usse/funda-scraper/venv/lib/python3.10/site-packages/mypyc/analysis/dataflow.py

565 lines
19 KiB
Python
Raw Normal View History

2023-02-20 22:38:24 +00:00
"""Data-flow analyses."""
from abc import abstractmethod
from typing import Dict, Tuple, List, Set, TypeVar, Iterator, Generic, Optional, Iterable, Union
from mypyc.ir.ops import (
Value, ControlOp,
BasicBlock, OpVisitor, Assign, AssignMulti, Integer, LoadErrorValue, RegisterOp, Goto, Branch,
Return, Call, Box, Unbox, Cast, Op, Unreachable, TupleGet, TupleSet, GetAttr, SetAttr,
LoadLiteral, LoadStatic, InitStatic, MethodCall, RaiseStandardError, CallC, LoadGlobal,
Truncate, IntOp, LoadMem, GetElementPtr, LoadAddress, ComparisonOp, SetMem, KeepAlive, Extend
)
from mypyc.ir.func_ir import all_values
class CFG:
"""Control-flow graph.
Node 0 is always assumed to be the entry point. There must be a
non-empty set of exits.
"""
def __init__(self,
succ: Dict[BasicBlock, List[BasicBlock]],
pred: Dict[BasicBlock, List[BasicBlock]],
exits: Set[BasicBlock]) -> None:
assert exits
self.succ = succ
self.pred = pred
self.exits = exits
def __str__(self) -> str:
lines = []
lines.append('exits: %s' % sorted(self.exits, key=lambda e: e.label))
lines.append('succ: %s' % self.succ)
lines.append('pred: %s' % self.pred)
return '\n'.join(lines)
def get_cfg(blocks: List[BasicBlock]) -> CFG:
"""Calculate basic block control-flow graph.
The result is a dictionary like this:
basic block index -> (successors blocks, predecesssor blocks)
"""
succ_map = {}
pred_map: Dict[BasicBlock, List[BasicBlock]] = {}
exits = set()
for block in blocks:
assert not any(isinstance(op, ControlOp) for op in block.ops[:-1]), (
"Control-flow ops must be at the end of blocks")
succ = list(block.terminator.targets())
if not succ:
exits.add(block)
# Errors can occur anywhere inside a block, which means that
# we can't assume that the entire block has executed before
# jumping to the error handler. In our CFG construction, we
# model this as saying that a block can jump to its error
# handler or the error handlers of any of its normal
# successors (to represent an error before that next block
# completes). This works well for analyses like "must
# defined", where it implies that registers assigned in a
# block may be undefined in its error handler, but is in
# general not a precise representation of reality; any
# analyses that require more fidelity must wait until after
# exception insertion.
for error_point in [block] + succ:
if error_point.error_handler:
succ.append(error_point.error_handler)
succ_map[block] = succ
pred_map[block] = []
for prev, nxt in succ_map.items():
for label in nxt:
pred_map[label].append(prev)
return CFG(succ_map, pred_map, exits)
def get_real_target(label: BasicBlock) -> BasicBlock:
if len(label.ops) == 1 and isinstance(label.ops[-1], Goto):
label = label.ops[-1].label
return label
def cleanup_cfg(blocks: List[BasicBlock]) -> None:
"""Cleanup the control flow graph.
This eliminates obviously dead basic blocks and eliminates blocks that contain
nothing but a single jump.
There is a lot more that could be done.
"""
changed = True
while changed:
# First collapse any jumps to basic block that only contain a goto
for block in blocks:
for i, tgt in enumerate(block.terminator.targets()):
block.terminator.set_target(i, get_real_target(tgt))
# Then delete any blocks that have no predecessors
changed = False
cfg = get_cfg(blocks)
orig_blocks = blocks[:]
blocks.clear()
for i, block in enumerate(orig_blocks):
if i == 0 or cfg.pred[block]:
blocks.append(block)
else:
changed = True
T = TypeVar('T')
AnalysisDict = Dict[Tuple[BasicBlock, int], Set[T]]
class AnalysisResult(Generic[T]):
def __init__(self, before: 'AnalysisDict[T]', after: 'AnalysisDict[T]') -> None:
self.before = before
self.after = after
def __str__(self) -> str:
return f'before: {self.before}\nafter: {self.after}\n'
GenAndKill = Tuple[Set[T], Set[T]]
class BaseAnalysisVisitor(OpVisitor[GenAndKill[T]]):
def visit_goto(self, op: Goto) -> GenAndKill[T]:
return set(), set()
@abstractmethod
def visit_register_op(self, op: RegisterOp) -> GenAndKill[T]:
raise NotImplementedError
@abstractmethod
def visit_assign(self, op: Assign) -> GenAndKill[T]:
raise NotImplementedError
@abstractmethod
def visit_assign_multi(self, op: AssignMulti) -> GenAndKill[T]:
raise NotImplementedError
@abstractmethod
def visit_set_mem(self, op: SetMem) -> GenAndKill[T]:
raise NotImplementedError
def visit_call(self, op: Call) -> GenAndKill[T]:
return self.visit_register_op(op)
def visit_method_call(self, op: MethodCall) -> GenAndKill[T]:
return self.visit_register_op(op)
def visit_load_error_value(self, op: LoadErrorValue) -> GenAndKill[T]:
return self.visit_register_op(op)
def visit_load_literal(self, op: LoadLiteral) -> GenAndKill[T]:
return self.visit_register_op(op)
def visit_get_attr(self, op: GetAttr) -> GenAndKill[T]:
return self.visit_register_op(op)
def visit_set_attr(self, op: SetAttr) -> GenAndKill[T]:
return self.visit_register_op(op)
def visit_load_static(self, op: LoadStatic) -> GenAndKill[T]:
return self.visit_register_op(op)
def visit_init_static(self, op: InitStatic) -> GenAndKill[T]:
return self.visit_register_op(op)
def visit_tuple_get(self, op: TupleGet) -> GenAndKill[T]:
return self.visit_register_op(op)
def visit_tuple_set(self, op: TupleSet) -> GenAndKill[T]:
return self.visit_register_op(op)
def visit_box(self, op: Box) -> GenAndKill[T]:
return self.visit_register_op(op)
def visit_unbox(self, op: Unbox) -> GenAndKill[T]:
return self.visit_register_op(op)
def visit_cast(self, op: Cast) -> GenAndKill[T]:
return self.visit_register_op(op)
def visit_raise_standard_error(self, op: RaiseStandardError) -> GenAndKill[T]:
return self.visit_register_op(op)
def visit_call_c(self, op: CallC) -> GenAndKill[T]:
return self.visit_register_op(op)
def visit_truncate(self, op: Truncate) -> GenAndKill[T]:
return self.visit_register_op(op)
def visit_extend(self, op: Extend) -> GenAndKill[T]:
return self.visit_register_op(op)
def visit_load_global(self, op: LoadGlobal) -> GenAndKill[T]:
return self.visit_register_op(op)
def visit_int_op(self, op: IntOp) -> GenAndKill[T]:
return self.visit_register_op(op)
def visit_comparison_op(self, op: ComparisonOp) -> GenAndKill[T]:
return self.visit_register_op(op)
def visit_load_mem(self, op: LoadMem) -> GenAndKill[T]:
return self.visit_register_op(op)
def visit_get_element_ptr(self, op: GetElementPtr) -> GenAndKill[T]:
return self.visit_register_op(op)
def visit_load_address(self, op: LoadAddress) -> GenAndKill[T]:
return self.visit_register_op(op)
def visit_keep_alive(self, op: KeepAlive) -> GenAndKill[T]:
return self.visit_register_op(op)
class DefinedVisitor(BaseAnalysisVisitor[Value]):
"""Visitor for finding defined registers.
Note that this only deals with registers and not temporaries, on
the assumption that we never access temporaries when they might be
undefined.
If strict_errors is True, then we regard any use of LoadErrorValue
as making a register undefined. Otherwise we only do if
`undefines` is set on the error value.
This lets us only consider the things we care about during
uninitialized variable checking while capturing all possibly
undefined things for refcounting.
"""
def __init__(self, strict_errors: bool = False) -> None:
self.strict_errors = strict_errors
def visit_branch(self, op: Branch) -> GenAndKill[Value]:
return set(), set()
def visit_return(self, op: Return) -> GenAndKill[Value]:
return set(), set()
def visit_unreachable(self, op: Unreachable) -> GenAndKill[Value]:
return set(), set()
def visit_register_op(self, op: RegisterOp) -> GenAndKill[Value]:
return set(), set()
def visit_assign(self, op: Assign) -> GenAndKill[Value]:
# Loading an error value may undefine the register.
if (isinstance(op.src, LoadErrorValue)
and (op.src.undefines or self.strict_errors)):
return set(), {op.dest}
else:
return {op.dest}, set()
def visit_assign_multi(self, op: AssignMulti) -> GenAndKill[Value]:
# Array registers are special and we don't track the definedness of them.
return set(), set()
def visit_set_mem(self, op: SetMem) -> GenAndKill[Value]:
return set(), set()
def analyze_maybe_defined_regs(blocks: List[BasicBlock],
cfg: CFG,
initial_defined: Set[Value]) -> AnalysisResult[Value]:
"""Calculate potentially defined registers at each CFG location.
A register is defined if it has a value along some path from the initial location.
"""
return run_analysis(blocks=blocks,
cfg=cfg,
gen_and_kill=DefinedVisitor(),
initial=initial_defined,
backward=False,
kind=MAYBE_ANALYSIS)
def analyze_must_defined_regs(
blocks: List[BasicBlock],
cfg: CFG,
initial_defined: Set[Value],
regs: Iterable[Value],
strict_errors: bool = False) -> AnalysisResult[Value]:
"""Calculate always defined registers at each CFG location.
This analysis can work before exception insertion, since it is a
sound assumption that registers defined in a block might not be
initialized in its error handler.
A register is defined if it has a value along all paths from the
initial location.
"""
return run_analysis(blocks=blocks,
cfg=cfg,
gen_and_kill=DefinedVisitor(strict_errors=strict_errors),
initial=initial_defined,
backward=False,
kind=MUST_ANALYSIS,
universe=set(regs))
class BorrowedArgumentsVisitor(BaseAnalysisVisitor[Value]):
def __init__(self, args: Set[Value]) -> None:
self.args = args
def visit_branch(self, op: Branch) -> GenAndKill[Value]:
return set(), set()
def visit_return(self, op: Return) -> GenAndKill[Value]:
return set(), set()
def visit_unreachable(self, op: Unreachable) -> GenAndKill[Value]:
return set(), set()
def visit_register_op(self, op: RegisterOp) -> GenAndKill[Value]:
return set(), set()
def visit_assign(self, op: Assign) -> GenAndKill[Value]:
if op.dest in self.args:
return set(), {op.dest}
return set(), set()
def visit_assign_multi(self, op: AssignMulti) -> GenAndKill[Value]:
return set(), set()
def visit_set_mem(self, op: SetMem) -> GenAndKill[Value]:
return set(), set()
def analyze_borrowed_arguments(
blocks: List[BasicBlock],
cfg: CFG,
borrowed: Set[Value]) -> AnalysisResult[Value]:
"""Calculate arguments that can use references borrowed from the caller.
When assigning to an argument, it no longer is borrowed.
"""
return run_analysis(blocks=blocks,
cfg=cfg,
gen_and_kill=BorrowedArgumentsVisitor(borrowed),
initial=borrowed,
backward=False,
kind=MUST_ANALYSIS,
universe=borrowed)
class UndefinedVisitor(BaseAnalysisVisitor[Value]):
def visit_branch(self, op: Branch) -> GenAndKill[Value]:
return set(), set()
def visit_return(self, op: Return) -> GenAndKill[Value]:
return set(), set()
def visit_unreachable(self, op: Unreachable) -> GenAndKill[Value]:
return set(), set()
def visit_register_op(self, op: RegisterOp) -> GenAndKill[Value]:
return set(), {op} if not op.is_void else set()
def visit_assign(self, op: Assign) -> GenAndKill[Value]:
return set(), {op.dest}
def visit_assign_multi(self, op: AssignMulti) -> GenAndKill[Value]:
return set(), {op.dest}
def visit_set_mem(self, op: SetMem) -> GenAndKill[Value]:
return set(), set()
def analyze_undefined_regs(blocks: List[BasicBlock],
cfg: CFG,
initial_defined: Set[Value]) -> AnalysisResult[Value]:
"""Calculate potentially undefined registers at each CFG location.
A register is undefined if there is some path from initial block
where it has an undefined value.
Function arguments are assumed to be always defined.
"""
initial_undefined = set(all_values([], blocks)) - initial_defined
return run_analysis(blocks=blocks,
cfg=cfg,
gen_and_kill=UndefinedVisitor(),
initial=initial_undefined,
backward=False,
kind=MAYBE_ANALYSIS)
def non_trivial_sources(op: Op) -> Set[Value]:
result = set()
for source in op.sources():
if not isinstance(source, Integer):
result.add(source)
return result
class LivenessVisitor(BaseAnalysisVisitor[Value]):
def visit_branch(self, op: Branch) -> GenAndKill[Value]:
return non_trivial_sources(op), set()
def visit_return(self, op: Return) -> GenAndKill[Value]:
if not isinstance(op.value, Integer):
return {op.value}, set()
else:
return set(), set()
def visit_unreachable(self, op: Unreachable) -> GenAndKill[Value]:
return set(), set()
def visit_register_op(self, op: RegisterOp) -> GenAndKill[Value]:
gen = non_trivial_sources(op)
if not op.is_void:
return gen, {op}
else:
return gen, set()
def visit_assign(self, op: Assign) -> GenAndKill[Value]:
return non_trivial_sources(op), {op.dest}
def visit_assign_multi(self, op: AssignMulti) -> GenAndKill[Value]:
return non_trivial_sources(op), {op.dest}
def visit_set_mem(self, op: SetMem) -> GenAndKill[Value]:
return non_trivial_sources(op), set()
def analyze_live_regs(blocks: List[BasicBlock],
cfg: CFG) -> AnalysisResult[Value]:
"""Calculate live registers at each CFG location.
A register is live at a location if it can be read along some CFG path starting
from the location.
"""
return run_analysis(blocks=blocks,
cfg=cfg,
gen_and_kill=LivenessVisitor(),
initial=set(),
backward=True,
kind=MAYBE_ANALYSIS)
# Analysis kinds
MUST_ANALYSIS = 0
MAYBE_ANALYSIS = 1
def run_analysis(blocks: List[BasicBlock],
cfg: CFG,
gen_and_kill: OpVisitor[GenAndKill[T]],
initial: Set[T],
kind: int,
backward: bool,
universe: Optional[Set[T]] = None) -> AnalysisResult[T]:
"""Run a general set-based data flow analysis.
Args:
blocks: All basic blocks
cfg: Control-flow graph for the code
gen_and_kill: Implementation of gen and kill functions for each op
initial: Value of analysis for the entry points (for a forward analysis) or the
exit points (for a backward analysis)
kind: MUST_ANALYSIS or MAYBE_ANALYSIS
backward: If False, the analysis is a forward analysis; it's backward otherwise
universe: For a must analysis, the set of all possible values. This is the starting
value for the work list algorithm, which will narrow this down until reaching a
fixed point. For a maybe analysis the iteration always starts from an empty set
and this argument is ignored.
Return analysis results: (before, after)
"""
block_gen = {}
block_kill = {}
# Calculate kill and gen sets for entire basic blocks.
for block in blocks:
gen: Set[T] = set()
kill: Set[T] = set()
ops = block.ops
if backward:
ops = list(reversed(ops))
for op in ops:
opgen, opkill = op.accept(gen_and_kill)
gen = ((gen - opkill) | opgen)
kill = ((kill - opgen) | opkill)
block_gen[block] = gen
block_kill[block] = kill
# Set up initial state for worklist algorithm.
worklist = list(blocks)
if not backward:
worklist = worklist[::-1] # Reverse for a small performance improvement
workset = set(worklist)
before: Dict[BasicBlock, Set[T]] = {}
after: Dict[BasicBlock, Set[T]] = {}
for block in blocks:
if kind == MAYBE_ANALYSIS:
before[block] = set()
after[block] = set()
else:
assert universe is not None, "Universe must be defined for a must analysis"
before[block] = set(universe)
after[block] = set(universe)
if backward:
pred_map = cfg.succ
succ_map = cfg.pred
else:
pred_map = cfg.pred
succ_map = cfg.succ
# Run work list algorithm to generate in and out sets for each basic block.
while worklist:
label = worklist.pop()
workset.remove(label)
if pred_map[label]:
new_before: Union[Set[T], None] = None
for pred in pred_map[label]:
if new_before is None:
new_before = set(after[pred])
elif kind == MAYBE_ANALYSIS:
new_before |= after[pred]
else:
new_before &= after[pred]
assert new_before is not None
else:
new_before = set(initial)
before[label] = new_before
new_after = (new_before - block_kill[label]) | block_gen[label]
if new_after != after[label]:
for succ in succ_map[label]:
if succ not in workset:
worklist.append(succ)
workset.add(succ)
after[label] = new_after
# Run algorithm for each basic block to generate opcode-level sets.
op_before: Dict[Tuple[BasicBlock, int], Set[T]] = {}
op_after: Dict[Tuple[BasicBlock, int], Set[T]] = {}
for block in blocks:
label = block
cur = before[label]
ops_enum: Iterator[Tuple[int, Op]] = enumerate(block.ops)
if backward:
ops_enum = reversed(list(ops_enum))
for idx, op in ops_enum:
op_before[label, idx] = cur
opgen, opkill = op.accept(gen_and_kill)
cur = (cur - opkill) | opgen
op_after[label, idx] = cur
if backward:
op_after, op_before = op_before, op_after
return AnalysisResult(op_before, op_after)