548 lines
19 KiB
Python
548 lines
19 KiB
Python
from contextlib import contextmanager
|
|
from typing import Dict, Iterator, List, Set
|
|
from typing_extensions import Final
|
|
|
|
from mypy.nodes import (
|
|
Block, AssignmentStmt, NameExpr, MypyFile, FuncDef, Lvalue, ListExpr, TupleExpr,
|
|
WhileStmt, ForStmt, BreakStmt, ContinueStmt, TryStmt, WithStmt, MatchStmt, StarExpr,
|
|
ImportFrom, MemberExpr, IndexExpr, Import, ImportAll, ClassDef
|
|
)
|
|
from mypy.patterns import AsPattern
|
|
from mypy.traverser import TraverserVisitor
|
|
|
|
# Scope kinds
|
|
FILE: Final = 0
|
|
FUNCTION: Final = 1
|
|
CLASS: Final = 2
|
|
|
|
|
|
class VariableRenameVisitor(TraverserVisitor):
|
|
"""Rename variables to allow redefinition of variables.
|
|
|
|
For example, consider this code:
|
|
|
|
x = 0
|
|
f(x)
|
|
|
|
x = "a"
|
|
g(x)
|
|
|
|
It will be transformed like this:
|
|
|
|
x' = 0
|
|
f(x')
|
|
|
|
x = "a"
|
|
g(x)
|
|
|
|
There will be two independent variables (x' and x) that will have separate
|
|
inferred types. The publicly exposed variant will get the non-suffixed name.
|
|
This is the last definition at module top level and the first definition
|
|
(argument) within a function.
|
|
|
|
Renaming only happens for assignments within the same block. Renaming is
|
|
performed before semantic analysis, immediately after parsing.
|
|
|
|
The implementation performs a rudimentary static analysis. The analysis is
|
|
overly conservative to keep things simple.
|
|
"""
|
|
|
|
def __init__(self) -> None:
|
|
# Counter for labeling new blocks
|
|
self.block_id = 0
|
|
# Number of surrounding try statements that disallow variable redefinition
|
|
self.disallow_redef_depth = 0
|
|
# Number of surrounding loop statements
|
|
self.loop_depth = 0
|
|
# Map block id to loop depth.
|
|
self.block_loop_depth: Dict[int, int] = {}
|
|
# Stack of block ids being processed.
|
|
self.blocks: List[int] = []
|
|
# List of scopes; each scope maps short (unqualified) name to block id.
|
|
self.var_blocks: List[Dict[str, int]] = []
|
|
|
|
# References to variables that we may need to rename. List of
|
|
# scopes; each scope is a mapping from name to list of collections
|
|
# of names that refer to the same logical variable.
|
|
self.refs: List[Dict[str, List[List[NameExpr]]]] = []
|
|
# Number of reads of the most recent definition of a variable (per scope)
|
|
self.num_reads: List[Dict[str, int]] = []
|
|
# Kinds of nested scopes (FILE, FUNCTION or CLASS)
|
|
self.scope_kinds: List[int] = []
|
|
|
|
def visit_mypy_file(self, file_node: MypyFile) -> None:
|
|
"""Rename variables within a file.
|
|
|
|
This is the main entry point to this class.
|
|
"""
|
|
self.clear()
|
|
with self.enter_scope(FILE), self.enter_block():
|
|
for d in file_node.defs:
|
|
d.accept(self)
|
|
|
|
def visit_func_def(self, fdef: FuncDef) -> None:
|
|
# Conservatively do not allow variable defined before a function to
|
|
# be redefined later, since function could refer to either definition.
|
|
self.reject_redefinition_of_vars_in_scope()
|
|
|
|
with self.enter_scope(FUNCTION), self.enter_block():
|
|
for arg in fdef.arguments:
|
|
name = arg.variable.name
|
|
# 'self' can't be redefined since it's special as it allows definition of
|
|
# attributes. 'cls' can't be used to define attributes so we can ignore it.
|
|
can_be_redefined = name != 'self' # TODO: Proper check
|
|
self.record_assignment(arg.variable.name, can_be_redefined)
|
|
self.handle_arg(name)
|
|
|
|
for stmt in fdef.body.body:
|
|
stmt.accept(self)
|
|
|
|
def visit_class_def(self, cdef: ClassDef) -> None:
|
|
self.reject_redefinition_of_vars_in_scope()
|
|
with self.enter_scope(CLASS):
|
|
super().visit_class_def(cdef)
|
|
|
|
def visit_block(self, block: Block) -> None:
|
|
with self.enter_block():
|
|
super().visit_block(block)
|
|
|
|
def visit_while_stmt(self, stmt: WhileStmt) -> None:
|
|
with self.enter_loop():
|
|
super().visit_while_stmt(stmt)
|
|
|
|
def visit_for_stmt(self, stmt: ForStmt) -> None:
|
|
stmt.expr.accept(self)
|
|
self.analyze_lvalue(stmt.index, True)
|
|
# Also analyze as non-lvalue so that every for loop index variable is assumed to be read.
|
|
stmt.index.accept(self)
|
|
with self.enter_loop():
|
|
stmt.body.accept(self)
|
|
if stmt.else_body:
|
|
stmt.else_body.accept(self)
|
|
|
|
def visit_break_stmt(self, stmt: BreakStmt) -> None:
|
|
self.reject_redefinition_of_vars_in_loop()
|
|
|
|
def visit_continue_stmt(self, stmt: ContinueStmt) -> None:
|
|
self.reject_redefinition_of_vars_in_loop()
|
|
|
|
def visit_try_stmt(self, stmt: TryStmt) -> None:
|
|
# Variables defined by a try statement get special treatment in the
|
|
# type checker which allows them to be always redefined, so no need to
|
|
# do renaming here.
|
|
with self.enter_try():
|
|
super().visit_try_stmt(stmt)
|
|
|
|
def visit_with_stmt(self, stmt: WithStmt) -> None:
|
|
for expr in stmt.expr:
|
|
expr.accept(self)
|
|
for target in stmt.target:
|
|
if target is not None:
|
|
self.analyze_lvalue(target)
|
|
# We allow redefinitions in the body of a with statement for
|
|
# convenience. This is unsafe since with statements can affect control
|
|
# flow by catching exceptions, but this is rare except for
|
|
# assertRaises() and other similar functions, where the exception is
|
|
# raised by the last statement in the body, which usually isn't a
|
|
# problem.
|
|
stmt.body.accept(self)
|
|
|
|
def visit_import(self, imp: Import) -> None:
|
|
for id, as_id in imp.ids:
|
|
self.record_assignment(as_id or id, False)
|
|
|
|
def visit_import_from(self, imp: ImportFrom) -> None:
|
|
for id, as_id in imp.names:
|
|
self.record_assignment(as_id or id, False)
|
|
|
|
def visit_assignment_stmt(self, s: AssignmentStmt) -> None:
|
|
s.rvalue.accept(self)
|
|
for lvalue in s.lvalues:
|
|
self.analyze_lvalue(lvalue)
|
|
|
|
def visit_match_stmt(self, s: MatchStmt) -> None:
|
|
for i in range(len(s.patterns)):
|
|
with self.enter_block():
|
|
s.patterns[i].accept(self)
|
|
guard = s.guards[i]
|
|
if guard is not None:
|
|
guard.accept(self)
|
|
# We already entered a block, so visit this block's statements directly
|
|
for stmt in s.bodies[i].body:
|
|
stmt.accept(self)
|
|
|
|
def visit_capture_pattern(self, p: AsPattern) -> None:
|
|
if p.name is not None:
|
|
self.analyze_lvalue(p.name)
|
|
|
|
def analyze_lvalue(self, lvalue: Lvalue, is_nested: bool = False) -> None:
|
|
"""Process assignment; in particular, keep track of (re)defined names.
|
|
|
|
Args:
|
|
is_nested: True for non-outermost Lvalue in a multiple assignment such as
|
|
"x, y = ..."
|
|
"""
|
|
if isinstance(lvalue, NameExpr):
|
|
name = lvalue.name
|
|
is_new = self.record_assignment(name, True)
|
|
if is_new:
|
|
self.handle_def(lvalue)
|
|
else:
|
|
self.handle_refine(lvalue)
|
|
if is_nested:
|
|
# This allows these to be redefined freely even if never read. Multiple
|
|
# assignment like "x, _ _ = y" defines dummy variables that are never read.
|
|
self.handle_ref(lvalue)
|
|
elif isinstance(lvalue, (ListExpr, TupleExpr)):
|
|
for item in lvalue.items:
|
|
self.analyze_lvalue(item, is_nested=True)
|
|
elif isinstance(lvalue, MemberExpr):
|
|
lvalue.expr.accept(self)
|
|
elif isinstance(lvalue, IndexExpr):
|
|
lvalue.base.accept(self)
|
|
lvalue.index.accept(self)
|
|
elif isinstance(lvalue, StarExpr):
|
|
# Propagate is_nested since in a typical use case like "x, *rest = ..." 'rest' may
|
|
# be freely reused.
|
|
self.analyze_lvalue(lvalue.expr, is_nested=is_nested)
|
|
|
|
def visit_name_expr(self, expr: NameExpr) -> None:
|
|
self.handle_ref(expr)
|
|
|
|
# Helpers for renaming references
|
|
|
|
def handle_arg(self, name: str) -> None:
|
|
"""Store function argument."""
|
|
self.refs[-1][name] = [[]]
|
|
self.num_reads[-1][name] = 0
|
|
|
|
def handle_def(self, expr: NameExpr) -> None:
|
|
"""Store new name definition."""
|
|
name = expr.name
|
|
names = self.refs[-1].setdefault(name, [])
|
|
names.append([expr])
|
|
self.num_reads[-1][name] = 0
|
|
|
|
def handle_refine(self, expr: NameExpr) -> None:
|
|
"""Store assignment to an existing name (that replaces previous value, if any)."""
|
|
name = expr.name
|
|
if name in self.refs[-1]:
|
|
names = self.refs[-1][name]
|
|
if not names:
|
|
names.append([])
|
|
names[-1].append(expr)
|
|
|
|
def handle_ref(self, expr: NameExpr) -> None:
|
|
"""Store reference to defined name."""
|
|
name = expr.name
|
|
if name in self.refs[-1]:
|
|
names = self.refs[-1][name]
|
|
if not names:
|
|
names.append([])
|
|
names[-1].append(expr)
|
|
num_reads = self.num_reads[-1]
|
|
num_reads[name] = num_reads.get(name, 0) + 1
|
|
|
|
def flush_refs(self) -> None:
|
|
"""Rename all references within the current scope.
|
|
|
|
This will be called at the end of a scope.
|
|
"""
|
|
is_func = self.scope_kinds[-1] == FUNCTION
|
|
for name, refs in self.refs[-1].items():
|
|
if len(refs) == 1:
|
|
# Only one definition -- no renaming needed.
|
|
continue
|
|
if is_func:
|
|
# In a function, don't rename the first definition, as it
|
|
# may be an argument that must preserve the name.
|
|
to_rename = refs[1:]
|
|
else:
|
|
# At module top level, don't rename the final definition,
|
|
# as it will be publicly visible outside the module.
|
|
to_rename = refs[:-1]
|
|
for i, item in enumerate(to_rename):
|
|
rename_refs(item, i)
|
|
self.refs.pop()
|
|
|
|
# Helpers for determining which assignments define new variables
|
|
|
|
def clear(self) -> None:
|
|
self.blocks = []
|
|
self.var_blocks = []
|
|
|
|
@contextmanager
|
|
def enter_block(self) -> Iterator[None]:
|
|
self.block_id += 1
|
|
self.blocks.append(self.block_id)
|
|
self.block_loop_depth[self.block_id] = self.loop_depth
|
|
try:
|
|
yield
|
|
finally:
|
|
self.blocks.pop()
|
|
|
|
@contextmanager
|
|
def enter_try(self) -> Iterator[None]:
|
|
self.disallow_redef_depth += 1
|
|
try:
|
|
yield
|
|
finally:
|
|
self.disallow_redef_depth -= 1
|
|
|
|
@contextmanager
|
|
def enter_loop(self) -> Iterator[None]:
|
|
self.loop_depth += 1
|
|
try:
|
|
yield
|
|
finally:
|
|
self.loop_depth -= 1
|
|
|
|
def current_block(self) -> int:
|
|
return self.blocks[-1]
|
|
|
|
@contextmanager
|
|
def enter_scope(self, kind: int) -> Iterator[None]:
|
|
self.var_blocks.append({})
|
|
self.refs.append({})
|
|
self.num_reads.append({})
|
|
self.scope_kinds.append(kind)
|
|
try:
|
|
yield
|
|
finally:
|
|
self.flush_refs()
|
|
self.var_blocks.pop()
|
|
self.num_reads.pop()
|
|
self.scope_kinds.pop()
|
|
|
|
def is_nested(self) -> int:
|
|
return len(self.var_blocks) > 1
|
|
|
|
def reject_redefinition_of_vars_in_scope(self) -> None:
|
|
"""Make it impossible to redefine defined variables in the current scope.
|
|
|
|
This is used if we encounter a function definition that
|
|
can make it ambiguous which definition is live. Example:
|
|
|
|
x = 0
|
|
|
|
def f() -> int:
|
|
return x
|
|
|
|
x = '' # Error -- cannot redefine x across function definition
|
|
"""
|
|
var_blocks = self.var_blocks[-1]
|
|
for key in var_blocks:
|
|
var_blocks[key] = -1
|
|
|
|
def reject_redefinition_of_vars_in_loop(self) -> None:
|
|
"""Reject redefinition of variables in the innermost loop.
|
|
|
|
If there is an early exit from a loop, there may be ambiguity about which
|
|
value may escape the loop. Example where this matters:
|
|
|
|
while f():
|
|
x = 0
|
|
if g():
|
|
break
|
|
x = '' # Error -- not a redefinition
|
|
reveal_type(x) # int
|
|
|
|
This method ensures that the second assignment to 'x' doesn't introduce a new
|
|
variable.
|
|
"""
|
|
var_blocks = self.var_blocks[-1]
|
|
for key, block in var_blocks.items():
|
|
if self.block_loop_depth.get(block) == self.loop_depth:
|
|
var_blocks[key] = -1
|
|
|
|
def record_assignment(self, name: str, can_be_redefined: bool) -> bool:
|
|
"""Record assignment to given name and return True if it defines a new variable.
|
|
|
|
Args:
|
|
can_be_redefined: If True, allows assignment in the same block to redefine
|
|
this name (if this is a new definition)
|
|
"""
|
|
if self.num_reads[-1].get(name, -1) == 0:
|
|
# Only set, not read, so no reason to redefine
|
|
return False
|
|
if self.disallow_redef_depth > 0:
|
|
# Can't redefine within try/with a block.
|
|
can_be_redefined = False
|
|
block = self.current_block()
|
|
var_blocks = self.var_blocks[-1]
|
|
if name not in var_blocks:
|
|
# New definition in this scope.
|
|
if can_be_redefined:
|
|
# Store the block where this was defined to allow redefinition in
|
|
# the same block only.
|
|
var_blocks[name] = block
|
|
else:
|
|
# This doesn't support arbitrary redefinition.
|
|
var_blocks[name] = -1
|
|
return True
|
|
elif var_blocks[name] == block:
|
|
# Redefinition -- defines a new variable with the same name.
|
|
return True
|
|
else:
|
|
# Assigns to an existing variable.
|
|
return False
|
|
|
|
|
|
class LimitedVariableRenameVisitor(TraverserVisitor):
|
|
"""Perform some limited variable renaming in with statements.
|
|
|
|
This allows reusing a variable in multiple with statements with
|
|
different types. For example, the two instances of 'x' can have
|
|
incompatible types:
|
|
|
|
with C() as x:
|
|
f(x)
|
|
with D() as x:
|
|
g(x)
|
|
|
|
The above code gets renamed conceptually into this (not valid Python!):
|
|
|
|
with C() as x':
|
|
f(x')
|
|
with D() as x:
|
|
g(x)
|
|
|
|
If there's a reference to a variable defined in 'with' outside the
|
|
statement, or if there's any trickiness around variable visibility
|
|
(e.g. function definitions), we give up and won't perform renaming.
|
|
|
|
The main use case is to allow binding both readable and writable
|
|
binary files into the same variable. These have different types:
|
|
|
|
with open(fnam, 'rb') as f: ...
|
|
with open(fnam, 'wb') as f: ...
|
|
"""
|
|
|
|
def __init__(self) -> None:
|
|
# Short names of variables bound in with statements using "as"
|
|
# in a surrounding scope
|
|
self.bound_vars: List[str] = []
|
|
# Stack of names that can't be safely renamed, per scope ('*' means that
|
|
# no names can be renamed)
|
|
self.skipped: List[Set[str]] = []
|
|
# References to variables that we may need to rename. Stack of
|
|
# scopes; each scope is a mapping from name to list of collections
|
|
# of names that refer to the same logical variable.
|
|
self.refs: List[Dict[str, List[List[NameExpr]]]] = []
|
|
|
|
def visit_mypy_file(self, file_node: MypyFile) -> None:
|
|
"""Rename variables within a file.
|
|
|
|
This is the main entry point to this class.
|
|
"""
|
|
with self.enter_scope():
|
|
for d in file_node.defs:
|
|
d.accept(self)
|
|
|
|
def visit_func_def(self, fdef: FuncDef) -> None:
|
|
self.reject_redefinition_of_vars_in_scope()
|
|
with self.enter_scope():
|
|
for arg in fdef.arguments:
|
|
self.record_skipped(arg.variable.name)
|
|
super().visit_func_def(fdef)
|
|
|
|
def visit_class_def(self, cdef: ClassDef) -> None:
|
|
self.reject_redefinition_of_vars_in_scope()
|
|
with self.enter_scope():
|
|
super().visit_class_def(cdef)
|
|
|
|
def visit_with_stmt(self, stmt: WithStmt) -> None:
|
|
for expr in stmt.expr:
|
|
expr.accept(self)
|
|
old_len = len(self.bound_vars)
|
|
for target in stmt.target:
|
|
if target is not None:
|
|
self.analyze_lvalue(target)
|
|
for target in stmt.target:
|
|
if target:
|
|
target.accept(self)
|
|
stmt.body.accept(self)
|
|
|
|
while len(self.bound_vars) > old_len:
|
|
self.bound_vars.pop()
|
|
|
|
def analyze_lvalue(self, lvalue: Lvalue) -> None:
|
|
if isinstance(lvalue, NameExpr):
|
|
name = lvalue.name
|
|
if name in self.bound_vars:
|
|
# Name bound in a surrounding with statement, so it can be renamed
|
|
self.visit_name_expr(lvalue)
|
|
else:
|
|
var_info = self.refs[-1]
|
|
if name not in var_info:
|
|
var_info[name] = []
|
|
var_info[name].append([])
|
|
self.bound_vars.append(name)
|
|
elif isinstance(lvalue, (ListExpr, TupleExpr)):
|
|
for item in lvalue.items:
|
|
self.analyze_lvalue(item)
|
|
elif isinstance(lvalue, MemberExpr):
|
|
lvalue.expr.accept(self)
|
|
elif isinstance(lvalue, IndexExpr):
|
|
lvalue.base.accept(self)
|
|
lvalue.index.accept(self)
|
|
elif isinstance(lvalue, StarExpr):
|
|
self.analyze_lvalue(lvalue.expr)
|
|
|
|
def visit_import(self, imp: Import) -> None:
|
|
# We don't support renaming imports
|
|
for id, as_id in imp.ids:
|
|
self.record_skipped(as_id or id)
|
|
|
|
def visit_import_from(self, imp: ImportFrom) -> None:
|
|
# We don't support renaming imports
|
|
for id, as_id in imp.names:
|
|
self.record_skipped(as_id or id)
|
|
|
|
def visit_import_all(self, imp: ImportAll) -> None:
|
|
# Give up, since we don't know all imported names yet
|
|
self.reject_redefinition_of_vars_in_scope()
|
|
|
|
def visit_name_expr(self, expr: NameExpr) -> None:
|
|
name = expr.name
|
|
if name in self.bound_vars:
|
|
# Record reference so that it can be renamed later
|
|
for scope in reversed(self.refs):
|
|
if name in scope:
|
|
scope[name][-1].append(expr)
|
|
else:
|
|
self.record_skipped(name)
|
|
|
|
@contextmanager
|
|
def enter_scope(self) -> Iterator[None]:
|
|
self.skipped.append(set())
|
|
self.refs.append({})
|
|
yield None
|
|
self.flush_refs()
|
|
|
|
def reject_redefinition_of_vars_in_scope(self) -> None:
|
|
self.record_skipped('*')
|
|
|
|
def record_skipped(self, name: str) -> None:
|
|
self.skipped[-1].add(name)
|
|
|
|
def flush_refs(self) -> None:
|
|
ref_dict = self.refs.pop()
|
|
skipped = self.skipped.pop()
|
|
if '*' not in skipped:
|
|
for name, refs in ref_dict.items():
|
|
if len(refs) <= 1 or name in skipped:
|
|
continue
|
|
# At module top level we must not rename the final definition,
|
|
# as it may be publicly visible
|
|
to_rename = refs[:-1]
|
|
for i, item in enumerate(to_rename):
|
|
rename_refs(item, i)
|
|
|
|
|
|
def rename_refs(names: List[NameExpr], index: int) -> None:
|
|
name = names[0].name
|
|
new_name = name + "'" * (index + 1)
|
|
for expr in names:
|
|
expr.name = new_name
|