usse/funda-scraper/venv/lib/python3.10/site-packages/black/nodes.py

849 lines
24 KiB
Python
Raw Normal View History

2023-02-20 22:38:24 +00:00
"""
blib2to3 Node/Leaf transformation-related utility functions.
"""
import sys
from typing import Generic, Iterator, List, Optional, Set, Tuple, TypeVar, Union
if sys.version_info >= (3, 8):
from typing import Final
else:
from typing_extensions import Final
if sys.version_info >= (3, 10):
from typing import TypeGuard
else:
from typing_extensions import TypeGuard
from mypy_extensions import mypyc_attr
from black.cache import CACHE_DIR
from black.strings import has_triple_quotes
from blib2to3 import pygram
from blib2to3.pgen2 import token
from blib2to3.pytree import NL, Leaf, Node, type_repr
pygram.initialize(CACHE_DIR)
syms: Final = pygram.python_symbols
# types
T = TypeVar("T")
LN = Union[Leaf, Node]
LeafID = int
NodeType = int
WHITESPACE: Final = {token.DEDENT, token.INDENT, token.NEWLINE}
STATEMENT: Final = {
syms.if_stmt,
syms.while_stmt,
syms.for_stmt,
syms.try_stmt,
syms.except_clause,
syms.with_stmt,
syms.funcdef,
syms.classdef,
syms.match_stmt,
syms.case_block,
}
STANDALONE_COMMENT: Final = 153
token.tok_name[STANDALONE_COMMENT] = "STANDALONE_COMMENT"
LOGIC_OPERATORS: Final = {"and", "or"}
COMPARATORS: Final = {
token.LESS,
token.GREATER,
token.EQEQUAL,
token.NOTEQUAL,
token.LESSEQUAL,
token.GREATEREQUAL,
}
MATH_OPERATORS: Final = {
token.VBAR,
token.CIRCUMFLEX,
token.AMPER,
token.LEFTSHIFT,
token.RIGHTSHIFT,
token.PLUS,
token.MINUS,
token.STAR,
token.SLASH,
token.DOUBLESLASH,
token.PERCENT,
token.AT,
token.TILDE,
token.DOUBLESTAR,
}
STARS: Final = {token.STAR, token.DOUBLESTAR}
VARARGS_SPECIALS: Final = STARS | {token.SLASH}
VARARGS_PARENTS: Final = {
syms.arglist,
syms.argument, # double star in arglist
syms.trailer, # single argument to call
syms.typedargslist,
syms.varargslist, # lambdas
}
UNPACKING_PARENTS: Final = {
syms.atom, # single element of a list or set literal
syms.dictsetmaker,
syms.listmaker,
syms.testlist_gexp,
syms.testlist_star_expr,
syms.subject_expr,
syms.pattern,
}
TEST_DESCENDANTS: Final = {
syms.test,
syms.lambdef,
syms.or_test,
syms.and_test,
syms.not_test,
syms.comparison,
syms.star_expr,
syms.expr,
syms.xor_expr,
syms.and_expr,
syms.shift_expr,
syms.arith_expr,
syms.trailer,
syms.term,
syms.power,
}
TYPED_NAMES: Final = {syms.tname, syms.tname_star}
ASSIGNMENTS: Final = {
"=",
"+=",
"-=",
"*=",
"@=",
"/=",
"%=",
"&=",
"|=",
"^=",
"<<=",
">>=",
"**=",
"//=",
}
IMPLICIT_TUPLE: Final = {syms.testlist, syms.testlist_star_expr, syms.exprlist}
BRACKET: Final = {
token.LPAR: token.RPAR,
token.LSQB: token.RSQB,
token.LBRACE: token.RBRACE,
}
OPENING_BRACKETS: Final = set(BRACKET.keys())
CLOSING_BRACKETS: Final = set(BRACKET.values())
BRACKETS: Final = OPENING_BRACKETS | CLOSING_BRACKETS
ALWAYS_NO_SPACE: Final = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT}
RARROW = 55
@mypyc_attr(allow_interpreted_subclasses=True)
class Visitor(Generic[T]):
"""Basic lib2to3 visitor that yields things of type `T` on `visit()`."""
def visit(self, node: LN) -> Iterator[T]:
"""Main method to visit `node` and its children.
It tries to find a `visit_*()` method for the given `node.type`, like
`visit_simple_stmt` for Node objects or `visit_INDENT` for Leaf objects.
If no dedicated `visit_*()` method is found, chooses `visit_default()`
instead.
Then yields objects of type `T` from the selected visitor.
"""
if node.type < 256:
name = token.tok_name[node.type]
else:
name = str(type_repr(node.type))
# We explicitly branch on whether a visitor exists (instead of
# using self.visit_default as the default arg to getattr) in order
# to save needing to create a bound method object and so mypyc can
# generate a native call to visit_default.
visitf = getattr(self, f"visit_{name}", None)
if visitf:
yield from visitf(node)
else:
yield from self.visit_default(node)
def visit_default(self, node: LN) -> Iterator[T]:
"""Default `visit_*()` implementation. Recurses to children of `node`."""
if isinstance(node, Node):
for child in node.children:
yield from self.visit(child)
def whitespace(leaf: Leaf, *, complex_subscript: bool) -> str: # noqa: C901
"""Return whitespace prefix if needed for the given `leaf`.
`complex_subscript` signals whether the given leaf is part of a subscription
which has non-trivial arguments, like arithmetic expressions or function calls.
"""
NO: Final = ""
SPACE: Final = " "
DOUBLESPACE: Final = " "
t = leaf.type
p = leaf.parent
v = leaf.value
if t in ALWAYS_NO_SPACE:
return NO
if t == token.COMMENT:
return DOUBLESPACE
assert p is not None, f"INTERNAL ERROR: hand-made leaf without parent: {leaf!r}"
if t == token.COLON and p.type not in {
syms.subscript,
syms.subscriptlist,
syms.sliceop,
}:
return NO
prev = leaf.prev_sibling
if not prev:
prevp = preceding_leaf(p)
if not prevp or prevp.type in OPENING_BRACKETS:
return NO
if t == token.COLON:
if prevp.type == token.COLON:
return NO
elif prevp.type != token.COMMA and not complex_subscript:
return NO
return SPACE
if prevp.type == token.EQUAL:
if prevp.parent:
if prevp.parent.type in {
syms.arglist,
syms.argument,
syms.parameters,
syms.varargslist,
}:
return NO
elif prevp.parent.type == syms.typedargslist:
# A bit hacky: if the equal sign has whitespace, it means we
# previously found it's a typed argument. So, we're using
# that, too.
return prevp.prefix
elif (
prevp.type == token.STAR
and parent_type(prevp) == syms.star_expr
and parent_type(prevp.parent) == syms.subscriptlist
):
# No space between typevar tuples.
return NO
elif prevp.type in VARARGS_SPECIALS:
if is_vararg(prevp, within=VARARGS_PARENTS | UNPACKING_PARENTS):
return NO
elif prevp.type == token.COLON:
if prevp.parent and prevp.parent.type in {syms.subscript, syms.sliceop}:
return SPACE if complex_subscript else NO
elif (
prevp.parent
and prevp.parent.type == syms.factor
and prevp.type in MATH_OPERATORS
):
return NO
elif prevp.type == token.AT and p.parent and p.parent.type == syms.decorator:
# no space in decorators
return NO
elif prev.type in OPENING_BRACKETS:
return NO
if p.type in {syms.parameters, syms.arglist}:
# untyped function signatures or calls
if not prev or prev.type != token.COMMA:
return NO
elif p.type == syms.varargslist:
# lambdas
if prev and prev.type != token.COMMA:
return NO
elif p.type == syms.typedargslist:
# typed function signatures
if not prev:
return NO
if t == token.EQUAL:
if prev.type not in TYPED_NAMES:
return NO
elif prev.type == token.EQUAL:
# A bit hacky: if the equal sign has whitespace, it means we
# previously found it's a typed argument. So, we're using that, too.
return prev.prefix
elif prev.type != token.COMMA:
return NO
elif p.type in TYPED_NAMES:
# type names
if not prev:
prevp = preceding_leaf(p)
if not prevp or prevp.type != token.COMMA:
return NO
elif p.type == syms.trailer:
# attributes and calls
if t == token.LPAR or t == token.RPAR:
return NO
if not prev:
if t == token.DOT or t == token.LSQB:
return NO
elif prev.type != token.COMMA:
return NO
elif p.type == syms.argument:
# single argument
if t == token.EQUAL:
return NO
if not prev:
prevp = preceding_leaf(p)
if not prevp or prevp.type == token.LPAR:
return NO
elif prev.type in {token.EQUAL} | VARARGS_SPECIALS:
return NO
elif p.type == syms.decorator:
# decorators
return NO
elif p.type == syms.dotted_name:
if prev:
return NO
prevp = preceding_leaf(p)
if not prevp or prevp.type == token.AT or prevp.type == token.DOT:
return NO
elif p.type == syms.classdef:
if t == token.LPAR:
return NO
if prev and prev.type == token.LPAR:
return NO
elif p.type in {syms.subscript, syms.sliceop}:
# indexing
if not prev:
assert p.parent is not None, "subscripts are always parented"
if p.parent.type == syms.subscriptlist:
return SPACE
return NO
elif not complex_subscript:
return NO
elif p.type == syms.atom:
if prev and t == token.DOT:
# dots, but not the first one.
return NO
elif p.type == syms.dictsetmaker:
# dict unpacking
if prev and prev.type == token.DOUBLESTAR:
return NO
elif p.type in {syms.factor, syms.star_expr}:
# unary ops
if not prev:
prevp = preceding_leaf(p)
if not prevp or prevp.type in OPENING_BRACKETS:
return NO
prevp_parent = prevp.parent
assert prevp_parent is not None
if prevp.type == token.COLON and prevp_parent.type in {
syms.subscript,
syms.sliceop,
}:
return NO
elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
return NO
elif t in {token.NAME, token.NUMBER, token.STRING}:
return NO
elif p.type == syms.import_from:
if t == token.DOT:
if prev and prev.type == token.DOT:
return NO
elif t == token.NAME:
if v == "import":
return SPACE
if prev and prev.type == token.DOT:
return NO
elif p.type == syms.sliceop:
return NO
elif p.type == syms.except_clause:
if t == token.STAR:
return NO
return SPACE
def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
"""Return the first leaf that precedes `node`, if any."""
while node:
res = node.prev_sibling
if res:
if isinstance(res, Leaf):
return res
try:
return list(res.leaves())[-1]
except IndexError:
return None
node = node.parent
return None
def prev_siblings_are(node: Optional[LN], tokens: List[Optional[NodeType]]) -> bool:
"""Return if the `node` and its previous siblings match types against the provided
list of tokens; the provided `node`has its type matched against the last element in
the list. `None` can be used as the first element to declare that the start of the
list is anchored at the start of its parent's children."""
if not tokens:
return True
if tokens[-1] is None:
return node is None
if not node:
return False
if node.type != tokens[-1]:
return False
return prev_siblings_are(node.prev_sibling, tokens[:-1])
def parent_type(node: Optional[LN]) -> Optional[NodeType]:
"""
Returns:
@node.parent.type, if @node is not None and has a parent.
OR
None, otherwise.
"""
if node is None or node.parent is None:
return None
return node.parent.type
def child_towards(ancestor: Node, descendant: LN) -> Optional[LN]:
"""Return the child of `ancestor` that contains `descendant`."""
node: Optional[LN] = descendant
while node and node.parent != ancestor:
node = node.parent
return node
def replace_child(old_child: LN, new_child: LN) -> None:
"""
Side Effects:
* If @old_child.parent is set, replace @old_child with @new_child in
@old_child's underlying Node structure.
OR
* Otherwise, this function does nothing.
"""
parent = old_child.parent
if not parent:
return
child_idx = old_child.remove()
if child_idx is not None:
parent.insert_child(child_idx, new_child)
def container_of(leaf: Leaf) -> LN:
"""Return `leaf` or one of its ancestors that is the topmost container of it.
By "container" we mean a node where `leaf` is the very first child.
"""
same_prefix = leaf.prefix
container: LN = leaf
while container:
parent = container.parent
if parent is None:
break
if parent.children[0].prefix != same_prefix:
break
if parent.type == syms.file_input:
break
if parent.prev_sibling is not None and parent.prev_sibling.type in BRACKETS:
break
container = parent
return container
def first_leaf_column(node: Node) -> Optional[int]:
"""Returns the column of the first leaf child of a node."""
for child in node.children:
if isinstance(child, Leaf):
return child.column
return None
def is_arith_like(node: LN) -> bool:
"""Whether node is an arithmetic or a binary arithmetic expression"""
return node.type in {
syms.arith_expr,
syms.shift_expr,
syms.xor_expr,
syms.and_expr,
}
def is_docstring(leaf: Leaf) -> bool:
if prev_siblings_are(
leaf.parent, [None, token.NEWLINE, token.INDENT, syms.simple_stmt]
):
return True
# Multiline docstring on the same line as the `def`.
if prev_siblings_are(leaf.parent, [syms.parameters, token.COLON, syms.simple_stmt]):
# `syms.parameters` is only used in funcdefs and async_funcdefs in the Python
# grammar. We're safe to return True without further checks.
return True
return False
def is_empty_tuple(node: LN) -> bool:
"""Return True if `node` holds an empty tuple."""
return (
node.type == syms.atom
and len(node.children) == 2
and node.children[0].type == token.LPAR
and node.children[1].type == token.RPAR
)
def is_one_tuple(node: LN) -> bool:
"""Return True if `node` holds a tuple with one element, with or without parens."""
if node.type == syms.atom:
gexp = unwrap_singleton_parenthesis(node)
if gexp is None or gexp.type != syms.testlist_gexp:
return False
return len(gexp.children) == 2 and gexp.children[1].type == token.COMMA
return (
node.type in IMPLICIT_TUPLE
and len(node.children) == 2
and node.children[1].type == token.COMMA
)
def is_one_sequence_between(
opening: Leaf,
closing: Leaf,
leaves: List[Leaf],
brackets: Tuple[int, int] = (token.LPAR, token.RPAR),
) -> bool:
"""Return True if content between `opening` and `closing` is a one-sequence."""
if (opening.type, closing.type) != brackets:
return False
depth = closing.bracket_depth + 1
for _opening_index, leaf in enumerate(leaves):
if leaf is opening:
break
else:
raise LookupError("Opening paren not found in `leaves`")
commas = 0
_opening_index += 1
for leaf in leaves[_opening_index:]:
if leaf is closing:
break
bracket_depth = leaf.bracket_depth
if bracket_depth == depth and leaf.type == token.COMMA:
commas += 1
if leaf.parent and leaf.parent.type in {
syms.arglist,
syms.typedargslist,
}:
commas += 1
break
return commas < 2
def is_walrus_assignment(node: LN) -> bool:
"""Return True iff `node` is of the shape ( test := test )"""
inner = unwrap_singleton_parenthesis(node)
return inner is not None and inner.type == syms.namedexpr_test
def is_simple_decorator_trailer(node: LN, last: bool = False) -> bool:
"""Return True iff `node` is a trailer valid in a simple decorator"""
return node.type == syms.trailer and (
(
len(node.children) == 2
and node.children[0].type == token.DOT
and node.children[1].type == token.NAME
)
# last trailer can be an argument-less parentheses pair
or (
last
and len(node.children) == 2
and node.children[0].type == token.LPAR
and node.children[1].type == token.RPAR
)
# last trailer can be arguments
or (
last
and len(node.children) == 3
and node.children[0].type == token.LPAR
# and node.children[1].type == syms.argument
and node.children[2].type == token.RPAR
)
)
def is_simple_decorator_expression(node: LN) -> bool:
"""Return True iff `node` could be a 'dotted name' decorator
This function takes the node of the 'namedexpr_test' of the new decorator
grammar and test if it would be valid under the old decorator grammar.
The old grammar was: decorator: @ dotted_name [arguments] NEWLINE
The new grammar is : decorator: @ namedexpr_test NEWLINE
"""
if node.type == token.NAME:
return True
if node.type == syms.power:
if node.children:
return (
node.children[0].type == token.NAME
and all(map(is_simple_decorator_trailer, node.children[1:-1]))
and (
len(node.children) < 2
or is_simple_decorator_trailer(node.children[-1], last=True)
)
)
return False
def is_yield(node: LN) -> bool:
"""Return True if `node` holds a `yield` or `yield from` expression."""
if node.type == syms.yield_expr:
return True
if is_name_token(node) and node.value == "yield":
return True
if node.type != syms.atom:
return False
if len(node.children) != 3:
return False
lpar, expr, rpar = node.children
if lpar.type == token.LPAR and rpar.type == token.RPAR:
return is_yield(expr)
return False
def is_vararg(leaf: Leaf, within: Set[NodeType]) -> bool:
"""Return True if `leaf` is a star or double star in a vararg or kwarg.
If `within` includes VARARGS_PARENTS, this applies to function signatures.
If `within` includes UNPACKING_PARENTS, it applies to right hand-side
extended iterable unpacking (PEP 3132) and additional unpacking
generalizations (PEP 448).
"""
if leaf.type not in VARARGS_SPECIALS or not leaf.parent:
return False
p = leaf.parent
if p.type == syms.star_expr:
# Star expressions are also used as assignment targets in extended
# iterable unpacking (PEP 3132). See what its parent is instead.
if not p.parent:
return False
p = p.parent
return p.type in within
def is_multiline_string(leaf: Leaf) -> bool:
"""Return True if `leaf` is a multiline string that actually spans many lines."""
return has_triple_quotes(leaf.value) and "\n" in leaf.value
def is_stub_suite(node: Node) -> bool:
"""Return True if `node` is a suite with a stub body."""
if (
len(node.children) != 4
or node.children[0].type != token.NEWLINE
or node.children[1].type != token.INDENT
or node.children[3].type != token.DEDENT
):
return False
return is_stub_body(node.children[2])
def is_stub_body(node: LN) -> bool:
"""Return True if `node` is a simple statement containing an ellipsis."""
if not isinstance(node, Node) or node.type != syms.simple_stmt:
return False
if len(node.children) != 2:
return False
child = node.children[0]
return (
child.type == syms.atom
and len(child.children) == 3
and all(leaf == Leaf(token.DOT, ".") for leaf in child.children)
)
def is_atom_with_invisible_parens(node: LN) -> bool:
"""Given a `LN`, determines whether it's an atom `node` with invisible
parens. Useful in dedupe-ing and normalizing parens.
"""
if isinstance(node, Leaf) or node.type != syms.atom:
return False
first, last = node.children[0], node.children[-1]
return (
isinstance(first, Leaf)
and first.type == token.LPAR
and first.value == ""
and isinstance(last, Leaf)
and last.type == token.RPAR
and last.value == ""
)
def is_empty_par(leaf: Leaf) -> bool:
return is_empty_lpar(leaf) or is_empty_rpar(leaf)
def is_empty_lpar(leaf: Leaf) -> bool:
return leaf.type == token.LPAR and leaf.value == ""
def is_empty_rpar(leaf: Leaf) -> bool:
return leaf.type == token.RPAR and leaf.value == ""
def is_import(leaf: Leaf) -> bool:
"""Return True if the given leaf starts an import statement."""
p = leaf.parent
t = leaf.type
v = leaf.value
return bool(
t == token.NAME
and (
(v == "import" and p and p.type == syms.import_name)
or (v == "from" and p and p.type == syms.import_from)
)
)
def is_type_comment(leaf: Leaf, suffix: str = "") -> bool:
"""Return True if the given leaf is a special comment.
Only returns true for type comments for now."""
t = leaf.type
v = leaf.value
return t in {token.COMMENT, STANDALONE_COMMENT} and v.startswith("# type:" + suffix)
def wrap_in_parentheses(parent: Node, child: LN, *, visible: bool = True) -> None:
"""Wrap `child` in parentheses.
This replaces `child` with an atom holding the parentheses and the old
child. That requires moving the prefix.
If `visible` is False, the leaves will be valueless (and thus invisible).
"""
lpar = Leaf(token.LPAR, "(" if visible else "")
rpar = Leaf(token.RPAR, ")" if visible else "")
prefix = child.prefix
child.prefix = ""
index = child.remove() or 0
new_child = Node(syms.atom, [lpar, child, rpar])
new_child.prefix = prefix
parent.insert_child(index, new_child)
def unwrap_singleton_parenthesis(node: LN) -> Optional[LN]:
"""Returns `wrapped` if `node` is of the shape ( wrapped ).
Parenthesis can be optional. Returns None otherwise"""
if len(node.children) != 3:
return None
lpar, wrapped, rpar = node.children
if not (lpar.type == token.LPAR and rpar.type == token.RPAR):
return None
return wrapped
def ensure_visible(leaf: Leaf) -> None:
"""Make sure parentheses are visible.
They could be invisible as part of some statements (see
:func:`normalize_invisible_parens` and :func:`visit_import_from`).
"""
if leaf.type == token.LPAR:
leaf.value = "("
elif leaf.type == token.RPAR:
leaf.value = ")"
def is_name_token(nl: NL) -> TypeGuard[Leaf]:
return nl.type == token.NAME
def is_lpar_token(nl: NL) -> TypeGuard[Leaf]:
return nl.type == token.LPAR
def is_rpar_token(nl: NL) -> TypeGuard[Leaf]:
return nl.type == token.RPAR
def is_string_token(nl: NL) -> TypeGuard[Leaf]:
return nl.type == token.STRING
def is_number_token(nl: NL) -> TypeGuard[Leaf]:
return nl.type == token.NUMBER