336 lines
12 KiB
Python
336 lines
12 KiB
Python
|
import re
|
|||
|
import sys
|
|||
|
from dataclasses import dataclass
|
|||
|
from functools import lru_cache
|
|||
|
from typing import Iterator, List, Optional, Union
|
|||
|
|
|||
|
if sys.version_info >= (3, 8):
|
|||
|
from typing import Final
|
|||
|
else:
|
|||
|
from typing_extensions import Final
|
|||
|
|
|||
|
from black.nodes import (
|
|||
|
CLOSING_BRACKETS,
|
|||
|
STANDALONE_COMMENT,
|
|||
|
WHITESPACE,
|
|||
|
container_of,
|
|||
|
first_leaf_column,
|
|||
|
preceding_leaf,
|
|||
|
)
|
|||
|
from blib2to3.pgen2 import token
|
|||
|
from blib2to3.pytree import Leaf, Node, type_repr
|
|||
|
|
|||
|
# types
|
|||
|
LN = Union[Leaf, Node]
|
|||
|
|
|||
|
FMT_OFF: Final = {"# fmt: off", "# fmt:off", "# yapf: disable"}
|
|||
|
FMT_SKIP: Final = {"# fmt: skip", "# fmt:skip"}
|
|||
|
FMT_PASS: Final = {*FMT_OFF, *FMT_SKIP}
|
|||
|
FMT_ON: Final = {"# fmt: on", "# fmt:on", "# yapf: enable"}
|
|||
|
|
|||
|
COMMENT_EXCEPTIONS = {True: " !:#'", False: " !:#'%"}
|
|||
|
|
|||
|
|
|||
|
@dataclass
|
|||
|
class ProtoComment:
|
|||
|
"""Describes a piece of syntax that is a comment.
|
|||
|
|
|||
|
It's not a :class:`blib2to3.pytree.Leaf` so that:
|
|||
|
|
|||
|
* it can be cached (`Leaf` objects should not be reused more than once as
|
|||
|
they store their lineno, column, prefix, and parent information);
|
|||
|
* `newlines` and `consumed` fields are kept separate from the `value`. This
|
|||
|
simplifies handling of special marker comments like ``# fmt: off/on``.
|
|||
|
"""
|
|||
|
|
|||
|
type: int # token.COMMENT or STANDALONE_COMMENT
|
|||
|
value: str # content of the comment
|
|||
|
newlines: int # how many newlines before the comment
|
|||
|
consumed: int # how many characters of the original leaf's prefix did we consume
|
|||
|
|
|||
|
|
|||
|
def generate_comments(leaf: LN, *, preview: bool) -> Iterator[Leaf]:
|
|||
|
"""Clean the prefix of the `leaf` and generate comments from it, if any.
|
|||
|
|
|||
|
Comments in lib2to3 are shoved into the whitespace prefix. This happens
|
|||
|
in `pgen2/driver.py:Driver.parse_tokens()`. This was a brilliant implementation
|
|||
|
move because it does away with modifying the grammar to include all the
|
|||
|
possible places in which comments can be placed.
|
|||
|
|
|||
|
The sad consequence for us though is that comments don't "belong" anywhere.
|
|||
|
This is why this function generates simple parentless Leaf objects for
|
|||
|
comments. We simply don't know what the correct parent should be.
|
|||
|
|
|||
|
No matter though, we can live without this. We really only need to
|
|||
|
differentiate between inline and standalone comments. The latter don't
|
|||
|
share the line with any code.
|
|||
|
|
|||
|
Inline comments are emitted as regular token.COMMENT leaves. Standalone
|
|||
|
are emitted with a fake STANDALONE_COMMENT token identifier.
|
|||
|
"""
|
|||
|
for pc in list_comments(
|
|||
|
leaf.prefix, is_endmarker=leaf.type == token.ENDMARKER, preview=preview
|
|||
|
):
|
|||
|
yield Leaf(pc.type, pc.value, prefix="\n" * pc.newlines)
|
|||
|
|
|||
|
|
|||
|
@lru_cache(maxsize=4096)
|
|||
|
def list_comments(
|
|||
|
prefix: str, *, is_endmarker: bool, preview: bool
|
|||
|
) -> List[ProtoComment]:
|
|||
|
"""Return a list of :class:`ProtoComment` objects parsed from the given `prefix`."""
|
|||
|
result: List[ProtoComment] = []
|
|||
|
if not prefix or "#" not in prefix:
|
|||
|
return result
|
|||
|
|
|||
|
consumed = 0
|
|||
|
nlines = 0
|
|||
|
ignored_lines = 0
|
|||
|
for index, line in enumerate(re.split("\r?\n", prefix)):
|
|||
|
consumed += len(line) + 1 # adding the length of the split '\n'
|
|||
|
line = line.lstrip()
|
|||
|
if not line:
|
|||
|
nlines += 1
|
|||
|
if not line.startswith("#"):
|
|||
|
# Escaped newlines outside of a comment are not really newlines at
|
|||
|
# all. We treat a single-line comment following an escaped newline
|
|||
|
# as a simple trailing comment.
|
|||
|
if line.endswith("\\"):
|
|||
|
ignored_lines += 1
|
|||
|
continue
|
|||
|
|
|||
|
if index == ignored_lines and not is_endmarker:
|
|||
|
comment_type = token.COMMENT # simple trailing comment
|
|||
|
else:
|
|||
|
comment_type = STANDALONE_COMMENT
|
|||
|
comment = make_comment(line, preview=preview)
|
|||
|
result.append(
|
|||
|
ProtoComment(
|
|||
|
type=comment_type, value=comment, newlines=nlines, consumed=consumed
|
|||
|
)
|
|||
|
)
|
|||
|
nlines = 0
|
|||
|
return result
|
|||
|
|
|||
|
|
|||
|
def make_comment(content: str, *, preview: bool) -> str:
|
|||
|
"""Return a consistently formatted comment from the given `content` string.
|
|||
|
|
|||
|
All comments (except for "##", "#!", "#:", '#'") should have a single
|
|||
|
space between the hash sign and the content.
|
|||
|
|
|||
|
If `content` didn't start with a hash sign, one is provided.
|
|||
|
"""
|
|||
|
content = content.rstrip()
|
|||
|
if not content:
|
|||
|
return "#"
|
|||
|
|
|||
|
if content[0] == "#":
|
|||
|
content = content[1:]
|
|||
|
NON_BREAKING_SPACE = " "
|
|||
|
if (
|
|||
|
content
|
|||
|
and content[0] == NON_BREAKING_SPACE
|
|||
|
and not content.lstrip().startswith("type:")
|
|||
|
):
|
|||
|
content = " " + content[1:] # Replace NBSP by a simple space
|
|||
|
if content and content[0] not in COMMENT_EXCEPTIONS[preview]:
|
|||
|
content = " " + content
|
|||
|
return "#" + content
|
|||
|
|
|||
|
|
|||
|
def normalize_fmt_off(node: Node, *, preview: bool) -> None:
|
|||
|
"""Convert content between `# fmt: off`/`# fmt: on` into standalone comments."""
|
|||
|
try_again = True
|
|||
|
while try_again:
|
|||
|
try_again = convert_one_fmt_off_pair(node, preview=preview)
|
|||
|
|
|||
|
|
|||
|
def convert_one_fmt_off_pair(node: Node, *, preview: bool) -> bool:
|
|||
|
"""Convert content of a single `# fmt: off`/`# fmt: on` into a standalone comment.
|
|||
|
|
|||
|
Returns True if a pair was converted.
|
|||
|
"""
|
|||
|
for leaf in node.leaves():
|
|||
|
previous_consumed = 0
|
|||
|
for comment in list_comments(leaf.prefix, is_endmarker=False, preview=preview):
|
|||
|
if comment.value not in FMT_PASS:
|
|||
|
previous_consumed = comment.consumed
|
|||
|
continue
|
|||
|
# We only want standalone comments. If there's no previous leaf or
|
|||
|
# the previous leaf is indentation, it's a standalone comment in
|
|||
|
# disguise.
|
|||
|
if comment.value in FMT_PASS and comment.type != STANDALONE_COMMENT:
|
|||
|
prev = preceding_leaf(leaf)
|
|||
|
if prev:
|
|||
|
if comment.value in FMT_OFF and prev.type not in WHITESPACE:
|
|||
|
continue
|
|||
|
if comment.value in FMT_SKIP and prev.type in WHITESPACE:
|
|||
|
continue
|
|||
|
|
|||
|
ignored_nodes = list(generate_ignored_nodes(leaf, comment, preview=preview))
|
|||
|
if not ignored_nodes:
|
|||
|
continue
|
|||
|
|
|||
|
first = ignored_nodes[0] # Can be a container node with the `leaf`.
|
|||
|
parent = first.parent
|
|||
|
prefix = first.prefix
|
|||
|
if comment.value in FMT_OFF:
|
|||
|
first.prefix = prefix[comment.consumed :]
|
|||
|
if comment.value in FMT_SKIP:
|
|||
|
first.prefix = ""
|
|||
|
standalone_comment_prefix = prefix
|
|||
|
else:
|
|||
|
standalone_comment_prefix = (
|
|||
|
prefix[:previous_consumed] + "\n" * comment.newlines
|
|||
|
)
|
|||
|
hidden_value = "".join(str(n) for n in ignored_nodes)
|
|||
|
if comment.value in FMT_OFF:
|
|||
|
hidden_value = comment.value + "\n" + hidden_value
|
|||
|
if comment.value in FMT_SKIP:
|
|||
|
hidden_value += " " + comment.value
|
|||
|
if hidden_value.endswith("\n"):
|
|||
|
# That happens when one of the `ignored_nodes` ended with a NEWLINE
|
|||
|
# leaf (possibly followed by a DEDENT).
|
|||
|
hidden_value = hidden_value[:-1]
|
|||
|
first_idx: Optional[int] = None
|
|||
|
for ignored in ignored_nodes:
|
|||
|
index = ignored.remove()
|
|||
|
if first_idx is None:
|
|||
|
first_idx = index
|
|||
|
assert parent is not None, "INTERNAL ERROR: fmt: on/off handling (1)"
|
|||
|
assert first_idx is not None, "INTERNAL ERROR: fmt: on/off handling (2)"
|
|||
|
parent.insert_child(
|
|||
|
first_idx,
|
|||
|
Leaf(
|
|||
|
STANDALONE_COMMENT,
|
|||
|
hidden_value,
|
|||
|
prefix=standalone_comment_prefix,
|
|||
|
),
|
|||
|
)
|
|||
|
return True
|
|||
|
|
|||
|
return False
|
|||
|
|
|||
|
|
|||
|
def generate_ignored_nodes(
|
|||
|
leaf: Leaf, comment: ProtoComment, *, preview: bool
|
|||
|
) -> Iterator[LN]:
|
|||
|
"""Starting from the container of `leaf`, generate all leaves until `# fmt: on`.
|
|||
|
|
|||
|
If comment is skip, returns leaf only.
|
|||
|
Stops at the end of the block.
|
|||
|
"""
|
|||
|
if comment.value in FMT_SKIP:
|
|||
|
yield from _generate_ignored_nodes_from_fmt_skip(leaf, comment, preview=preview)
|
|||
|
return
|
|||
|
container: Optional[LN] = container_of(leaf)
|
|||
|
while container is not None and container.type != token.ENDMARKER:
|
|||
|
if is_fmt_on(container, preview=preview):
|
|||
|
return
|
|||
|
|
|||
|
# fix for fmt: on in children
|
|||
|
if contains_fmt_on_at_column(container, leaf.column, preview=preview):
|
|||
|
for child in container.children:
|
|||
|
if isinstance(child, Leaf) and is_fmt_on(child, preview=preview):
|
|||
|
if child.type in CLOSING_BRACKETS:
|
|||
|
# This means `# fmt: on` is placed at a different bracket level
|
|||
|
# than `# fmt: off`. This is an invalid use, but as a courtesy,
|
|||
|
# we include this closing bracket in the ignored nodes.
|
|||
|
# The alternative is to fail the formatting.
|
|||
|
yield child
|
|||
|
return
|
|||
|
if contains_fmt_on_at_column(child, leaf.column, preview=preview):
|
|||
|
return
|
|||
|
yield child
|
|||
|
else:
|
|||
|
yield container
|
|||
|
container = container.next_sibling
|
|||
|
|
|||
|
|
|||
|
def _generate_ignored_nodes_from_fmt_skip(
|
|||
|
leaf: Leaf, comment: ProtoComment, *, preview: bool
|
|||
|
) -> Iterator[LN]:
|
|||
|
"""Generate all leaves that should be ignored by the `# fmt: skip` from `leaf`."""
|
|||
|
prev_sibling = leaf.prev_sibling
|
|||
|
parent = leaf.parent
|
|||
|
# Need to properly format the leaf prefix to compare it to comment.value,
|
|||
|
# which is also formatted
|
|||
|
comments = list_comments(leaf.prefix, is_endmarker=False, preview=preview)
|
|||
|
if not comments or comment.value != comments[0].value:
|
|||
|
return
|
|||
|
if prev_sibling is not None:
|
|||
|
leaf.prefix = ""
|
|||
|
siblings = [prev_sibling]
|
|||
|
while "\n" not in prev_sibling.prefix and prev_sibling.prev_sibling is not None:
|
|||
|
prev_sibling = prev_sibling.prev_sibling
|
|||
|
siblings.insert(0, prev_sibling)
|
|||
|
for sibling in siblings:
|
|||
|
yield sibling
|
|||
|
elif (
|
|||
|
parent is not None
|
|||
|
and type_repr(parent.type) == "suite"
|
|||
|
and leaf.type == token.NEWLINE
|
|||
|
):
|
|||
|
# The `# fmt: skip` is on the colon line of the if/while/def/class/...
|
|||
|
# statements. The ignored nodes should be previous siblings of the
|
|||
|
# parent suite node.
|
|||
|
leaf.prefix = ""
|
|||
|
ignored_nodes: List[LN] = []
|
|||
|
parent_sibling = parent.prev_sibling
|
|||
|
while parent_sibling is not None and type_repr(parent_sibling.type) != "suite":
|
|||
|
ignored_nodes.insert(0, parent_sibling)
|
|||
|
parent_sibling = parent_sibling.prev_sibling
|
|||
|
# Special case for `async_stmt` where the ASYNC token is on the
|
|||
|
# grandparent node.
|
|||
|
grandparent = parent.parent
|
|||
|
if (
|
|||
|
grandparent is not None
|
|||
|
and grandparent.prev_sibling is not None
|
|||
|
and grandparent.prev_sibling.type == token.ASYNC
|
|||
|
):
|
|||
|
ignored_nodes.insert(0, grandparent.prev_sibling)
|
|||
|
yield from iter(ignored_nodes)
|
|||
|
|
|||
|
|
|||
|
def is_fmt_on(container: LN, preview: bool) -> bool:
|
|||
|
"""Determine whether formatting is switched on within a container.
|
|||
|
Determined by whether the last `# fmt:` comment is `on` or `off`.
|
|||
|
"""
|
|||
|
fmt_on = False
|
|||
|
for comment in list_comments(container.prefix, is_endmarker=False, preview=preview):
|
|||
|
if comment.value in FMT_ON:
|
|||
|
fmt_on = True
|
|||
|
elif comment.value in FMT_OFF:
|
|||
|
fmt_on = False
|
|||
|
return fmt_on
|
|||
|
|
|||
|
|
|||
|
def contains_fmt_on_at_column(container: LN, column: int, *, preview: bool) -> bool:
|
|||
|
"""Determine if children at a given column have formatting switched on."""
|
|||
|
for child in container.children:
|
|||
|
if (
|
|||
|
isinstance(child, Node)
|
|||
|
and first_leaf_column(child) == column
|
|||
|
or isinstance(child, Leaf)
|
|||
|
and child.column == column
|
|||
|
):
|
|||
|
if is_fmt_on(child, preview=preview):
|
|||
|
return True
|
|||
|
|
|||
|
return False
|
|||
|
|
|||
|
|
|||
|
def contains_pragma_comment(comment_list: List[Leaf]) -> bool:
|
|||
|
"""
|
|||
|
Returns:
|
|||
|
True iff one of the comments in @comment_list is a pragma used by one
|
|||
|
of the more common static analysis tools for python (e.g. mypy, flake8,
|
|||
|
pylint).
|
|||
|
"""
|
|||
|
for comment in comment_list:
|
|||
|
if comment.value.startswith(("# type:", "# noqa", "# pylint:")):
|
|||
|
return True
|
|||
|
|
|||
|
return False
|