usse/funda-scraper/venv/lib/python3.10/site-packages/black/comments.py

336 lines
12 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import re
import sys
from dataclasses import dataclass
from functools import lru_cache
from typing import Iterator, List, Optional, Union
if sys.version_info >= (3, 8):
from typing import Final
else:
from typing_extensions import Final
from black.nodes import (
CLOSING_BRACKETS,
STANDALONE_COMMENT,
WHITESPACE,
container_of,
first_leaf_column,
preceding_leaf,
)
from blib2to3.pgen2 import token
from blib2to3.pytree import Leaf, Node, type_repr
# types
LN = Union[Leaf, Node]
FMT_OFF: Final = {"# fmt: off", "# fmt:off", "# yapf: disable"}
FMT_SKIP: Final = {"# fmt: skip", "# fmt:skip"}
FMT_PASS: Final = {*FMT_OFF, *FMT_SKIP}
FMT_ON: Final = {"# fmt: on", "# fmt:on", "# yapf: enable"}
COMMENT_EXCEPTIONS = {True: " !:#'", False: " !:#'%"}
@dataclass
class ProtoComment:
"""Describes a piece of syntax that is a comment.
It's not a :class:`blib2to3.pytree.Leaf` so that:
* it can be cached (`Leaf` objects should not be reused more than once as
they store their lineno, column, prefix, and parent information);
* `newlines` and `consumed` fields are kept separate from the `value`. This
simplifies handling of special marker comments like ``# fmt: off/on``.
"""
type: int # token.COMMENT or STANDALONE_COMMENT
value: str # content of the comment
newlines: int # how many newlines before the comment
consumed: int # how many characters of the original leaf's prefix did we consume
def generate_comments(leaf: LN, *, preview: bool) -> Iterator[Leaf]:
"""Clean the prefix of the `leaf` and generate comments from it, if any.
Comments in lib2to3 are shoved into the whitespace prefix. This happens
in `pgen2/driver.py:Driver.parse_tokens()`. This was a brilliant implementation
move because it does away with modifying the grammar to include all the
possible places in which comments can be placed.
The sad consequence for us though is that comments don't "belong" anywhere.
This is why this function generates simple parentless Leaf objects for
comments. We simply don't know what the correct parent should be.
No matter though, we can live without this. We really only need to
differentiate between inline and standalone comments. The latter don't
share the line with any code.
Inline comments are emitted as regular token.COMMENT leaves. Standalone
are emitted with a fake STANDALONE_COMMENT token identifier.
"""
for pc in list_comments(
leaf.prefix, is_endmarker=leaf.type == token.ENDMARKER, preview=preview
):
yield Leaf(pc.type, pc.value, prefix="\n" * pc.newlines)
@lru_cache(maxsize=4096)
def list_comments(
prefix: str, *, is_endmarker: bool, preview: bool
) -> List[ProtoComment]:
"""Return a list of :class:`ProtoComment` objects parsed from the given `prefix`."""
result: List[ProtoComment] = []
if not prefix or "#" not in prefix:
return result
consumed = 0
nlines = 0
ignored_lines = 0
for index, line in enumerate(re.split("\r?\n", prefix)):
consumed += len(line) + 1 # adding the length of the split '\n'
line = line.lstrip()
if not line:
nlines += 1
if not line.startswith("#"):
# Escaped newlines outside of a comment are not really newlines at
# all. We treat a single-line comment following an escaped newline
# as a simple trailing comment.
if line.endswith("\\"):
ignored_lines += 1
continue
if index == ignored_lines and not is_endmarker:
comment_type = token.COMMENT # simple trailing comment
else:
comment_type = STANDALONE_COMMENT
comment = make_comment(line, preview=preview)
result.append(
ProtoComment(
type=comment_type, value=comment, newlines=nlines, consumed=consumed
)
)
nlines = 0
return result
def make_comment(content: str, *, preview: bool) -> str:
"""Return a consistently formatted comment from the given `content` string.
All comments (except for "##", "#!", "#:", '#'") should have a single
space between the hash sign and the content.
If `content` didn't start with a hash sign, one is provided.
"""
content = content.rstrip()
if not content:
return "#"
if content[0] == "#":
content = content[1:]
NON_BREAKING_SPACE = " "
if (
content
and content[0] == NON_BREAKING_SPACE
and not content.lstrip().startswith("type:")
):
content = " " + content[1:] # Replace NBSP by a simple space
if content and content[0] not in COMMENT_EXCEPTIONS[preview]:
content = " " + content
return "#" + content
def normalize_fmt_off(node: Node, *, preview: bool) -> None:
"""Convert content between `# fmt: off`/`# fmt: on` into standalone comments."""
try_again = True
while try_again:
try_again = convert_one_fmt_off_pair(node, preview=preview)
def convert_one_fmt_off_pair(node: Node, *, preview: bool) -> bool:
"""Convert content of a single `# fmt: off`/`# fmt: on` into a standalone comment.
Returns True if a pair was converted.
"""
for leaf in node.leaves():
previous_consumed = 0
for comment in list_comments(leaf.prefix, is_endmarker=False, preview=preview):
if comment.value not in FMT_PASS:
previous_consumed = comment.consumed
continue
# We only want standalone comments. If there's no previous leaf or
# the previous leaf is indentation, it's a standalone comment in
# disguise.
if comment.value in FMT_PASS and comment.type != STANDALONE_COMMENT:
prev = preceding_leaf(leaf)
if prev:
if comment.value in FMT_OFF and prev.type not in WHITESPACE:
continue
if comment.value in FMT_SKIP and prev.type in WHITESPACE:
continue
ignored_nodes = list(generate_ignored_nodes(leaf, comment, preview=preview))
if not ignored_nodes:
continue
first = ignored_nodes[0] # Can be a container node with the `leaf`.
parent = first.parent
prefix = first.prefix
if comment.value in FMT_OFF:
first.prefix = prefix[comment.consumed :]
if comment.value in FMT_SKIP:
first.prefix = ""
standalone_comment_prefix = prefix
else:
standalone_comment_prefix = (
prefix[:previous_consumed] + "\n" * comment.newlines
)
hidden_value = "".join(str(n) for n in ignored_nodes)
if comment.value in FMT_OFF:
hidden_value = comment.value + "\n" + hidden_value
if comment.value in FMT_SKIP:
hidden_value += " " + comment.value
if hidden_value.endswith("\n"):
# That happens when one of the `ignored_nodes` ended with a NEWLINE
# leaf (possibly followed by a DEDENT).
hidden_value = hidden_value[:-1]
first_idx: Optional[int] = None
for ignored in ignored_nodes:
index = ignored.remove()
if first_idx is None:
first_idx = index
assert parent is not None, "INTERNAL ERROR: fmt: on/off handling (1)"
assert first_idx is not None, "INTERNAL ERROR: fmt: on/off handling (2)"
parent.insert_child(
first_idx,
Leaf(
STANDALONE_COMMENT,
hidden_value,
prefix=standalone_comment_prefix,
),
)
return True
return False
def generate_ignored_nodes(
leaf: Leaf, comment: ProtoComment, *, preview: bool
) -> Iterator[LN]:
"""Starting from the container of `leaf`, generate all leaves until `# fmt: on`.
If comment is skip, returns leaf only.
Stops at the end of the block.
"""
if comment.value in FMT_SKIP:
yield from _generate_ignored_nodes_from_fmt_skip(leaf, comment, preview=preview)
return
container: Optional[LN] = container_of(leaf)
while container is not None and container.type != token.ENDMARKER:
if is_fmt_on(container, preview=preview):
return
# fix for fmt: on in children
if contains_fmt_on_at_column(container, leaf.column, preview=preview):
for child in container.children:
if isinstance(child, Leaf) and is_fmt_on(child, preview=preview):
if child.type in CLOSING_BRACKETS:
# This means `# fmt: on` is placed at a different bracket level
# than `# fmt: off`. This is an invalid use, but as a courtesy,
# we include this closing bracket in the ignored nodes.
# The alternative is to fail the formatting.
yield child
return
if contains_fmt_on_at_column(child, leaf.column, preview=preview):
return
yield child
else:
yield container
container = container.next_sibling
def _generate_ignored_nodes_from_fmt_skip(
leaf: Leaf, comment: ProtoComment, *, preview: bool
) -> Iterator[LN]:
"""Generate all leaves that should be ignored by the `# fmt: skip` from `leaf`."""
prev_sibling = leaf.prev_sibling
parent = leaf.parent
# Need to properly format the leaf prefix to compare it to comment.value,
# which is also formatted
comments = list_comments(leaf.prefix, is_endmarker=False, preview=preview)
if not comments or comment.value != comments[0].value:
return
if prev_sibling is not None:
leaf.prefix = ""
siblings = [prev_sibling]
while "\n" not in prev_sibling.prefix and prev_sibling.prev_sibling is not None:
prev_sibling = prev_sibling.prev_sibling
siblings.insert(0, prev_sibling)
for sibling in siblings:
yield sibling
elif (
parent is not None
and type_repr(parent.type) == "suite"
and leaf.type == token.NEWLINE
):
# The `# fmt: skip` is on the colon line of the if/while/def/class/...
# statements. The ignored nodes should be previous siblings of the
# parent suite node.
leaf.prefix = ""
ignored_nodes: List[LN] = []
parent_sibling = parent.prev_sibling
while parent_sibling is not None and type_repr(parent_sibling.type) != "suite":
ignored_nodes.insert(0, parent_sibling)
parent_sibling = parent_sibling.prev_sibling
# Special case for `async_stmt` where the ASYNC token is on the
# grandparent node.
grandparent = parent.parent
if (
grandparent is not None
and grandparent.prev_sibling is not None
and grandparent.prev_sibling.type == token.ASYNC
):
ignored_nodes.insert(0, grandparent.prev_sibling)
yield from iter(ignored_nodes)
def is_fmt_on(container: LN, preview: bool) -> bool:
"""Determine whether formatting is switched on within a container.
Determined by whether the last `# fmt:` comment is `on` or `off`.
"""
fmt_on = False
for comment in list_comments(container.prefix, is_endmarker=False, preview=preview):
if comment.value in FMT_ON:
fmt_on = True
elif comment.value in FMT_OFF:
fmt_on = False
return fmt_on
def contains_fmt_on_at_column(container: LN, column: int, *, preview: bool) -> bool:
"""Determine if children at a given column have formatting switched on."""
for child in container.children:
if (
isinstance(child, Node)
and first_leaf_column(child) == column
or isinstance(child, Leaf)
and child.column == column
):
if is_fmt_on(child, preview=preview):
return True
return False
def contains_pragma_comment(comment_list: List[Leaf]) -> bool:
"""
Returns:
True iff one of the comments in @comment_list is a pragma used by one
of the more common static analysis tools for python (e.g. mypy, flake8,
pylint).
"""
for comment in comment_list:
if comment.value.startswith(("# type:", "# noqa", "# pylint:")):
return True
return False