usse/scrape/venv/lib/python3.10/site-packages/sphinx/util/cfamily.py
2023-12-22 15:26:01 +01:00

465 lines
15 KiB
Python

"""Utility functions common to the C and C++ domains."""
from __future__ import annotations
import re
from copy import deepcopy
from typing import TYPE_CHECKING, Any, Callable
from docutils import nodes
from sphinx import addnodes
from sphinx.util import logging
if TYPE_CHECKING:
from docutils.nodes import TextElement
from sphinx.config import Config
logger = logging.getLogger(__name__)
StringifyTransform = Callable[[Any], str]
_whitespace_re = re.compile(r'\s+')
anon_identifier_re = re.compile(r'(@[a-zA-Z0-9_])[a-zA-Z0-9_]*\b')
identifier_re = re.compile(r'''
( # This 'extends' _anon_identifier_re with the ordinary identifiers,
# make sure they are in sync.
(~?\b[a-zA-Z_]) # ordinary identifiers
| (@[a-zA-Z0-9_]) # our extension for names of anonymous entities
)
[a-zA-Z0-9_]*\b
''', flags=re.VERBOSE)
integer_literal_re = re.compile(r'[1-9][0-9]*(\'[0-9]+)*')
octal_literal_re = re.compile(r'0[0-7]*(\'[0-7]+)*')
hex_literal_re = re.compile(r'0[xX][0-9a-fA-F]+(\'[0-9a-fA-F]+)*')
binary_literal_re = re.compile(r'0[bB][01]+(\'[01]+)*')
integers_literal_suffix_re = re.compile(r'''
# unsigned and/or (long) long, in any order, but at least one of them
(
([uU] ([lL] | (ll) | (LL))?)
|
(([lL] | (ll) | (LL)) [uU]?)
)\b
# the ending word boundary is important for distinguishing
# between suffixes and UDLs in C++
''', flags=re.VERBOSE)
float_literal_re = re.compile(r'''
[+-]?(
# decimal
([0-9]+(\'[0-9]+)*[eE][+-]?[0-9]+(\'[0-9]+)*)
| (([0-9]+(\'[0-9]+)*)?\.[0-9]+(\'[0-9]+)*([eE][+-]?[0-9]+(\'[0-9]+)*)?)
| ([0-9]+(\'[0-9]+)*\.([eE][+-]?[0-9]+(\'[0-9]+)*)?)
# hex
| (0[xX][0-9a-fA-F]+(\'[0-9a-fA-F]+)*[pP][+-]?[0-9a-fA-F]+(\'[0-9a-fA-F]+)*)
| (0[xX]([0-9a-fA-F]+(\'[0-9a-fA-F]+)*)?\.
[0-9a-fA-F]+(\'[0-9a-fA-F]+)*([pP][+-]?[0-9a-fA-F]+(\'[0-9a-fA-F]+)*)?)
| (0[xX][0-9a-fA-F]+(\'[0-9a-fA-F]+)*\.([pP][+-]?[0-9a-fA-F]+(\'[0-9a-fA-F]+)*)?)
)
''', flags=re.VERBOSE)
float_literal_suffix_re = re.compile(r'[fFlL]\b')
# the ending word boundary is important for distinguishing between suffixes and UDLs in C++
char_literal_re = re.compile(r'''
((?:u8)|u|U|L)?
'(
(?:[^\\'])
| (\\(
(?:['"?\\abfnrtv])
| (?:[0-7]{1,3})
| (?:x[0-9a-fA-F]{2})
| (?:u[0-9a-fA-F]{4})
| (?:U[0-9a-fA-F]{8})
))
)'
''', flags=re.VERBOSE)
def verify_description_mode(mode: str) -> None:
if mode not in ('lastIsName', 'noneIsName', 'markType', 'markName', 'param', 'udl'):
raise Exception("Description mode '%s' is invalid." % mode)
class NoOldIdError(Exception):
# Used to avoid implementing unneeded id generation for old id schemes.
pass
class ASTBaseBase:
def __eq__(self, other: Any) -> bool:
if type(self) is not type(other):
return False
try:
for key, value in self.__dict__.items():
if value != getattr(other, key):
return False
except AttributeError:
return False
return True
# Defining __hash__ = None is not strictly needed when __eq__ is defined.
__hash__ = None # type: ignore[assignment]
def clone(self) -> Any:
return deepcopy(self)
def _stringify(self, transform: StringifyTransform) -> str:
raise NotImplementedError(repr(self))
def __str__(self) -> str:
return self._stringify(lambda ast: str(ast))
def get_display_string(self) -> str:
return self._stringify(lambda ast: ast.get_display_string())
def __repr__(self) -> str:
return '<%s>' % self.__class__.__name__
################################################################################
# Attributes
################################################################################
class ASTAttribute(ASTBaseBase):
def describe_signature(self, signode: TextElement) -> None:
raise NotImplementedError(repr(self))
class ASTCPPAttribute(ASTAttribute):
def __init__(self, arg: str) -> None:
self.arg = arg
def _stringify(self, transform: StringifyTransform) -> str:
return "[[" + self.arg + "]]"
def describe_signature(self, signode: TextElement) -> None:
signode.append(addnodes.desc_sig_punctuation('[[', '[['))
signode.append(nodes.Text(self.arg))
signode.append(addnodes.desc_sig_punctuation(']]', ']]'))
class ASTGnuAttribute(ASTBaseBase):
def __init__(self, name: str, args: ASTBaseParenExprList | None) -> None:
self.name = name
self.args = args
def _stringify(self, transform: StringifyTransform) -> str:
res = [self.name]
if self.args:
res.append(transform(self.args))
return ''.join(res)
class ASTGnuAttributeList(ASTAttribute):
def __init__(self, attrs: list[ASTGnuAttribute]) -> None:
self.attrs = attrs
def _stringify(self, transform: StringifyTransform) -> str:
res = ['__attribute__((']
first = True
for attr in self.attrs:
if not first:
res.append(', ')
first = False
res.append(transform(attr))
res.append('))')
return ''.join(res)
def describe_signature(self, signode: TextElement) -> None:
txt = str(self)
signode.append(nodes.Text(txt))
class ASTIdAttribute(ASTAttribute):
"""For simple attributes defined by the user."""
def __init__(self, id: str) -> None:
self.id = id
def _stringify(self, transform: StringifyTransform) -> str:
return self.id
def describe_signature(self, signode: TextElement) -> None:
signode.append(nodes.Text(self.id))
class ASTParenAttribute(ASTAttribute):
"""For paren attributes defined by the user."""
def __init__(self, id: str, arg: str) -> None:
self.id = id
self.arg = arg
def _stringify(self, transform: StringifyTransform) -> str:
return self.id + '(' + self.arg + ')'
def describe_signature(self, signode: TextElement) -> None:
txt = str(self)
signode.append(nodes.Text(txt))
class ASTAttributeList(ASTBaseBase):
def __init__(self, attrs: list[ASTAttribute]) -> None:
self.attrs = attrs
def __len__(self) -> int:
return len(self.attrs)
def __add__(self, other: ASTAttributeList) -> ASTAttributeList:
return ASTAttributeList(self.attrs + other.attrs)
def _stringify(self, transform: StringifyTransform) -> str:
return ' '.join(transform(attr) for attr in self.attrs)
def describe_signature(self, signode: TextElement) -> None:
if len(self.attrs) == 0:
return
self.attrs[0].describe_signature(signode)
if len(self.attrs) == 1:
return
for attr in self.attrs[1:]:
signode.append(addnodes.desc_sig_space())
attr.describe_signature(signode)
################################################################################
class ASTBaseParenExprList(ASTBaseBase):
pass
################################################################################
class UnsupportedMultiCharacterCharLiteral(Exception):
pass
class DefinitionError(Exception):
pass
class BaseParser:
def __init__(self, definition: str, *,
location: nodes.Node | tuple[str, int] | str,
config: Config) -> None:
self.definition = definition.strip()
self.location = location # for warnings
self.config = config
self.pos = 0
self.end = len(self.definition)
self.last_match: re.Match[str] | None = None
self._previous_state: tuple[int, re.Match[str] | None] = (0, None)
self.otherErrors: list[DefinitionError] = []
# in our tests the following is set to False to capture bad parsing
self.allowFallbackExpressionParsing = True
def _make_multi_error(self, errors: list[Any], header: str) -> DefinitionError:
if len(errors) == 1:
if len(header) > 0:
return DefinitionError(header + '\n' + str(errors[0][0]))
else:
return DefinitionError(str(errors[0][0]))
result = [header, '\n']
for e in errors:
if len(e[1]) > 0:
indent = ' '
result.append(e[1])
result.append(':\n')
for line in str(e[0]).split('\n'):
if len(line) == 0:
continue
result.append(indent)
result.append(line)
result.append('\n')
else:
result.append(str(e[0]))
return DefinitionError(''.join(result))
@property
def language(self) -> str:
raise NotImplementedError
def status(self, msg: str) -> None:
# for debugging
indicator = '-' * self.pos + '^'
logger.debug(f"{msg}\n{self.definition}\n{indicator}") # NoQA: G004
def fail(self, msg: str) -> None:
errors = []
indicator = '-' * self.pos + '^'
exMain = DefinitionError(
'Invalid %s declaration: %s [error at %d]\n %s\n %s' %
(self.language, msg, self.pos, self.definition, indicator))
errors.append((exMain, "Main error"))
for err in self.otherErrors:
errors.append((err, "Potential other error"))
self.otherErrors = []
raise self._make_multi_error(errors, '')
def warn(self, msg: str) -> None:
logger.warning(msg, location=self.location)
def match(self, regex: re.Pattern[str]) -> bool:
match = regex.match(self.definition, self.pos)
if match is not None:
self._previous_state = (self.pos, self.last_match)
self.pos = match.end()
self.last_match = match
return True
return False
def skip_string(self, string: str) -> bool:
strlen = len(string)
if self.definition[self.pos:self.pos + strlen] == string:
self.pos += strlen
return True
return False
def skip_word(self, word: str) -> bool:
return self.match(re.compile(r'\b%s\b' % re.escape(word)))
def skip_ws(self) -> bool:
return self.match(_whitespace_re)
def skip_word_and_ws(self, word: str) -> bool:
if self.skip_word(word):
self.skip_ws()
return True
return False
def skip_string_and_ws(self, string: str) -> bool:
if self.skip_string(string):
self.skip_ws()
return True
return False
@property
def eof(self) -> bool:
return self.pos >= self.end
@property
def current_char(self) -> str:
try:
return self.definition[self.pos]
except IndexError:
return 'EOF'
@property
def matched_text(self) -> str:
if self.last_match is not None:
return self.last_match.group()
return ''
def read_rest(self) -> str:
rv = self.definition[self.pos:]
self.pos = self.end
return rv
def assert_end(self, *, allowSemicolon: bool = False) -> None:
self.skip_ws()
if allowSemicolon:
if not self.eof and self.definition[self.pos:] != ';':
self.fail('Expected end of definition or ;.')
else:
if not self.eof:
self.fail('Expected end of definition.')
################################################################################
@property
def id_attributes(self):
raise NotImplementedError
@property
def paren_attributes(self):
raise NotImplementedError
def _parse_balanced_token_seq(self, end: list[str]) -> str:
# TODO: add handling of string literals and similar
brackets = {'(': ')', '[': ']', '{': '}'}
startPos = self.pos
symbols: list[str] = []
while not self.eof:
if len(symbols) == 0 and self.current_char in end:
break
if self.current_char in brackets:
symbols.append(brackets[self.current_char])
elif len(symbols) > 0 and self.current_char == symbols[-1]:
symbols.pop()
elif self.current_char in ")]}":
self.fail("Unexpected '%s' in balanced-token-seq." % self.current_char)
self.pos += 1
if self.eof:
self.fail("Could not find end of balanced-token-seq starting at %d."
% startPos)
return self.definition[startPos:self.pos]
def _parse_attribute(self) -> ASTAttribute | None:
self.skip_ws()
# try C++11 style
startPos = self.pos
if self.skip_string_and_ws('['):
if not self.skip_string('['):
self.pos = startPos
else:
# TODO: actually implement the correct grammar
arg = self._parse_balanced_token_seq(end=[']'])
if not self.skip_string_and_ws(']'):
self.fail("Expected ']' in end of attribute.")
if not self.skip_string_and_ws(']'):
self.fail("Expected ']' in end of attribute after [[...]")
return ASTCPPAttribute(arg)
# try GNU style
if self.skip_word_and_ws('__attribute__'):
if not self.skip_string_and_ws('('):
self.fail("Expected '(' after '__attribute__'.")
if not self.skip_string_and_ws('('):
self.fail("Expected '(' after '__attribute__('.")
attrs = []
while 1:
if self.match(identifier_re):
name = self.matched_text
exprs = self._parse_paren_expression_list()
attrs.append(ASTGnuAttribute(name, exprs))
if self.skip_string_and_ws(','):
continue
if self.skip_string_and_ws(')'):
break
self.fail("Expected identifier, ')', or ',' in __attribute__.")
if not self.skip_string_and_ws(')'):
self.fail("Expected ')' after '__attribute__((...)'")
return ASTGnuAttributeList(attrs)
# try the simple id attributes defined by the user
for id in self.id_attributes:
if self.skip_word_and_ws(id):
return ASTIdAttribute(id)
# try the paren attributes defined by the user
for id in self.paren_attributes:
if not self.skip_string_and_ws(id):
continue
if not self.skip_string('('):
self.fail("Expected '(' after user-defined paren-attribute.")
arg = self._parse_balanced_token_seq(end=[')'])
if not self.skip_string(')'):
self.fail("Expected ')' to end user-defined paren-attribute.")
return ASTParenAttribute(id, arg)
return None
def _parse_attribute_list(self) -> ASTAttributeList:
res = []
while True:
attr = self._parse_attribute()
if attr is None:
break
res.append(attr)
return ASTAttributeList(res)
def _parse_paren_expression_list(self) -> ASTBaseParenExprList | None:
raise NotImplementedError