465 lines
15 KiB
Python
465 lines
15 KiB
Python
|
"""Utility functions common to the C and C++ domains."""
|
||
|
|
||
|
from __future__ import annotations
|
||
|
|
||
|
import re
|
||
|
from copy import deepcopy
|
||
|
from typing import TYPE_CHECKING, Any, Callable
|
||
|
|
||
|
from docutils import nodes
|
||
|
|
||
|
from sphinx import addnodes
|
||
|
from sphinx.util import logging
|
||
|
|
||
|
if TYPE_CHECKING:
|
||
|
from docutils.nodes import TextElement
|
||
|
|
||
|
from sphinx.config import Config
|
||
|
|
||
|
logger = logging.getLogger(__name__)
|
||
|
|
||
|
StringifyTransform = Callable[[Any], str]
|
||
|
|
||
|
|
||
|
_whitespace_re = re.compile(r'\s+')
|
||
|
anon_identifier_re = re.compile(r'(@[a-zA-Z0-9_])[a-zA-Z0-9_]*\b')
|
||
|
identifier_re = re.compile(r'''
|
||
|
( # This 'extends' _anon_identifier_re with the ordinary identifiers,
|
||
|
# make sure they are in sync.
|
||
|
(~?\b[a-zA-Z_]) # ordinary identifiers
|
||
|
| (@[a-zA-Z0-9_]) # our extension for names of anonymous entities
|
||
|
)
|
||
|
[a-zA-Z0-9_]*\b
|
||
|
''', flags=re.VERBOSE)
|
||
|
integer_literal_re = re.compile(r'[1-9][0-9]*(\'[0-9]+)*')
|
||
|
octal_literal_re = re.compile(r'0[0-7]*(\'[0-7]+)*')
|
||
|
hex_literal_re = re.compile(r'0[xX][0-9a-fA-F]+(\'[0-9a-fA-F]+)*')
|
||
|
binary_literal_re = re.compile(r'0[bB][01]+(\'[01]+)*')
|
||
|
integers_literal_suffix_re = re.compile(r'''
|
||
|
# unsigned and/or (long) long, in any order, but at least one of them
|
||
|
(
|
||
|
([uU] ([lL] | (ll) | (LL))?)
|
||
|
|
|
||
|
(([lL] | (ll) | (LL)) [uU]?)
|
||
|
)\b
|
||
|
# the ending word boundary is important for distinguishing
|
||
|
# between suffixes and UDLs in C++
|
||
|
''', flags=re.VERBOSE)
|
||
|
float_literal_re = re.compile(r'''
|
||
|
[+-]?(
|
||
|
# decimal
|
||
|
([0-9]+(\'[0-9]+)*[eE][+-]?[0-9]+(\'[0-9]+)*)
|
||
|
| (([0-9]+(\'[0-9]+)*)?\.[0-9]+(\'[0-9]+)*([eE][+-]?[0-9]+(\'[0-9]+)*)?)
|
||
|
| ([0-9]+(\'[0-9]+)*\.([eE][+-]?[0-9]+(\'[0-9]+)*)?)
|
||
|
# hex
|
||
|
| (0[xX][0-9a-fA-F]+(\'[0-9a-fA-F]+)*[pP][+-]?[0-9a-fA-F]+(\'[0-9a-fA-F]+)*)
|
||
|
| (0[xX]([0-9a-fA-F]+(\'[0-9a-fA-F]+)*)?\.
|
||
|
[0-9a-fA-F]+(\'[0-9a-fA-F]+)*([pP][+-]?[0-9a-fA-F]+(\'[0-9a-fA-F]+)*)?)
|
||
|
| (0[xX][0-9a-fA-F]+(\'[0-9a-fA-F]+)*\.([pP][+-]?[0-9a-fA-F]+(\'[0-9a-fA-F]+)*)?)
|
||
|
)
|
||
|
''', flags=re.VERBOSE)
|
||
|
float_literal_suffix_re = re.compile(r'[fFlL]\b')
|
||
|
# the ending word boundary is important for distinguishing between suffixes and UDLs in C++
|
||
|
char_literal_re = re.compile(r'''
|
||
|
((?:u8)|u|U|L)?
|
||
|
'(
|
||
|
(?:[^\\'])
|
||
|
| (\\(
|
||
|
(?:['"?\\abfnrtv])
|
||
|
| (?:[0-7]{1,3})
|
||
|
| (?:x[0-9a-fA-F]{2})
|
||
|
| (?:u[0-9a-fA-F]{4})
|
||
|
| (?:U[0-9a-fA-F]{8})
|
||
|
))
|
||
|
)'
|
||
|
''', flags=re.VERBOSE)
|
||
|
|
||
|
|
||
|
def verify_description_mode(mode: str) -> None:
|
||
|
if mode not in ('lastIsName', 'noneIsName', 'markType', 'markName', 'param', 'udl'):
|
||
|
raise Exception("Description mode '%s' is invalid." % mode)
|
||
|
|
||
|
|
||
|
class NoOldIdError(Exception):
|
||
|
# Used to avoid implementing unneeded id generation for old id schemes.
|
||
|
pass
|
||
|
|
||
|
|
||
|
class ASTBaseBase:
|
||
|
def __eq__(self, other: Any) -> bool:
|
||
|
if type(self) is not type(other):
|
||
|
return False
|
||
|
try:
|
||
|
for key, value in self.__dict__.items():
|
||
|
if value != getattr(other, key):
|
||
|
return False
|
||
|
except AttributeError:
|
||
|
return False
|
||
|
return True
|
||
|
|
||
|
# Defining __hash__ = None is not strictly needed when __eq__ is defined.
|
||
|
__hash__ = None # type: ignore[assignment]
|
||
|
|
||
|
def clone(self) -> Any:
|
||
|
return deepcopy(self)
|
||
|
|
||
|
def _stringify(self, transform: StringifyTransform) -> str:
|
||
|
raise NotImplementedError(repr(self))
|
||
|
|
||
|
def __str__(self) -> str:
|
||
|
return self._stringify(lambda ast: str(ast))
|
||
|
|
||
|
def get_display_string(self) -> str:
|
||
|
return self._stringify(lambda ast: ast.get_display_string())
|
||
|
|
||
|
def __repr__(self) -> str:
|
||
|
return '<%s>' % self.__class__.__name__
|
||
|
|
||
|
|
||
|
################################################################################
|
||
|
# Attributes
|
||
|
################################################################################
|
||
|
|
||
|
class ASTAttribute(ASTBaseBase):
|
||
|
def describe_signature(self, signode: TextElement) -> None:
|
||
|
raise NotImplementedError(repr(self))
|
||
|
|
||
|
|
||
|
class ASTCPPAttribute(ASTAttribute):
|
||
|
def __init__(self, arg: str) -> None:
|
||
|
self.arg = arg
|
||
|
|
||
|
def _stringify(self, transform: StringifyTransform) -> str:
|
||
|
return "[[" + self.arg + "]]"
|
||
|
|
||
|
def describe_signature(self, signode: TextElement) -> None:
|
||
|
signode.append(addnodes.desc_sig_punctuation('[[', '[['))
|
||
|
signode.append(nodes.Text(self.arg))
|
||
|
signode.append(addnodes.desc_sig_punctuation(']]', ']]'))
|
||
|
|
||
|
|
||
|
class ASTGnuAttribute(ASTBaseBase):
|
||
|
def __init__(self, name: str, args: ASTBaseParenExprList | None) -> None:
|
||
|
self.name = name
|
||
|
self.args = args
|
||
|
|
||
|
def _stringify(self, transform: StringifyTransform) -> str:
|
||
|
res = [self.name]
|
||
|
if self.args:
|
||
|
res.append(transform(self.args))
|
||
|
return ''.join(res)
|
||
|
|
||
|
|
||
|
class ASTGnuAttributeList(ASTAttribute):
|
||
|
def __init__(self, attrs: list[ASTGnuAttribute]) -> None:
|
||
|
self.attrs = attrs
|
||
|
|
||
|
def _stringify(self, transform: StringifyTransform) -> str:
|
||
|
res = ['__attribute__((']
|
||
|
first = True
|
||
|
for attr in self.attrs:
|
||
|
if not first:
|
||
|
res.append(', ')
|
||
|
first = False
|
||
|
res.append(transform(attr))
|
||
|
res.append('))')
|
||
|
return ''.join(res)
|
||
|
|
||
|
def describe_signature(self, signode: TextElement) -> None:
|
||
|
txt = str(self)
|
||
|
signode.append(nodes.Text(txt))
|
||
|
|
||
|
|
||
|
class ASTIdAttribute(ASTAttribute):
|
||
|
"""For simple attributes defined by the user."""
|
||
|
|
||
|
def __init__(self, id: str) -> None:
|
||
|
self.id = id
|
||
|
|
||
|
def _stringify(self, transform: StringifyTransform) -> str:
|
||
|
return self.id
|
||
|
|
||
|
def describe_signature(self, signode: TextElement) -> None:
|
||
|
signode.append(nodes.Text(self.id))
|
||
|
|
||
|
|
||
|
class ASTParenAttribute(ASTAttribute):
|
||
|
"""For paren attributes defined by the user."""
|
||
|
|
||
|
def __init__(self, id: str, arg: str) -> None:
|
||
|
self.id = id
|
||
|
self.arg = arg
|
||
|
|
||
|
def _stringify(self, transform: StringifyTransform) -> str:
|
||
|
return self.id + '(' + self.arg + ')'
|
||
|
|
||
|
def describe_signature(self, signode: TextElement) -> None:
|
||
|
txt = str(self)
|
||
|
signode.append(nodes.Text(txt))
|
||
|
|
||
|
|
||
|
class ASTAttributeList(ASTBaseBase):
|
||
|
def __init__(self, attrs: list[ASTAttribute]) -> None:
|
||
|
self.attrs = attrs
|
||
|
|
||
|
def __len__(self) -> int:
|
||
|
return len(self.attrs)
|
||
|
|
||
|
def __add__(self, other: ASTAttributeList) -> ASTAttributeList:
|
||
|
return ASTAttributeList(self.attrs + other.attrs)
|
||
|
|
||
|
def _stringify(self, transform: StringifyTransform) -> str:
|
||
|
return ' '.join(transform(attr) for attr in self.attrs)
|
||
|
|
||
|
def describe_signature(self, signode: TextElement) -> None:
|
||
|
if len(self.attrs) == 0:
|
||
|
return
|
||
|
self.attrs[0].describe_signature(signode)
|
||
|
if len(self.attrs) == 1:
|
||
|
return
|
||
|
for attr in self.attrs[1:]:
|
||
|
signode.append(addnodes.desc_sig_space())
|
||
|
attr.describe_signature(signode)
|
||
|
|
||
|
|
||
|
################################################################################
|
||
|
|
||
|
class ASTBaseParenExprList(ASTBaseBase):
|
||
|
pass
|
||
|
|
||
|
|
||
|
################################################################################
|
||
|
|
||
|
class UnsupportedMultiCharacterCharLiteral(Exception):
|
||
|
pass
|
||
|
|
||
|
|
||
|
class DefinitionError(Exception):
|
||
|
pass
|
||
|
|
||
|
|
||
|
class BaseParser:
|
||
|
def __init__(self, definition: str, *,
|
||
|
location: nodes.Node | tuple[str, int] | str,
|
||
|
config: Config) -> None:
|
||
|
self.definition = definition.strip()
|
||
|
self.location = location # for warnings
|
||
|
self.config = config
|
||
|
|
||
|
self.pos = 0
|
||
|
self.end = len(self.definition)
|
||
|
self.last_match: re.Match[str] | None = None
|
||
|
self._previous_state: tuple[int, re.Match[str] | None] = (0, None)
|
||
|
self.otherErrors: list[DefinitionError] = []
|
||
|
|
||
|
# in our tests the following is set to False to capture bad parsing
|
||
|
self.allowFallbackExpressionParsing = True
|
||
|
|
||
|
def _make_multi_error(self, errors: list[Any], header: str) -> DefinitionError:
|
||
|
if len(errors) == 1:
|
||
|
if len(header) > 0:
|
||
|
return DefinitionError(header + '\n' + str(errors[0][0]))
|
||
|
else:
|
||
|
return DefinitionError(str(errors[0][0]))
|
||
|
result = [header, '\n']
|
||
|
for e in errors:
|
||
|
if len(e[1]) > 0:
|
||
|
indent = ' '
|
||
|
result.append(e[1])
|
||
|
result.append(':\n')
|
||
|
for line in str(e[0]).split('\n'):
|
||
|
if len(line) == 0:
|
||
|
continue
|
||
|
result.append(indent)
|
||
|
result.append(line)
|
||
|
result.append('\n')
|
||
|
else:
|
||
|
result.append(str(e[0]))
|
||
|
return DefinitionError(''.join(result))
|
||
|
|
||
|
@property
|
||
|
def language(self) -> str:
|
||
|
raise NotImplementedError
|
||
|
|
||
|
def status(self, msg: str) -> None:
|
||
|
# for debugging
|
||
|
indicator = '-' * self.pos + '^'
|
||
|
logger.debug(f"{msg}\n{self.definition}\n{indicator}") # NoQA: G004
|
||
|
|
||
|
def fail(self, msg: str) -> None:
|
||
|
errors = []
|
||
|
indicator = '-' * self.pos + '^'
|
||
|
exMain = DefinitionError(
|
||
|
'Invalid %s declaration: %s [error at %d]\n %s\n %s' %
|
||
|
(self.language, msg, self.pos, self.definition, indicator))
|
||
|
errors.append((exMain, "Main error"))
|
||
|
for err in self.otherErrors:
|
||
|
errors.append((err, "Potential other error"))
|
||
|
self.otherErrors = []
|
||
|
raise self._make_multi_error(errors, '')
|
||
|
|
||
|
def warn(self, msg: str) -> None:
|
||
|
logger.warning(msg, location=self.location)
|
||
|
|
||
|
def match(self, regex: re.Pattern[str]) -> bool:
|
||
|
match = regex.match(self.definition, self.pos)
|
||
|
if match is not None:
|
||
|
self._previous_state = (self.pos, self.last_match)
|
||
|
self.pos = match.end()
|
||
|
self.last_match = match
|
||
|
return True
|
||
|
return False
|
||
|
|
||
|
def skip_string(self, string: str) -> bool:
|
||
|
strlen = len(string)
|
||
|
if self.definition[self.pos:self.pos + strlen] == string:
|
||
|
self.pos += strlen
|
||
|
return True
|
||
|
return False
|
||
|
|
||
|
def skip_word(self, word: str) -> bool:
|
||
|
return self.match(re.compile(r'\b%s\b' % re.escape(word)))
|
||
|
|
||
|
def skip_ws(self) -> bool:
|
||
|
return self.match(_whitespace_re)
|
||
|
|
||
|
def skip_word_and_ws(self, word: str) -> bool:
|
||
|
if self.skip_word(word):
|
||
|
self.skip_ws()
|
||
|
return True
|
||
|
return False
|
||
|
|
||
|
def skip_string_and_ws(self, string: str) -> bool:
|
||
|
if self.skip_string(string):
|
||
|
self.skip_ws()
|
||
|
return True
|
||
|
return False
|
||
|
|
||
|
@property
|
||
|
def eof(self) -> bool:
|
||
|
return self.pos >= self.end
|
||
|
|
||
|
@property
|
||
|
def current_char(self) -> str:
|
||
|
try:
|
||
|
return self.definition[self.pos]
|
||
|
except IndexError:
|
||
|
return 'EOF'
|
||
|
|
||
|
@property
|
||
|
def matched_text(self) -> str:
|
||
|
if self.last_match is not None:
|
||
|
return self.last_match.group()
|
||
|
return ''
|
||
|
|
||
|
def read_rest(self) -> str:
|
||
|
rv = self.definition[self.pos:]
|
||
|
self.pos = self.end
|
||
|
return rv
|
||
|
|
||
|
def assert_end(self, *, allowSemicolon: bool = False) -> None:
|
||
|
self.skip_ws()
|
||
|
if allowSemicolon:
|
||
|
if not self.eof and self.definition[self.pos:] != ';':
|
||
|
self.fail('Expected end of definition or ;.')
|
||
|
else:
|
||
|
if not self.eof:
|
||
|
self.fail('Expected end of definition.')
|
||
|
|
||
|
################################################################################
|
||
|
|
||
|
@property
|
||
|
def id_attributes(self):
|
||
|
raise NotImplementedError
|
||
|
|
||
|
@property
|
||
|
def paren_attributes(self):
|
||
|
raise NotImplementedError
|
||
|
|
||
|
def _parse_balanced_token_seq(self, end: list[str]) -> str:
|
||
|
# TODO: add handling of string literals and similar
|
||
|
brackets = {'(': ')', '[': ']', '{': '}'}
|
||
|
startPos = self.pos
|
||
|
symbols: list[str] = []
|
||
|
while not self.eof:
|
||
|
if len(symbols) == 0 and self.current_char in end:
|
||
|
break
|
||
|
if self.current_char in brackets:
|
||
|
symbols.append(brackets[self.current_char])
|
||
|
elif len(symbols) > 0 and self.current_char == symbols[-1]:
|
||
|
symbols.pop()
|
||
|
elif self.current_char in ")]}":
|
||
|
self.fail("Unexpected '%s' in balanced-token-seq." % self.current_char)
|
||
|
self.pos += 1
|
||
|
if self.eof:
|
||
|
self.fail("Could not find end of balanced-token-seq starting at %d."
|
||
|
% startPos)
|
||
|
return self.definition[startPos:self.pos]
|
||
|
|
||
|
def _parse_attribute(self) -> ASTAttribute | None:
|
||
|
self.skip_ws()
|
||
|
# try C++11 style
|
||
|
startPos = self.pos
|
||
|
if self.skip_string_and_ws('['):
|
||
|
if not self.skip_string('['):
|
||
|
self.pos = startPos
|
||
|
else:
|
||
|
# TODO: actually implement the correct grammar
|
||
|
arg = self._parse_balanced_token_seq(end=[']'])
|
||
|
if not self.skip_string_and_ws(']'):
|
||
|
self.fail("Expected ']' in end of attribute.")
|
||
|
if not self.skip_string_and_ws(']'):
|
||
|
self.fail("Expected ']' in end of attribute after [[...]")
|
||
|
return ASTCPPAttribute(arg)
|
||
|
|
||
|
# try GNU style
|
||
|
if self.skip_word_and_ws('__attribute__'):
|
||
|
if not self.skip_string_and_ws('('):
|
||
|
self.fail("Expected '(' after '__attribute__'.")
|
||
|
if not self.skip_string_and_ws('('):
|
||
|
self.fail("Expected '(' after '__attribute__('.")
|
||
|
attrs = []
|
||
|
while 1:
|
||
|
if self.match(identifier_re):
|
||
|
name = self.matched_text
|
||
|
exprs = self._parse_paren_expression_list()
|
||
|
attrs.append(ASTGnuAttribute(name, exprs))
|
||
|
if self.skip_string_and_ws(','):
|
||
|
continue
|
||
|
if self.skip_string_and_ws(')'):
|
||
|
break
|
||
|
self.fail("Expected identifier, ')', or ',' in __attribute__.")
|
||
|
if not self.skip_string_and_ws(')'):
|
||
|
self.fail("Expected ')' after '__attribute__((...)'")
|
||
|
return ASTGnuAttributeList(attrs)
|
||
|
|
||
|
# try the simple id attributes defined by the user
|
||
|
for id in self.id_attributes:
|
||
|
if self.skip_word_and_ws(id):
|
||
|
return ASTIdAttribute(id)
|
||
|
|
||
|
# try the paren attributes defined by the user
|
||
|
for id in self.paren_attributes:
|
||
|
if not self.skip_string_and_ws(id):
|
||
|
continue
|
||
|
if not self.skip_string('('):
|
||
|
self.fail("Expected '(' after user-defined paren-attribute.")
|
||
|
arg = self._parse_balanced_token_seq(end=[')'])
|
||
|
if not self.skip_string(')'):
|
||
|
self.fail("Expected ')' to end user-defined paren-attribute.")
|
||
|
return ASTParenAttribute(id, arg)
|
||
|
|
||
|
return None
|
||
|
|
||
|
def _parse_attribute_list(self) -> ASTAttributeList:
|
||
|
res = []
|
||
|
while True:
|
||
|
attr = self._parse_attribute()
|
||
|
if attr is None:
|
||
|
break
|
||
|
res.append(attr)
|
||
|
return ASTAttributeList(res)
|
||
|
|
||
|
def _parse_paren_expression_list(self) -> ASTBaseParenExprList | None:
|
||
|
raise NotImplementedError
|