268 lines
7.8 KiB
Python
268 lines
7.8 KiB
Python
"""
|
|
pygments.lexers.grammar_notation
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
|
|
Lexers for grammar notations like BNF.
|
|
|
|
:copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
|
|
:license: BSD, see LICENSE for details.
|
|
"""
|
|
|
|
import re
|
|
|
|
from pygments.lexer import RegexLexer, bygroups, include, this, using, words
|
|
from pygments.token import Comment, Keyword, Literal, Name, Number, \
|
|
Operator, Punctuation, String, Text, Whitespace
|
|
|
|
__all__ = ['BnfLexer', 'AbnfLexer', 'JsgfLexer', 'PegLexer']
|
|
|
|
|
|
class BnfLexer(RegexLexer):
|
|
"""
|
|
This lexer is for grammar notations which are similar to
|
|
original BNF.
|
|
|
|
In order to maximize a number of targets of this lexer,
|
|
let's decide some designs:
|
|
|
|
* We don't distinguish `Terminal Symbol`.
|
|
|
|
* We do assume that `NonTerminal Symbol` are always enclosed
|
|
with arrow brackets.
|
|
|
|
* We do assume that `NonTerminal Symbol` may include
|
|
any printable characters except arrow brackets and ASCII 0x20.
|
|
This assumption is for `RBNF <http://www.rfc-base.org/txt/rfc-5511.txt>`_.
|
|
|
|
* We do assume that target notation doesn't support comment.
|
|
|
|
* We don't distinguish any operators and punctuation except
|
|
`::=`.
|
|
|
|
Though these decision making might cause too minimal highlighting
|
|
and you might be disappointed, but it is reasonable for us.
|
|
|
|
.. versionadded:: 2.1
|
|
"""
|
|
|
|
name = 'BNF'
|
|
aliases = ['bnf']
|
|
filenames = ['*.bnf']
|
|
mimetypes = ['text/x-bnf']
|
|
|
|
tokens = {
|
|
'root': [
|
|
(r'(<)([ -;=?-~]+)(>)',
|
|
bygroups(Punctuation, Name.Class, Punctuation)),
|
|
|
|
# an only operator
|
|
(r'::=', Operator),
|
|
|
|
# fallback
|
|
(r'[^<>:]+', Text), # for performance
|
|
(r'.', Text),
|
|
],
|
|
}
|
|
|
|
|
|
class AbnfLexer(RegexLexer):
|
|
"""
|
|
Lexer for IETF 7405 ABNF.
|
|
|
|
(Updates `5234 <http://www.ietf.org/rfc/rfc5234.txt>`_) grammars.
|
|
|
|
.. versionadded:: 2.1
|
|
"""
|
|
|
|
name = 'ABNF'
|
|
url = 'http://www.ietf.org/rfc/rfc7405.txt'
|
|
aliases = ['abnf']
|
|
filenames = ['*.abnf']
|
|
mimetypes = ['text/x-abnf']
|
|
|
|
_core_rules = (
|
|
'ALPHA', 'BIT', 'CHAR', 'CR', 'CRLF', 'CTL', 'DIGIT',
|
|
'DQUOTE', 'HEXDIG', 'HTAB', 'LF', 'LWSP', 'OCTET',
|
|
'SP', 'VCHAR', 'WSP')
|
|
|
|
tokens = {
|
|
'root': [
|
|
# comment
|
|
(r';.*$', Comment.Single),
|
|
|
|
# quoted
|
|
# double quote itself in this state, it is as '%x22'.
|
|
(r'(%[si])?"[^"]*"', Literal),
|
|
|
|
# binary (but i have never seen...)
|
|
(r'%b[01]+\-[01]+\b', Literal), # range
|
|
(r'%b[01]+(\.[01]+)*\b', Literal), # concat
|
|
|
|
# decimal
|
|
(r'%d[0-9]+\-[0-9]+\b', Literal), # range
|
|
(r'%d[0-9]+(\.[0-9]+)*\b', Literal), # concat
|
|
|
|
# hexadecimal
|
|
(r'%x[0-9a-fA-F]+\-[0-9a-fA-F]+\b', Literal), # range
|
|
(r'%x[0-9a-fA-F]+(\.[0-9a-fA-F]+)*\b', Literal), # concat
|
|
|
|
# repetition (<a>*<b>element) including nRule
|
|
(r'\b[0-9]+\*[0-9]+', Operator),
|
|
(r'\b[0-9]+\*', Operator),
|
|
(r'\b[0-9]+', Operator),
|
|
(r'\*', Operator),
|
|
|
|
# Strictly speaking, these are not keyword but
|
|
# are called `Core Rule'.
|
|
(words(_core_rules, suffix=r'\b'), Keyword),
|
|
|
|
# nonterminals (ALPHA *(ALPHA / DIGIT / "-"))
|
|
(r'[a-zA-Z][a-zA-Z0-9-]*\b', Name.Class),
|
|
|
|
# operators
|
|
(r'(=/|=|/)', Operator),
|
|
|
|
# punctuation
|
|
(r'[\[\]()]', Punctuation),
|
|
|
|
# fallback
|
|
(r'\s+', Whitespace),
|
|
(r'.', Text),
|
|
],
|
|
}
|
|
|
|
|
|
class JsgfLexer(RegexLexer):
|
|
"""
|
|
For JSpeech Grammar Format grammars.
|
|
|
|
.. versionadded:: 2.2
|
|
"""
|
|
name = 'JSGF'
|
|
url = 'https://www.w3.org/TR/jsgf/'
|
|
aliases = ['jsgf']
|
|
filenames = ['*.jsgf']
|
|
mimetypes = ['application/jsgf', 'application/x-jsgf', 'text/jsgf']
|
|
|
|
tokens = {
|
|
'root': [
|
|
include('comments'),
|
|
include('non-comments'),
|
|
],
|
|
'comments': [
|
|
(r'/\*\*(?!/)', Comment.Multiline, 'documentation comment'),
|
|
(r'/\*[\w\W]*?\*/', Comment.Multiline),
|
|
(r'//.*$', Comment.Single),
|
|
],
|
|
'non-comments': [
|
|
(r'\A#JSGF[^;]*', Comment.Preproc),
|
|
(r'\s+', Whitespace),
|
|
(r';', Punctuation),
|
|
(r'[=|()\[\]*+]', Operator),
|
|
(r'/[^/]+/', Number.Float),
|
|
(r'"', String.Double, 'string'),
|
|
(r'\{', String.Other, 'tag'),
|
|
(words(('import', 'public'), suffix=r'\b'), Keyword.Reserved),
|
|
(r'grammar\b', Keyword.Reserved, 'grammar name'),
|
|
(r'(<)(NULL|VOID)(>)',
|
|
bygroups(Punctuation, Name.Builtin, Punctuation)),
|
|
(r'<', Punctuation, 'rulename'),
|
|
(r'\w+|[^\s;=|()\[\]*+/"{<\w]+', Text),
|
|
],
|
|
'string': [
|
|
(r'"', String.Double, '#pop'),
|
|
(r'\\.', String.Escape),
|
|
(r'[^\\"]+', String.Double),
|
|
],
|
|
'tag': [
|
|
(r'\}', String.Other, '#pop'),
|
|
(r'\\.', String.Escape),
|
|
(r'[^\\}]+', String.Other),
|
|
],
|
|
'grammar name': [
|
|
(r';', Punctuation, '#pop'),
|
|
(r'\s+', Whitespace),
|
|
(r'\.', Punctuation),
|
|
(r'[^;\s.]+', Name.Namespace),
|
|
],
|
|
'rulename': [
|
|
(r'>', Punctuation, '#pop'),
|
|
(r'\*', Punctuation),
|
|
(r'\s+', Whitespace),
|
|
(r'([^.>]+)(\s*)(\.)', bygroups(Name.Namespace, Text, Punctuation)),
|
|
(r'[^.>]+', Name.Constant),
|
|
],
|
|
'documentation comment': [
|
|
(r'\*/', Comment.Multiline, '#pop'),
|
|
(r'^(\s*)(\*?)(\s*)(@(?:example|see))(\s+)'
|
|
r'([\w\W]*?(?=(?:^\s*\*?\s*@|\*/)))',
|
|
bygroups(Whitespace,Comment.Multiline, Whitespace, Comment.Special,
|
|
Whitespace, using(this, state='example'))),
|
|
(r'(^\s*\*?\s*)(@\S*)',
|
|
bygroups(Comment.Multiline, Comment.Special)),
|
|
(r'[^*\n@]+|\w|\W', Comment.Multiline),
|
|
],
|
|
'example': [
|
|
(r'(\n\s*)(\*)', bygroups(Whitespace, Comment.Multiline)),
|
|
include('non-comments'),
|
|
(r'.', Comment.Multiline),
|
|
],
|
|
}
|
|
|
|
|
|
class PegLexer(RegexLexer):
|
|
"""
|
|
This lexer is for Parsing Expression Grammars (PEG).
|
|
|
|
Various implementations of PEG have made different decisions
|
|
regarding the syntax, so let's try to be accommodating:
|
|
|
|
* `<-`, `←`, `:`, and `=` are all accepted as rule operators.
|
|
|
|
* Both `|` and `/` are choice operators.
|
|
|
|
* `^`, `↑`, and `~` are cut operators.
|
|
|
|
* A single `a-z` character immediately before a string, or
|
|
multiple `a-z` characters following a string, are part of the
|
|
string (e.g., `r"..."` or `"..."ilmsuxa`).
|
|
|
|
.. versionadded:: 2.6
|
|
"""
|
|
|
|
name = 'PEG'
|
|
url = 'https://bford.info/pub/lang/peg.pdf'
|
|
aliases = ['peg']
|
|
filenames = ['*.peg']
|
|
mimetypes = ['text/x-peg']
|
|
|
|
tokens = {
|
|
'root': [
|
|
# Comments
|
|
(r'#.*$', Comment.Single),
|
|
|
|
# All operators
|
|
(r'<-|[←:=/|&!?*+^↑~]', Operator),
|
|
|
|
# Other punctuation
|
|
(r'[()]', Punctuation),
|
|
|
|
# Keywords
|
|
(r'\.', Keyword),
|
|
|
|
# Character classes
|
|
(r'(\[)([^\]]*(?:\\.[^\]\\]*)*)(\])',
|
|
bygroups(Punctuation, String, Punctuation)),
|
|
|
|
# Single and double quoted strings (with optional modifiers)
|
|
(r'[a-z]?"[^"\\]*(?:\\.[^"\\]*)*"[a-z]*', String.Double),
|
|
(r"[a-z]?'[^'\\]*(?:\\.[^'\\]*)*'[a-z]*", String.Single),
|
|
|
|
# Nonterminals are not whitespace, operators, or punctuation
|
|
(r'[^\s<←:=/|&!?*+\^↑~()\[\]"\'#]+', Name.Class),
|
|
|
|
# Fallback
|
|
(r'.', Text),
|
|
],
|
|
}
|