usse/funda-scraper/venv/lib/python3.10/site-packages/pygments/lexers/grammar_notation.py

"""
    pygments.lexers.grammar_notation
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    Lexers for grammar notations like BNF.

    :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
    :license: BSD, see LICENSE for details.
"""

import re

from pygments.lexer import RegexLexer, bygroups, include, this, using, words
from pygments.token import Comment, Keyword, Literal, Name, Number, \
    Operator, Punctuation, String, Text, Whitespace

__all__ = ['BnfLexer', 'AbnfLexer', 'JsgfLexer', 'PegLexer']


class BnfLexer(RegexLexer):
    """
    This lexer is for grammar notations which are similar to
    original BNF.

    In order to maximize a number of targets of this lexer,
    let's decide some designs:

    * We don't distinguish `Terminal Symbol`.

    * We do assume that `NonTerminal Symbol` are always enclosed
      with arrow brackets.

    * We do assume that `NonTerminal Symbol` may include
      any printable characters except arrow brackets and ASCII 0x20.
      This assumption is for `RBNF <http://www.rfc-base.org/txt/rfc-5511.txt>`_.

    * We do assume that target notation doesn't support comment.

    * We don't distinguish any operators and punctuation except
      `::=`.

    Though these decision making might cause too minimal highlighting
    and you might be disappointed, but it is reasonable for us.

    .. versionadded:: 2.1
    """

    name = 'BNF'
    aliases = ['bnf']
    filenames = ['*.bnf']
    mimetypes = ['text/x-bnf']

    tokens = {
        'root': [
            (r'(<)([ -;=?-~]+)(>)',
             bygroups(Punctuation, Name.Class, Punctuation)),

            # an only operator
            (r'::=', Operator),

            # fallback
            (r'[^<>:]+', Text),  # for performance
            (r'.', Text),
        ],
    }


class AbnfLexer(RegexLexer):
    """
    Lexer for IETF 7405 ABNF.

    (Updates `5234 <http://www.ietf.org/rfc/rfc5234.txt>`_) grammars.

    .. versionadded:: 2.1
    """

    name = 'ABNF'
    url = 'http://www.ietf.org/rfc/rfc7405.txt'
    aliases = ['abnf']
    filenames = ['*.abnf']
    mimetypes = ['text/x-abnf']

    _core_rules = (
        'ALPHA', 'BIT', 'CHAR', 'CR', 'CRLF', 'CTL', 'DIGIT',
        'DQUOTE', 'HEXDIG', 'HTAB', 'LF', 'LWSP', 'OCTET',
        'SP', 'VCHAR', 'WSP')

    tokens = {
        'root': [
            # comment
            (r';.*$', Comment.Single),

            # quoted
            #   double quote itself in this state, it is as '%x22'.
            (r'(%[si])?"[^"]*"', Literal),

            # binary (but i have never seen...)
            (r'%b[01]+\-[01]+\b', Literal),  # range
            (r'%b[01]+(\.[01]+)*\b', Literal),  # concat

            # decimal
            (r'%d[0-9]+\-[0-9]+\b', Literal),  # range
            (r'%d[0-9]+(\.[0-9]+)*\b', Literal),  # concat

            # hexadecimal
            (r'%x[0-9a-fA-F]+\-[0-9a-fA-F]+\b', Literal),  # range
            (r'%x[0-9a-fA-F]+(\.[0-9a-fA-F]+)*\b', Literal),  # concat

            # repetition (<a>*<b>element) including nRule
            (r'\b[0-9]+\*[0-9]+', Operator),
            (r'\b[0-9]+\*', Operator),
            (r'\b[0-9]+', Operator),
            (r'\*', Operator),

            # Strictly speaking, these are not keyword but
            # are called `Core Rule'.
            (words(_core_rules, suffix=r'\b'), Keyword),

            # nonterminals (ALPHA *(ALPHA / DIGIT / "-"))
            (r'[a-zA-Z][a-zA-Z0-9-]*\b', Name.Class),

            # operators
            (r'(=/|=|/)', Operator),

            # punctuation
            (r'[\[\]()]', Punctuation),

            # fallback
            (r'\s+', Whitespace),
            (r'.', Text),
        ],
    }


class JsgfLexer(RegexLexer):
    """
    For JSpeech Grammar Format grammars.

    .. versionadded:: 2.2
    """
    name = 'JSGF'
    url = 'https://www.w3.org/TR/jsgf/'
    aliases = ['jsgf']
    filenames = ['*.jsgf']
    mimetypes = ['application/jsgf', 'application/x-jsgf', 'text/jsgf']

    tokens = {
        'root': [
            include('comments'),
            include('non-comments'),
        ],
        'comments': [
            (r'/\*\*(?!/)', Comment.Multiline, 'documentation comment'),
            (r'/\*[\w\W]*?\*/', Comment.Multiline),
            (r'//.*$', Comment.Single),
        ],
        'non-comments': [
            (r'\A#JSGF[^;]*', Comment.Preproc),
            (r'\s+', Whitespace),
            (r';', Punctuation),
            (r'[=|()\[\]*+]', Operator),
            (r'/[^/]+/', Number.Float),
            (r'"', String.Double, 'string'),
            (r'\{', String.Other, 'tag'),
            (words(('import', 'public'), suffix=r'\b'), Keyword.Reserved),
            (r'grammar\b', Keyword.Reserved, 'grammar name'),
            (r'(<)(NULL|VOID)(>)',
             bygroups(Punctuation, Name.Builtin, Punctuation)),
            (r'<', Punctuation, 'rulename'),
            (r'\w+|[^\s;=|()\[\]*+/"{<\w]+', Text),
        ],
        'string': [
            (r'"', String.Double, '#pop'),
            (r'\\.', String.Escape),
            (r'[^\\"]+', String.Double),
        ],
        'tag': [
            (r'\}', String.Other, '#pop'),
            (r'\\.', String.Escape),
            (r'[^\\}]+', String.Other),
        ],
        'grammar name': [
            (r';', Punctuation, '#pop'),
            (r'\s+', Whitespace),
            (r'\.', Punctuation),
            (r'[^;\s.]+', Name.Namespace),
        ],
        'rulename': [
            (r'>', Punctuation, '#pop'),
            (r'\*', Punctuation),
            (r'\s+', Whitespace),
            (r'([^.>]+)(\s*)(\.)', bygroups(Name.Namespace, Text, Punctuation)),
            (r'[^.>]+', Name.Constant),
        ],
        'documentation comment': [
            (r'\*/', Comment.Multiline, '#pop'),
            (r'^(\s*)(\*?)(\s*)(@(?:example|see))(\s+)'
             r'([\w\W]*?(?=(?:^\s*\*?\s*@|\*/)))',
             bygroups(Whitespace,Comment.Multiline, Whitespace, Comment.Special,
                      Whitespace, using(this, state='example'))),
            (r'(^\s*\*?\s*)(@\S*)',
             bygroups(Comment.Multiline, Comment.Special)),
            (r'[^*\n@]+|\w|\W', Comment.Multiline),
        ],
        'example': [
            (r'(\n\s*)(\*)', bygroups(Whitespace, Comment.Multiline)),
            include('non-comments'),
            (r'.', Comment.Multiline),
        ],
    }


class PegLexer(RegexLexer):
    """
    This lexer is for Parsing Expression Grammars (PEG).

    Various implementations of PEG have made different decisions
    regarding the syntax, so let's try to be accommodating:

    * `<-`, `←`, `:`, and `=` are all accepted as rule operators.

    * Both `|` and `/` are choice operators.

    * `^`, `↑`, and `~` are cut operators.

    * A single `a-z` character immediately before a string, or
      multiple `a-z` characters following a string, are part of the
      string (e.g., `r"..."` or `"..."ilmsuxa`).

    .. versionadded:: 2.6
    """

    name = 'PEG'
    url = 'https://bford.info/pub/lang/peg.pdf'
    aliases = ['peg']
    filenames = ['*.peg']
    mimetypes = ['text/x-peg']

    tokens = {
        'root': [
            # Comments
            (r'#.*$', Comment.Single),

            # All operators
            (r'<-|[←:=/|&!?*+^↑~]', Operator),

            # Other punctuation
            (r'[()]', Punctuation),

            # Keywords
            (r'\.', Keyword),

            # Character classes
            (r'(\[)([^\]]*(?:\\.[^\]\\]*)*)(\])',
             bygroups(Punctuation, String, Punctuation)),

            # Single and double quoted strings (with optional modifiers)
            (r'[a-z]?"[^"\\]*(?:\\.[^"\\]*)*"[a-z]*', String.Double),
            (r"[a-z]?'[^'\\]*(?:\\.[^'\\]*)*'[a-z]*", String.Single),

            # Nonterminals are not whitespace, operators, or punctuation
            (r'[^\s<←:=/|&!?*+\^↑~()\[\]"\'#]+', Name.Class),

            # Fallback
            (r'.', Text),
        ],
    }
Initial commit 2023-02-20 22:38:24 +00:00			`"""`
			`pygments.lexers.grammar_notation`
			`~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`

			`Lexers for grammar notations like BNF.`

			`:copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.`
			`:license: BSD, see LICENSE for details.`
			`"""`

			`import re`

			`from pygments.lexer import RegexLexer, bygroups, include, this, using, words`
			`from pygments.token import Comment, Keyword, Literal, Name, Number, \`
			`Operator, Punctuation, String, Text, Whitespace`

			`__all__ = ['BnfLexer', 'AbnfLexer', 'JsgfLexer', 'PegLexer']`


			`class BnfLexer(RegexLexer):`
			`"""`
			`This lexer is for grammar notations which are similar to`
			`original BNF.`

			`In order to maximize a number of targets of this lexer,`
			`let's decide some designs:`

			* We don't distinguish `Terminal Symbol`.

			* We do assume that `NonTerminal Symbol` are always enclosed
			`with arrow brackets.`

			* We do assume that `NonTerminal Symbol` may include
			`any printable characters except arrow brackets and ASCII 0x20.`
			This assumption is for `RBNF <http://www.rfc-base.org/txt/rfc-5511.txt>`_.

			`* We do assume that target notation doesn't support comment.`

			`* We don't distinguish any operators and punctuation except`
			`::=`.

			`Though these decision making might cause too minimal highlighting`
			`and you might be disappointed, but it is reasonable for us.`

			`.. versionadded:: 2.1`
			`"""`

			`name = 'BNF'`
			`aliases = ['bnf']`
			`filenames = ['*.bnf']`
			`mimetypes = ['text/x-bnf']`

			`tokens = {`
			`'root': [`
			`(r'(<)([ -;=?-~]+)(>)',`
			`bygroups(Punctuation, Name.Class, Punctuation)),`

			`# an only operator`
			`(r'::=', Operator),`

			`# fallback`
			`(r'[^<>:]+', Text), # for performance`
			`(r'.', Text),`
			`],`
			`}`


			`class AbnfLexer(RegexLexer):`
			`"""`
			`Lexer for IETF 7405 ABNF.`

			(Updates `5234 <http://www.ietf.org/rfc/rfc5234.txt>`_) grammars.

			`.. versionadded:: 2.1`
			`"""`

			`name = 'ABNF'`
			`url = 'http://www.ietf.org/rfc/rfc7405.txt'`
			`aliases = ['abnf']`
			`filenames = ['*.abnf']`
			`mimetypes = ['text/x-abnf']`

			`_core_rules = (`
			`'ALPHA', 'BIT', 'CHAR', 'CR', 'CRLF', 'CTL', 'DIGIT',`
			`'DQUOTE', 'HEXDIG', 'HTAB', 'LF', 'LWSP', 'OCTET',`
			`'SP', 'VCHAR', 'WSP')`

			`tokens = {`
			`'root': [`
			`# comment`
			`(r';.*$', Comment.Single),`

			`# quoted`
			`# double quote itself in this state, it is as '%x22'.`
			`(r'(%[si])?"[^"]*"', Literal),`

			`# binary (but i have never seen...)`
			`(r'%b[01]+\-[01]+\b', Literal), # range`
			`(r'%b[01]+(\.[01]+)*\b', Literal), # concat`

			`# decimal`
			`(r'%d[0-9]+\-[0-9]+\b', Literal), # range`
			`(r'%d[0-9]+(\.[0-9]+)*\b', Literal), # concat`

			`# hexadecimal`
			`(r'%x[0-9a-fA-F]+\-[0-9a-fA-F]+\b', Literal), # range`
			`(r'%x[0-9a-fA-F]+(\.[0-9a-fA-F]+)*\b', Literal), # concat`

			`# repetition (<a>*<b>element) including nRule`
			`(r'\b[0-9]+\*[0-9]+', Operator),`
			`(r'\b[0-9]+\*', Operator),`
			`(r'\b[0-9]+', Operator),`
			`(r'\*', Operator),`

			`# Strictly speaking, these are not keyword but`
			# are called `Core Rule'.
			`(words(_core_rules, suffix=r'\b'), Keyword),`

			`# nonterminals (ALPHA *(ALPHA / DIGIT / "-"))`
			`(r'[a-zA-Z][a-zA-Z0-9-]*\b', Name.Class),`

			`# operators`
			`(r'(=/\|=\|/)', Operator),`

			`# punctuation`
			`(r'[\[\]()]', Punctuation),`

			`# fallback`
			`(r'\s+', Whitespace),`
			`(r'.', Text),`
			`],`
			`}`


			`class JsgfLexer(RegexLexer):`
			`"""`
			`For JSpeech Grammar Format grammars.`

			`.. versionadded:: 2.2`
			`"""`
			`name = 'JSGF'`
			`url = 'https://www.w3.org/TR/jsgf/'`
			`aliases = ['jsgf']`
			`filenames = ['*.jsgf']`
			`mimetypes = ['application/jsgf', 'application/x-jsgf', 'text/jsgf']`

			`tokens = {`
			`'root': [`
			`include('comments'),`
			`include('non-comments'),`
			`],`
			`'comments': [`
			`(r'/\\(?!/)', Comment.Multiline, 'documentation comment'),`
			`(r'/\[\w\W]?\*/', Comment.Multiline),`
			`(r'//.*$', Comment.Single),`
			`],`
			`'non-comments': [`
			`(r'\A#JSGF[^;]*', Comment.Preproc),`
			`(r'\s+', Whitespace),`
			`(r';', Punctuation),`
			`(r'[=\|()\[\]*+]', Operator),`
			`(r'/[^/]+/', Number.Float),`
			`(r'"', String.Double, 'string'),`
			`(r'\{', String.Other, 'tag'),`
			`(words(('import', 'public'), suffix=r'\b'), Keyword.Reserved),`
			`(r'grammar\b', Keyword.Reserved, 'grammar name'),`
			`(r'(<)(NULL\|VOID)(>)',`
			`bygroups(Punctuation, Name.Builtin, Punctuation)),`
			`(r'<', Punctuation, 'rulename'),`
			`(r'\w+\|[^\s;=\|()\[\]*+/"{<\w]+', Text),`
			`],`
			`'string': [`
			`(r'"', String.Double, '#pop'),`
			`(r'\\.', String.Escape),`
			`(r'[^\\"]+', String.Double),`
			`],`
			`'tag': [`
			`(r'\}', String.Other, '#pop'),`
			`(r'\\.', String.Escape),`
			`(r'[^\\}]+', String.Other),`
			`],`
			`'grammar name': [`
			`(r';', Punctuation, '#pop'),`
			`(r'\s+', Whitespace),`
			`(r'\.', Punctuation),`
			`(r'[^;\s.]+', Name.Namespace),`
			`],`
			`'rulename': [`
			`(r'>', Punctuation, '#pop'),`
			`(r'\*', Punctuation),`
			`(r'\s+', Whitespace),`
			`(r'([^.>]+)(\s*)(\.)', bygroups(Name.Namespace, Text, Punctuation)),`
			`(r'[^.>]+', Name.Constant),`
			`],`
			`'documentation comment': [`
			`(r'\*/', Comment.Multiline, '#pop'),`
			`(r'^(\s)(\?)(\s*)(@(?:example\|see))(\s+)'`
			`r'([\w\W]?(?=(?:^\s\?\s@\|\*/)))',`
			`bygroups(Whitespace,Comment.Multiline, Whitespace, Comment.Special,`
			`Whitespace, using(this, state='example'))),`
			`(r'(^\s\?\s)(@\S)',`
			`bygroups(Comment.Multiline, Comment.Special)),`
			`(r'[^*\n@]+\|\w\|\W', Comment.Multiline),`
			`],`
			`'example': [`
			`(r'(\n\s)(\)', bygroups(Whitespace, Comment.Multiline)),`
			`include('non-comments'),`
			`(r'.', Comment.Multiline),`
			`],`
			`}`


			`class PegLexer(RegexLexer):`
			`"""`
			`This lexer is for Parsing Expression Grammars (PEG).`

			`Various implementations of PEG have made different decisions`
			`regarding the syntax, so let's try to be accommodating:`

			* `<-`, `←`, `:`, and `=` are all accepted as rule operators.

			* Both `\|` and `/` are choice operators.

			* `^`, `↑`, and `~` are cut operators.

			* A single `a-z` character immediately before a string, or
			multiple `a-z` characters following a string, are part of the
			string (e.g., `r"..."` or `"..."ilmsuxa`).

			`.. versionadded:: 2.6`
			`"""`

			`name = 'PEG'`
			`url = 'https://bford.info/pub/lang/peg.pdf'`
			`aliases = ['peg']`
			`filenames = ['*.peg']`
			`mimetypes = ['text/x-peg']`

			`tokens = {`
			`'root': [`
			`# Comments`
			`(r'#.*$', Comment.Single),`

			`# All operators`
			`(r'<-\|[←:=/\|&!?*+^↑~]', Operator),`

			`# Other punctuation`
			`(r'[()]', Punctuation),`

			`# Keywords`
			`(r'\.', Keyword),`

			`# Character classes`
			`(r'(\[)([^\]](?:\\.[^\]\\])*)(\])',`
			`bygroups(Punctuation, String, Punctuation)),`

			`# Single and double quoted strings (with optional modifiers)`
			`(r'[a-z]?"[^"\\](?:\\.[^"\\])"[a-z]', String.Double),`
			`(r"[a-z]?'[^'\\](?:\\.[^'\\])'[a-z]", String.Single),`

			`# Nonterminals are not whitespace, operators, or punctuation`
			`(r'[^\s<←:=/\|&!?*+\^↑~()\[\]"\'#]+', Name.Class),`

			`# Fallback`
			`(r'.', Text),`
			`],`
			`}`