usse/funda-scraper/venv/lib/python3.10/site-packages/pygments/lexers/esoteric.py

"""
    pygments.lexers.esoteric
    ~~~~~~~~~~~~~~~~~~~~~~~~

    Lexers for esoteric languages.

    :copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
    :license: BSD, see LICENSE for details.
"""

from pygments.lexer import RegexLexer, include, words, bygroups
from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
    Number, Punctuation, Error, Whitespace

__all__ = ['BrainfuckLexer', 'BefungeLexer', 'RedcodeLexer', 'CAmkESLexer',
           'CapDLLexer', 'AheuiLexer']


class BrainfuckLexer(RegexLexer):
    """
    Lexer for the esoteric BrainFuck language.
    """

    name = 'Brainfuck'
    url = 'http://www.muppetlabs.com/~breadbox/bf/'
    aliases = ['brainfuck', 'bf']
    filenames = ['*.bf', '*.b']
    mimetypes = ['application/x-brainfuck']

    tokens = {
        'common': [
            # use different colors for different instruction types
            (r'[.,]+', Name.Tag),
            (r'[+-]+', Name.Builtin),
            (r'[<>]+', Name.Variable),
            (r'[^.,+\-<>\[\]]+', Comment),
        ],
        'root': [
            (r'\[', Keyword, 'loop'),
            (r'\]', Error),
            include('common'),
        ],
        'loop': [
            (r'\[', Keyword, '#push'),
            (r'\]', Keyword, '#pop'),
            include('common'),
        ]
    }

    def analyse_text(text):
        """It's safe to assume that a program which mostly consists of + -
        and < > is brainfuck."""
        plus_minus_count = 0
        greater_less_count = 0

        range_to_check = max(256, len(text))

        for c in text[:range_to_check]:
            if c == '+' or c == '-':
                plus_minus_count += 1
            if c == '<' or c == '>':
                greater_less_count += 1

        if plus_minus_count > (0.25 * range_to_check):
            return 1.0
        if greater_less_count > (0.25 * range_to_check):
            return 1.0

        result = 0
        if '[-]' in text:
            result += 0.5

        return result


class BefungeLexer(RegexLexer):
    """
    Lexer for the esoteric Befunge language.

    .. versionadded:: 0.7
    """
    name = 'Befunge'
    url = 'http://en.wikipedia.org/wiki/Befunge'
    aliases = ['befunge']
    filenames = ['*.befunge']
    mimetypes = ['application/x-befunge']

    tokens = {
        'root': [
            (r'[0-9a-f]', Number),
            (r'[+*/%!`-]', Operator),             # Traditional math
            (r'[<>^v?\[\]rxjk]', Name.Variable),  # Move, imperatives
            (r'[:\\$.,n]', Name.Builtin),         # Stack ops, imperatives
            (r'[|_mw]', Keyword),
            (r'[{}]', Name.Tag),                  # Befunge-98 stack ops
            (r'".*?"', String.Double),            # Strings don't appear to allow escapes
            (r'\'.', String.Single),              # Single character
            (r'[#;]', Comment),                   # Trampoline... depends on direction hit
            (r'[pg&~=@iotsy]', Keyword),          # Misc
            (r'[()A-Z]', Comment),                # Fingerprints
            (r'\s+', Whitespace),                 # Whitespace doesn't matter
        ],
    }


class CAmkESLexer(RegexLexer):
    """
    Basic lexer for the input language for the CAmkES component platform.

    .. versionadded:: 2.1
    """
    name = 'CAmkES'
    url = 'https://sel4.systems/CAmkES/'
    aliases = ['camkes', 'idl4']
    filenames = ['*.camkes', '*.idl4']

    tokens = {
        'root': [
            # C pre-processor directive
            (r'^(\s*)(#.*)(\n)', bygroups(Whitespace, Comment.Preproc, 
                Whitespace)),

            # Whitespace, comments
            (r'\s+', Whitespace),
            (r'/\*(.|\n)*?\*/', Comment),
            (r'//.*$', Comment),

            (r'[\[(){},.;\]]', Punctuation),
            (r'[~!%^&*+=|?:<>/-]', Operator),

            (words(('assembly', 'attribute', 'component', 'composition',
                    'configuration', 'connection', 'connector', 'consumes',
                    'control', 'dataport', 'Dataport', 'Dataports', 'emits',
                    'event', 'Event', 'Events', 'export', 'from', 'group',
                    'hardware', 'has', 'interface', 'Interface', 'maybe',
                    'procedure', 'Procedure', 'Procedures', 'provides',
                    'template', 'thread', 'threads', 'to', 'uses', 'with'),
                   suffix=r'\b'), Keyword),

            (words(('bool', 'boolean', 'Buf', 'char', 'character', 'double',
                    'float', 'in', 'inout', 'int', 'int16_6', 'int32_t',
                    'int64_t', 'int8_t', 'integer', 'mutex', 'out', 'real',
                    'refin', 'semaphore', 'signed', 'string', 'struct',
                    'uint16_t', 'uint32_t', 'uint64_t', 'uint8_t', 'uintptr_t',
                    'unsigned', 'void'),
                   suffix=r'\b'), Keyword.Type),

            # Recognised attributes
            (r'[a-zA-Z_]\w*_(priority|domain|buffer)', Keyword.Reserved),
            (words(('dma_pool', 'from_access', 'to_access'), suffix=r'\b'),
                Keyword.Reserved),

            # CAmkES-level include
            (r'(import)(\s+)((?:<[^>]*>|"[^"]*");)',
                bygroups(Comment.Preproc, Whitespace, Comment.Preproc)),

            # C-level include
            (r'(include)(\s+)((?:<[^>]*>|"[^"]*");)',
                bygroups(Comment.Preproc, Whitespace, Comment.Preproc)),

            # Literals
            (r'0[xX][\da-fA-F]+', Number.Hex),
            (r'-?[\d]+', Number),
            (r'-?[\d]+\.[\d]+', Number.Float),
            (r'"[^"]*"', String),
            (r'[Tt]rue|[Ff]alse', Name.Builtin),

            # Identifiers
            (r'[a-zA-Z_]\w*', Name),
        ],
    }


class CapDLLexer(RegexLexer):
    """
    Basic lexer for CapDL.

    The source of the primary tool that reads such specifications is available
    at https://github.com/seL4/capdl/tree/master/capDL-tool. Note that this
    lexer only supports a subset of the grammar. For example, identifiers can
    shadow type names, but these instances are currently incorrectly
    highlighted as types. Supporting this would need a stateful lexer that is
    considered unnecessarily complex for now.

    .. versionadded:: 2.2
    """
    name = 'CapDL'
    url = 'https://ssrg.nicta.com.au/publications/nictaabstracts/Kuz_KLW_10.abstract.pml'
    aliases = ['capdl']
    filenames = ['*.cdl']

    tokens = {
        'root': [
            # C pre-processor directive
            (r'^(\s*)(#.*)(\n)',
                bygroups(Whitespace, Comment.Preproc, Whitespace)),

            # Whitespace, comments
            (r'\s+', Whitespace),
            (r'/\*(.|\n)*?\*/', Comment),
            (r'(//|--).*$', Comment),

            (r'[<>\[(){},:;=\]]', Punctuation),
            (r'\.\.', Punctuation),

            (words(('arch', 'arm11', 'caps', 'child_of', 'ia32', 'irq', 'maps',
                    'objects'), suffix=r'\b'), Keyword),

            (words(('aep', 'asid_pool', 'cnode', 'ep', 'frame', 'io_device',
                    'io_ports', 'io_pt', 'notification', 'pd', 'pt', 'tcb',
                    'ut', 'vcpu'), suffix=r'\b'), Keyword.Type),

            # Properties
            (words(('asid', 'addr', 'badge', 'cached', 'dom', 'domainID', 'elf',
                    'fault_ep', 'G', 'guard', 'guard_size', 'init', 'ip',
                    'prio', 'sp', 'R', 'RG', 'RX', 'RW', 'RWG', 'RWX', 'W',
                    'WG', 'WX', 'level', 'masked', 'master_reply', 'paddr',
                    'ports', 'reply', 'uncached'), suffix=r'\b'),
             Keyword.Reserved),

            # Literals
            (r'0[xX][\da-fA-F]+', Number.Hex),
            (r'\d+(\.\d+)?(k|M)?', Number),
            (words(('bits',), suffix=r'\b'), Number),
            (words(('cspace', 'vspace', 'reply_slot', 'caller_slot',
                    'ipc_buffer_slot'), suffix=r'\b'), Number),

            # Identifiers
            (r'[a-zA-Z_][-@\.\w]*', Name),
        ],
    }


class RedcodeLexer(RegexLexer):
    """
    A simple Redcode lexer based on ICWS'94.
    Contributed by Adam Blinkinsop <blinks@acm.org>.

    .. versionadded:: 0.8
    """
    name = 'Redcode'
    aliases = ['redcode']
    filenames = ['*.cw']

    opcodes = ('DAT', 'MOV', 'ADD', 'SUB', 'MUL', 'DIV', 'MOD',
               'JMP', 'JMZ', 'JMN', 'DJN', 'CMP', 'SLT', 'SPL',
               'ORG', 'EQU', 'END')
    modifiers = ('A', 'B', 'AB', 'BA', 'F', 'X', 'I')

    tokens = {
        'root': [
            # Whitespace:
            (r'\s+', Whitespace),
            (r';.*$', Comment.Single),
            # Lexemes:
            #  Identifiers
            (r'\b(%s)\b' % '|'.join(opcodes), Name.Function),
            (r'\b(%s)\b' % '|'.join(modifiers), Name.Decorator),
            (r'[A-Za-z_]\w+', Name),
            #  Operators
            (r'[-+*/%]', Operator),
            (r'[#$@<>]', Operator),  # mode
            (r'[.,]', Punctuation),  # mode
            #  Numbers
            (r'[-+]?\d+', Number.Integer),
        ],
    }


class AheuiLexer(RegexLexer):
    """
    Aheui is esoteric language based on Korean alphabets.
    """

    name = 'Aheui'
    url = 'http://aheui.github.io/'
    aliases = ['aheui']
    filenames = ['*.aheui']

    tokens = {
        'root': [
            ('['
             '나-낳냐-냫너-넣녀-녛노-놓뇨-눟뉴-닇'
             '다-닿댜-댷더-덯뎌-뎧도-돟됴-둫듀-딓'
             '따-땋땨-떃떠-떻뗘-뗳또-똫뚀-뚷뜌-띟'
             '라-랗랴-럏러-렇려-렿로-롷료-뤃류-릫'
             '마-맣먀-먛머-멓며-몋모-뫃묘-뭏뮤-믷'
             '바-밯뱌-뱧버-벟벼-볗보-봏뵤-붛뷰-빃'
             '빠-빻뺘-뺳뻐-뻫뼈-뼣뽀-뽛뾰-뿧쀼-삏'
             '사-샇샤-샿서-섷셔-셯소-솧쇼-숳슈-싛'
             '싸-쌓쌰-썋써-쎃쎠-쎻쏘-쏳쑈-쑿쓔-씧'
             '자-잫쟈-쟣저-젛져-졓조-좋죠-줗쥬-즿'
             '차-챃챠-챻처-첳쳐-쳫초-촣쵸-춯츄-칗'
             '카-캏캬-컇커-컿켜-켷코-콯쿄-쿻큐-킣'
             '타-탛탸-턓터-텋텨-톃토-톻툐-퉇튜-틯'
             '파-팧퍄-퍟퍼-펗펴-폏포-퐇표-풓퓨-픻'
             '하-핳햐-햫허-헣혀-혛호-홓효-훟휴-힇'
             ']', Operator),
            ('.', Comment),
        ],
    }
Initial commit 2023-02-20 22:38:24 +00:00			`"""`
			`pygments.lexers.esoteric`
			`~~~~~~~~~~~~~~~~~~~~~~~~`

			`Lexers for esoteric languages.`

			`:copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.`
			`:license: BSD, see LICENSE for details.`
			`"""`

			`from pygments.lexer import RegexLexer, include, words, bygroups`
			`from pygments.token import Text, Comment, Operator, Keyword, Name, String, \`
			`Number, Punctuation, Error, Whitespace`

			`__all__ = ['BrainfuckLexer', 'BefungeLexer', 'RedcodeLexer', 'CAmkESLexer',`
			`'CapDLLexer', 'AheuiLexer']`


			`class BrainfuckLexer(RegexLexer):`
			`"""`
			`Lexer for the esoteric BrainFuck language.`
			`"""`

			`name = 'Brainfuck'`
			`url = 'http://www.muppetlabs.com/~breadbox/bf/'`
			`aliases = ['brainfuck', 'bf']`
			`filenames = ['.bf', '.b']`
			`mimetypes = ['application/x-brainfuck']`

			`tokens = {`
			`'common': [`
			`# use different colors for different instruction types`
			`(r'[.,]+', Name.Tag),`
			`(r'[+-]+', Name.Builtin),`
			`(r'[<>]+', Name.Variable),`
			`(r'[^.,+\-<>\[\]]+', Comment),`
			`],`
			`'root': [`
			`(r'\[', Keyword, 'loop'),`
			`(r'\]', Error),`
			`include('common'),`
			`],`
			`'loop': [`
			`(r'\[', Keyword, '#push'),`
			`(r'\]', Keyword, '#pop'),`
			`include('common'),`
			`]`
			`}`

			`def analyse_text(text):`
			`"""It's safe to assume that a program which mostly consists of + -`
			`and < > is brainfuck."""`
			`plus_minus_count = 0`
			`greater_less_count = 0`

			`range_to_check = max(256, len(text))`

			`for c in text[:range_to_check]:`
			`if c == '+' or c == '-':`
			`plus_minus_count += 1`
			`if c == '<' or c == '>':`
			`greater_less_count += 1`

			`if plus_minus_count > (0.25 * range_to_check):`
			`return 1.0`
			`if greater_less_count > (0.25 * range_to_check):`
			`return 1.0`

			`result = 0`
			`if '[-]' in text:`
			`result += 0.5`

			`return result`


			`class BefungeLexer(RegexLexer):`
			`"""`
			`Lexer for the esoteric Befunge language.`

			`.. versionadded:: 0.7`
			`"""`
			`name = 'Befunge'`
			`url = 'http://en.wikipedia.org/wiki/Befunge'`
			`aliases = ['befunge']`
			`filenames = ['*.befunge']`
			`mimetypes = ['application/x-befunge']`

			`tokens = {`
			`'root': [`
			`(r'[0-9a-f]', Number),`
			(r'[+*/%!`-]', Operator), # Traditional math
			`(r'[<>^v?\[\]rxjk]', Name.Variable), # Move, imperatives`
			`(r'[:\\$.,n]', Name.Builtin), # Stack ops, imperatives`
			`(r'[\|_mw]', Keyword),`
			`(r'[{}]', Name.Tag), # Befunge-98 stack ops`
			`(r'".*?"', String.Double), # Strings don't appear to allow escapes`
			`(r'\'.', String.Single), # Single character`
			`(r'[#;]', Comment), # Trampoline... depends on direction hit`
			`(r'[pg&~=@iotsy]', Keyword), # Misc`
			`(r'[()A-Z]', Comment), # Fingerprints`
			`(r'\s+', Whitespace), # Whitespace doesn't matter`
			`],`
			`}`


			`class CAmkESLexer(RegexLexer):`
			`"""`
			`Basic lexer for the input language for the CAmkES component platform.`

			`.. versionadded:: 2.1`
			`"""`
			`name = 'CAmkES'`
			`url = 'https://sel4.systems/CAmkES/'`
			`aliases = ['camkes', 'idl4']`
			`filenames = ['.camkes', '.idl4']`

			`tokens = {`
			`'root': [`
			`# C pre-processor directive`
			`(r'^(\s)(#.)(\n)', bygroups(Whitespace, Comment.Preproc,`
			`Whitespace)),`

			`# Whitespace, comments`
			`(r'\s+', Whitespace),`
			`(r'/\(.\|\n)?\*/', Comment),`
			`(r'//.*$', Comment),`

			`(r'[\[(){},.;\]]', Punctuation),`
			`(r'[~!%^&*+=\|?:<>/-]', Operator),`

			`(words(('assembly', 'attribute', 'component', 'composition',`
			`'configuration', 'connection', 'connector', 'consumes',`
			`'control', 'dataport', 'Dataport', 'Dataports', 'emits',`
			`'event', 'Event', 'Events', 'export', 'from', 'group',`
			`'hardware', 'has', 'interface', 'Interface', 'maybe',`
			`'procedure', 'Procedure', 'Procedures', 'provides',`
			`'template', 'thread', 'threads', 'to', 'uses', 'with'),`
			`suffix=r'\b'), Keyword),`

			`(words(('bool', 'boolean', 'Buf', 'char', 'character', 'double',`
			`'float', 'in', 'inout', 'int', 'int16_6', 'int32_t',`
			`'int64_t', 'int8_t', 'integer', 'mutex', 'out', 'real',`
			`'refin', 'semaphore', 'signed', 'string', 'struct',`
			`'uint16_t', 'uint32_t', 'uint64_t', 'uint8_t', 'uintptr_t',`
			`'unsigned', 'void'),`
			`suffix=r'\b'), Keyword.Type),`

			`# Recognised attributes`
			`(r'[a-zA-Z_]\w*_(priority\|domain\|buffer)', Keyword.Reserved),`
			`(words(('dma_pool', 'from_access', 'to_access'), suffix=r'\b'),`
			`Keyword.Reserved),`

			`# CAmkES-level include`
			`(r'(import)(\s+)((?:<[^>]>\|"[^"]");)',`
			`bygroups(Comment.Preproc, Whitespace, Comment.Preproc)),`

			`# C-level include`
			`(r'(include)(\s+)((?:<[^>]>\|"[^"]");)',`
			`bygroups(Comment.Preproc, Whitespace, Comment.Preproc)),`

			`# Literals`
			`(r'0[xX][\da-fA-F]+', Number.Hex),`
			`(r'-?[\d]+', Number),`
			`(r'-?[\d]+\.[\d]+', Number.Float),`
			`(r'"[^"]*"', String),`
			`(r'[Tt]rue\|[Ff]alse', Name.Builtin),`

			`# Identifiers`
			`(r'[a-zA-Z_]\w*', Name),`
			`],`
			`}`


			`class CapDLLexer(RegexLexer):`
			`"""`
			`Basic lexer for CapDL.`

			`The source of the primary tool that reads such specifications is available`
			`at https://github.com/seL4/capdl/tree/master/capDL-tool. Note that this`
			`lexer only supports a subset of the grammar. For example, identifiers can`
			`shadow type names, but these instances are currently incorrectly`
			`highlighted as types. Supporting this would need a stateful lexer that is`
			`considered unnecessarily complex for now.`

			`.. versionadded:: 2.2`
			`"""`
			`name = 'CapDL'`
			`url = 'https://ssrg.nicta.com.au/publications/nictaabstracts/Kuz_KLW_10.abstract.pml'`
			`aliases = ['capdl']`
			`filenames = ['*.cdl']`

			`tokens = {`
			`'root': [`
			`# C pre-processor directive`
			`(r'^(\s)(#.)(\n)',`
			`bygroups(Whitespace, Comment.Preproc, Whitespace)),`

			`# Whitespace, comments`
			`(r'\s+', Whitespace),`
			`(r'/\(.\|\n)?\*/', Comment),`
			`(r'(//\|--).*$', Comment),`

			`(r'[<>\[(){},:;=\]]', Punctuation),`
			`(r'\.\.', Punctuation),`

			`(words(('arch', 'arm11', 'caps', 'child_of', 'ia32', 'irq', 'maps',`
			`'objects'), suffix=r'\b'), Keyword),`

			`(words(('aep', 'asid_pool', 'cnode', 'ep', 'frame', 'io_device',`
			`'io_ports', 'io_pt', 'notification', 'pd', 'pt', 'tcb',`
			`'ut', 'vcpu'), suffix=r'\b'), Keyword.Type),`

			`# Properties`
			`(words(('asid', 'addr', 'badge', 'cached', 'dom', 'domainID', 'elf',`
			`'fault_ep', 'G', 'guard', 'guard_size', 'init', 'ip',`
			`'prio', 'sp', 'R', 'RG', 'RX', 'RW', 'RWG', 'RWX', 'W',`
			`'WG', 'WX', 'level', 'masked', 'master_reply', 'paddr',`
			`'ports', 'reply', 'uncached'), suffix=r'\b'),`
			`Keyword.Reserved),`

			`# Literals`
			`(r'0[xX][\da-fA-F]+', Number.Hex),`
			`(r'\d+(\.\d+)?(k\|M)?', Number),`
			`(words(('bits',), suffix=r'\b'), Number),`
			`(words(('cspace', 'vspace', 'reply_slot', 'caller_slot',`
			`'ipc_buffer_slot'), suffix=r'\b'), Number),`

			`# Identifiers`
			`(r'[a-zA-Z_][-@\.\w]*', Name),`
			`],`
			`}`


			`class RedcodeLexer(RegexLexer):`
			`"""`
			`A simple Redcode lexer based on ICWS'94.`
			`Contributed by Adam Blinkinsop <blinks@acm.org>.`

			`.. versionadded:: 0.8`
			`"""`
			`name = 'Redcode'`
			`aliases = ['redcode']`
			`filenames = ['*.cw']`

			`opcodes = ('DAT', 'MOV', 'ADD', 'SUB', 'MUL', 'DIV', 'MOD',`
			`'JMP', 'JMZ', 'JMN', 'DJN', 'CMP', 'SLT', 'SPL',`
			`'ORG', 'EQU', 'END')`
			`modifiers = ('A', 'B', 'AB', 'BA', 'F', 'X', 'I')`

			`tokens = {`
			`'root': [`
			`# Whitespace:`
			`(r'\s+', Whitespace),`
			`(r';.*$', Comment.Single),`
			`# Lexemes:`
			`# Identifiers`
			`(r'\b(%s)\b' % '\|'.join(opcodes), Name.Function),`
			`(r'\b(%s)\b' % '\|'.join(modifiers), Name.Decorator),`
			`(r'[A-Za-z_]\w+', Name),`
			`# Operators`
			`(r'[-+*/%]', Operator),`
			`(r'[#$@<>]', Operator), # mode`
			`(r'[.,]', Punctuation), # mode`
			`# Numbers`
			`(r'[-+]?\d+', Number.Integer),`
			`],`
			`}`


			`class AheuiLexer(RegexLexer):`
			`"""`
			`Aheui is esoteric language based on Korean alphabets.`
			`"""`

			`name = 'Aheui'`
			`url = 'http://aheui.github.io/'`
			`aliases = ['aheui']`
			`filenames = ['*.aheui']`

			`tokens = {`
			`'root': [`
			`('['`
			`'나-낳냐-냫너-넣녀-녛노-놓뇨-눟뉴-닇'`
			`'다-닿댜-댷더-덯뎌-뎧도-돟됴-둫듀-딓'`
			`'따-땋땨-떃떠-떻뗘-뗳또-똫뚀-뚷뜌-띟'`
			`'라-랗랴-럏러-렇려-렿로-롷료-뤃류-릫'`
			`'마-맣먀-먛머-멓며-몋모-뫃묘-뭏뮤-믷'`
			`'바-밯뱌-뱧버-벟벼-볗보-봏뵤-붛뷰-빃'`
			`'빠-빻뺘-뺳뻐-뻫뼈-뼣뽀-뽛뾰-뿧쀼-삏'`
			`'사-샇샤-샿서-섷셔-셯소-솧쇼-숳슈-싛'`
			`'싸-쌓쌰-썋써-쎃쎠-쎻쏘-쏳쑈-쑿쓔-씧'`
			`'자-잫쟈-쟣저-젛져-졓조-좋죠-줗쥬-즿'`
			`'차-챃챠-챻처-첳쳐-쳫초-촣쵸-춯츄-칗'`
			`'카-캏캬-컇커-컿켜-켷코-콯쿄-쿻큐-킣'`
			`'타-탛탸-턓터-텋텨-톃토-톻툐-퉇튜-틯'`
			`'파-팧퍄-퍟퍼-펗펴-폏포-퐇표-풓퓨-픻'`
			`'하-핳햐-햫허-헣혀-혛호-홓효-훟휴-힇'`
			`']', Operator),`
			`('.', Comment),`
			`],`
			`}`