Shofel2_T124_python/venv/lib/python3.10/site-packages/jsonpath_ng/lexer.py

from __future__ import unicode_literals, print_function, absolute_import, division, generators, nested_scopes
import sys
import logging

import ply.lex

from jsonpath_ng.exceptions import JsonPathLexerError

logger = logging.getLogger(__name__)


class JsonPathLexer(object):
    '''
    A Lexical analyzer for JsonPath.
    '''

    def __init__(self, debug=False):
        self.debug = debug
        if self.__doc__ is None:
            raise JsonPathLexerError('Docstrings have been removed! By design of PLY, jsonpath-rw requires docstrings. You must not use PYTHONOPTIMIZE=2 or python -OO.')

    def tokenize(self, string):
        '''
        Maps a string to an iterator over tokens. In other words: [char] -> [token]
        '''

        new_lexer = ply.lex.lex(module=self, debug=self.debug, errorlog=logger)
        new_lexer.latest_newline = 0
        new_lexer.string_value = None
        new_lexer.input(string)

        while True:
            t = new_lexer.token()
            if t is None:
                break
            t.col = t.lexpos - new_lexer.latest_newline
            yield t

        if new_lexer.string_value is not None:
            raise JsonPathLexerError('Unexpected EOF in string literal or identifier')

    # ============== PLY Lexer specification ==================
    #
    # This probably should be private but:
    #   - the parser requires access to `tokens` (perhaps they should be defined in a third, shared dependency)
    #   - things like `literals` might be a legitimate part of the public interface.
    #
    # Anyhow, it is pythonic to give some rope to hang oneself with :-)

    literals = ['*', '.', '[', ']', '(', ')', '$', ',', ':', '|', '&', '~']

    reserved_words = { 'where': 'WHERE' }

    tokens = ['DOUBLEDOT', 'NUMBER', 'ID', 'NAMED_OPERATOR'] + list(reserved_words.values())

    states = [ ('singlequote', 'exclusive'),
               ('doublequote', 'exclusive'),
               ('backquote', 'exclusive') ]

    # Normal lexing, rather easy
    t_DOUBLEDOT = r'\.\.'
    t_ignore = ' \t'

    def t_ID(self, t):
        r'[a-zA-Z_@][a-zA-Z0-9_@\-]*'
        t.type = self.reserved_words.get(t.value, 'ID')
        return t

    def t_NUMBER(self, t):
        r'-?\d+'
        t.value = int(t.value)
        return t


    # Single-quoted strings
    t_singlequote_ignore = ''
    def t_singlequote(self, t):
        r"'"
        t.lexer.string_start = t.lexer.lexpos
        t.lexer.string_value = ''
        t.lexer.push_state('singlequote')

    def t_singlequote_content(self, t):
        r"[^'\\]+"
        t.lexer.string_value += t.value

    def t_singlequote_escape(self, t):
        r'\\.'
        t.lexer.string_value += t.value[1]

    def t_singlequote_end(self, t):
        r"'"
        t.value = t.lexer.string_value
        t.type = 'ID'
        t.lexer.string_value = None
        t.lexer.pop_state()
        return t

    def t_singlequote_error(self, t):
        raise JsonPathLexerError('Error on line %s, col %s while lexing singlequoted field: Unexpected character: %s ' % (t.lexer.lineno, t.lexpos - t.lexer.latest_newline, t.value[0]))


    # Double-quoted strings
    t_doublequote_ignore = ''
    def t_doublequote(self, t):
        r'"'
        t.lexer.string_start = t.lexer.lexpos
        t.lexer.string_value = ''
        t.lexer.push_state('doublequote')

    def t_doublequote_content(self, t):
        r'[^"\\]+'
        t.lexer.string_value += t.value

    def t_doublequote_escape(self, t):
        r'\\.'
        t.lexer.string_value += t.value[1]

    def t_doublequote_end(self, t):
        r'"'
        t.value = t.lexer.string_value
        t.type = 'ID'
        t.lexer.string_value = None
        t.lexer.pop_state()
        return t

    def t_doublequote_error(self, t):
        raise JsonPathLexerError('Error on line %s, col %s while lexing doublequoted field: Unexpected character: %s ' % (t.lexer.lineno, t.lexpos - t.lexer.latest_newline, t.value[0]))


    # Back-quoted "magic" operators
    t_backquote_ignore = ''
    def t_backquote(self, t):
        r'`'
        t.lexer.string_start = t.lexer.lexpos
        t.lexer.string_value = ''
        t.lexer.push_state('backquote')

    def t_backquote_escape(self, t):
        r'\\.'
        t.lexer.string_value += t.value[1]

    def t_backquote_content(self, t):
        r"[^`\\]+"
        t.lexer.string_value += t.value

    def t_backquote_end(self, t):
        r'`'
        t.value = t.lexer.string_value
        t.type = 'NAMED_OPERATOR'
        t.lexer.string_value = None
        t.lexer.pop_state()
        return t

    def t_backquote_error(self, t):
        raise JsonPathLexerError('Error on line %s, col %s while lexing backquoted operator: Unexpected character: %s ' % (t.lexer.lineno, t.lexpos - t.lexer.latest_newline, t.value[0]))


    # Counting lines, handling errors
    def t_newline(self, t):
        r'\n'
        t.lexer.lineno += 1
        t.lexer.latest_newline = t.lexpos

    def t_error(self, t):
        raise JsonPathLexerError('Error on line %s, col %s: Unexpected character: %s ' % (t.lexer.lineno, t.lexpos - t.lexer.latest_newline, t.value[0]))

if __name__ == '__main__':
    logging.basicConfig()
    lexer = JsonPathLexer(debug=True)
    for token in lexer.tokenize(sys.stdin.read()):
        print('%-20s%s' % (token.value, token.type))
fully working method 2024-05-25 16:45:07 +00:00			`from __future__ import unicode_literals, print_function, absolute_import, division, generators, nested_scopes`
			`import sys`
			`import logging`

			`import ply.lex`

			`from jsonpath_ng.exceptions import JsonPathLexerError`

			`logger = logging.getLogger(__name__)`


			`class JsonPathLexer(object):`
			`'''`
			`A Lexical analyzer for JsonPath.`
			`'''`

			`def __init__(self, debug=False):`
			`self.debug = debug`
			`if self.__doc__ is None:`
			`raise JsonPathLexerError('Docstrings have been removed! By design of PLY, jsonpath-rw requires docstrings. You must not use PYTHONOPTIMIZE=2 or python -OO.')`

			`def tokenize(self, string):`
			`'''`
			`Maps a string to an iterator over tokens. In other words: [char] -> [token]`
			`'''`

			`new_lexer = ply.lex.lex(module=self, debug=self.debug, errorlog=logger)`
			`new_lexer.latest_newline = 0`
			`new_lexer.string_value = None`
			`new_lexer.input(string)`

			`while True:`
			`t = new_lexer.token()`
			`if t is None:`
			`break`
			`t.col = t.lexpos - new_lexer.latest_newline`
			`yield t`

			`if new_lexer.string_value is not None:`
			`raise JsonPathLexerError('Unexpected EOF in string literal or identifier')`

			`# ============== PLY Lexer specification ==================`
			`#`
			`# This probably should be private but:`
			# - the parser requires access to `tokens` (perhaps they should be defined in a third, shared dependency)
			# - things like `literals` might be a legitimate part of the public interface.
			`#`
			`# Anyhow, it is pythonic to give some rope to hang oneself with :-)`

			`literals = ['*', '.', '[', ']', '(', ')', '$', ',', ':', '\|', '&', '~']`

			`reserved_words = { 'where': 'WHERE' }`

			`tokens = ['DOUBLEDOT', 'NUMBER', 'ID', 'NAMED_OPERATOR'] + list(reserved_words.values())`

			`states = [ ('singlequote', 'exclusive'),`
			`('doublequote', 'exclusive'),`
			`('backquote', 'exclusive') ]`

			`# Normal lexing, rather easy`
			`t_DOUBLEDOT = r'\.\.'`
			`t_ignore = ' \t'`

			`def t_ID(self, t):`
			`r'[a-zA-Z_@][a-zA-Z0-9_@\-]*'`
			`t.type = self.reserved_words.get(t.value, 'ID')`
			`return t`

			`def t_NUMBER(self, t):`
			`r'-?\d+'`
			`t.value = int(t.value)`
			`return t`


			`# Single-quoted strings`
			`t_singlequote_ignore = ''`
			`def t_singlequote(self, t):`
			`r"'"`
			`t.lexer.string_start = t.lexer.lexpos`
			`t.lexer.string_value = ''`
			`t.lexer.push_state('singlequote')`

			`def t_singlequote_content(self, t):`
			`r"[^'\\]+"`
			`t.lexer.string_value += t.value`

			`def t_singlequote_escape(self, t):`
			`r'\\.'`
			`t.lexer.string_value += t.value[1]`

			`def t_singlequote_end(self, t):`
			`r"'"`
			`t.value = t.lexer.string_value`
			`t.type = 'ID'`
			`t.lexer.string_value = None`
			`t.lexer.pop_state()`
			`return t`

			`def t_singlequote_error(self, t):`
			`raise JsonPathLexerError('Error on line %s, col %s while lexing singlequoted field: Unexpected character: %s ' % (t.lexer.lineno, t.lexpos - t.lexer.latest_newline, t.value[0]))`


			`# Double-quoted strings`
			`t_doublequote_ignore = ''`
			`def t_doublequote(self, t):`
			`r'"'`
			`t.lexer.string_start = t.lexer.lexpos`
			`t.lexer.string_value = ''`
			`t.lexer.push_state('doublequote')`

			`def t_doublequote_content(self, t):`
			`r'[^"\\]+'`
			`t.lexer.string_value += t.value`

			`def t_doublequote_escape(self, t):`
			`r'\\.'`
			`t.lexer.string_value += t.value[1]`

			`def t_doublequote_end(self, t):`
			`r'"'`
			`t.value = t.lexer.string_value`
			`t.type = 'ID'`
			`t.lexer.string_value = None`
			`t.lexer.pop_state()`
			`return t`

			`def t_doublequote_error(self, t):`
			`raise JsonPathLexerError('Error on line %s, col %s while lexing doublequoted field: Unexpected character: %s ' % (t.lexer.lineno, t.lexpos - t.lexer.latest_newline, t.value[0]))`


			`# Back-quoted "magic" operators`
			`t_backquote_ignore = ''`
			`def t_backquote(self, t):`
			r'`'
			`t.lexer.string_start = t.lexer.lexpos`
			`t.lexer.string_value = ''`
			`t.lexer.push_state('backquote')`

			`def t_backquote_escape(self, t):`
			`r'\\.'`
			`t.lexer.string_value += t.value[1]`

			`def t_backquote_content(self, t):`
			r"[^`\\]+"
			`t.lexer.string_value += t.value`

			`def t_backquote_end(self, t):`
			r'`'
			`t.value = t.lexer.string_value`
			`t.type = 'NAMED_OPERATOR'`
			`t.lexer.string_value = None`
			`t.lexer.pop_state()`
			`return t`

			`def t_backquote_error(self, t):`
			`raise JsonPathLexerError('Error on line %s, col %s while lexing backquoted operator: Unexpected character: %s ' % (t.lexer.lineno, t.lexpos - t.lexer.latest_newline, t.value[0]))`


			`# Counting lines, handling errors`
			`def t_newline(self, t):`
			`r'\n'`
			`t.lexer.lineno += 1`
			`t.lexer.latest_newline = t.lexpos`

			`def t_error(self, t):`
			`raise JsonPathLexerError('Error on line %s, col %s: Unexpected character: %s ' % (t.lexer.lineno, t.lexpos - t.lexer.latest_newline, t.value[0]))`

			`if __name__ == '__main__':`
			`logging.basicConfig()`
			`lexer = JsonPathLexer(debug=True)`
			`for token in lexer.tokenize(sys.stdin.read()):`
			`print('%-20s%s' % (token.value, token.type))`