usse/funda-scraper/venv/lib/python3.10/site-packages/pygments/lexers/tnt.py

272 lines
10 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
pygments.lexers.tnt
~~~~~~~~~~~~~~~~~~~
Lexer for Typographic Number Theory.
:copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
import re
from pygments.lexer import Lexer
from pygments.token import Text, Comment, Operator, Keyword, Name, Number, \
Punctuation, Error
__all__ = ['TNTLexer']
class TNTLexer(Lexer):
"""
Lexer for Typographic Number Theory, as described in the book
Gödel, Escher, Bach, by Douglas R. Hofstadter
.. versionadded:: 2.7
"""
name = 'Typographic Number Theory'
url = 'https://github.com/Kenny2github/language-tnt'
aliases = ['tnt']
filenames = ['*.tnt']
cur = []
LOGIC = set('⊃→]&∧^|Vv')
OPERATORS = set('+.⋅*')
VARIABLES = set('abcde')
PRIMES = set("'")
NEGATORS = set('~!')
QUANTIFIERS = set('AE∀∃')
NUMBERS = set('0123456789')
WHITESPACE = set('\t \v\n')
RULES = re.compile('''(?xi)
joining | separation | double-tilde | fantasy\\ rule
| carry[- ]over(?:\\ of)?(?:\\ line)?\\ ([0-9]+) | detachment
| contrapositive | De\\ Morgan | switcheroo
| specification | generalization | interchange
| existence | symmetry | transitivity
| add\\ S | drop\\ S | induction
| axiom\\ ([1-5]) | premise | push | pop
''')
LINENOS = re.compile(r'(?:[0-9]+)(?:(?:, ?|,? and )(?:[0-9]+))*')
COMMENT = re.compile(r'\[[^\n\]]+\]')
def __init__(self, *args, **kwargs):
Lexer.__init__(self, *args, **kwargs)
self.cur = []
def whitespace(self, start, text, required=False):
"""Tokenize whitespace."""
end = start
try:
while text[end] in self.WHITESPACE:
end += 1
except IndexError:
end = len(text)
if required and end == start:
raise AssertionError
if end != start:
self.cur.append((start, Text, text[start:end]))
return end
def variable(self, start, text):
"""Tokenize a variable."""
if text[start] not in self.VARIABLES:
raise AssertionError
end = start+1
while text[end] in self.PRIMES:
end += 1
self.cur.append((start, Name.Variable, text[start:end]))
return end
def term(self, start, text):
"""Tokenize a term."""
if text[start] == 'S': # S...S(...) or S...0
end = start+1
while text[end] == 'S':
end += 1
self.cur.append((start, Number.Integer, text[start:end]))
return self.term(end, text)
if text[start] == '0': # the singleton 0
self.cur.append((start, Number.Integer, text[start]))
return start+1
if text[start] in self.VARIABLES: # a''...
return self.variable(start, text)
if text[start] == '(': # (...+...)
self.cur.append((start, Punctuation, text[start]))
start = self.term(start+1, text)
if text[start] not in self.OPERATORS:
raise AssertionError
self.cur.append((start, Operator, text[start]))
start = self.term(start+1, text)
if text[start] != ')':
raise AssertionError
self.cur.append((start, Punctuation, text[start]))
return start+1
raise AssertionError # no matches
def formula(self, start, text):
"""Tokenize a formula."""
if text[start] in self.NEGATORS: # ~<...>
end = start+1
while text[end] in self.NEGATORS:
end += 1
self.cur.append((start, Operator, text[start:end]))
return self.formula(end, text)
if text[start] in self.QUANTIFIERS: # Aa:<...>
self.cur.append((start, Keyword.Declaration, text[start]))
start = self.variable(start+1, text)
if text[start] != ':':
raise AssertionError
self.cur.append((start, Punctuation, text[start]))
return self.formula(start+1, text)
if text[start] == '<': # <...&...>
self.cur.append((start, Punctuation, text[start]))
start = self.formula(start+1, text)
if text[start] not in self.LOGIC:
raise AssertionError
self.cur.append((start, Operator, text[start]))
start = self.formula(start+1, text)
if text[start] != '>':
raise AssertionError
self.cur.append((start, Punctuation, text[start]))
return start+1
# ...=...
start = self.term(start, text)
if text[start] != '=':
raise AssertionError
self.cur.append((start, Operator, text[start]))
start = self.term(start+1, text)
return start
def rule(self, start, text):
"""Tokenize a rule."""
match = self.RULES.match(text, start)
if match is None:
raise AssertionError
groups = sorted(match.regs[1:]) # exclude whole match
for group in groups:
if group[0] >= 0: # this group matched
self.cur.append((start, Keyword, text[start:group[0]]))
self.cur.append((group[0], Number.Integer,
text[group[0]:group[1]]))
if group[1] != match.end():
self.cur.append((group[1], Keyword,
text[group[1]:match.end()]))
break
else:
self.cur.append((start, Keyword, text[start:match.end()]))
return match.end()
def lineno(self, start, text):
"""Tokenize a line referral."""
end = start
while text[end] not in self.NUMBERS:
end += 1
self.cur.append((start, Punctuation, text[start]))
self.cur.append((start+1, Text, text[start+1:end]))
start = end
match = self.LINENOS.match(text, start)
if match is None:
raise AssertionError
if text[match.end()] != ')':
raise AssertionError
self.cur.append((match.start(), Number.Integer, match.group(0)))
self.cur.append((match.end(), Punctuation, text[match.end()]))
return match.end() + 1
def error_till_line_end(self, start, text):
"""Mark everything from ``start`` to the end of the line as Error."""
end = start
try:
while text[end] != '\n': # there's whitespace in rules
end += 1
except IndexError:
end = len(text)
if end != start:
self.cur.append((start, Error, text[start:end]))
end = self.whitespace(end, text)
return end
def get_tokens_unprocessed(self, text):
"""Returns a list of TNT tokens."""
self.cur = []
start = end = self.whitespace(0, text)
while start <= end < len(text):
try:
# try line number
while text[end] in self.NUMBERS:
end += 1
if end != start: # actual number present
self.cur.append((start, Number.Integer, text[start:end]))
# whitespace is required after a line number
orig = len(self.cur)
try:
start = end = self.whitespace(end, text, True)
except AssertionError:
del self.cur[orig:]
start = end = self.error_till_line_end(end, text)
continue
# at this point it could be a comment
match = self.COMMENT.match(text, start)
if match is not None:
self.cur.append((start, Comment, text[start:match.end()]))
start = end = match.end()
# anything after the closing bracket is invalid
start = end = self.error_till_line_end(start, text)
# do not attempt to process the rest
continue
del match
if text[start] in '[]': # fantasy push or pop
self.cur.append((start, Keyword, text[start]))
start += 1
end += 1
else:
# one formula, possibly containing subformulae
orig = len(self.cur)
try:
start = end = self.formula(start, text)
except (AssertionError, RecursionError): # not well-formed
del self.cur[orig:]
while text[end] not in self.WHITESPACE:
end += 1
self.cur.append((start, Error, text[start:end]))
start = end
# skip whitespace after formula
orig = len(self.cur)
try:
start = end = self.whitespace(end, text, True)
except AssertionError:
del self.cur[orig:]
start = end = self.error_till_line_end(start, text)
continue
# rule proving this formula a theorem
orig = len(self.cur)
try:
start = end = self.rule(start, text)
except AssertionError:
del self.cur[orig:]
start = end = self.error_till_line_end(start, text)
continue
# skip whitespace after rule
start = end = self.whitespace(end, text)
# line marker
if text[start] == '(':
orig = len(self.cur)
try:
start = end = self.lineno(start, text)
except AssertionError:
del self.cur[orig:]
start = end = self.error_till_line_end(start, text)
continue
start = end = self.whitespace(start, text)
except IndexError:
try:
del self.cur[orig:]
except NameError:
pass # if orig was never defined, fine
self.error_till_line_end(start, text)
return self.cur