272 lines
10 KiB
Python
272 lines
10 KiB
Python
|
"""
|
|||
|
pygments.lexers.tnt
|
|||
|
~~~~~~~~~~~~~~~~~~~
|
|||
|
|
|||
|
Lexer for Typographic Number Theory.
|
|||
|
|
|||
|
:copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
|
|||
|
:license: BSD, see LICENSE for details.
|
|||
|
"""
|
|||
|
|
|||
|
import re
|
|||
|
|
|||
|
from pygments.lexer import Lexer
|
|||
|
from pygments.token import Text, Comment, Operator, Keyword, Name, Number, \
|
|||
|
Punctuation, Error
|
|||
|
|
|||
|
__all__ = ['TNTLexer']
|
|||
|
|
|||
|
|
|||
|
class TNTLexer(Lexer):
|
|||
|
"""
|
|||
|
Lexer for Typographic Number Theory, as described in the book
|
|||
|
Gödel, Escher, Bach, by Douglas R. Hofstadter
|
|||
|
|
|||
|
.. versionadded:: 2.7
|
|||
|
"""
|
|||
|
|
|||
|
name = 'Typographic Number Theory'
|
|||
|
url = 'https://github.com/Kenny2github/language-tnt'
|
|||
|
aliases = ['tnt']
|
|||
|
filenames = ['*.tnt']
|
|||
|
|
|||
|
cur = []
|
|||
|
|
|||
|
LOGIC = set('⊃→]&∧^|∨Vv')
|
|||
|
OPERATORS = set('+.⋅*')
|
|||
|
VARIABLES = set('abcde')
|
|||
|
PRIMES = set("'′")
|
|||
|
NEGATORS = set('~!')
|
|||
|
QUANTIFIERS = set('AE∀∃')
|
|||
|
NUMBERS = set('0123456789')
|
|||
|
WHITESPACE = set('\t \v\n')
|
|||
|
|
|||
|
RULES = re.compile('''(?xi)
|
|||
|
joining | separation | double-tilde | fantasy\\ rule
|
|||
|
| carry[- ]over(?:\\ of)?(?:\\ line)?\\ ([0-9]+) | detachment
|
|||
|
| contrapositive | De\\ Morgan | switcheroo
|
|||
|
| specification | generalization | interchange
|
|||
|
| existence | symmetry | transitivity
|
|||
|
| add\\ S | drop\\ S | induction
|
|||
|
| axiom\\ ([1-5]) | premise | push | pop
|
|||
|
''')
|
|||
|
LINENOS = re.compile(r'(?:[0-9]+)(?:(?:, ?|,? and )(?:[0-9]+))*')
|
|||
|
COMMENT = re.compile(r'\[[^\n\]]+\]')
|
|||
|
|
|||
|
def __init__(self, *args, **kwargs):
|
|||
|
Lexer.__init__(self, *args, **kwargs)
|
|||
|
self.cur = []
|
|||
|
|
|||
|
def whitespace(self, start, text, required=False):
|
|||
|
"""Tokenize whitespace."""
|
|||
|
end = start
|
|||
|
try:
|
|||
|
while text[end] in self.WHITESPACE:
|
|||
|
end += 1
|
|||
|
except IndexError:
|
|||
|
end = len(text)
|
|||
|
if required and end == start:
|
|||
|
raise AssertionError
|
|||
|
if end != start:
|
|||
|
self.cur.append((start, Text, text[start:end]))
|
|||
|
return end
|
|||
|
|
|||
|
def variable(self, start, text):
|
|||
|
"""Tokenize a variable."""
|
|||
|
if text[start] not in self.VARIABLES:
|
|||
|
raise AssertionError
|
|||
|
end = start+1
|
|||
|
while text[end] in self.PRIMES:
|
|||
|
end += 1
|
|||
|
self.cur.append((start, Name.Variable, text[start:end]))
|
|||
|
return end
|
|||
|
|
|||
|
def term(self, start, text):
|
|||
|
"""Tokenize a term."""
|
|||
|
if text[start] == 'S': # S...S(...) or S...0
|
|||
|
end = start+1
|
|||
|
while text[end] == 'S':
|
|||
|
end += 1
|
|||
|
self.cur.append((start, Number.Integer, text[start:end]))
|
|||
|
return self.term(end, text)
|
|||
|
if text[start] == '0': # the singleton 0
|
|||
|
self.cur.append((start, Number.Integer, text[start]))
|
|||
|
return start+1
|
|||
|
if text[start] in self.VARIABLES: # a''...
|
|||
|
return self.variable(start, text)
|
|||
|
if text[start] == '(': # (...+...)
|
|||
|
self.cur.append((start, Punctuation, text[start]))
|
|||
|
start = self.term(start+1, text)
|
|||
|
if text[start] not in self.OPERATORS:
|
|||
|
raise AssertionError
|
|||
|
self.cur.append((start, Operator, text[start]))
|
|||
|
start = self.term(start+1, text)
|
|||
|
if text[start] != ')':
|
|||
|
raise AssertionError
|
|||
|
self.cur.append((start, Punctuation, text[start]))
|
|||
|
return start+1
|
|||
|
raise AssertionError # no matches
|
|||
|
|
|||
|
def formula(self, start, text):
|
|||
|
"""Tokenize a formula."""
|
|||
|
if text[start] in self.NEGATORS: # ~<...>
|
|||
|
end = start+1
|
|||
|
while text[end] in self.NEGATORS:
|
|||
|
end += 1
|
|||
|
self.cur.append((start, Operator, text[start:end]))
|
|||
|
return self.formula(end, text)
|
|||
|
if text[start] in self.QUANTIFIERS: # Aa:<...>
|
|||
|
self.cur.append((start, Keyword.Declaration, text[start]))
|
|||
|
start = self.variable(start+1, text)
|
|||
|
if text[start] != ':':
|
|||
|
raise AssertionError
|
|||
|
self.cur.append((start, Punctuation, text[start]))
|
|||
|
return self.formula(start+1, text)
|
|||
|
if text[start] == '<': # <...&...>
|
|||
|
self.cur.append((start, Punctuation, text[start]))
|
|||
|
start = self.formula(start+1, text)
|
|||
|
if text[start] not in self.LOGIC:
|
|||
|
raise AssertionError
|
|||
|
self.cur.append((start, Operator, text[start]))
|
|||
|
start = self.formula(start+1, text)
|
|||
|
if text[start] != '>':
|
|||
|
raise AssertionError
|
|||
|
self.cur.append((start, Punctuation, text[start]))
|
|||
|
return start+1
|
|||
|
# ...=...
|
|||
|
start = self.term(start, text)
|
|||
|
if text[start] != '=':
|
|||
|
raise AssertionError
|
|||
|
self.cur.append((start, Operator, text[start]))
|
|||
|
start = self.term(start+1, text)
|
|||
|
return start
|
|||
|
|
|||
|
def rule(self, start, text):
|
|||
|
"""Tokenize a rule."""
|
|||
|
match = self.RULES.match(text, start)
|
|||
|
if match is None:
|
|||
|
raise AssertionError
|
|||
|
groups = sorted(match.regs[1:]) # exclude whole match
|
|||
|
for group in groups:
|
|||
|
if group[0] >= 0: # this group matched
|
|||
|
self.cur.append((start, Keyword, text[start:group[0]]))
|
|||
|
self.cur.append((group[0], Number.Integer,
|
|||
|
text[group[0]:group[1]]))
|
|||
|
if group[1] != match.end():
|
|||
|
self.cur.append((group[1], Keyword,
|
|||
|
text[group[1]:match.end()]))
|
|||
|
break
|
|||
|
else:
|
|||
|
self.cur.append((start, Keyword, text[start:match.end()]))
|
|||
|
return match.end()
|
|||
|
|
|||
|
def lineno(self, start, text):
|
|||
|
"""Tokenize a line referral."""
|
|||
|
end = start
|
|||
|
while text[end] not in self.NUMBERS:
|
|||
|
end += 1
|
|||
|
self.cur.append((start, Punctuation, text[start]))
|
|||
|
self.cur.append((start+1, Text, text[start+1:end]))
|
|||
|
start = end
|
|||
|
match = self.LINENOS.match(text, start)
|
|||
|
if match is None:
|
|||
|
raise AssertionError
|
|||
|
if text[match.end()] != ')':
|
|||
|
raise AssertionError
|
|||
|
self.cur.append((match.start(), Number.Integer, match.group(0)))
|
|||
|
self.cur.append((match.end(), Punctuation, text[match.end()]))
|
|||
|
return match.end() + 1
|
|||
|
|
|||
|
def error_till_line_end(self, start, text):
|
|||
|
"""Mark everything from ``start`` to the end of the line as Error."""
|
|||
|
end = start
|
|||
|
try:
|
|||
|
while text[end] != '\n': # there's whitespace in rules
|
|||
|
end += 1
|
|||
|
except IndexError:
|
|||
|
end = len(text)
|
|||
|
if end != start:
|
|||
|
self.cur.append((start, Error, text[start:end]))
|
|||
|
end = self.whitespace(end, text)
|
|||
|
return end
|
|||
|
|
|||
|
def get_tokens_unprocessed(self, text):
|
|||
|
"""Returns a list of TNT tokens."""
|
|||
|
self.cur = []
|
|||
|
start = end = self.whitespace(0, text)
|
|||
|
while start <= end < len(text):
|
|||
|
try:
|
|||
|
# try line number
|
|||
|
while text[end] in self.NUMBERS:
|
|||
|
end += 1
|
|||
|
if end != start: # actual number present
|
|||
|
self.cur.append((start, Number.Integer, text[start:end]))
|
|||
|
# whitespace is required after a line number
|
|||
|
orig = len(self.cur)
|
|||
|
try:
|
|||
|
start = end = self.whitespace(end, text, True)
|
|||
|
except AssertionError:
|
|||
|
del self.cur[orig:]
|
|||
|
start = end = self.error_till_line_end(end, text)
|
|||
|
continue
|
|||
|
# at this point it could be a comment
|
|||
|
match = self.COMMENT.match(text, start)
|
|||
|
if match is not None:
|
|||
|
self.cur.append((start, Comment, text[start:match.end()]))
|
|||
|
start = end = match.end()
|
|||
|
# anything after the closing bracket is invalid
|
|||
|
start = end = self.error_till_line_end(start, text)
|
|||
|
# do not attempt to process the rest
|
|||
|
continue
|
|||
|
del match
|
|||
|
if text[start] in '[]': # fantasy push or pop
|
|||
|
self.cur.append((start, Keyword, text[start]))
|
|||
|
start += 1
|
|||
|
end += 1
|
|||
|
else:
|
|||
|
# one formula, possibly containing subformulae
|
|||
|
orig = len(self.cur)
|
|||
|
try:
|
|||
|
start = end = self.formula(start, text)
|
|||
|
except (AssertionError, RecursionError): # not well-formed
|
|||
|
del self.cur[orig:]
|
|||
|
while text[end] not in self.WHITESPACE:
|
|||
|
end += 1
|
|||
|
self.cur.append((start, Error, text[start:end]))
|
|||
|
start = end
|
|||
|
# skip whitespace after formula
|
|||
|
orig = len(self.cur)
|
|||
|
try:
|
|||
|
start = end = self.whitespace(end, text, True)
|
|||
|
except AssertionError:
|
|||
|
del self.cur[orig:]
|
|||
|
start = end = self.error_till_line_end(start, text)
|
|||
|
continue
|
|||
|
# rule proving this formula a theorem
|
|||
|
orig = len(self.cur)
|
|||
|
try:
|
|||
|
start = end = self.rule(start, text)
|
|||
|
except AssertionError:
|
|||
|
del self.cur[orig:]
|
|||
|
start = end = self.error_till_line_end(start, text)
|
|||
|
continue
|
|||
|
# skip whitespace after rule
|
|||
|
start = end = self.whitespace(end, text)
|
|||
|
# line marker
|
|||
|
if text[start] == '(':
|
|||
|
orig = len(self.cur)
|
|||
|
try:
|
|||
|
start = end = self.lineno(start, text)
|
|||
|
except AssertionError:
|
|||
|
del self.cur[orig:]
|
|||
|
start = end = self.error_till_line_end(start, text)
|
|||
|
continue
|
|||
|
start = end = self.whitespace(start, text)
|
|||
|
except IndexError:
|
|||
|
try:
|
|||
|
del self.cur[orig:]
|
|||
|
except NameError:
|
|||
|
pass # if orig was never defined, fine
|
|||
|
self.error_till_line_end(start, text)
|
|||
|
return self.cur
|