154 lines
5.3 KiB
Python
154 lines
5.3 KiB
Python
|
"""TeX escaping helper."""
|
|||
|
|
|||
|
from __future__ import annotations
|
|||
|
|
|||
|
import re
|
|||
|
|
|||
|
tex_replacements = [
|
|||
|
# map TeX special chars
|
|||
|
('$', r'\$'),
|
|||
|
('%', r'\%'),
|
|||
|
('&', r'\&'),
|
|||
|
('#', r'\#'),
|
|||
|
('_', r'\_'),
|
|||
|
('{', r'\{'),
|
|||
|
('}', r'\}'),
|
|||
|
('\\', r'\textbackslash{}'),
|
|||
|
('~', r'\textasciitilde{}'),
|
|||
|
('^', r'\textasciicircum{}'),
|
|||
|
# map chars to avoid mis-interpretation in LaTeX
|
|||
|
('[', r'{[}'),
|
|||
|
(']', r'{]}'),
|
|||
|
# map special Unicode characters to TeX commands
|
|||
|
('✓', r'\(\checkmark\)'),
|
|||
|
('✔', r'\(\pmb{\checkmark}\)'),
|
|||
|
('✕', r'\(\times\)'),
|
|||
|
('✖', r'\(\pmb{\times}\)'),
|
|||
|
# used to separate -- in options
|
|||
|
('', r'{}'),
|
|||
|
# map some special Unicode characters to similar ASCII ones
|
|||
|
# (even for Unicode LaTeX as may not be supported by OpenType font)
|
|||
|
('⎽', r'\_'),
|
|||
|
('ℯ', r'e'),
|
|||
|
('ⅈ', r'i'),
|
|||
|
# Greek alphabet not escaped: pdflatex handles it via textalpha and inputenc
|
|||
|
# OHM SIGN U+2126 is handled by LaTeX textcomp package
|
|||
|
]
|
|||
|
|
|||
|
# A map to avoid TeX ligatures or character replacements in PDF output
|
|||
|
# xelatex/lualatex/uplatex are handled differently (#5790, #6888)
|
|||
|
ascii_tex_replacements = [
|
|||
|
# Note: the " renders curly in OT1 encoding but straight in T1, T2A, LY1...
|
|||
|
# escaping it to \textquotedbl would break documents using OT1
|
|||
|
# Sphinx does \shorthandoff{"} to avoid problems with some languages
|
|||
|
# There is no \text... LaTeX escape for the hyphen character -
|
|||
|
('-', r'\sphinxhyphen{}'), # -- and --- are TeX ligatures
|
|||
|
# ,, is a TeX ligature in T1 encoding, but escaping the comma adds
|
|||
|
# complications (whether by {}, or a macro) and is not done
|
|||
|
# the next two require textcomp package
|
|||
|
("'", r'\textquotesingle{}'), # else ' renders curly, and '' is a ligature
|
|||
|
('`', r'\textasciigrave{}'), # else \` and \`\` render curly
|
|||
|
('<', r'\textless{}'), # < is inv. exclam in OT1, << is a T1-ligature
|
|||
|
('>', r'\textgreater{}'), # > is inv. quest. mark in 0T1, >> a T1-ligature
|
|||
|
]
|
|||
|
|
|||
|
# A map Unicode characters to LaTeX representation
|
|||
|
# (for LaTeX engines which don't support unicode)
|
|||
|
unicode_tex_replacements = [
|
|||
|
# map some more common Unicode characters to TeX commands
|
|||
|
('¶', r'\P{}'),
|
|||
|
('§', r'\S{}'),
|
|||
|
('€', r'\texteuro{}'),
|
|||
|
('∞', r'\(\infty\)'),
|
|||
|
('±', r'\(\pm\)'),
|
|||
|
('→', r'\(\rightarrow\)'),
|
|||
|
('‣', r'\(\rightarrow\)'),
|
|||
|
('–', r'\textendash{}'),
|
|||
|
# superscript
|
|||
|
('⁰', r'\(\sp{\text{0}}\)'),
|
|||
|
('¹', r'\(\sp{\text{1}}\)'),
|
|||
|
('²', r'\(\sp{\text{2}}\)'),
|
|||
|
('³', r'\(\sp{\text{3}}\)'),
|
|||
|
('⁴', r'\(\sp{\text{4}}\)'),
|
|||
|
('⁵', r'\(\sp{\text{5}}\)'),
|
|||
|
('⁶', r'\(\sp{\text{6}}\)'),
|
|||
|
('⁷', r'\(\sp{\text{7}}\)'),
|
|||
|
('⁸', r'\(\sp{\text{8}}\)'),
|
|||
|
('⁹', r'\(\sp{\text{9}}\)'),
|
|||
|
# subscript
|
|||
|
('₀', r'\(\sb{\text{0}}\)'),
|
|||
|
('₁', r'\(\sb{\text{1}}\)'),
|
|||
|
('₂', r'\(\sb{\text{2}}\)'),
|
|||
|
('₃', r'\(\sb{\text{3}}\)'),
|
|||
|
('₄', r'\(\sb{\text{4}}\)'),
|
|||
|
('₅', r'\(\sb{\text{5}}\)'),
|
|||
|
('₆', r'\(\sb{\text{6}}\)'),
|
|||
|
('₇', r'\(\sb{\text{7}}\)'),
|
|||
|
('₈', r'\(\sb{\text{8}}\)'),
|
|||
|
('₉', r'\(\sb{\text{9}}\)'),
|
|||
|
]
|
|||
|
|
|||
|
# TODO: this should be called tex_idescape_map because its only use is in
|
|||
|
# sphinx.writers.latex.LaTeXTranslator.idescape()
|
|||
|
# %, {, }, \, #, and ~ are the only ones which must be replaced by _ character
|
|||
|
# It would be simpler to define it entirely here rather than in init().
|
|||
|
# Unicode replacements are superfluous, as idescape() uses backslashreplace
|
|||
|
tex_replace_map: dict[int, str] = {}
|
|||
|
|
|||
|
_tex_escape_map: dict[int, str] = {}
|
|||
|
_tex_escape_map_without_unicode: dict[int, str] = {}
|
|||
|
_tex_hlescape_map: dict[int, str] = {}
|
|||
|
_tex_hlescape_map_without_unicode: dict[int, str] = {}
|
|||
|
|
|||
|
|
|||
|
def escape(s: str, latex_engine: str | None = None) -> str:
|
|||
|
"""Escape text for LaTeX output."""
|
|||
|
if latex_engine in ('lualatex', 'xelatex'):
|
|||
|
# unicode based LaTeX engine
|
|||
|
return s.translate(_tex_escape_map_without_unicode)
|
|||
|
else:
|
|||
|
return s.translate(_tex_escape_map)
|
|||
|
|
|||
|
|
|||
|
def hlescape(s: str, latex_engine: str | None = None) -> str:
|
|||
|
"""Escape text for LaTeX highlighter."""
|
|||
|
if latex_engine in ('lualatex', 'xelatex'):
|
|||
|
# unicode based LaTeX engine
|
|||
|
return s.translate(_tex_hlescape_map_without_unicode)
|
|||
|
else:
|
|||
|
return s.translate(_tex_hlescape_map)
|
|||
|
|
|||
|
|
|||
|
def escape_abbr(text: str) -> str:
|
|||
|
"""Adjust spacing after abbreviations. Works with @ letter or other."""
|
|||
|
return re.sub(r'\.(?=\s|$)', r'.\@{}', text)
|
|||
|
|
|||
|
|
|||
|
def init() -> None:
|
|||
|
for a, b in tex_replacements:
|
|||
|
_tex_escape_map[ord(a)] = b
|
|||
|
_tex_escape_map_without_unicode[ord(a)] = b
|
|||
|
tex_replace_map[ord(a)] = '_'
|
|||
|
|
|||
|
# no reason to do this for _tex_escape_map_without_unicode
|
|||
|
for a, b in ascii_tex_replacements:
|
|||
|
_tex_escape_map[ord(a)] = b
|
|||
|
|
|||
|
# but the hyphen has a specific PDF bookmark problem
|
|||
|
# https://github.com/latex3/hyperref/issues/112
|
|||
|
_tex_escape_map_without_unicode[ord('-')] = r'\sphinxhyphen{}'
|
|||
|
|
|||
|
for a, b in unicode_tex_replacements:
|
|||
|
_tex_escape_map[ord(a)] = b
|
|||
|
# This is actually unneeded:
|
|||
|
tex_replace_map[ord(a)] = '_'
|
|||
|
|
|||
|
for a, b in tex_replacements:
|
|||
|
if a in '[]{}\\':
|
|||
|
continue
|
|||
|
_tex_hlescape_map[ord(a)] = b
|
|||
|
_tex_hlescape_map_without_unicode[ord(a)] = b
|
|||
|
|
|||
|
for a, b in unicode_tex_replacements:
|
|||
|
_tex_hlescape_map[ord(a)] = b
|