usse/scrape/venv/lib/python3.10/site-packages/sphinx/util/texescape.py
2023-12-22 15:26:01 +01:00

154 lines
5.3 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""TeX escaping helper."""
from __future__ import annotations
import re
tex_replacements = [
# map TeX special chars
('$', r'\$'),
('%', r'\%'),
('&', r'\&'),
('#', r'\#'),
('_', r'\_'),
('{', r'\{'),
('}', r'\}'),
('\\', r'\textbackslash{}'),
('~', r'\textasciitilde{}'),
('^', r'\textasciicircum{}'),
# map chars to avoid mis-interpretation in LaTeX
('[', r'{[}'),
(']', r'{]}'),
# map special Unicode characters to TeX commands
('', r'\(\checkmark\)'),
('', r'\(\pmb{\checkmark}\)'),
('', r'\(\times\)'),
('', r'\(\pmb{\times}\)'),
# used to separate -- in options
('', r'{}'),
# map some special Unicode characters to similar ASCII ones
# (even for Unicode LaTeX as may not be supported by OpenType font)
('', r'\_'),
('', r'e'),
('', r'i'),
# Greek alphabet not escaped: pdflatex handles it via textalpha and inputenc
# OHM SIGN U+2126 is handled by LaTeX textcomp package
]
# A map to avoid TeX ligatures or character replacements in PDF output
# xelatex/lualatex/uplatex are handled differently (#5790, #6888)
ascii_tex_replacements = [
# Note: the " renders curly in OT1 encoding but straight in T1, T2A, LY1...
# escaping it to \textquotedbl would break documents using OT1
# Sphinx does \shorthandoff{"} to avoid problems with some languages
# There is no \text... LaTeX escape for the hyphen character -
('-', r'\sphinxhyphen{}'), # -- and --- are TeX ligatures
# ,, is a TeX ligature in T1 encoding, but escaping the comma adds
# complications (whether by {}, or a macro) and is not done
# the next two require textcomp package
("'", r'\textquotesingle{}'), # else ' renders curly, and '' is a ligature
('`', r'\textasciigrave{}'), # else \` and \`\` render curly
('<', r'\textless{}'), # < is inv. exclam in OT1, << is a T1-ligature
('>', r'\textgreater{}'), # > is inv. quest. mark in 0T1, >> a T1-ligature
]
# A map Unicode characters to LaTeX representation
# (for LaTeX engines which don't support unicode)
unicode_tex_replacements = [
# map some more common Unicode characters to TeX commands
('', r'\P{}'),
('§', r'\S{}'),
('', r'\texteuro{}'),
('', r'\(\infty\)'),
('±', r'\(\pm\)'),
('', r'\(\rightarrow\)'),
('', r'\(\rightarrow\)'),
('', r'\textendash{}'),
# superscript
('', r'\(\sp{\text{0}}\)'),
('¹', r'\(\sp{\text{1}}\)'),
('²', r'\(\sp{\text{2}}\)'),
('³', r'\(\sp{\text{3}}\)'),
('', r'\(\sp{\text{4}}\)'),
('', r'\(\sp{\text{5}}\)'),
('', r'\(\sp{\text{6}}\)'),
('', r'\(\sp{\text{7}}\)'),
('', r'\(\sp{\text{8}}\)'),
('', r'\(\sp{\text{9}}\)'),
# subscript
('', r'\(\sb{\text{0}}\)'),
('', r'\(\sb{\text{1}}\)'),
('', r'\(\sb{\text{2}}\)'),
('', r'\(\sb{\text{3}}\)'),
('', r'\(\sb{\text{4}}\)'),
('', r'\(\sb{\text{5}}\)'),
('', r'\(\sb{\text{6}}\)'),
('', r'\(\sb{\text{7}}\)'),
('', r'\(\sb{\text{8}}\)'),
('', r'\(\sb{\text{9}}\)'),
]
# TODO: this should be called tex_idescape_map because its only use is in
# sphinx.writers.latex.LaTeXTranslator.idescape()
# %, {, }, \, #, and ~ are the only ones which must be replaced by _ character
# It would be simpler to define it entirely here rather than in init().
# Unicode replacements are superfluous, as idescape() uses backslashreplace
tex_replace_map: dict[int, str] = {}
_tex_escape_map: dict[int, str] = {}
_tex_escape_map_without_unicode: dict[int, str] = {}
_tex_hlescape_map: dict[int, str] = {}
_tex_hlescape_map_without_unicode: dict[int, str] = {}
def escape(s: str, latex_engine: str | None = None) -> str:
"""Escape text for LaTeX output."""
if latex_engine in ('lualatex', 'xelatex'):
# unicode based LaTeX engine
return s.translate(_tex_escape_map_without_unicode)
else:
return s.translate(_tex_escape_map)
def hlescape(s: str, latex_engine: str | None = None) -> str:
"""Escape text for LaTeX highlighter."""
if latex_engine in ('lualatex', 'xelatex'):
# unicode based LaTeX engine
return s.translate(_tex_hlescape_map_without_unicode)
else:
return s.translate(_tex_hlescape_map)
def escape_abbr(text: str) -> str:
"""Adjust spacing after abbreviations. Works with @ letter or other."""
return re.sub(r'\.(?=\s|$)', r'.\@{}', text)
def init() -> None:
for a, b in tex_replacements:
_tex_escape_map[ord(a)] = b
_tex_escape_map_without_unicode[ord(a)] = b
tex_replace_map[ord(a)] = '_'
# no reason to do this for _tex_escape_map_without_unicode
for a, b in ascii_tex_replacements:
_tex_escape_map[ord(a)] = b
# but the hyphen has a specific PDF bookmark problem
# https://github.com/latex3/hyperref/issues/112
_tex_escape_map_without_unicode[ord('-')] = r'\sphinxhyphen{}'
for a, b in unicode_tex_replacements:
_tex_escape_map[ord(a)] = b
# This is actually unneeded:
tex_replace_map[ord(a)] = '_'
for a, b in tex_replacements:
if a in '[]{}\\':
continue
_tex_hlescape_map[ord(a)] = b
_tex_hlescape_map_without_unicode[ord(a)] = b
for a, b in unicode_tex_replacements:
_tex_hlescape_map[ord(a)] = b