usse/scrape/venv/lib/python3.10/site-packages/sphinx/util/matching.py

170 lines
5.2 KiB
Python
Raw Normal View History

2023-12-22 14:26:01 +00:00
"""Pattern-matching utility functions for Sphinx."""
from __future__ import annotations
import os.path
import re
from typing import TYPE_CHECKING, Callable
from sphinx.util.osutil import canon_path, path_stabilize
if TYPE_CHECKING:
from collections.abc import Iterable, Iterator
def _translate_pattern(pat: str) -> str:
"""Translate a shell-style glob pattern to a regular expression.
Adapted from the fnmatch module, but enhanced so that single stars don't
match slashes.
"""
i, n = 0, len(pat)
res = ''
while i < n:
c = pat[i]
i += 1
if c == '*':
if i < n and pat[i] == '*':
# double star matches slashes too
i += 1
res = res + '.*'
else:
# single star doesn't match slashes
res = res + '[^/]*'
elif c == '?':
# question mark doesn't match slashes too
res = res + '[^/]'
elif c == '[':
j = i
if j < n and pat[j] == '!':
j += 1
if j < n and pat[j] == ']':
j += 1
while j < n and pat[j] != ']':
j += 1
if j >= n:
res = res + '\\['
else:
stuff = pat[i:j].replace('\\', '\\\\')
i = j + 1
if stuff[0] == '!':
# negative pattern mustn't match slashes too
stuff = '^/' + stuff[1:]
elif stuff[0] == '^':
stuff = '\\' + stuff
res = f'{res}[{stuff}]'
else:
res += re.escape(c)
return res + '$'
def compile_matchers(
patterns: Iterable[str],
) -> list[Callable[[str], re.Match[str] | None]]:
return [re.compile(_translate_pattern(pat)).match for pat in patterns]
class Matcher:
"""A pattern matcher for Multiple shell-style glob patterns.
Note: this modifies the patterns to work with copy_asset().
For example, "**/index.rst" matches with "index.rst"
"""
def __init__(self, exclude_patterns: Iterable[str]) -> None:
expanded = [pat[3:] for pat in exclude_patterns if pat.startswith('**/')]
self.patterns = compile_matchers(list(exclude_patterns) + expanded)
def __call__(self, string: str) -> bool:
return self.match(string)
def match(self, string: str) -> bool:
string = canon_path(string)
return any(pat(string) for pat in self.patterns)
DOTFILES = Matcher(['**/.*'])
_pat_cache: dict[str, re.Pattern[str]] = {}
def patmatch(name: str, pat: str) -> re.Match[str] | None:
"""Return if name matches the regular expression (pattern)
``pat```. Adapted from fnmatch module."""
if pat not in _pat_cache:
_pat_cache[pat] = re.compile(_translate_pattern(pat))
return _pat_cache[pat].match(name)
def patfilter(names: Iterable[str], pat: str) -> list[str]:
"""Return the subset of the list ``names`` that match
the regular expression (pattern) ``pat``.
Adapted from fnmatch module.
"""
if pat not in _pat_cache:
_pat_cache[pat] = re.compile(_translate_pattern(pat))
match = _pat_cache[pat].match
return list(filter(match, names))
def get_matching_files(
dirname: str | os.PathLike[str],
include_patterns: Iterable[str] = ("**",),
exclude_patterns: Iterable[str] = (),
) -> Iterator[str]:
"""Get all file names in a directory, recursively.
Filter file names by the glob-style include_patterns and exclude_patterns.
The default values include all files ("**") and exclude nothing ("").
Only files matching some pattern in *include_patterns* are included, and
exclusions from *exclude_patterns* take priority over inclusions.
"""
# dirname is a normalized absolute path.
dirname = os.path.normpath(os.path.abspath(dirname))
exclude_matchers = compile_matchers(exclude_patterns)
include_matchers = compile_matchers(include_patterns)
for root, dirs, files in os.walk(dirname, followlinks=True):
relative_root = os.path.relpath(root, dirname)
if relative_root == ".":
relative_root = "" # suppress dirname for files on the target dir
# Filter files
included_files = []
for entry in sorted(files):
entry = path_stabilize(os.path.join(relative_root, entry))
keep = False
for matcher in include_matchers:
if matcher(entry):
keep = True
break # break the inner loop
for matcher in exclude_matchers:
if matcher(entry):
keep = False
break # break the inner loop
if keep:
included_files.append(entry)
# Filter directories
filtered_dirs = []
for dir_name in sorted(dirs):
normalised = path_stabilize(os.path.join(relative_root, dir_name))
for matcher in exclude_matchers:
if matcher(normalised):
break # break the inner loop
else:
# if the loop didn't break
filtered_dirs.append(dir_name)
dirs[:] = filtered_dirs
# Yield filtered files
yield from included_files