170 lines
5.2 KiB
Python
170 lines
5.2 KiB
Python
|
"""Pattern-matching utility functions for Sphinx."""
|
||
|
|
||
|
from __future__ import annotations
|
||
|
|
||
|
import os.path
|
||
|
import re
|
||
|
from typing import TYPE_CHECKING, Callable
|
||
|
|
||
|
from sphinx.util.osutil import canon_path, path_stabilize
|
||
|
|
||
|
if TYPE_CHECKING:
|
||
|
from collections.abc import Iterable, Iterator
|
||
|
|
||
|
|
||
|
def _translate_pattern(pat: str) -> str:
|
||
|
"""Translate a shell-style glob pattern to a regular expression.
|
||
|
|
||
|
Adapted from the fnmatch module, but enhanced so that single stars don't
|
||
|
match slashes.
|
||
|
"""
|
||
|
i, n = 0, len(pat)
|
||
|
res = ''
|
||
|
while i < n:
|
||
|
c = pat[i]
|
||
|
i += 1
|
||
|
if c == '*':
|
||
|
if i < n and pat[i] == '*':
|
||
|
# double star matches slashes too
|
||
|
i += 1
|
||
|
res = res + '.*'
|
||
|
else:
|
||
|
# single star doesn't match slashes
|
||
|
res = res + '[^/]*'
|
||
|
elif c == '?':
|
||
|
# question mark doesn't match slashes too
|
||
|
res = res + '[^/]'
|
||
|
elif c == '[':
|
||
|
j = i
|
||
|
if j < n and pat[j] == '!':
|
||
|
j += 1
|
||
|
if j < n and pat[j] == ']':
|
||
|
j += 1
|
||
|
while j < n and pat[j] != ']':
|
||
|
j += 1
|
||
|
if j >= n:
|
||
|
res = res + '\\['
|
||
|
else:
|
||
|
stuff = pat[i:j].replace('\\', '\\\\')
|
||
|
i = j + 1
|
||
|
if stuff[0] == '!':
|
||
|
# negative pattern mustn't match slashes too
|
||
|
stuff = '^/' + stuff[1:]
|
||
|
elif stuff[0] == '^':
|
||
|
stuff = '\\' + stuff
|
||
|
res = f'{res}[{stuff}]'
|
||
|
else:
|
||
|
res += re.escape(c)
|
||
|
return res + '$'
|
||
|
|
||
|
|
||
|
def compile_matchers(
|
||
|
patterns: Iterable[str],
|
||
|
) -> list[Callable[[str], re.Match[str] | None]]:
|
||
|
return [re.compile(_translate_pattern(pat)).match for pat in patterns]
|
||
|
|
||
|
|
||
|
class Matcher:
|
||
|
"""A pattern matcher for Multiple shell-style glob patterns.
|
||
|
|
||
|
Note: this modifies the patterns to work with copy_asset().
|
||
|
For example, "**/index.rst" matches with "index.rst"
|
||
|
"""
|
||
|
|
||
|
def __init__(self, exclude_patterns: Iterable[str]) -> None:
|
||
|
expanded = [pat[3:] for pat in exclude_patterns if pat.startswith('**/')]
|
||
|
self.patterns = compile_matchers(list(exclude_patterns) + expanded)
|
||
|
|
||
|
def __call__(self, string: str) -> bool:
|
||
|
return self.match(string)
|
||
|
|
||
|
def match(self, string: str) -> bool:
|
||
|
string = canon_path(string)
|
||
|
return any(pat(string) for pat in self.patterns)
|
||
|
|
||
|
|
||
|
DOTFILES = Matcher(['**/.*'])
|
||
|
|
||
|
|
||
|
_pat_cache: dict[str, re.Pattern[str]] = {}
|
||
|
|
||
|
|
||
|
def patmatch(name: str, pat: str) -> re.Match[str] | None:
|
||
|
"""Return if name matches the regular expression (pattern)
|
||
|
``pat```. Adapted from fnmatch module."""
|
||
|
if pat not in _pat_cache:
|
||
|
_pat_cache[pat] = re.compile(_translate_pattern(pat))
|
||
|
return _pat_cache[pat].match(name)
|
||
|
|
||
|
|
||
|
def patfilter(names: Iterable[str], pat: str) -> list[str]:
|
||
|
"""Return the subset of the list ``names`` that match
|
||
|
the regular expression (pattern) ``pat``.
|
||
|
|
||
|
Adapted from fnmatch module.
|
||
|
"""
|
||
|
if pat not in _pat_cache:
|
||
|
_pat_cache[pat] = re.compile(_translate_pattern(pat))
|
||
|
match = _pat_cache[pat].match
|
||
|
return list(filter(match, names))
|
||
|
|
||
|
|
||
|
def get_matching_files(
|
||
|
dirname: str | os.PathLike[str],
|
||
|
include_patterns: Iterable[str] = ("**",),
|
||
|
exclude_patterns: Iterable[str] = (),
|
||
|
) -> Iterator[str]:
|
||
|
"""Get all file names in a directory, recursively.
|
||
|
|
||
|
Filter file names by the glob-style include_patterns and exclude_patterns.
|
||
|
The default values include all files ("**") and exclude nothing ("").
|
||
|
|
||
|
Only files matching some pattern in *include_patterns* are included, and
|
||
|
exclusions from *exclude_patterns* take priority over inclusions.
|
||
|
|
||
|
"""
|
||
|
# dirname is a normalized absolute path.
|
||
|
dirname = os.path.normpath(os.path.abspath(dirname))
|
||
|
|
||
|
exclude_matchers = compile_matchers(exclude_patterns)
|
||
|
include_matchers = compile_matchers(include_patterns)
|
||
|
|
||
|
for root, dirs, files in os.walk(dirname, followlinks=True):
|
||
|
relative_root = os.path.relpath(root, dirname)
|
||
|
if relative_root == ".":
|
||
|
relative_root = "" # suppress dirname for files on the target dir
|
||
|
|
||
|
# Filter files
|
||
|
included_files = []
|
||
|
for entry in sorted(files):
|
||
|
entry = path_stabilize(os.path.join(relative_root, entry))
|
||
|
keep = False
|
||
|
for matcher in include_matchers:
|
||
|
if matcher(entry):
|
||
|
keep = True
|
||
|
break # break the inner loop
|
||
|
|
||
|
for matcher in exclude_matchers:
|
||
|
if matcher(entry):
|
||
|
keep = False
|
||
|
break # break the inner loop
|
||
|
|
||
|
if keep:
|
||
|
included_files.append(entry)
|
||
|
|
||
|
# Filter directories
|
||
|
filtered_dirs = []
|
||
|
for dir_name in sorted(dirs):
|
||
|
normalised = path_stabilize(os.path.join(relative_root, dir_name))
|
||
|
for matcher in exclude_matchers:
|
||
|
if matcher(normalised):
|
||
|
break # break the inner loop
|
||
|
else:
|
||
|
# if the loop didn't break
|
||
|
filtered_dirs.append(dir_name)
|
||
|
|
||
|
dirs[:] = filtered_dirs
|
||
|
|
||
|
# Yield filtered files
|
||
|
yield from included_files
|