291 lines
8.1 KiB
Python
Raw Normal View History

2023-12-22 15:26:01 +01:00
import os.path
import pathlib
import re
from typing import Callable
from typing import Optional
from typing import Union
from urllib import parse
import attrs
from pygls import IS_WIN
from pygls.uris import urlparse
SCHEME = re.compile(r"^[a-zA-Z][a-zA-Z\d+.-]*$")
RE_DRIVE_LETTER_PATH = re.compile(r"^(\/?)([a-zA-Z]:)")
# TODO: Look into upstreaming this into pygls
# - if it works out
# - when pygls drops 3.7 (Uri uses the := operator)
@attrs.define(frozen=True)
class Uri:
"""Helper class for working with URIs."""
scheme: str
authority: str
path: str
query: str
fragment: str
def __attrs_post_init__(self):
"""Basic validation."""
if self.scheme is None:
raise ValueError("URIs must have a scheme")
if not SCHEME.match(self.scheme):
raise ValueError("Invalid scheme")
if self.authority and self.path and (not self.path.startswith("/")):
raise ValueError("Paths with an authority must start with a slash '/'")
if self.path and self.path.startswith("//") and (not self.authority):
raise ValueError(
"Paths without an authority cannot start with two slashes '//'"
)
def __fspath__(self):
"""Return the file system representation of this uri.
This makes Uri instances compatible with any function that expects an
``os.PathLike`` object!
"""
# TODO: Should we raise an exception if scheme != "file"?
return self.as_fs_path(preserve_case=True)
def __str__(self):
return self.as_string()
def __truediv__(self, other):
return self.join(other)
@classmethod
def create(
cls,
*,
scheme: str = "",
authority: str = "",
path: str = "",
query: str = "",
fragment: str = "",
) -> "Uri":
"""Create a uri with the given attributes."""
if scheme in {"http", "https", "file"}:
if not path.startswith("/"):
path = f"/{path}"
return cls(
scheme=scheme,
authority=authority,
path=path,
query=query,
fragment=fragment,
)
@classmethod
def parse(cls, uri: str) -> "Uri":
"""Parse the given uri from its string representation."""
scheme, authority, path, _, query, fragment = urlparse(uri)
return cls.create(
scheme=scheme,
authority=authority,
path=path,
query=query,
fragment=fragment,
)
def resolve(self) -> "Uri":
"""Return the fully resolved version of this Uri."""
# This operation only makes sense for file uris
if self.scheme != "file":
return Uri.parse(str(self))
return Uri.for_file(pathlib.Path(self).resolve())
@classmethod
def for_file(cls, filepath: Union[str, "os.PathLike[str]"]) -> "Uri":
"""Create a uri based on the given filepath."""
fpath = os.fspath(filepath)
if IS_WIN:
fpath = fpath.replace("\\", "/")
if fpath.startswith("//"):
authority, *path = fpath[2:].split("/")
fpath = "/".join(path)
else:
authority = ""
return cls.create(scheme="file", authority=authority, path=fpath)
@property
def fs_path(self) -> Optional[str]:
"""Return the equivalent fs path."""
return self.as_fs_path()
def where(self, **kwargs) -> "Uri":
"""Return an transformed version of this uri where certain components of the uri
have been replace with the given arguments.
Passing a value of ``None`` will remove the given component entirely.
"""
keys = {"scheme", "authority", "path", "query", "fragment"}
valid_keys = keys.copy() & kwargs.keys()
current = {k: getattr(self, k) for k in keys}
replacements = {k: kwargs[k] for k in valid_keys}
return Uri.create(**{**current, **replacements})
def join(self, path: str) -> "Uri":
"""Join this Uri's path component with the given path and return the resulting
uri.
Parameters
----------
path
The path segment to join
Returns
-------
Uri
The resulting uri
"""
if not self.path:
raise ValueError("This uri has no path")
if IS_WIN:
fs_path = self.fs_path
if fs_path is None:
raise ValueError("Unable to join paths, fs_path is None")
joined = os.path.normpath(os.path.join(fs_path, path))
new_path = self.for_file(joined).path
else:
new_path = os.path.normpath(os.path.join(self.path, path))
return self.where(path=new_path)
def as_fs_path(self, preserve_case: bool = False) -> Optional[str]:
"""Return the file system path correspondin with this uri."""
if self.path:
path = _normalize_path(self.path, preserve_case)
if self.authority and len(path) > 1:
path = f"//{self.authority}{path}"
# Remove the leading `/` from windows paths
elif RE_DRIVE_LETTER_PATH.match(path):
path = path[1:]
if IS_WIN:
path = path.replace("/", "\\")
return path
return None
def as_string(self, encode=True) -> str:
"""Return a string representation of this Uri.
Parameters
----------
encode
If ``True`` (the default), encode any special characters.
Returns
-------
str
The string representation of the Uri
"""
# See: https://github.com/python/mypy/issues/10740
encoder: Callable[[str], str] = parse.quote if encode else _replace_chars # type: ignore[assignment]
if authority := self.authority:
usercred, *auth = authority.split("@")
if len(auth) > 0:
*user, cred = usercred.split(":")
if len(user) > 0:
usercred = encoder(":".join(user)) + f":{encoder(cred)}"
else:
usercred = encoder(usercred)
authority = "@".join(auth)
else:
usercred = ""
authority = authority.lower()
*auth, port = authority.split(":")
if len(auth) > 0:
authority = encoder(":".join(auth)) + f":{port}"
else:
authority = encoder(authority)
if usercred:
authority = f"{usercred}@{authority}"
scheme_separator = ""
if authority or self.scheme == "file":
scheme_separator = "//"
if path := self.path:
path = encoder(_normalize_path(path))
if query := self.query:
query = encoder(query)
if fragment := self.fragment:
fragment = encoder(fragment)
parts = [
f"{self.scheme}:",
scheme_separator,
authority if authority else "",
path if path else "",
f"?{query}" if query else "",
f"#{fragment}" if fragment else "",
]
return "".join(parts)
def _replace_chars(segment: str) -> str:
"""Replace a certain subset of characters in a uri segment"""
return segment.replace("#", "%23").replace("?", "%3F")
def _normalize_path(path: str, preserve_case: bool = False) -> str:
"""Normalise the path segment of a Uri.
Parameters
----------
path
The path to normalise.
preserve_case
If ``True``, preserve the case of the drive label on Windows.
If ``False``, the drive label will be lowercased.
Returns
-------
str
The normalised path.
"""
# normalize to fwd-slashes on windows,
# on other systems bwd-slashes are valid
# filename character, eg /f\oo/ba\r.txt
if IS_WIN:
path = path.replace("\\", "/")
# Normalize drive paths to lower case
if (not preserve_case) and (match := RE_DRIVE_LETTER_PATH.match(path)):
path = match.group(1) + match.group(2).lower() + path[match.end() :]
return path