usse/funda-scraper/venv/lib/python3.10/site-packages/rfc3986/iri.py

162 lines
5.3 KiB
Python

"""Module containing the implementation of the IRIReference class."""
# Copyright (c) 2014 Rackspace
# Copyright (c) 2015 Ian Stapleton Cordasco
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import namedtuple
from . import compat
from . import exceptions
from . import misc
from . import normalizers
from . import uri
try:
import idna
except ImportError: # pragma: no cover
idna = None
class IRIReference(
namedtuple("IRIReference", misc.URI_COMPONENTS), uri.URIMixin
):
"""Immutable object representing a parsed IRI Reference.
Can be encoded into an URIReference object via the procedure
specified in RFC 3987 Section 3.1
.. note::
The IRI submodule is a new interface and may possibly change in
the future. Check for changes to the interface when upgrading.
"""
slots = ()
def __new__(
cls, scheme, authority, path, query, fragment, encoding="utf-8"
):
"""Create a new IRIReference."""
ref = super().__new__(
cls,
scheme or None,
authority or None,
path or None,
query,
fragment,
)
ref.encoding = encoding
return ref
def __eq__(self, other):
"""Compare this reference to another."""
other_ref = other
if isinstance(other, tuple):
other_ref = self.__class__(*other)
elif not isinstance(other, IRIReference):
try:
other_ref = self.__class__.from_string(other)
except TypeError:
raise TypeError(
"Unable to compare {}() to {}()".format(
type(self).__name__, type(other).__name__
)
)
# See http://tools.ietf.org/html/rfc3986#section-6.2
return tuple(self) == tuple(other_ref)
def _match_subauthority(self):
return misc.ISUBAUTHORITY_MATCHER.match(self.authority)
@classmethod
def from_string(cls, iri_string, encoding="utf-8"):
"""Parse a IRI reference from the given unicode IRI string.
:param str iri_string: Unicode IRI to be parsed into a reference.
:param str encoding: The encoding of the string provided
:returns: :class:`IRIReference` or subclass thereof
"""
iri_string = compat.to_str(iri_string, encoding)
split_iri = misc.IRI_MATCHER.match(iri_string).groupdict()
return cls(
split_iri["scheme"],
split_iri["authority"],
normalizers.encode_component(split_iri["path"], encoding),
normalizers.encode_component(split_iri["query"], encoding),
normalizers.encode_component(split_iri["fragment"], encoding),
encoding,
)
def encode(self, idna_encoder=None): # noqa: C901
"""Encode an IRIReference into a URIReference instance.
If the ``idna`` module is installed or the ``rfc3986[idna]``
extra is used then unicode characters in the IRI host
component will be encoded with IDNA2008.
:param idna_encoder:
Function that encodes each part of the host component
If not given will raise an exception if the IRI
contains a host component.
:rtype: uri.URIReference
:returns: A URI reference
"""
authority = self.authority
if authority:
if idna_encoder is None:
if idna is None: # pragma: no cover
raise exceptions.MissingDependencyError(
"Could not import the 'idna' module "
"and the IRI hostname requires encoding"
)
def idna_encoder(name):
if any(ord(c) > 128 for c in name):
try:
return idna.encode(
name.lower(), strict=True, std3_rules=True
)
except idna.IDNAError:
raise exceptions.InvalidAuthority(self.authority)
return name
authority = ""
if self.host:
authority = ".".join(
[
compat.to_str(idna_encoder(part))
for part in self.host.split(".")
]
)
if self.userinfo is not None:
authority = (
normalizers.encode_component(self.userinfo, self.encoding)
+ "@"
+ authority
)
if self.port is not None:
authority += ":" + str(self.port)
return uri.URIReference(
self.scheme,
authority,
path=self.path,
query=self.query,
fragment=self.fragment,
encoding=self.encoding,
)