162 lines
5.3 KiB
Python
162 lines
5.3 KiB
Python
"""Module containing the implementation of the IRIReference class."""
|
|
# Copyright (c) 2014 Rackspace
|
|
# Copyright (c) 2015 Ian Stapleton Cordasco
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
# implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
from collections import namedtuple
|
|
|
|
from . import compat
|
|
from . import exceptions
|
|
from . import misc
|
|
from . import normalizers
|
|
from . import uri
|
|
|
|
|
|
try:
|
|
import idna
|
|
except ImportError: # pragma: no cover
|
|
idna = None
|
|
|
|
|
|
class IRIReference(
|
|
namedtuple("IRIReference", misc.URI_COMPONENTS), uri.URIMixin
|
|
):
|
|
"""Immutable object representing a parsed IRI Reference.
|
|
|
|
Can be encoded into an URIReference object via the procedure
|
|
specified in RFC 3987 Section 3.1
|
|
|
|
.. note::
|
|
The IRI submodule is a new interface and may possibly change in
|
|
the future. Check for changes to the interface when upgrading.
|
|
"""
|
|
|
|
slots = ()
|
|
|
|
def __new__(
|
|
cls, scheme, authority, path, query, fragment, encoding="utf-8"
|
|
):
|
|
"""Create a new IRIReference."""
|
|
ref = super().__new__(
|
|
cls,
|
|
scheme or None,
|
|
authority or None,
|
|
path or None,
|
|
query,
|
|
fragment,
|
|
)
|
|
ref.encoding = encoding
|
|
return ref
|
|
|
|
def __eq__(self, other):
|
|
"""Compare this reference to another."""
|
|
other_ref = other
|
|
if isinstance(other, tuple):
|
|
other_ref = self.__class__(*other)
|
|
elif not isinstance(other, IRIReference):
|
|
try:
|
|
other_ref = self.__class__.from_string(other)
|
|
except TypeError:
|
|
raise TypeError(
|
|
"Unable to compare {}() to {}()".format(
|
|
type(self).__name__, type(other).__name__
|
|
)
|
|
)
|
|
|
|
# See http://tools.ietf.org/html/rfc3986#section-6.2
|
|
return tuple(self) == tuple(other_ref)
|
|
|
|
def _match_subauthority(self):
|
|
return misc.ISUBAUTHORITY_MATCHER.match(self.authority)
|
|
|
|
@classmethod
|
|
def from_string(cls, iri_string, encoding="utf-8"):
|
|
"""Parse a IRI reference from the given unicode IRI string.
|
|
|
|
:param str iri_string: Unicode IRI to be parsed into a reference.
|
|
:param str encoding: The encoding of the string provided
|
|
:returns: :class:`IRIReference` or subclass thereof
|
|
"""
|
|
iri_string = compat.to_str(iri_string, encoding)
|
|
|
|
split_iri = misc.IRI_MATCHER.match(iri_string).groupdict()
|
|
return cls(
|
|
split_iri["scheme"],
|
|
split_iri["authority"],
|
|
normalizers.encode_component(split_iri["path"], encoding),
|
|
normalizers.encode_component(split_iri["query"], encoding),
|
|
normalizers.encode_component(split_iri["fragment"], encoding),
|
|
encoding,
|
|
)
|
|
|
|
def encode(self, idna_encoder=None): # noqa: C901
|
|
"""Encode an IRIReference into a URIReference instance.
|
|
|
|
If the ``idna`` module is installed or the ``rfc3986[idna]``
|
|
extra is used then unicode characters in the IRI host
|
|
component will be encoded with IDNA2008.
|
|
|
|
:param idna_encoder:
|
|
Function that encodes each part of the host component
|
|
If not given will raise an exception if the IRI
|
|
contains a host component.
|
|
:rtype: uri.URIReference
|
|
:returns: A URI reference
|
|
"""
|
|
authority = self.authority
|
|
if authority:
|
|
if idna_encoder is None:
|
|
if idna is None: # pragma: no cover
|
|
raise exceptions.MissingDependencyError(
|
|
"Could not import the 'idna' module "
|
|
"and the IRI hostname requires encoding"
|
|
)
|
|
|
|
def idna_encoder(name):
|
|
if any(ord(c) > 128 for c in name):
|
|
try:
|
|
return idna.encode(
|
|
name.lower(), strict=True, std3_rules=True
|
|
)
|
|
except idna.IDNAError:
|
|
raise exceptions.InvalidAuthority(self.authority)
|
|
return name
|
|
|
|
authority = ""
|
|
if self.host:
|
|
authority = ".".join(
|
|
[
|
|
compat.to_str(idna_encoder(part))
|
|
for part in self.host.split(".")
|
|
]
|
|
)
|
|
|
|
if self.userinfo is not None:
|
|
authority = (
|
|
normalizers.encode_component(self.userinfo, self.encoding)
|
|
+ "@"
|
|
+ authority
|
|
)
|
|
|
|
if self.port is not None:
|
|
authority += ":" + str(self.port)
|
|
|
|
return uri.URIReference(
|
|
self.scheme,
|
|
authority,
|
|
path=self.path,
|
|
query=self.query,
|
|
fragment=self.fragment,
|
|
encoding=self.encoding,
|
|
)
|