441 lines
13 KiB
Python
441 lines
13 KiB
Python
|
# Copyright (c) 2017 Ian Stapleton Cordasco
|
||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
# you may not use this file except in compliance with the License.
|
||
|
# You may obtain a copy of the License at
|
||
|
#
|
||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||
|
#
|
||
|
# Unless required by applicable law or agreed to in writing, software
|
||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||
|
# implied.
|
||
|
# See the License for the specific language governing permissions and
|
||
|
# limitations under the License.
|
||
|
"""Module containing the validation logic for rfc3986."""
|
||
|
from . import exceptions
|
||
|
from . import misc
|
||
|
from . import normalizers
|
||
|
|
||
|
|
||
|
class Validator:
|
||
|
"""Object used to configure validation of all objects in rfc3986.
|
||
|
|
||
|
.. versionadded:: 1.0
|
||
|
|
||
|
Example usage::
|
||
|
|
||
|
>>> from rfc3986 import api, validators
|
||
|
>>> uri = api.uri_reference('https://github.com/')
|
||
|
>>> validator = validators.Validator().require_presence_of(
|
||
|
... 'scheme', 'host', 'path',
|
||
|
... ).allow_schemes(
|
||
|
... 'http', 'https',
|
||
|
... ).allow_hosts(
|
||
|
... '127.0.0.1', 'github.com',
|
||
|
... )
|
||
|
>>> validator.validate(uri)
|
||
|
>>> invalid_uri = rfc3986.uri_reference('imap://mail.google.com')
|
||
|
>>> validator.validate(invalid_uri)
|
||
|
Traceback (most recent call last):
|
||
|
...
|
||
|
rfc3986.exceptions.MissingComponentError: ('path was required but
|
||
|
missing', URIReference(scheme=u'imap', authority=u'mail.google.com',
|
||
|
path=None, query=None, fragment=None), ['path'])
|
||
|
|
||
|
"""
|
||
|
|
||
|
COMPONENT_NAMES = frozenset(
|
||
|
["scheme", "userinfo", "host", "port", "path", "query", "fragment"]
|
||
|
)
|
||
|
|
||
|
def __init__(self):
|
||
|
"""Initialize our default validations."""
|
||
|
self.allowed_schemes = set()
|
||
|
self.allowed_hosts = set()
|
||
|
self.allowed_ports = set()
|
||
|
self.allow_password = True
|
||
|
self.required_components = {
|
||
|
"scheme": False,
|
||
|
"userinfo": False,
|
||
|
"host": False,
|
||
|
"port": False,
|
||
|
"path": False,
|
||
|
"query": False,
|
||
|
"fragment": False,
|
||
|
}
|
||
|
self.validated_components = self.required_components.copy()
|
||
|
|
||
|
def allow_schemes(self, *schemes):
|
||
|
"""Require the scheme to be one of the provided schemes.
|
||
|
|
||
|
.. versionadded:: 1.0
|
||
|
|
||
|
:param schemes:
|
||
|
Schemes, without ``://`` that are allowed.
|
||
|
:returns:
|
||
|
The validator instance.
|
||
|
:rtype:
|
||
|
Validator
|
||
|
"""
|
||
|
for scheme in schemes:
|
||
|
self.allowed_schemes.add(normalizers.normalize_scheme(scheme))
|
||
|
return self
|
||
|
|
||
|
def allow_hosts(self, *hosts):
|
||
|
"""Require the host to be one of the provided hosts.
|
||
|
|
||
|
.. versionadded:: 1.0
|
||
|
|
||
|
:param hosts:
|
||
|
Hosts that are allowed.
|
||
|
:returns:
|
||
|
The validator instance.
|
||
|
:rtype:
|
||
|
Validator
|
||
|
"""
|
||
|
for host in hosts:
|
||
|
self.allowed_hosts.add(normalizers.normalize_host(host))
|
||
|
return self
|
||
|
|
||
|
def allow_ports(self, *ports):
|
||
|
"""Require the port to be one of the provided ports.
|
||
|
|
||
|
.. versionadded:: 1.0
|
||
|
|
||
|
:param ports:
|
||
|
Ports that are allowed.
|
||
|
:returns:
|
||
|
The validator instance.
|
||
|
:rtype:
|
||
|
Validator
|
||
|
"""
|
||
|
for port in ports:
|
||
|
port_int = int(port, base=10)
|
||
|
if 0 <= port_int <= 65535:
|
||
|
self.allowed_ports.add(port)
|
||
|
return self
|
||
|
|
||
|
def allow_use_of_password(self):
|
||
|
"""Allow passwords to be present in the URI.
|
||
|
|
||
|
.. versionadded:: 1.0
|
||
|
|
||
|
:returns:
|
||
|
The validator instance.
|
||
|
:rtype:
|
||
|
Validator
|
||
|
"""
|
||
|
self.allow_password = True
|
||
|
return self
|
||
|
|
||
|
def forbid_use_of_password(self):
|
||
|
"""Prevent passwords from being included in the URI.
|
||
|
|
||
|
.. versionadded:: 1.0
|
||
|
|
||
|
:returns:
|
||
|
The validator instance.
|
||
|
:rtype:
|
||
|
Validator
|
||
|
"""
|
||
|
self.allow_password = False
|
||
|
return self
|
||
|
|
||
|
def check_validity_of(self, *components):
|
||
|
"""Check the validity of the components provided.
|
||
|
|
||
|
This can be specified repeatedly.
|
||
|
|
||
|
.. versionadded:: 1.1
|
||
|
|
||
|
:param components:
|
||
|
Names of components from :attr:`Validator.COMPONENT_NAMES`.
|
||
|
:returns:
|
||
|
The validator instance.
|
||
|
:rtype:
|
||
|
Validator
|
||
|
"""
|
||
|
components = [c.lower() for c in components]
|
||
|
for component in components:
|
||
|
if component not in self.COMPONENT_NAMES:
|
||
|
raise ValueError(f'"{component}" is not a valid component')
|
||
|
self.validated_components.update(
|
||
|
{component: True for component in components}
|
||
|
)
|
||
|
return self
|
||
|
|
||
|
def require_presence_of(self, *components):
|
||
|
"""Require the components provided.
|
||
|
|
||
|
This can be specified repeatedly.
|
||
|
|
||
|
.. versionadded:: 1.0
|
||
|
|
||
|
:param components:
|
||
|
Names of components from :attr:`Validator.COMPONENT_NAMES`.
|
||
|
:returns:
|
||
|
The validator instance.
|
||
|
:rtype:
|
||
|
Validator
|
||
|
"""
|
||
|
components = [c.lower() for c in components]
|
||
|
for component in components:
|
||
|
if component not in self.COMPONENT_NAMES:
|
||
|
raise ValueError(f'"{component}" is not a valid component')
|
||
|
self.required_components.update(
|
||
|
{component: True for component in components}
|
||
|
)
|
||
|
return self
|
||
|
|
||
|
def validate(self, uri):
|
||
|
"""Check a URI for conditions specified on this validator.
|
||
|
|
||
|
.. versionadded:: 1.0
|
||
|
|
||
|
:param uri:
|
||
|
Parsed URI to validate.
|
||
|
:type uri:
|
||
|
rfc3986.uri.URIReference
|
||
|
:raises MissingComponentError:
|
||
|
When a required component is missing.
|
||
|
:raises UnpermittedComponentError:
|
||
|
When a component is not one of those allowed.
|
||
|
:raises PasswordForbidden:
|
||
|
When a password is present in the userinfo component but is
|
||
|
not permitted by configuration.
|
||
|
:raises InvalidComponentsError:
|
||
|
When a component was found to be invalid.
|
||
|
"""
|
||
|
if not self.allow_password:
|
||
|
check_password(uri)
|
||
|
|
||
|
required_components = [
|
||
|
component
|
||
|
for component, required in self.required_components.items()
|
||
|
if required
|
||
|
]
|
||
|
validated_components = [
|
||
|
component
|
||
|
for component, required in self.validated_components.items()
|
||
|
if required
|
||
|
]
|
||
|
if required_components:
|
||
|
ensure_required_components_exist(uri, required_components)
|
||
|
if validated_components:
|
||
|
ensure_components_are_valid(uri, validated_components)
|
||
|
|
||
|
ensure_one_of(self.allowed_schemes, uri, "scheme")
|
||
|
ensure_one_of(self.allowed_hosts, uri, "host")
|
||
|
ensure_one_of(self.allowed_ports, uri, "port")
|
||
|
|
||
|
|
||
|
def check_password(uri):
|
||
|
"""Assert that there is no password present in the uri."""
|
||
|
userinfo = uri.userinfo
|
||
|
if not userinfo:
|
||
|
return
|
||
|
credentials = userinfo.split(":", 1)
|
||
|
if len(credentials) <= 1:
|
||
|
return
|
||
|
raise exceptions.PasswordForbidden(uri)
|
||
|
|
||
|
|
||
|
def ensure_one_of(allowed_values, uri, attribute):
|
||
|
"""Assert that the uri's attribute is one of the allowed values."""
|
||
|
value = getattr(uri, attribute)
|
||
|
if value is not None and allowed_values and value not in allowed_values:
|
||
|
raise exceptions.UnpermittedComponentError(
|
||
|
attribute,
|
||
|
value,
|
||
|
allowed_values,
|
||
|
)
|
||
|
|
||
|
|
||
|
def ensure_required_components_exist(uri, required_components):
|
||
|
"""Assert that all required components are present in the URI."""
|
||
|
missing_components = sorted(
|
||
|
component
|
||
|
for component in required_components
|
||
|
if getattr(uri, component) is None
|
||
|
)
|
||
|
if missing_components:
|
||
|
raise exceptions.MissingComponentError(uri, *missing_components)
|
||
|
|
||
|
|
||
|
def is_valid(value, matcher, require):
|
||
|
"""Determine if a value is valid based on the provided matcher.
|
||
|
|
||
|
:param str value:
|
||
|
Value to validate.
|
||
|
:param matcher:
|
||
|
Compiled regular expression to use to validate the value.
|
||
|
:param require:
|
||
|
Whether or not the value is required.
|
||
|
"""
|
||
|
if require:
|
||
|
return value is not None and matcher.match(value)
|
||
|
|
||
|
# require is False and value is not None
|
||
|
return value is None or matcher.match(value)
|
||
|
|
||
|
|
||
|
def authority_is_valid(authority, host=None, require=False):
|
||
|
"""Determine if the authority string is valid.
|
||
|
|
||
|
:param str authority:
|
||
|
The authority to validate.
|
||
|
:param str host:
|
||
|
(optional) The host portion of the authority to validate.
|
||
|
:param bool require:
|
||
|
(optional) Specify if authority must not be None.
|
||
|
:returns:
|
||
|
``True`` if valid, ``False`` otherwise
|
||
|
:rtype:
|
||
|
bool
|
||
|
"""
|
||
|
validated = is_valid(authority, misc.SUBAUTHORITY_MATCHER, require)
|
||
|
if validated and host is not None:
|
||
|
return host_is_valid(host, require)
|
||
|
return validated
|
||
|
|
||
|
|
||
|
def host_is_valid(host, require=False):
|
||
|
"""Determine if the host string is valid.
|
||
|
|
||
|
:param str host:
|
||
|
The host to validate.
|
||
|
:param bool require:
|
||
|
(optional) Specify if host must not be None.
|
||
|
:returns:
|
||
|
``True`` if valid, ``False`` otherwise
|
||
|
:rtype:
|
||
|
bool
|
||
|
"""
|
||
|
validated = is_valid(host, misc.HOST_MATCHER, require)
|
||
|
if validated and host is not None and misc.IPv4_MATCHER.match(host):
|
||
|
return valid_ipv4_host_address(host)
|
||
|
elif validated and host is not None and misc.IPv6_MATCHER.match(host):
|
||
|
return misc.IPv6_NO_RFC4007_MATCHER.match(host) is not None
|
||
|
return validated
|
||
|
|
||
|
|
||
|
def scheme_is_valid(scheme, require=False):
|
||
|
"""Determine if the scheme is valid.
|
||
|
|
||
|
:param str scheme:
|
||
|
The scheme string to validate.
|
||
|
:param bool require:
|
||
|
(optional) Set to ``True`` to require the presence of a scheme.
|
||
|
:returns:
|
||
|
``True`` if the scheme is valid. ``False`` otherwise.
|
||
|
:rtype:
|
||
|
bool
|
||
|
"""
|
||
|
return is_valid(scheme, misc.SCHEME_MATCHER, require)
|
||
|
|
||
|
|
||
|
def path_is_valid(path, require=False):
|
||
|
"""Determine if the path component is valid.
|
||
|
|
||
|
:param str path:
|
||
|
The path string to validate.
|
||
|
:param bool require:
|
||
|
(optional) Set to ``True`` to require the presence of a path.
|
||
|
:returns:
|
||
|
``True`` if the path is valid. ``False`` otherwise.
|
||
|
:rtype:
|
||
|
bool
|
||
|
"""
|
||
|
return is_valid(path, misc.PATH_MATCHER, require)
|
||
|
|
||
|
|
||
|
def query_is_valid(query, require=False):
|
||
|
"""Determine if the query component is valid.
|
||
|
|
||
|
:param str query:
|
||
|
The query string to validate.
|
||
|
:param bool require:
|
||
|
(optional) Set to ``True`` to require the presence of a query.
|
||
|
:returns:
|
||
|
``True`` if the query is valid. ``False`` otherwise.
|
||
|
:rtype:
|
||
|
bool
|
||
|
"""
|
||
|
return is_valid(query, misc.QUERY_MATCHER, require)
|
||
|
|
||
|
|
||
|
def fragment_is_valid(fragment, require=False):
|
||
|
"""Determine if the fragment component is valid.
|
||
|
|
||
|
:param str fragment:
|
||
|
The fragment string to validate.
|
||
|
:param bool require:
|
||
|
(optional) Set to ``True`` to require the presence of a fragment.
|
||
|
:returns:
|
||
|
``True`` if the fragment is valid. ``False`` otherwise.
|
||
|
:rtype:
|
||
|
bool
|
||
|
"""
|
||
|
return is_valid(fragment, misc.FRAGMENT_MATCHER, require)
|
||
|
|
||
|
|
||
|
def valid_ipv4_host_address(host):
|
||
|
"""Determine if the given host is a valid IPv4 address."""
|
||
|
# If the host exists, and it might be IPv4, check each byte in the
|
||
|
# address.
|
||
|
return all([0 <= int(byte, base=10) <= 255 for byte in host.split(".")])
|
||
|
|
||
|
|
||
|
_COMPONENT_VALIDATORS = {
|
||
|
"scheme": scheme_is_valid,
|
||
|
"path": path_is_valid,
|
||
|
"query": query_is_valid,
|
||
|
"fragment": fragment_is_valid,
|
||
|
}
|
||
|
|
||
|
_SUBAUTHORITY_VALIDATORS = {"userinfo", "host", "port"}
|
||
|
|
||
|
|
||
|
def subauthority_component_is_valid(uri, component):
|
||
|
"""Determine if the userinfo, host, and port are valid."""
|
||
|
try:
|
||
|
subauthority_dict = uri.authority_info()
|
||
|
except exceptions.InvalidAuthority:
|
||
|
return False
|
||
|
|
||
|
# If we can parse the authority into sub-components and we're not
|
||
|
# validating the port, we can assume it's valid.
|
||
|
if component == "host":
|
||
|
return host_is_valid(subauthority_dict["host"])
|
||
|
elif component != "port":
|
||
|
return True
|
||
|
|
||
|
try:
|
||
|
port = int(subauthority_dict["port"])
|
||
|
except TypeError:
|
||
|
# If the port wasn't provided it'll be None and int(None) raises a
|
||
|
# TypeError
|
||
|
return True
|
||
|
|
||
|
return 0 <= port <= 65535
|
||
|
|
||
|
|
||
|
def ensure_components_are_valid(uri, validated_components):
|
||
|
"""Assert that all components are valid in the URI."""
|
||
|
invalid_components = set()
|
||
|
for component in validated_components:
|
||
|
if component in _SUBAUTHORITY_VALIDATORS:
|
||
|
if not subauthority_component_is_valid(uri, component):
|
||
|
invalid_components.add(component)
|
||
|
# Python's peephole optimizer means that while this continue *is*
|
||
|
# actually executed, coverage.py cannot detect that. See also,
|
||
|
# https://bitbucket.org/ned/coveragepy/issues/198/continue-marked-as-not-covered
|
||
|
continue # nocov: Python 2.7, 3.3, 3.4
|
||
|
|
||
|
validator = _COMPONENT_VALIDATORS[component]
|
||
|
if not validator(getattr(uri, component)):
|
||
|
invalid_components.add(component)
|
||
|
|
||
|
if invalid_components:
|
||
|
raise exceptions.InvalidComponentsError(uri, *invalid_components)
|