132 lines
4.0 KiB
Python
132 lines
4.0 KiB
Python
|
# Copyright (c) 2014 Rackspace
|
||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
# you may not use this file except in compliance with the License.
|
||
|
# You may obtain a copy of the License at
|
||
|
#
|
||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||
|
#
|
||
|
# Unless required by applicable law or agreed to in writing, software
|
||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||
|
# implied.
|
||
|
# See the License for the specific language governing permissions and
|
||
|
# limitations under the License.
|
||
|
"""
|
||
|
Module containing compiled regular expressions and constants.
|
||
|
|
||
|
This module contains important constants, patterns, and compiled regular
|
||
|
expressions for parsing and validating URIs and their components.
|
||
|
"""
|
||
|
import re
|
||
|
|
||
|
from . import abnf_regexp
|
||
|
|
||
|
# These are enumerated for the named tuple used as a superclass of
|
||
|
# URIReference
|
||
|
URI_COMPONENTS = ["scheme", "authority", "path", "query", "fragment"]
|
||
|
|
||
|
important_characters = {
|
||
|
"generic_delimiters": abnf_regexp.GENERIC_DELIMITERS,
|
||
|
"sub_delimiters": abnf_regexp.SUB_DELIMITERS,
|
||
|
# We need to escape the '*' in this case
|
||
|
"re_sub_delimiters": abnf_regexp.SUB_DELIMITERS_RE,
|
||
|
"unreserved_chars": abnf_regexp.UNRESERVED_CHARS,
|
||
|
# We need to escape the '-' in this case:
|
||
|
"re_unreserved": abnf_regexp.UNRESERVED_RE,
|
||
|
}
|
||
|
|
||
|
# For details about delimiters and reserved characters, see:
|
||
|
# http://tools.ietf.org/html/rfc3986#section-2.2
|
||
|
GENERIC_DELIMITERS = abnf_regexp.GENERIC_DELIMITERS_SET
|
||
|
SUB_DELIMITERS = abnf_regexp.SUB_DELIMITERS_SET
|
||
|
RESERVED_CHARS = abnf_regexp.RESERVED_CHARS_SET
|
||
|
# For details about unreserved characters, see:
|
||
|
# http://tools.ietf.org/html/rfc3986#section-2.3
|
||
|
UNRESERVED_CHARS = abnf_regexp.UNRESERVED_CHARS_SET
|
||
|
NON_PCT_ENCODED = abnf_regexp.NON_PCT_ENCODED_SET
|
||
|
|
||
|
URI_MATCHER = re.compile(abnf_regexp.URL_PARSING_RE)
|
||
|
|
||
|
SUBAUTHORITY_MATCHER = re.compile(
|
||
|
(
|
||
|
"^(?:(?P<userinfo>{})@)?" # userinfo
|
||
|
"(?P<host>{})" # host
|
||
|
":?(?P<port>{})?$" # port
|
||
|
).format(
|
||
|
abnf_regexp.USERINFO_RE, abnf_regexp.HOST_PATTERN, abnf_regexp.PORT_RE
|
||
|
)
|
||
|
)
|
||
|
|
||
|
|
||
|
HOST_MATCHER = re.compile("^" + abnf_regexp.HOST_RE + "$")
|
||
|
IPv4_MATCHER = re.compile("^" + abnf_regexp.IPv4_RE + "$")
|
||
|
IPv6_MATCHER = re.compile(r"^\[" + abnf_regexp.IPv6_ADDRZ_RFC4007_RE + r"\]$")
|
||
|
|
||
|
# Used by host validator
|
||
|
IPv6_NO_RFC4007_MATCHER = re.compile(r"^\[%s\]$" % (abnf_regexp.IPv6_ADDRZ_RE))
|
||
|
|
||
|
# Matcher used to validate path components
|
||
|
PATH_MATCHER = re.compile(abnf_regexp.PATH_RE)
|
||
|
|
||
|
|
||
|
# ##################################
|
||
|
# Query and Fragment Matcher Section
|
||
|
# ##################################
|
||
|
|
||
|
QUERY_MATCHER = re.compile(abnf_regexp.QUERY_RE)
|
||
|
|
||
|
FRAGMENT_MATCHER = QUERY_MATCHER
|
||
|
|
||
|
# Scheme validation, see: http://tools.ietf.org/html/rfc3986#section-3.1
|
||
|
SCHEME_MATCHER = re.compile(f"^{abnf_regexp.SCHEME_RE}$")
|
||
|
|
||
|
RELATIVE_REF_MATCHER = re.compile(
|
||
|
r"^%s(\?%s)?(#%s)?$"
|
||
|
% (
|
||
|
abnf_regexp.RELATIVE_PART_RE,
|
||
|
abnf_regexp.QUERY_RE,
|
||
|
abnf_regexp.FRAGMENT_RE,
|
||
|
)
|
||
|
)
|
||
|
|
||
|
# See http://tools.ietf.org/html/rfc3986#section-4.3
|
||
|
ABSOLUTE_URI_MATCHER = re.compile(
|
||
|
r"^%s:%s(\?%s)?$"
|
||
|
% (
|
||
|
abnf_regexp.COMPONENT_PATTERN_DICT["scheme"],
|
||
|
abnf_regexp.HIER_PART_RE,
|
||
|
abnf_regexp.QUERY_RE[1:-1],
|
||
|
)
|
||
|
)
|
||
|
|
||
|
# ###############
|
||
|
# IRIs / RFC 3987
|
||
|
# ###############
|
||
|
|
||
|
IRI_MATCHER = re.compile(abnf_regexp.URL_PARSING_RE, re.UNICODE)
|
||
|
|
||
|
ISUBAUTHORITY_MATCHER = re.compile(
|
||
|
(
|
||
|
"^(?:(?P<userinfo>{})@)?" # iuserinfo
|
||
|
"(?P<host>{})" # ihost
|
||
|
":?(?P<port>{})?$" # port
|
||
|
).format(
|
||
|
abnf_regexp.IUSERINFO_RE, abnf_regexp.IHOST_RE, abnf_regexp.PORT_RE
|
||
|
),
|
||
|
re.UNICODE,
|
||
|
)
|
||
|
|
||
|
|
||
|
# Path merger as defined in http://tools.ietf.org/html/rfc3986#section-5.2.3
|
||
|
def merge_paths(base_uri, relative_path):
|
||
|
"""Merge a base URI's path with a relative URI's path."""
|
||
|
if base_uri.path is None and base_uri.authority is not None:
|
||
|
return "/" + relative_path
|
||
|
else:
|
||
|
path = base_uri.path or ""
|
||
|
index = path.rfind("/")
|
||
|
return path[:index] + "/" + relative_path
|
||
|
|
||
|
|
||
|
UseExisting = object()
|