usse/funda-scraper/venv/lib/python3.10/site-packages/rfc3986/misc.py

132 lines
4.0 KiB
Python

# Copyright (c) 2014 Rackspace
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Module containing compiled regular expressions and constants.
This module contains important constants, patterns, and compiled regular
expressions for parsing and validating URIs and their components.
"""
import re
from . import abnf_regexp
# These are enumerated for the named tuple used as a superclass of
# URIReference
URI_COMPONENTS = ["scheme", "authority", "path", "query", "fragment"]
important_characters = {
"generic_delimiters": abnf_regexp.GENERIC_DELIMITERS,
"sub_delimiters": abnf_regexp.SUB_DELIMITERS,
# We need to escape the '*' in this case
"re_sub_delimiters": abnf_regexp.SUB_DELIMITERS_RE,
"unreserved_chars": abnf_regexp.UNRESERVED_CHARS,
# We need to escape the '-' in this case:
"re_unreserved": abnf_regexp.UNRESERVED_RE,
}
# For details about delimiters and reserved characters, see:
# http://tools.ietf.org/html/rfc3986#section-2.2
GENERIC_DELIMITERS = abnf_regexp.GENERIC_DELIMITERS_SET
SUB_DELIMITERS = abnf_regexp.SUB_DELIMITERS_SET
RESERVED_CHARS = abnf_regexp.RESERVED_CHARS_SET
# For details about unreserved characters, see:
# http://tools.ietf.org/html/rfc3986#section-2.3
UNRESERVED_CHARS = abnf_regexp.UNRESERVED_CHARS_SET
NON_PCT_ENCODED = abnf_regexp.NON_PCT_ENCODED_SET
URI_MATCHER = re.compile(abnf_regexp.URL_PARSING_RE)
SUBAUTHORITY_MATCHER = re.compile(
(
"^(?:(?P<userinfo>{})@)?" # userinfo
"(?P<host>{})" # host
":?(?P<port>{})?$" # port
).format(
abnf_regexp.USERINFO_RE, abnf_regexp.HOST_PATTERN, abnf_regexp.PORT_RE
)
)
HOST_MATCHER = re.compile("^" + abnf_regexp.HOST_RE + "$")
IPv4_MATCHER = re.compile("^" + abnf_regexp.IPv4_RE + "$")
IPv6_MATCHER = re.compile(r"^\[" + abnf_regexp.IPv6_ADDRZ_RFC4007_RE + r"\]$")
# Used by host validator
IPv6_NO_RFC4007_MATCHER = re.compile(r"^\[%s\]$" % (abnf_regexp.IPv6_ADDRZ_RE))
# Matcher used to validate path components
PATH_MATCHER = re.compile(abnf_regexp.PATH_RE)
# ##################################
# Query and Fragment Matcher Section
# ##################################
QUERY_MATCHER = re.compile(abnf_regexp.QUERY_RE)
FRAGMENT_MATCHER = QUERY_MATCHER
# Scheme validation, see: http://tools.ietf.org/html/rfc3986#section-3.1
SCHEME_MATCHER = re.compile(f"^{abnf_regexp.SCHEME_RE}$")
RELATIVE_REF_MATCHER = re.compile(
r"^%s(\?%s)?(#%s)?$"
% (
abnf_regexp.RELATIVE_PART_RE,
abnf_regexp.QUERY_RE,
abnf_regexp.FRAGMENT_RE,
)
)
# See http://tools.ietf.org/html/rfc3986#section-4.3
ABSOLUTE_URI_MATCHER = re.compile(
r"^%s:%s(\?%s)?$"
% (
abnf_regexp.COMPONENT_PATTERN_DICT["scheme"],
abnf_regexp.HIER_PART_RE,
abnf_regexp.QUERY_RE[1:-1],
)
)
# ###############
# IRIs / RFC 3987
# ###############
IRI_MATCHER = re.compile(abnf_regexp.URL_PARSING_RE, re.UNICODE)
ISUBAUTHORITY_MATCHER = re.compile(
(
"^(?:(?P<userinfo>{})@)?" # iuserinfo
"(?P<host>{})" # ihost
":?(?P<port>{})?$" # port
).format(
abnf_regexp.IUSERINFO_RE, abnf_regexp.IHOST_RE, abnf_regexp.PORT_RE
),
re.UNICODE,
)
# Path merger as defined in http://tools.ietf.org/html/rfc3986#section-5.2.3
def merge_paths(base_uri, relative_path):
"""Merge a base URI's path with a relative URI's path."""
if base_uri.path is None and base_uri.authority is not None:
return "/" + relative_path
else:
path = base_uri.path or ""
index = path.rfind("/")
return path[:index] + "/" + relative_path
UseExisting = object()