usse/scrape/venv/lib/python3.10/site-packages/urllib3/_collections.py

356 lines
11 KiB
Python
Raw Normal View History

2023-12-22 14:26:01 +00:00
from __future__ import absolute_import
try:
from collections.abc import Mapping, MutableMapping
except ImportError:
from collections import Mapping, MutableMapping
try:
from threading import RLock
except ImportError: # Platform-specific: No threads available
class RLock:
def __enter__(self):
pass
def __exit__(self, exc_type, exc_value, traceback):
pass
from collections import OrderedDict
from .exceptions import InvalidHeader
from .packages import six
from .packages.six import iterkeys, itervalues
__all__ = ["RecentlyUsedContainer", "HTTPHeaderDict"]
_Null = object()
class RecentlyUsedContainer(MutableMapping):
"""
Provides a thread-safe dict-like container which maintains up to
``maxsize`` keys while throwing away the least-recently-used keys beyond
``maxsize``.
:param maxsize:
Maximum number of recent elements to retain.
:param dispose_func:
Every time an item is evicted from the container,
``dispose_func(value)`` is called. Callback which will get called
"""
ContainerCls = OrderedDict
def __init__(self, maxsize=10, dispose_func=None):
self._maxsize = maxsize
self.dispose_func = dispose_func
self._container = self.ContainerCls()
self.lock = RLock()
def __getitem__(self, key):
# Re-insert the item, moving it to the end of the eviction line.
with self.lock:
item = self._container.pop(key)
self._container[key] = item
return item
def __setitem__(self, key, value):
evicted_value = _Null
with self.lock:
# Possibly evict the existing value of 'key'
evicted_value = self._container.get(key, _Null)
self._container[key] = value
# If we didn't evict an existing value, we might have to evict the
# least recently used item from the beginning of the container.
if len(self._container) > self._maxsize:
_key, evicted_value = self._container.popitem(last=False)
if self.dispose_func and evicted_value is not _Null:
self.dispose_func(evicted_value)
def __delitem__(self, key):
with self.lock:
value = self._container.pop(key)
if self.dispose_func:
self.dispose_func(value)
def __len__(self):
with self.lock:
return len(self._container)
def __iter__(self):
raise NotImplementedError(
"Iteration over this class is unlikely to be threadsafe."
)
def clear(self):
with self.lock:
# Copy pointers to all values, then wipe the mapping
values = list(itervalues(self._container))
self._container.clear()
if self.dispose_func:
for value in values:
self.dispose_func(value)
def keys(self):
with self.lock:
return list(iterkeys(self._container))
class HTTPHeaderDict(MutableMapping):
"""
:param headers:
An iterable of field-value pairs. Must not contain multiple field names
when compared case-insensitively.
:param kwargs:
Additional field-value pairs to pass in to ``dict.update``.
A ``dict`` like container for storing HTTP Headers.
Field names are stored and compared case-insensitively in compliance with
RFC 7230. Iteration provides the first case-sensitive key seen for each
case-insensitive pair.
Using ``__setitem__`` syntax overwrites fields that compare equal
case-insensitively in order to maintain ``dict``'s api. For fields that
compare equal, instead create a new ``HTTPHeaderDict`` and use ``.add``
in a loop.
If multiple fields that are equal case-insensitively are passed to the
constructor or ``.update``, the behavior is undefined and some will be
lost.
>>> headers = HTTPHeaderDict()
>>> headers.add('Set-Cookie', 'foo=bar')
>>> headers.add('set-cookie', 'baz=quxx')
>>> headers['content-length'] = '7'
>>> headers['SET-cookie']
'foo=bar, baz=quxx'
>>> headers['Content-Length']
'7'
"""
def __init__(self, headers=None, **kwargs):
super(HTTPHeaderDict, self).__init__()
self._container = OrderedDict()
if headers is not None:
if isinstance(headers, HTTPHeaderDict):
self._copy_from(headers)
else:
self.extend(headers)
if kwargs:
self.extend(kwargs)
def __setitem__(self, key, val):
self._container[key.lower()] = [key, val]
return self._container[key.lower()]
def __getitem__(self, key):
val = self._container[key.lower()]
return ", ".join(val[1:])
def __delitem__(self, key):
del self._container[key.lower()]
def __contains__(self, key):
return key.lower() in self._container
def __eq__(self, other):
if not isinstance(other, Mapping) and not hasattr(other, "keys"):
return False
if not isinstance(other, type(self)):
other = type(self)(other)
return dict((k.lower(), v) for k, v in self.itermerged()) == dict(
(k.lower(), v) for k, v in other.itermerged()
)
def __ne__(self, other):
return not self.__eq__(other)
if six.PY2: # Python 2
iterkeys = MutableMapping.iterkeys
itervalues = MutableMapping.itervalues
__marker = object()
def __len__(self):
return len(self._container)
def __iter__(self):
# Only provide the originally cased names
for vals in self._container.values():
yield vals[0]
def pop(self, key, default=__marker):
"""D.pop(k[,d]) -> v, remove specified key and return the corresponding value.
If key is not found, d is returned if given, otherwise KeyError is raised.
"""
# Using the MutableMapping function directly fails due to the private marker.
# Using ordinary dict.pop would expose the internal structures.
# So let's reinvent the wheel.
try:
value = self[key]
except KeyError:
if default is self.__marker:
raise
return default
else:
del self[key]
return value
def discard(self, key):
try:
del self[key]
except KeyError:
pass
def add(self, key, val):
"""Adds a (name, value) pair, doesn't overwrite the value if it already
exists.
>>> headers = HTTPHeaderDict(foo='bar')
>>> headers.add('Foo', 'baz')
>>> headers['foo']
'bar, baz'
"""
key_lower = key.lower()
new_vals = [key, val]
# Keep the common case aka no item present as fast as possible
vals = self._container.setdefault(key_lower, new_vals)
if new_vals is not vals:
vals.append(val)
def extend(self, *args, **kwargs):
"""Generic import function for any type of header-like object.
Adapted version of MutableMapping.update in order to insert items
with self.add instead of self.__setitem__
"""
if len(args) > 1:
raise TypeError(
"extend() takes at most 1 positional "
"arguments ({0} given)".format(len(args))
)
other = args[0] if len(args) >= 1 else ()
if isinstance(other, HTTPHeaderDict):
for key, val in other.iteritems():
self.add(key, val)
elif isinstance(other, Mapping):
for key in other:
self.add(key, other[key])
elif hasattr(other, "keys"):
for key in other.keys():
self.add(key, other[key])
else:
for key, value in other:
self.add(key, value)
for key, value in kwargs.items():
self.add(key, value)
def getlist(self, key, default=__marker):
"""Returns a list of all the values for the named field. Returns an
empty list if the key doesn't exist."""
try:
vals = self._container[key.lower()]
except KeyError:
if default is self.__marker:
return []
return default
else:
return vals[1:]
def _prepare_for_method_change(self):
"""
Remove content-specific header fields before changing the request
method to GET or HEAD according to RFC 9110, Section 15.4.
"""
content_specific_headers = [
"Content-Encoding",
"Content-Language",
"Content-Location",
"Content-Type",
"Content-Length",
"Digest",
"Last-Modified",
]
for header in content_specific_headers:
self.discard(header)
return self
# Backwards compatibility for httplib
getheaders = getlist
getallmatchingheaders = getlist
iget = getlist
# Backwards compatibility for http.cookiejar
get_all = getlist
def __repr__(self):
return "%s(%s)" % (type(self).__name__, dict(self.itermerged()))
def _copy_from(self, other):
for key in other:
val = other.getlist(key)
if isinstance(val, list):
# Don't need to convert tuples
val = list(val)
self._container[key.lower()] = [key] + val
def copy(self):
clone = type(self)()
clone._copy_from(self)
return clone
def iteritems(self):
"""Iterate over all header lines, including duplicate ones."""
for key in self:
vals = self._container[key.lower()]
for val in vals[1:]:
yield vals[0], val
def itermerged(self):
"""Iterate over all headers, merging duplicate ones together."""
for key in self:
val = self._container[key.lower()]
yield val[0], ", ".join(val[1:])
def items(self):
return list(self.iteritems())
@classmethod
def from_httplib(cls, message): # Python 2
"""Read headers from a Python 2 httplib message object."""
# python2.7 does not expose a proper API for exporting multiheaders
# efficiently. This function re-reads raw lines from the message
# object and extracts the multiheaders properly.
obs_fold_continued_leaders = (" ", "\t")
headers = []
for line in message.headers:
if line.startswith(obs_fold_continued_leaders):
if not headers:
# We received a header line that starts with OWS as described
# in RFC-7230 S3.2.4. This indicates a multiline header, but
# there exists no previous header to which we can attach it.
raise InvalidHeader(
"Header continuation with no previous header: %s" % line
)
else:
key, value = headers[-1]
headers[-1] = (key, value + " " + line.strip())
continue
key, value = line.split(":", 1)
headers.append((key, value.strip()))
return cls(headers)