usse/funda-scraper/venv/lib/python3.10/site-packages/geopy/geocoders/base.py

484 lines
17 KiB
Python

import asyncio
import functools
import inspect
import threading
from geopy import compat
from geopy.adapters import (
AdapterHTTPError,
BaseAsyncAdapter,
BaseSyncAdapter,
RequestsAdapter,
URLLibAdapter,
get_retry_after,
)
from geopy.exc import (
ConfigurationError,
GeocoderAuthenticationFailure,
GeocoderInsufficientPrivileges,
GeocoderQueryError,
GeocoderQuotaExceeded,
GeocoderRateLimited,
GeocoderServiceError,
GeocoderTimedOut,
)
from geopy.point import Point
from geopy.util import __version__, logger
__all__ = (
"Geocoder",
"options",
)
_DEFAULT_USER_AGENT = "geopy/%s" % __version__
_DEFAULT_ADAPTER_CLASS = next(
adapter_cls
for adapter_cls in (RequestsAdapter, URLLibAdapter,)
if adapter_cls.is_available
)
class options:
"""The `options` object contains default configuration values for
geocoders, e.g. `timeout` and `User-Agent`.
Instead of passing a custom value to each geocoder individually, you can
override a default value in this object.
Please note that not all geocoders use all attributes of this object.
For example, some geocoders don't respect the ``default_scheme``
attribute. Refer to the specific geocoder's initializer doc for a list
of parameters which that geocoder accepts.
Example for overriding default ``timeout`` and ``user_agent``::
>>> import geopy.geocoders
>>> from geopy.geocoders import Nominatim
>>> geopy.geocoders.options.default_user_agent = 'my_app/1'
>>> geopy.geocoders.options.default_timeout = 7
>>> geolocator = Nominatim()
>>> print(geolocator.headers)
{'User-Agent': 'my_app/1'}
>>> print(geolocator.timeout)
7
Attributes:
default_adapter_factory
A callable which returns a :class:`geopy.adapters.BaseAdapter`
instance. Adapters are different implementations of HTTP clients.
See :mod:`geopy.adapters` for more info.
This callable accepts two keyword args: ``proxies`` and ``ssl_context``.
A class might be specified as this callable as well.
Example::
import geopy.geocoders
geopy.geocoders.options.default_adapter_factory \
= geopy.adapters.URLLibAdapter
geopy.geocoders.options.default_adapter_factory = (
lambda proxies, ssl_context: MyAdapter(
proxies=proxies, ssl_context=ssl_context, my_custom_arg=42
)
)
If `requests <https://requests.readthedocs.io>`_ package is
installed, the default adapter is
:class:`geopy.adapters.RequestsAdapter`. Otherwise it is
:class:`geopy.adapters.URLLibAdapter`.
.. versionadded:: 2.0
default_proxies
Tunnel requests through HTTP proxy.
By default the system proxies are respected (e.g.
`HTTP_PROXY` and `HTTPS_PROXY` env vars or platform-specific
proxy settings, such as macOS or Windows native
preferences -- see :func:`urllib.request.getproxies` for
more details). The `proxies` value for using system proxies
is ``None``.
To disable system proxies and issue requests directly,
explicitly pass an empty dict as a value for `proxies`: ``{}``.
To use a custom HTTP proxy location, pass a string.
Valid examples are:
- ``"192.0.2.0:8080"``
- ``"john:passw0rd@192.0.2.0:8080"``
- ``"http://john:passw0rd@192.0.2.0:8080"``
Please note:
- Scheme part (``http://``) of the proxy is ignored.
- Only `http` proxy is supported. Even if the proxy scheme
is `https`, it will be ignored, and the connection between
client and proxy would still be unencrypted.
However, `https` requests via `http` proxy are still
supported (via `HTTP CONNECT` method).
Raw urllib-style `proxies` dict might be provided instead of
a string:
- ``{"https": "192.0.2.0:8080"}`` -- means that HTTP proxy
would be used only for requests having `https` scheme.
String `proxies` value is automatically used for both
schemes, and is provided as a shorthand for the urllib-style
`proxies` dict.
For more information, see
documentation on :func:`urllib.request.getproxies`.
default_scheme
Use ``'https'`` or ``'http'`` as the API URL's scheme.
default_ssl_context
An :class:`ssl.SSLContext` instance with custom TLS
verification settings. Pass ``None`` to use the interpreter's
defaults (that is to use the system's trusted CA certificates).
To use the CA bundle used by `requests` library::
import ssl
import certifi
import geopy.geocoders
ctx = ssl.create_default_context(cafile=certifi.where())
geopy.geocoders.options.default_ssl_context = ctx
To disable TLS certificate verification completely::
import ssl
import geopy.geocoders
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
geopy.geocoders.options.default_ssl_context = ctx
See docs for the :class:`ssl.SSLContext` class for more examples.
default_timeout
Time, in seconds, to wait for the geocoding service to respond
before raising a :class:`geopy.exc.GeocoderTimedOut` exception.
Pass `None` to disable timeout.
default_user_agent
User-Agent header to send with the requests to geocoder API.
"""
# Please keep the attributes sorted (Sphinx sorts them in the rendered
# docs) and make sure that each attr has a corresponding section in
# the docstring above.
#
# It's bad to have the attrs docs separated from the attrs
# themselves. Although Sphinx supports docstrings for each attr [1],
# this is not standardized and won't work with `help()` function and
# in the ReadTheDocs (at least out of the box) [2].
#
# [1]: http://www.sphinx-doc.org/en/master/ext/autodoc.html#directive-autoattribute
# [2]: https://github.com/rtfd/readthedocs.org/issues/855#issuecomment-261337038
default_adapter_factory = _DEFAULT_ADAPTER_CLASS
default_proxies = None
default_scheme = 'https'
default_ssl_context = None
default_timeout = 1
default_user_agent = _DEFAULT_USER_AGENT
# Create an object which `repr` returns 'DEFAULT_SENTINEL'. Sphinx (docs) uses
# this value when generating method's signature.
DEFAULT_SENTINEL = type('object', (object,),
{'__repr__': lambda self: 'DEFAULT_SENTINEL'})()
ERROR_CODE_MAP = {
400: GeocoderQueryError,
401: GeocoderAuthenticationFailure,
402: GeocoderQuotaExceeded,
403: GeocoderInsufficientPrivileges,
407: GeocoderAuthenticationFailure,
408: GeocoderTimedOut,
412: GeocoderQueryError,
413: GeocoderQueryError,
414: GeocoderQueryError,
429: GeocoderRateLimited,
502: GeocoderServiceError,
503: GeocoderTimedOut,
504: GeocoderTimedOut
}
NONE_RESULT = object() # special return value for `_geocoder_exception_handler`
class Geocoder:
"""
Template object for geocoders.
"""
def __init__(
self,
*,
scheme=None,
timeout=DEFAULT_SENTINEL,
proxies=DEFAULT_SENTINEL,
user_agent=None,
ssl_context=DEFAULT_SENTINEL,
adapter_factory=None
):
self.scheme = scheme or options.default_scheme
if self.scheme not in ('http', 'https'):
raise ConfigurationError(
'Supported schemes are `http` and `https`.'
)
self.timeout = (timeout if timeout is not DEFAULT_SENTINEL
else options.default_timeout)
self.proxies = (proxies if proxies is not DEFAULT_SENTINEL
else options.default_proxies)
self.headers = {'User-Agent': user_agent or options.default_user_agent}
self.ssl_context = (ssl_context if ssl_context is not DEFAULT_SENTINEL
else options.default_ssl_context)
if isinstance(self.proxies, str):
self.proxies = {'http': self.proxies, 'https': self.proxies}
if adapter_factory is None:
adapter_factory = options.default_adapter_factory
self.adapter = adapter_factory(
proxies=self.proxies,
ssl_context=self.ssl_context,
)
if isinstance(self.adapter, BaseSyncAdapter):
self.__run_async = False
elif isinstance(self.adapter, BaseAsyncAdapter):
self.__run_async = True
else:
raise ConfigurationError(
"Adapter %r must extend either BaseSyncAdapter or BaseAsyncAdapter"
% (type(self.adapter),)
)
def __enter__(self):
"""Context manager for synchronous adapters. At exit all
open connections will be closed.
In synchronous mode context manager usage is not required,
and connections will be automatically closed by garbage collection.
"""
if self.__run_async:
raise TypeError("`async with` must be used with async adapters")
res = self.adapter.__enter__()
assert res is self.adapter, "adapter's __enter__ must return `self`"
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.adapter.__exit__(exc_type, exc_val, exc_tb)
async def __aenter__(self):
"""Context manager for asynchronous adapters. At exit all
open connections will be closed.
In asynchronous mode context manager usage is not required,
however, it is strongly advised to avoid warnings about
resources leaks.
"""
if not self.__run_async:
raise TypeError("`async with` cannot be used with sync adapters")
res = await self.adapter.__aenter__()
assert res is self.adapter, "adapter's __enter__ must return `self`"
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
await self.adapter.__aexit__(exc_type, exc_val, exc_tb)
def _coerce_point_to_string(self, point, output_format="%(lat)s,%(lon)s"):
"""
Do the right thing on "point" input. For geocoders with reverse
methods.
"""
if not isinstance(point, Point):
point = Point(point)
# Altitude is silently dropped.
#
# Geocoding services (almost?) always consider only lat and lon
# in queries, so altitude doesn't affect the request.
# A non-zero altitude should not raise an exception
# though, because PoIs are assumed to span the whole
# altitude axis (i.e. not just the 0km plane).
return output_format % dict(lat=_format_coordinate(point.latitude),
lon=_format_coordinate(point.longitude))
def _format_bounding_box(
self, bbox, output_format="%(lat1)s,%(lon1)s,%(lat2)s,%(lon2)s"
):
"""
Transform bounding box boundaries to a string matching
`output_format` from the following formats:
- [Point(lat1, lon1), Point(lat2, lon2)]
- [[lat1, lon1], [lat2, lon2]]
- ["lat1,lon1", "lat2,lon2"]
It is guaranteed that lat1 <= lat2 and lon1 <= lon2.
"""
if len(bbox) != 2:
raise GeocoderQueryError("Unsupported format for a bounding box")
p1, p2 = bbox
p1, p2 = Point(p1), Point(p2)
return output_format % dict(lat1=min(p1.latitude, p2.latitude),
lon1=min(p1.longitude, p2.longitude),
lat2=max(p1.latitude, p2.latitude),
lon2=max(p1.longitude, p2.longitude))
def _geocoder_exception_handler(self, error):
"""
Geocoder-specific exceptions handler.
Override if custom exceptions processing is needed.
For example, raising an appropriate GeocoderQuotaExceeded on non-200
response with a textual message in the body about the exceeded quota.
Return `NONE_RESULT` to have the geocoding call return `None` (meaning
empty result).
"""
pass
def _call_geocoder(
self,
url,
callback,
*,
timeout=DEFAULT_SENTINEL,
is_json=True,
headers=None
):
"""
For a generated query URL, get the results.
"""
req_headers = self.headers.copy()
if headers:
req_headers.update(headers)
timeout = (timeout if timeout is not DEFAULT_SENTINEL
else self.timeout)
try:
if is_json:
result = self.adapter.get_json(url, timeout=timeout, headers=req_headers)
else:
result = self.adapter.get_text(url, timeout=timeout, headers=req_headers)
if self.__run_async:
async def fut():
try:
res = callback(await result)
if inspect.isawaitable(res):
res = await res
return res
except Exception as error:
res = self._adapter_error_handler(error)
if res is NONE_RESULT:
return None
raise
return fut()
else:
return callback(result)
except Exception as error:
res = self._adapter_error_handler(error)
if res is NONE_RESULT:
return None
raise
def _adapter_error_handler(self, error):
if isinstance(error, AdapterHTTPError):
if error.text:
logger.info(
'Received an HTTP error (%s): %s',
error.status_code,
error.text,
exc_info=False,
)
res = self._geocoder_exception_handler(error)
if res is NONE_RESULT:
return NONE_RESULT
exc_cls = ERROR_CODE_MAP.get(error.status_code, GeocoderServiceError)
if issubclass(exc_cls, GeocoderRateLimited):
raise exc_cls(
str(error), retry_after=get_retry_after(error.headers)
) from error
else:
raise exc_cls(str(error)) from error
else:
res = self._geocoder_exception_handler(error)
if res is NONE_RESULT:
return NONE_RESULT
# def geocode(self, query, *, exactly_one=True, timeout=DEFAULT_SENTINEL):
# raise NotImplementedError()
# def reverse(self, query, *, exactly_one=True, timeout=DEFAULT_SENTINEL):
# raise NotImplementedError()
def _format_coordinate(coordinate):
if abs(coordinate) >= 1:
return coordinate # use the default arbitrary precision scientific notation
return f"{coordinate:.7f}"
def _synchronized(func):
"""A decorator for geocoder methods which makes the method always run
under a lock. The lock is reentrant.
This decorator transparently handles sync and async working modes.
"""
sync_lock = threading.RLock()
def locked_sync(self, *args, **kwargs):
with sync_lock:
return func(self, *args, **kwargs)
# At the moment this decorator is evaluated we don't know if we
# will work in sync or async mode.
# But we shouldn't create the asyncio Lock in sync mode to avoid
# unwanted implicit loop initialization.
async_lock = None # asyncio.Lock()
async_lock_task = None # support reentrance
async def locked_async(self, *args, **kwargs):
nonlocal async_lock
nonlocal async_lock_task
if async_lock is None:
async_lock = asyncio.Lock()
if async_lock.locked():
assert async_lock_task is not None
if compat.current_task() is async_lock_task:
res = func(self, *args, **kwargs)
if inspect.isawaitable(res):
res = await res
return res
async with async_lock:
async_lock_task = compat.current_task()
try:
res = func(self, *args, **kwargs)
if inspect.isawaitable(res):
res = await res
return res
finally:
async_lock_task = None
@functools.wraps(func)
def f(self, *args, **kwargs):
run_async = isinstance(self.adapter, BaseAsyncAdapter)
if run_async:
return locked_async(self, *args, **kwargs)
else:
return locked_sync(self, *args, **kwargs)
return f