Added delete option to database storage.

This commit is contained in:
Batuhan Berk Başoğlu 2020-10-12 12:10:01 -04:00
parent 308604a33c
commit 963b5bc68b
1868 changed files with 192402 additions and 13278 deletions

View file

@ -0,0 +1,46 @@
from __future__ import absolute_import
# For backwards compatibility, provide imports that used to be here.
from .connection import is_connection_dropped
from .request import make_headers
from .response import is_fp_closed
from .ssl_ import (
SSLContext,
HAS_SNI,
IS_PYOPENSSL,
IS_SECURETRANSPORT,
assert_fingerprint,
resolve_cert_reqs,
resolve_ssl_version,
ssl_wrap_socket,
PROTOCOL_TLS,
)
from .timeout import current_time, Timeout
from .retry import Retry
from .url import get_host, parse_url, split_first, Url
from .wait import wait_for_read, wait_for_write
__all__ = (
"HAS_SNI",
"IS_PYOPENSSL",
"IS_SECURETRANSPORT",
"SSLContext",
"PROTOCOL_TLS",
"Retry",
"Timeout",
"Url",
"assert_fingerprint",
"current_time",
"is_connection_dropped",
"is_fp_closed",
"get_host",
"parse_url",
"make_headers",
"resolve_cert_reqs",
"resolve_ssl_version",
"split_first",
"ssl_wrap_socket",
"wait_for_read",
"wait_for_write",
)

View file

@ -0,0 +1,138 @@
from __future__ import absolute_import
import socket
from .wait import NoWayToWaitForSocketError, wait_for_read
from ..contrib import _appengine_environ
def is_connection_dropped(conn): # Platform-specific
"""
Returns True if the connection is dropped and should be closed.
:param conn:
:class:`httplib.HTTPConnection` object.
Note: For platforms like AppEngine, this will always return ``False`` to
let the platform handle connection recycling transparently for us.
"""
sock = getattr(conn, "sock", False)
if sock is False: # Platform-specific: AppEngine
return False
if sock is None: # Connection already closed (such as by httplib).
return True
try:
# Returns True if readable, which here means it's been dropped
return wait_for_read(sock, timeout=0.0)
except NoWayToWaitForSocketError: # Platform-specific: AppEngine
return False
# This function is copied from socket.py in the Python 2.7 standard
# library test suite. Added to its signature is only `socket_options`.
# One additional modification is that we avoid binding to IPv6 servers
# discovered in DNS if the system doesn't have IPv6 functionality.
def create_connection(
address,
timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
source_address=None,
socket_options=None,
):
"""Connect to *address* and return the socket object.
Convenience function. Connect to *address* (a 2-tuple ``(host,
port)``) and return the socket object. Passing the optional
*timeout* parameter will set the timeout on the socket instance
before attempting to connect. If no *timeout* is supplied, the
global default timeout setting returned by :func:`getdefaulttimeout`
is used. If *source_address* is set it must be a tuple of (host, port)
for the socket to bind as a source address before making the connection.
An host of '' or port 0 tells the OS to use the default.
"""
host, port = address
if host.startswith("["):
host = host.strip("[]")
err = None
# Using the value from allowed_gai_family() in the context of getaddrinfo lets
# us select whether to work with IPv4 DNS records, IPv6 records, or both.
# The original create_connection function always returns all records.
family = allowed_gai_family()
for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
af, socktype, proto, canonname, sa = res
sock = None
try:
sock = socket.socket(af, socktype, proto)
# If provided, set socket level options before connecting.
_set_socket_options(sock, socket_options)
if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
sock.settimeout(timeout)
if source_address:
sock.bind(source_address)
sock.connect(sa)
return sock
except socket.error as e:
err = e
if sock is not None:
sock.close()
sock = None
if err is not None:
raise err
raise socket.error("getaddrinfo returns an empty list")
def _set_socket_options(sock, options):
if options is None:
return
for opt in options:
sock.setsockopt(*opt)
def allowed_gai_family():
"""This function is designed to work in the context of
getaddrinfo, where family=socket.AF_UNSPEC is the default and
will perform a DNS search for both IPv6 and IPv4 records."""
family = socket.AF_INET
if HAS_IPV6:
family = socket.AF_UNSPEC
return family
def _has_ipv6(host):
""" Returns True if the system can bind an IPv6 address. """
sock = None
has_ipv6 = False
# App Engine doesn't support IPV6 sockets and actually has a quota on the
# number of sockets that can be used, so just early out here instead of
# creating a socket needlessly.
# See https://github.com/urllib3/urllib3/issues/1446
if _appengine_environ.is_appengine_sandbox():
return False
if socket.has_ipv6:
# has_ipv6 returns true if cPython was compiled with IPv6 support.
# It does not tell us if the system has IPv6 support enabled. To
# determine that we must bind to an IPv6 address.
# https://github.com/urllib3/urllib3/pull/611
# https://bugs.python.org/issue658327
try:
sock = socket.socket(socket.AF_INET6)
sock.bind((host, 0))
has_ipv6 = True
except Exception:
pass
if sock:
sock.close()
return has_ipv6
HAS_IPV6 = _has_ipv6("::1")

View file

@ -0,0 +1,21 @@
import collections
from ..packages import six
from ..packages.six.moves import queue
if six.PY2:
# Queue is imported for side effects on MS Windows. See issue #229.
import Queue as _unused_module_Queue # noqa: F401
class LifoQueue(queue.Queue):
def _init(self, _):
self.queue = collections.deque()
def _qsize(self, len=len):
return len(self.queue)
def _put(self, item):
self.queue.append(item)
def _get(self):
return self.queue.pop()

View file

@ -0,0 +1,135 @@
from __future__ import absolute_import
from base64 import b64encode
from ..packages.six import b, integer_types
from ..exceptions import UnrewindableBodyError
ACCEPT_ENCODING = "gzip,deflate"
try:
import brotli as _unused_module_brotli # noqa: F401
except ImportError:
pass
else:
ACCEPT_ENCODING += ",br"
_FAILEDTELL = object()
def make_headers(
keep_alive=None,
accept_encoding=None,
user_agent=None,
basic_auth=None,
proxy_basic_auth=None,
disable_cache=None,
):
"""
Shortcuts for generating request headers.
:param keep_alive:
If ``True``, adds 'connection: keep-alive' header.
:param accept_encoding:
Can be a boolean, list, or string.
``True`` translates to 'gzip,deflate'.
List will get joined by comma.
String will be used as provided.
:param user_agent:
String representing the user-agent you want, such as
"python-urllib3/0.6"
:param basic_auth:
Colon-separated username:password string for 'authorization: basic ...'
auth header.
:param proxy_basic_auth:
Colon-separated username:password string for 'proxy-authorization: basic ...'
auth header.
:param disable_cache:
If ``True``, adds 'cache-control: no-cache' header.
Example::
>>> make_headers(keep_alive=True, user_agent="Batman/1.0")
{'connection': 'keep-alive', 'user-agent': 'Batman/1.0'}
>>> make_headers(accept_encoding=True)
{'accept-encoding': 'gzip,deflate'}
"""
headers = {}
if accept_encoding:
if isinstance(accept_encoding, str):
pass
elif isinstance(accept_encoding, list):
accept_encoding = ",".join(accept_encoding)
else:
accept_encoding = ACCEPT_ENCODING
headers["accept-encoding"] = accept_encoding
if user_agent:
headers["user-agent"] = user_agent
if keep_alive:
headers["connection"] = "keep-alive"
if basic_auth:
headers["authorization"] = "Basic " + b64encode(b(basic_auth)).decode("utf-8")
if proxy_basic_auth:
headers["proxy-authorization"] = "Basic " + b64encode(
b(proxy_basic_auth)
).decode("utf-8")
if disable_cache:
headers["cache-control"] = "no-cache"
return headers
def set_file_position(body, pos):
"""
If a position is provided, move file to that point.
Otherwise, we'll attempt to record a position for future use.
"""
if pos is not None:
rewind_body(body, pos)
elif getattr(body, "tell", None) is not None:
try:
pos = body.tell()
except (IOError, OSError):
# This differentiates from None, allowing us to catch
# a failed `tell()` later when trying to rewind the body.
pos = _FAILEDTELL
return pos
def rewind_body(body, body_pos):
"""
Attempt to rewind body to a certain position.
Primarily used for request redirects and retries.
:param body:
File-like object that supports seek.
:param int pos:
Position to seek to in file.
"""
body_seek = getattr(body, "seek", None)
if body_seek is not None and isinstance(body_pos, integer_types):
try:
body_seek(body_pos)
except (IOError, OSError):
raise UnrewindableBodyError(
"An error occurred when rewinding request body for redirect/retry."
)
elif body_pos is _FAILEDTELL:
raise UnrewindableBodyError(
"Unable to record file position for rewinding "
"request body during a redirect/retry."
)
else:
raise ValueError(
"body_pos must be of type integer, instead it was %s." % type(body_pos)
)

View file

@ -0,0 +1,86 @@
from __future__ import absolute_import
from ..packages.six.moves import http_client as httplib
from ..exceptions import HeaderParsingError
def is_fp_closed(obj):
"""
Checks whether a given file-like object is closed.
:param obj:
The file-like object to check.
"""
try:
# Check `isclosed()` first, in case Python3 doesn't set `closed`.
# GH Issue #928
return obj.isclosed()
except AttributeError:
pass
try:
# Check via the official file-like-object way.
return obj.closed
except AttributeError:
pass
try:
# Check if the object is a container for another file-like object that
# gets released on exhaustion (e.g. HTTPResponse).
return obj.fp is None
except AttributeError:
pass
raise ValueError("Unable to determine whether fp is closed.")
def assert_header_parsing(headers):
"""
Asserts whether all headers have been successfully parsed.
Extracts encountered errors from the result of parsing headers.
Only works on Python 3.
:param headers: Headers to verify.
:type headers: `httplib.HTTPMessage`.
:raises urllib3.exceptions.HeaderParsingError:
If parsing errors are found.
"""
# This will fail silently if we pass in the wrong kind of parameter.
# To make debugging easier add an explicit check.
if not isinstance(headers, httplib.HTTPMessage):
raise TypeError("expected httplib.Message, got {0}.".format(type(headers)))
defects = getattr(headers, "defects", None)
get_payload = getattr(headers, "get_payload", None)
unparsed_data = None
if get_payload:
# get_payload is actually email.message.Message.get_payload;
# we're only interested in the result if it's not a multipart message
if not headers.is_multipart():
payload = get_payload()
if isinstance(payload, (bytes, str)):
unparsed_data = payload
if defects or unparsed_data:
raise HeaderParsingError(defects=defects, unparsed_data=unparsed_data)
def is_response_to_head(response):
"""
Checks whether the request of a response has been a HEAD-request.
Handles the quirks of AppEngine.
:param conn:
:type conn: :class:`httplib.HTTPResponse`
"""
# FIXME: Can we do this somehow without accessing private httplib _method?
method = response._method
if isinstance(method, int): # Platform-specific: Appengine
return method == 3
return method.upper() == "HEAD"

View file

@ -0,0 +1,453 @@
from __future__ import absolute_import
import time
import logging
from collections import namedtuple
from itertools import takewhile
import email
import re
from ..exceptions import (
ConnectTimeoutError,
MaxRetryError,
ProtocolError,
ReadTimeoutError,
ResponseError,
InvalidHeader,
ProxyError,
)
from ..packages import six
log = logging.getLogger(__name__)
# Data structure for representing the metadata of requests that result in a retry.
RequestHistory = namedtuple(
"RequestHistory", ["method", "url", "error", "status", "redirect_location"]
)
class Retry(object):
""" Retry configuration.
Each retry attempt will create a new Retry object with updated values, so
they can be safely reused.
Retries can be defined as a default for a pool::
retries = Retry(connect=5, read=2, redirect=5)
http = PoolManager(retries=retries)
response = http.request('GET', 'http://example.com/')
Or per-request (which overrides the default for the pool)::
response = http.request('GET', 'http://example.com/', retries=Retry(10))
Retries can be disabled by passing ``False``::
response = http.request('GET', 'http://example.com/', retries=False)
Errors will be wrapped in :class:`~urllib3.exceptions.MaxRetryError` unless
retries are disabled, in which case the causing exception will be raised.
:param int total:
Total number of retries to allow. Takes precedence over other counts.
Set to ``None`` to remove this constraint and fall back on other
counts. It's a good idea to set this to some sensibly-high value to
account for unexpected edge cases and avoid infinite retry loops.
Set to ``0`` to fail on the first retry.
Set to ``False`` to disable and imply ``raise_on_redirect=False``.
:param int connect:
How many connection-related errors to retry on.
These are errors raised before the request is sent to the remote server,
which we assume has not triggered the server to process the request.
Set to ``0`` to fail on the first retry of this type.
:param int read:
How many times to retry on read errors.
These errors are raised after the request was sent to the server, so the
request may have side-effects.
Set to ``0`` to fail on the first retry of this type.
:param int redirect:
How many redirects to perform. Limit this to avoid infinite redirect
loops.
A redirect is a HTTP response with a status code 301, 302, 303, 307 or
308.
Set to ``0`` to fail on the first retry of this type.
Set to ``False`` to disable and imply ``raise_on_redirect=False``.
:param int status:
How many times to retry on bad status codes.
These are retries made on responses, where status code matches
``status_forcelist``.
Set to ``0`` to fail on the first retry of this type.
:param iterable method_whitelist:
Set of uppercased HTTP method verbs that we should retry on.
By default, we only retry on methods which are considered to be
idempotent (multiple requests with the same parameters end with the
same state). See :attr:`Retry.DEFAULT_METHOD_WHITELIST`.
Set to a ``False`` value to retry on any verb.
:param iterable status_forcelist:
A set of integer HTTP status codes that we should force a retry on.
A retry is initiated if the request method is in ``method_whitelist``
and the response status code is in ``status_forcelist``.
By default, this is disabled with ``None``.
:param float backoff_factor:
A backoff factor to apply between attempts after the second try
(most errors are resolved immediately by a second try without a
delay). urllib3 will sleep for::
{backoff factor} * (2 ** ({number of total retries} - 1))
seconds. If the backoff_factor is 0.1, then :func:`.sleep` will sleep
for [0.0s, 0.2s, 0.4s, ...] between retries. It will never be longer
than :attr:`Retry.BACKOFF_MAX`.
By default, backoff is disabled (set to 0).
:param bool raise_on_redirect: Whether, if the number of redirects is
exhausted, to raise a MaxRetryError, or to return a response with a
response code in the 3xx range.
:param bool raise_on_status: Similar meaning to ``raise_on_redirect``:
whether we should raise an exception, or return a response,
if status falls in ``status_forcelist`` range and retries have
been exhausted.
:param tuple history: The history of the request encountered during
each call to :meth:`~Retry.increment`. The list is in the order
the requests occurred. Each list item is of class :class:`RequestHistory`.
:param bool respect_retry_after_header:
Whether to respect Retry-After header on status codes defined as
:attr:`Retry.RETRY_AFTER_STATUS_CODES` or not.
:param iterable remove_headers_on_redirect:
Sequence of headers to remove from the request when a response
indicating a redirect is returned before firing off the redirected
request.
"""
DEFAULT_METHOD_WHITELIST = frozenset(
["HEAD", "GET", "PUT", "DELETE", "OPTIONS", "TRACE"]
)
RETRY_AFTER_STATUS_CODES = frozenset([413, 429, 503])
DEFAULT_REDIRECT_HEADERS_BLACKLIST = frozenset(["Authorization"])
#: Maximum backoff time.
BACKOFF_MAX = 120
def __init__(
self,
total=10,
connect=None,
read=None,
redirect=None,
status=None,
method_whitelist=DEFAULT_METHOD_WHITELIST,
status_forcelist=None,
backoff_factor=0,
raise_on_redirect=True,
raise_on_status=True,
history=None,
respect_retry_after_header=True,
remove_headers_on_redirect=DEFAULT_REDIRECT_HEADERS_BLACKLIST,
):
self.total = total
self.connect = connect
self.read = read
self.status = status
if redirect is False or total is False:
redirect = 0
raise_on_redirect = False
self.redirect = redirect
self.status_forcelist = status_forcelist or set()
self.method_whitelist = method_whitelist
self.backoff_factor = backoff_factor
self.raise_on_redirect = raise_on_redirect
self.raise_on_status = raise_on_status
self.history = history or tuple()
self.respect_retry_after_header = respect_retry_after_header
self.remove_headers_on_redirect = frozenset(
[h.lower() for h in remove_headers_on_redirect]
)
def new(self, **kw):
params = dict(
total=self.total,
connect=self.connect,
read=self.read,
redirect=self.redirect,
status=self.status,
method_whitelist=self.method_whitelist,
status_forcelist=self.status_forcelist,
backoff_factor=self.backoff_factor,
raise_on_redirect=self.raise_on_redirect,
raise_on_status=self.raise_on_status,
history=self.history,
remove_headers_on_redirect=self.remove_headers_on_redirect,
respect_retry_after_header=self.respect_retry_after_header,
)
params.update(kw)
return type(self)(**params)
@classmethod
def from_int(cls, retries, redirect=True, default=None):
""" Backwards-compatibility for the old retries format."""
if retries is None:
retries = default if default is not None else cls.DEFAULT
if isinstance(retries, Retry):
return retries
redirect = bool(redirect) and None
new_retries = cls(retries, redirect=redirect)
log.debug("Converted retries value: %r -> %r", retries, new_retries)
return new_retries
def get_backoff_time(self):
""" Formula for computing the current backoff
:rtype: float
"""
# We want to consider only the last consecutive errors sequence (Ignore redirects).
consecutive_errors_len = len(
list(
takewhile(lambda x: x.redirect_location is None, reversed(self.history))
)
)
if consecutive_errors_len <= 1:
return 0
backoff_value = self.backoff_factor * (2 ** (consecutive_errors_len - 1))
return min(self.BACKOFF_MAX, backoff_value)
def parse_retry_after(self, retry_after):
# Whitespace: https://tools.ietf.org/html/rfc7230#section-3.2.4
if re.match(r"^\s*[0-9]+\s*$", retry_after):
seconds = int(retry_after)
else:
retry_date_tuple = email.utils.parsedate(retry_after)
if retry_date_tuple is None:
raise InvalidHeader("Invalid Retry-After header: %s" % retry_after)
retry_date = time.mktime(retry_date_tuple)
seconds = retry_date - time.time()
if seconds < 0:
seconds = 0
return seconds
def get_retry_after(self, response):
""" Get the value of Retry-After in seconds. """
retry_after = response.getheader("Retry-After")
if retry_after is None:
return None
return self.parse_retry_after(retry_after)
def sleep_for_retry(self, response=None):
retry_after = self.get_retry_after(response)
if retry_after:
time.sleep(retry_after)
return True
return False
def _sleep_backoff(self):
backoff = self.get_backoff_time()
if backoff <= 0:
return
time.sleep(backoff)
def sleep(self, response=None):
""" Sleep between retry attempts.
This method will respect a server's ``Retry-After`` response header
and sleep the duration of the time requested. If that is not present, it
will use an exponential backoff. By default, the backoff factor is 0 and
this method will return immediately.
"""
if self.respect_retry_after_header and response:
slept = self.sleep_for_retry(response)
if slept:
return
self._sleep_backoff()
def _is_connection_error(self, err):
""" Errors when we're fairly sure that the server did not receive the
request, so it should be safe to retry.
"""
if isinstance(err, ProxyError):
err = err.original_error
return isinstance(err, ConnectTimeoutError)
def _is_read_error(self, err):
""" Errors that occur after the request has been started, so we should
assume that the server began processing it.
"""
return isinstance(err, (ReadTimeoutError, ProtocolError))
def _is_method_retryable(self, method):
""" Checks if a given HTTP method should be retried upon, depending if
it is included on the method whitelist.
"""
if self.method_whitelist and method.upper() not in self.method_whitelist:
return False
return True
def is_retry(self, method, status_code, has_retry_after=False):
""" Is this method/status code retryable? (Based on whitelists and control
variables such as the number of total retries to allow, whether to
respect the Retry-After header, whether this header is present, and
whether the returned status code is on the list of status codes to
be retried upon on the presence of the aforementioned header)
"""
if not self._is_method_retryable(method):
return False
if self.status_forcelist and status_code in self.status_forcelist:
return True
return (
self.total
and self.respect_retry_after_header
and has_retry_after
and (status_code in self.RETRY_AFTER_STATUS_CODES)
)
def is_exhausted(self):
""" Are we out of retries? """
retry_counts = (self.total, self.connect, self.read, self.redirect, self.status)
retry_counts = list(filter(None, retry_counts))
if not retry_counts:
return False
return min(retry_counts) < 0
def increment(
self,
method=None,
url=None,
response=None,
error=None,
_pool=None,
_stacktrace=None,
):
""" Return a new Retry object with incremented retry counters.
:param response: A response object, or None, if the server did not
return a response.
:type response: :class:`~urllib3.response.HTTPResponse`
:param Exception error: An error encountered during the request, or
None if the response was received successfully.
:return: A new ``Retry`` object.
"""
if self.total is False and error:
# Disabled, indicate to re-raise the error.
raise six.reraise(type(error), error, _stacktrace)
total = self.total
if total is not None:
total -= 1
connect = self.connect
read = self.read
redirect = self.redirect
status_count = self.status
cause = "unknown"
status = None
redirect_location = None
if error and self._is_connection_error(error):
# Connect retry?
if connect is False:
raise six.reraise(type(error), error, _stacktrace)
elif connect is not None:
connect -= 1
elif error and self._is_read_error(error):
# Read retry?
if read is False or not self._is_method_retryable(method):
raise six.reraise(type(error), error, _stacktrace)
elif read is not None:
read -= 1
elif response and response.get_redirect_location():
# Redirect retry?
if redirect is not None:
redirect -= 1
cause = "too many redirects"
redirect_location = response.get_redirect_location()
status = response.status
else:
# Incrementing because of a server error like a 500 in
# status_forcelist and a the given method is in the whitelist
cause = ResponseError.GENERIC_ERROR
if response and response.status:
if status_count is not None:
status_count -= 1
cause = ResponseError.SPECIFIC_ERROR.format(status_code=response.status)
status = response.status
history = self.history + (
RequestHistory(method, url, error, status, redirect_location),
)
new_retry = self.new(
total=total,
connect=connect,
read=read,
redirect=redirect,
status=status_count,
history=history,
)
if new_retry.is_exhausted():
raise MaxRetryError(_pool, url, error or ResponseError(cause))
log.debug("Incremented Retry for (url='%s'): %r", url, new_retry)
return new_retry
def __repr__(self):
return (
"{cls.__name__}(total={self.total}, connect={self.connect}, "
"read={self.read}, redirect={self.redirect}, status={self.status})"
).format(cls=type(self), self=self)
# For backwards compatibility (equivalent to pre-v1.9):
Retry.DEFAULT = Retry(3)

View file

@ -0,0 +1,421 @@
from __future__ import absolute_import
import errno
import warnings
import hmac
import os
import sys
from binascii import hexlify, unhexlify
from hashlib import md5, sha1, sha256
from .url import IPV4_RE, BRACELESS_IPV6_ADDRZ_RE
from ..exceptions import SSLError, InsecurePlatformWarning, SNIMissingWarning
from ..packages import six
SSLContext = None
HAS_SNI = False
IS_PYOPENSSL = False
IS_SECURETRANSPORT = False
# Maps the length of a digest to a possible hash function producing this digest
HASHFUNC_MAP = {32: md5, 40: sha1, 64: sha256}
def _const_compare_digest_backport(a, b):
"""
Compare two digests of equal length in constant time.
The digests must be of type str/bytes.
Returns True if the digests match, and False otherwise.
"""
result = abs(len(a) - len(b))
for left, right in zip(bytearray(a), bytearray(b)):
result |= left ^ right
return result == 0
_const_compare_digest = getattr(hmac, "compare_digest", _const_compare_digest_backport)
try: # Test for SSL features
import ssl
from ssl import wrap_socket, CERT_REQUIRED
from ssl import HAS_SNI # Has SNI?
except ImportError:
pass
try: # Platform-specific: Python 3.6
from ssl import PROTOCOL_TLS
PROTOCOL_SSLv23 = PROTOCOL_TLS
except ImportError:
try:
from ssl import PROTOCOL_SSLv23 as PROTOCOL_TLS
PROTOCOL_SSLv23 = PROTOCOL_TLS
except ImportError:
PROTOCOL_SSLv23 = PROTOCOL_TLS = 2
try:
from ssl import OP_NO_SSLv2, OP_NO_SSLv3, OP_NO_COMPRESSION
except ImportError:
OP_NO_SSLv2, OP_NO_SSLv3 = 0x1000000, 0x2000000
OP_NO_COMPRESSION = 0x20000
# A secure default.
# Sources for more information on TLS ciphers:
#
# - https://wiki.mozilla.org/Security/Server_Side_TLS
# - https://www.ssllabs.com/projects/best-practices/index.html
# - https://hynek.me/articles/hardening-your-web-servers-ssl-ciphers/
#
# The general intent is:
# - prefer cipher suites that offer perfect forward secrecy (DHE/ECDHE),
# - prefer ECDHE over DHE for better performance,
# - prefer any AES-GCM and ChaCha20 over any AES-CBC for better performance and
# security,
# - prefer AES-GCM over ChaCha20 because hardware-accelerated AES is common,
# - disable NULL authentication, MD5 MACs, DSS, and other
# insecure ciphers for security reasons.
# - NOTE: TLS 1.3 cipher suites are managed through a different interface
# not exposed by CPython (yet!) and are enabled by default if they're available.
DEFAULT_CIPHERS = ":".join(
[
"ECDHE+AESGCM",
"ECDHE+CHACHA20",
"DHE+AESGCM",
"DHE+CHACHA20",
"ECDH+AESGCM",
"DH+AESGCM",
"ECDH+AES",
"DH+AES",
"RSA+AESGCM",
"RSA+AES",
"!aNULL",
"!eNULL",
"!MD5",
"!DSS",
]
)
try:
from ssl import SSLContext # Modern SSL?
except ImportError:
class SSLContext(object): # Platform-specific: Python 2
def __init__(self, protocol_version):
self.protocol = protocol_version
# Use default values from a real SSLContext
self.check_hostname = False
self.verify_mode = ssl.CERT_NONE
self.ca_certs = None
self.options = 0
self.certfile = None
self.keyfile = None
self.ciphers = None
def load_cert_chain(self, certfile, keyfile):
self.certfile = certfile
self.keyfile = keyfile
def load_verify_locations(self, cafile=None, capath=None, cadata=None):
self.ca_certs = cafile
if capath is not None:
raise SSLError("CA directories not supported in older Pythons")
if cadata is not None:
raise SSLError("CA data not supported in older Pythons")
def set_ciphers(self, cipher_suite):
self.ciphers = cipher_suite
def wrap_socket(self, socket, server_hostname=None, server_side=False):
warnings.warn(
"A true SSLContext object is not available. This prevents "
"urllib3 from configuring SSL appropriately and may cause "
"certain SSL connections to fail. You can upgrade to a newer "
"version of Python to solve this. For more information, see "
"https://urllib3.readthedocs.io/en/latest/advanced-usage.html"
"#ssl-warnings",
InsecurePlatformWarning,
)
kwargs = {
"keyfile": self.keyfile,
"certfile": self.certfile,
"ca_certs": self.ca_certs,
"cert_reqs": self.verify_mode,
"ssl_version": self.protocol,
"server_side": server_side,
}
return wrap_socket(socket, ciphers=self.ciphers, **kwargs)
def assert_fingerprint(cert, fingerprint):
"""
Checks if given fingerprint matches the supplied certificate.
:param cert:
Certificate as bytes object.
:param fingerprint:
Fingerprint as string of hexdigits, can be interspersed by colons.
"""
fingerprint = fingerprint.replace(":", "").lower()
digest_length = len(fingerprint)
hashfunc = HASHFUNC_MAP.get(digest_length)
if not hashfunc:
raise SSLError("Fingerprint of invalid length: {0}".format(fingerprint))
# We need encode() here for py32; works on py2 and p33.
fingerprint_bytes = unhexlify(fingerprint.encode())
cert_digest = hashfunc(cert).digest()
if not _const_compare_digest(cert_digest, fingerprint_bytes):
raise SSLError(
'Fingerprints did not match. Expected "{0}", got "{1}".'.format(
fingerprint, hexlify(cert_digest)
)
)
def resolve_cert_reqs(candidate):
"""
Resolves the argument to a numeric constant, which can be passed to
the wrap_socket function/method from the ssl module.
Defaults to :data:`ssl.CERT_REQUIRED`.
If given a string it is assumed to be the name of the constant in the
:mod:`ssl` module or its abbreviation.
(So you can specify `REQUIRED` instead of `CERT_REQUIRED`.
If it's neither `None` nor a string we assume it is already the numeric
constant which can directly be passed to wrap_socket.
"""
if candidate is None:
return CERT_REQUIRED
if isinstance(candidate, str):
res = getattr(ssl, candidate, None)
if res is None:
res = getattr(ssl, "CERT_" + candidate)
return res
return candidate
def resolve_ssl_version(candidate):
"""
like resolve_cert_reqs
"""
if candidate is None:
return PROTOCOL_TLS
if isinstance(candidate, str):
res = getattr(ssl, candidate, None)
if res is None:
res = getattr(ssl, "PROTOCOL_" + candidate)
return res
return candidate
def create_urllib3_context(
ssl_version=None, cert_reqs=None, options=None, ciphers=None
):
"""All arguments have the same meaning as ``ssl_wrap_socket``.
By default, this function does a lot of the same work that
``ssl.create_default_context`` does on Python 3.4+. It:
- Disables SSLv2, SSLv3, and compression
- Sets a restricted set of server ciphers
If you wish to enable SSLv3, you can do::
from urllib3.util import ssl_
context = ssl_.create_urllib3_context()
context.options &= ~ssl_.OP_NO_SSLv3
You can do the same to enable compression (substituting ``COMPRESSION``
for ``SSLv3`` in the last line above).
:param ssl_version:
The desired protocol version to use. This will default to
PROTOCOL_SSLv23 which will negotiate the highest protocol that both
the server and your installation of OpenSSL support.
:param cert_reqs:
Whether to require the certificate verification. This defaults to
``ssl.CERT_REQUIRED``.
:param options:
Specific OpenSSL options. These default to ``ssl.OP_NO_SSLv2``,
``ssl.OP_NO_SSLv3``, ``ssl.OP_NO_COMPRESSION``.
:param ciphers:
Which cipher suites to allow the server to select.
:returns:
Constructed SSLContext object with specified options
:rtype: SSLContext
"""
context = SSLContext(ssl_version or PROTOCOL_TLS)
context.set_ciphers(ciphers or DEFAULT_CIPHERS)
# Setting the default here, as we may have no ssl module on import
cert_reqs = ssl.CERT_REQUIRED if cert_reqs is None else cert_reqs
if options is None:
options = 0
# SSLv2 is easily broken and is considered harmful and dangerous
options |= OP_NO_SSLv2
# SSLv3 has several problems and is now dangerous
options |= OP_NO_SSLv3
# Disable compression to prevent CRIME attacks for OpenSSL 1.0+
# (issue #309)
options |= OP_NO_COMPRESSION
context.options |= options
# Enable post-handshake authentication for TLS 1.3, see GH #1634. PHA is
# necessary for conditional client cert authentication with TLS 1.3.
# The attribute is None for OpenSSL <= 1.1.0 or does not exist in older
# versions of Python. We only enable on Python 3.7.4+ or if certificate
# verification is enabled to work around Python issue #37428
# See: https://bugs.python.org/issue37428
if (cert_reqs == ssl.CERT_REQUIRED or sys.version_info >= (3, 7, 4)) and getattr(
context, "post_handshake_auth", None
) is not None:
context.post_handshake_auth = True
context.verify_mode = cert_reqs
if (
getattr(context, "check_hostname", None) is not None
): # Platform-specific: Python 3.2
# We do our own verification, including fingerprints and alternative
# hostnames. So disable it here
context.check_hostname = False
# Enable logging of TLS session keys via defacto standard environment variable
# 'SSLKEYLOGFILE', if the feature is available (Python 3.8+).
if hasattr(context, "keylog_filename"):
context.keylog_filename = os.environ.get("SSLKEYLOGFILE")
return context
def ssl_wrap_socket(
sock,
keyfile=None,
certfile=None,
cert_reqs=None,
ca_certs=None,
server_hostname=None,
ssl_version=None,
ciphers=None,
ssl_context=None,
ca_cert_dir=None,
key_password=None,
ca_cert_data=None,
):
"""
All arguments except for server_hostname, ssl_context, and ca_cert_dir have
the same meaning as they do when using :func:`ssl.wrap_socket`.
:param server_hostname:
When SNI is supported, the expected hostname of the certificate
:param ssl_context:
A pre-made :class:`SSLContext` object. If none is provided, one will
be created using :func:`create_urllib3_context`.
:param ciphers:
A string of ciphers we wish the client to support.
:param ca_cert_dir:
A directory containing CA certificates in multiple separate files, as
supported by OpenSSL's -CApath flag or the capath argument to
SSLContext.load_verify_locations().
:param key_password:
Optional password if the keyfile is encrypted.
:param ca_cert_data:
Optional string containing CA certificates in PEM format suitable for
passing as the cadata parameter to SSLContext.load_verify_locations()
"""
context = ssl_context
if context is None:
# Note: This branch of code and all the variables in it are no longer
# used by urllib3 itself. We should consider deprecating and removing
# this code.
context = create_urllib3_context(ssl_version, cert_reqs, ciphers=ciphers)
if ca_certs or ca_cert_dir or ca_cert_data:
try:
context.load_verify_locations(ca_certs, ca_cert_dir, ca_cert_data)
except IOError as e: # Platform-specific: Python 2.7
raise SSLError(e)
# Py33 raises FileNotFoundError which subclasses OSError
# These are not equivalent unless we check the errno attribute
except OSError as e: # Platform-specific: Python 3.3 and beyond
if e.errno == errno.ENOENT:
raise SSLError(e)
raise
elif ssl_context is None and hasattr(context, "load_default_certs"):
# try to load OS default certs; works well on Windows (require Python3.4+)
context.load_default_certs()
# Attempt to detect if we get the goofy behavior of the
# keyfile being encrypted and OpenSSL asking for the
# passphrase via the terminal and instead error out.
if keyfile and key_password is None and _is_key_file_encrypted(keyfile):
raise SSLError("Client private key is encrypted, password is required")
if certfile:
if key_password is None:
context.load_cert_chain(certfile, keyfile)
else:
context.load_cert_chain(certfile, keyfile, key_password)
# If we detect server_hostname is an IP address then the SNI
# extension should not be used according to RFC3546 Section 3.1
# We shouldn't warn the user if SNI isn't available but we would
# not be using SNI anyways due to IP address for server_hostname.
if (
server_hostname is not None and not is_ipaddress(server_hostname)
) or IS_SECURETRANSPORT:
if HAS_SNI and server_hostname is not None:
return context.wrap_socket(sock, server_hostname=server_hostname)
warnings.warn(
"An HTTPS request has been made, but the SNI (Server Name "
"Indication) extension to TLS is not available on this platform. "
"This may cause the server to present an incorrect TLS "
"certificate, which can cause validation failures. You can upgrade to "
"a newer version of Python to solve this. For more information, see "
"https://urllib3.readthedocs.io/en/latest/advanced-usage.html"
"#ssl-warnings",
SNIMissingWarning,
)
return context.wrap_socket(sock)
def is_ipaddress(hostname):
"""Detects whether the hostname given is an IPv4 or IPv6 address.
Also detects IPv6 addresses with Zone IDs.
:param str hostname: Hostname to examine.
:return: True if the hostname is an IP address, False otherwise.
"""
if not six.PY2 and isinstance(hostname, bytes):
# IDN A-label bytes are ASCII compatible.
hostname = hostname.decode("ascii")
return bool(IPV4_RE.match(hostname) or BRACELESS_IPV6_ADDRZ_RE.match(hostname))
def _is_key_file_encrypted(key_file):
"""Detects if a key file is encrypted or not."""
with open(key_file, "r") as f:
for line in f:
# Look for Proc-Type: 4,ENCRYPTED
if "ENCRYPTED" in line:
return True
return False

View file

@ -0,0 +1,261 @@
from __future__ import absolute_import
# The default socket timeout, used by httplib to indicate that no timeout was
# specified by the user
from socket import _GLOBAL_DEFAULT_TIMEOUT
import time
from ..exceptions import TimeoutStateError
# A sentinel value to indicate that no timeout was specified by the user in
# urllib3
_Default = object()
# Use time.monotonic if available.
current_time = getattr(time, "monotonic", time.time)
class Timeout(object):
""" Timeout configuration.
Timeouts can be defined as a default for a pool::
timeout = Timeout(connect=2.0, read=7.0)
http = PoolManager(timeout=timeout)
response = http.request('GET', 'http://example.com/')
Or per-request (which overrides the default for the pool)::
response = http.request('GET', 'http://example.com/', timeout=Timeout(10))
Timeouts can be disabled by setting all the parameters to ``None``::
no_timeout = Timeout(connect=None, read=None)
response = http.request('GET', 'http://example.com/, timeout=no_timeout)
:param total:
This combines the connect and read timeouts into one; the read timeout
will be set to the time leftover from the connect attempt. In the
event that both a connect timeout and a total are specified, or a read
timeout and a total are specified, the shorter timeout will be applied.
Defaults to None.
:type total: integer, float, or None
:param connect:
The maximum amount of time (in seconds) to wait for a connection
attempt to a server to succeed. Omitting the parameter will default the
connect timeout to the system default, probably `the global default
timeout in socket.py
<http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_.
None will set an infinite timeout for connection attempts.
:type connect: integer, float, or None
:param read:
The maximum amount of time (in seconds) to wait between consecutive
read operations for a response from the server. Omitting the parameter
will default the read timeout to the system default, probably `the
global default timeout in socket.py
<http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_.
None will set an infinite timeout.
:type read: integer, float, or None
.. note::
Many factors can affect the total amount of time for urllib3 to return
an HTTP response.
For example, Python's DNS resolver does not obey the timeout specified
on the socket. Other factors that can affect total request time include
high CPU load, high swap, the program running at a low priority level,
or other behaviors.
In addition, the read and total timeouts only measure the time between
read operations on the socket connecting the client and the server,
not the total amount of time for the request to return a complete
response. For most requests, the timeout is raised because the server
has not sent the first byte in the specified time. This is not always
the case; if a server streams one byte every fifteen seconds, a timeout
of 20 seconds will not trigger, even though the request will take
several minutes to complete.
If your goal is to cut off any request after a set amount of wall clock
time, consider having a second "watcher" thread to cut off a slow
request.
"""
#: A sentinel object representing the default timeout value
DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT
def __init__(self, total=None, connect=_Default, read=_Default):
self._connect = self._validate_timeout(connect, "connect")
self._read = self._validate_timeout(read, "read")
self.total = self._validate_timeout(total, "total")
self._start_connect = None
def __repr__(self):
return "%s(connect=%r, read=%r, total=%r)" % (
type(self).__name__,
self._connect,
self._read,
self.total,
)
# __str__ provided for backwards compatibility
__str__ = __repr__
@classmethod
def _validate_timeout(cls, value, name):
""" Check that a timeout attribute is valid.
:param value: The timeout value to validate
:param name: The name of the timeout attribute to validate. This is
used to specify in error messages.
:return: The validated and casted version of the given value.
:raises ValueError: If it is a numeric value less than or equal to
zero, or the type is not an integer, float, or None.
"""
if value is _Default:
return cls.DEFAULT_TIMEOUT
if value is None or value is cls.DEFAULT_TIMEOUT:
return value
if isinstance(value, bool):
raise ValueError(
"Timeout cannot be a boolean value. It must "
"be an int, float or None."
)
try:
float(value)
except (TypeError, ValueError):
raise ValueError(
"Timeout value %s was %s, but it must be an "
"int, float or None." % (name, value)
)
try:
if value <= 0:
raise ValueError(
"Attempted to set %s timeout to %s, but the "
"timeout cannot be set to a value less "
"than or equal to 0." % (name, value)
)
except TypeError:
# Python 3
raise ValueError(
"Timeout value %s was %s, but it must be an "
"int, float or None." % (name, value)
)
return value
@classmethod
def from_float(cls, timeout):
""" Create a new Timeout from a legacy timeout value.
The timeout value used by httplib.py sets the same timeout on the
connect(), and recv() socket requests. This creates a :class:`Timeout`
object that sets the individual timeouts to the ``timeout`` value
passed to this function.
:param timeout: The legacy timeout value.
:type timeout: integer, float, sentinel default object, or None
:return: Timeout object
:rtype: :class:`Timeout`
"""
return Timeout(read=timeout, connect=timeout)
def clone(self):
""" Create a copy of the timeout object
Timeout properties are stored per-pool but each request needs a fresh
Timeout object to ensure each one has its own start/stop configured.
:return: a copy of the timeout object
:rtype: :class:`Timeout`
"""
# We can't use copy.deepcopy because that will also create a new object
# for _GLOBAL_DEFAULT_TIMEOUT, which socket.py uses as a sentinel to
# detect the user default.
return Timeout(connect=self._connect, read=self._read, total=self.total)
def start_connect(self):
""" Start the timeout clock, used during a connect() attempt
:raises urllib3.exceptions.TimeoutStateError: if you attempt
to start a timer that has been started already.
"""
if self._start_connect is not None:
raise TimeoutStateError("Timeout timer has already been started.")
self._start_connect = current_time()
return self._start_connect
def get_connect_duration(self):
""" Gets the time elapsed since the call to :meth:`start_connect`.
:return: Elapsed time in seconds.
:rtype: float
:raises urllib3.exceptions.TimeoutStateError: if you attempt
to get duration for a timer that hasn't been started.
"""
if self._start_connect is None:
raise TimeoutStateError(
"Can't get connect duration for timer that has not started."
)
return current_time() - self._start_connect
@property
def connect_timeout(self):
""" Get the value to use when setting a connection timeout.
This will be a positive float or integer, the value None
(never timeout), or the default system timeout.
:return: Connect timeout.
:rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None
"""
if self.total is None:
return self._connect
if self._connect is None or self._connect is self.DEFAULT_TIMEOUT:
return self.total
return min(self._connect, self.total)
@property
def read_timeout(self):
""" Get the value for the read timeout.
This assumes some time has elapsed in the connection timeout and
computes the read timeout appropriately.
If self.total is set, the read timeout is dependent on the amount of
time taken by the connect timeout. If the connection time has not been
established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be
raised.
:return: Value to use for the read timeout.
:rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None
:raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect`
has not yet been called on this object.
"""
if (
self.total is not None
and self.total is not self.DEFAULT_TIMEOUT
and self._read is not None
and self._read is not self.DEFAULT_TIMEOUT
):
# In case the connect timeout has not yet been established.
if self._start_connect is None:
return self._read
return max(0, min(self.total - self.get_connect_duration(), self._read))
elif self.total is not None and self.total is not self.DEFAULT_TIMEOUT:
return max(0, self.total - self.get_connect_duration())
else:
return self._read

View file

@ -0,0 +1,430 @@
from __future__ import absolute_import
import re
from collections import namedtuple
from ..exceptions import LocationParseError
from ..packages import six
url_attrs = ["scheme", "auth", "host", "port", "path", "query", "fragment"]
# We only want to normalize urls with an HTTP(S) scheme.
# urllib3 infers URLs without a scheme (None) to be http.
NORMALIZABLE_SCHEMES = ("http", "https", None)
# Almost all of these patterns were derived from the
# 'rfc3986' module: https://github.com/python-hyper/rfc3986
PERCENT_RE = re.compile(r"%[a-fA-F0-9]{2}")
SCHEME_RE = re.compile(r"^(?:[a-zA-Z][a-zA-Z0-9+-]*:|/)")
URI_RE = re.compile(
r"^(?:([a-zA-Z][a-zA-Z0-9+.-]*):)?"
r"(?://([^\\/?#]*))?"
r"([^?#]*)"
r"(?:\?([^#]*))?"
r"(?:#(.*))?$",
re.UNICODE | re.DOTALL,
)
IPV4_PAT = r"(?:[0-9]{1,3}\.){3}[0-9]{1,3}"
HEX_PAT = "[0-9A-Fa-f]{1,4}"
LS32_PAT = "(?:{hex}:{hex}|{ipv4})".format(hex=HEX_PAT, ipv4=IPV4_PAT)
_subs = {"hex": HEX_PAT, "ls32": LS32_PAT}
_variations = [
# 6( h16 ":" ) ls32
"(?:%(hex)s:){6}%(ls32)s",
# "::" 5( h16 ":" ) ls32
"::(?:%(hex)s:){5}%(ls32)s",
# [ h16 ] "::" 4( h16 ":" ) ls32
"(?:%(hex)s)?::(?:%(hex)s:){4}%(ls32)s",
# [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
"(?:(?:%(hex)s:)?%(hex)s)?::(?:%(hex)s:){3}%(ls32)s",
# [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
"(?:(?:%(hex)s:){0,2}%(hex)s)?::(?:%(hex)s:){2}%(ls32)s",
# [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
"(?:(?:%(hex)s:){0,3}%(hex)s)?::%(hex)s:%(ls32)s",
# [ *4( h16 ":" ) h16 ] "::" ls32
"(?:(?:%(hex)s:){0,4}%(hex)s)?::%(ls32)s",
# [ *5( h16 ":" ) h16 ] "::" h16
"(?:(?:%(hex)s:){0,5}%(hex)s)?::%(hex)s",
# [ *6( h16 ":" ) h16 ] "::"
"(?:(?:%(hex)s:){0,6}%(hex)s)?::",
]
UNRESERVED_PAT = r"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._!\-~"
IPV6_PAT = "(?:" + "|".join([x % _subs for x in _variations]) + ")"
ZONE_ID_PAT = "(?:%25|%)(?:[" + UNRESERVED_PAT + "]|%[a-fA-F0-9]{2})+"
IPV6_ADDRZ_PAT = r"\[" + IPV6_PAT + r"(?:" + ZONE_ID_PAT + r")?\]"
REG_NAME_PAT = r"(?:[^\[\]%:/?#]|%[a-fA-F0-9]{2})*"
TARGET_RE = re.compile(r"^(/[^?#]*)(?:\?([^#]*))?(?:#.*)?$")
IPV4_RE = re.compile("^" + IPV4_PAT + "$")
IPV6_RE = re.compile("^" + IPV6_PAT + "$")
IPV6_ADDRZ_RE = re.compile("^" + IPV6_ADDRZ_PAT + "$")
BRACELESS_IPV6_ADDRZ_RE = re.compile("^" + IPV6_ADDRZ_PAT[2:-2] + "$")
ZONE_ID_RE = re.compile("(" + ZONE_ID_PAT + r")\]$")
SUBAUTHORITY_PAT = (u"^(?:(.*)@)?(%s|%s|%s)(?::([0-9]{0,5}))?$") % (
REG_NAME_PAT,
IPV4_PAT,
IPV6_ADDRZ_PAT,
)
SUBAUTHORITY_RE = re.compile(SUBAUTHORITY_PAT, re.UNICODE | re.DOTALL)
UNRESERVED_CHARS = set(
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._-~"
)
SUB_DELIM_CHARS = set("!$&'()*+,;=")
USERINFO_CHARS = UNRESERVED_CHARS | SUB_DELIM_CHARS | {":"}
PATH_CHARS = USERINFO_CHARS | {"@", "/"}
QUERY_CHARS = FRAGMENT_CHARS = PATH_CHARS | {"?"}
class Url(namedtuple("Url", url_attrs)):
"""
Data structure for representing an HTTP URL. Used as a return value for
:func:`parse_url`. Both the scheme and host are normalized as they are
both case-insensitive according to RFC 3986.
"""
__slots__ = ()
def __new__(
cls,
scheme=None,
auth=None,
host=None,
port=None,
path=None,
query=None,
fragment=None,
):
if path and not path.startswith("/"):
path = "/" + path
if scheme is not None:
scheme = scheme.lower()
return super(Url, cls).__new__(
cls, scheme, auth, host, port, path, query, fragment
)
@property
def hostname(self):
"""For backwards-compatibility with urlparse. We're nice like that."""
return self.host
@property
def request_uri(self):
"""Absolute path including the query string."""
uri = self.path or "/"
if self.query is not None:
uri += "?" + self.query
return uri
@property
def netloc(self):
"""Network location including host and port"""
if self.port:
return "%s:%d" % (self.host, self.port)
return self.host
@property
def url(self):
"""
Convert self into a url
This function should more or less round-trip with :func:`.parse_url`. The
returned url may not be exactly the same as the url inputted to
:func:`.parse_url`, but it should be equivalent by the RFC (e.g., urls
with a blank port will have : removed).
Example: ::
>>> U = parse_url('http://google.com/mail/')
>>> U.url
'http://google.com/mail/'
>>> Url('http', 'username:password', 'host.com', 80,
... '/path', 'query', 'fragment').url
'http://username:password@host.com:80/path?query#fragment'
"""
scheme, auth, host, port, path, query, fragment = self
url = u""
# We use "is not None" we want things to happen with empty strings (or 0 port)
if scheme is not None:
url += scheme + u"://"
if auth is not None:
url += auth + u"@"
if host is not None:
url += host
if port is not None:
url += u":" + str(port)
if path is not None:
url += path
if query is not None:
url += u"?" + query
if fragment is not None:
url += u"#" + fragment
return url
def __str__(self):
return self.url
def split_first(s, delims):
"""
.. deprecated:: 1.25
Given a string and an iterable of delimiters, split on the first found
delimiter. Return two split parts and the matched delimiter.
If not found, then the first part is the full input string.
Example::
>>> split_first('foo/bar?baz', '?/=')
('foo', 'bar?baz', '/')
>>> split_first('foo/bar?baz', '123')
('foo/bar?baz', '', None)
Scales linearly with number of delims. Not ideal for large number of delims.
"""
min_idx = None
min_delim = None
for d in delims:
idx = s.find(d)
if idx < 0:
continue
if min_idx is None or idx < min_idx:
min_idx = idx
min_delim = d
if min_idx is None or min_idx < 0:
return s, "", None
return s[:min_idx], s[min_idx + 1 :], min_delim
def _encode_invalid_chars(component, allowed_chars, encoding="utf-8"):
"""Percent-encodes a URI component without reapplying
onto an already percent-encoded component.
"""
if component is None:
return component
component = six.ensure_text(component)
# Normalize existing percent-encoded bytes.
# Try to see if the component we're encoding is already percent-encoded
# so we can skip all '%' characters but still encode all others.
component, percent_encodings = PERCENT_RE.subn(
lambda match: match.group(0).upper(), component
)
uri_bytes = component.encode("utf-8", "surrogatepass")
is_percent_encoded = percent_encodings == uri_bytes.count(b"%")
encoded_component = bytearray()
for i in range(0, len(uri_bytes)):
# Will return a single character bytestring on both Python 2 & 3
byte = uri_bytes[i : i + 1]
byte_ord = ord(byte)
if (is_percent_encoded and byte == b"%") or (
byte_ord < 128 and byte.decode() in allowed_chars
):
encoded_component += byte
continue
encoded_component.extend(b"%" + (hex(byte_ord)[2:].encode().zfill(2).upper()))
return encoded_component.decode(encoding)
def _remove_path_dot_segments(path):
# See http://tools.ietf.org/html/rfc3986#section-5.2.4 for pseudo-code
segments = path.split("/") # Turn the path into a list of segments
output = [] # Initialize the variable to use to store output
for segment in segments:
# '.' is the current directory, so ignore it, it is superfluous
if segment == ".":
continue
# Anything other than '..', should be appended to the output
elif segment != "..":
output.append(segment)
# In this case segment == '..', if we can, we should pop the last
# element
elif output:
output.pop()
# If the path starts with '/' and the output is empty or the first string
# is non-empty
if path.startswith("/") and (not output or output[0]):
output.insert(0, "")
# If the path starts with '/.' or '/..' ensure we add one more empty
# string to add a trailing '/'
if path.endswith(("/.", "/..")):
output.append("")
return "/".join(output)
def _normalize_host(host, scheme):
if host:
if isinstance(host, six.binary_type):
host = six.ensure_str(host)
if scheme in NORMALIZABLE_SCHEMES:
is_ipv6 = IPV6_ADDRZ_RE.match(host)
if is_ipv6:
match = ZONE_ID_RE.search(host)
if match:
start, end = match.span(1)
zone_id = host[start:end]
if zone_id.startswith("%25") and zone_id != "%25":
zone_id = zone_id[3:]
else:
zone_id = zone_id[1:]
zone_id = "%" + _encode_invalid_chars(zone_id, UNRESERVED_CHARS)
return host[:start].lower() + zone_id + host[end:]
else:
return host.lower()
elif not IPV4_RE.match(host):
return six.ensure_str(
b".".join([_idna_encode(label) for label in host.split(".")])
)
return host
def _idna_encode(name):
if name and any([ord(x) > 128 for x in name]):
try:
import idna
except ImportError:
six.raise_from(
LocationParseError("Unable to parse URL without the 'idna' module"),
None,
)
try:
return idna.encode(name.lower(), strict=True, std3_rules=True)
except idna.IDNAError:
six.raise_from(
LocationParseError(u"Name '%s' is not a valid IDNA label" % name), None
)
return name.lower().encode("ascii")
def _encode_target(target):
"""Percent-encodes a request target so that there are no invalid characters"""
path, query = TARGET_RE.match(target).groups()
target = _encode_invalid_chars(path, PATH_CHARS)
query = _encode_invalid_chars(query, QUERY_CHARS)
if query is not None:
target += "?" + query
return target
def parse_url(url):
"""
Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is
performed to parse incomplete urls. Fields not provided will be None.
This parser is RFC 3986 compliant.
The parser logic and helper functions are based heavily on
work done in the ``rfc3986`` module.
:param str url: URL to parse into a :class:`.Url` namedtuple.
Partly backwards-compatible with :mod:`urlparse`.
Example::
>>> parse_url('http://google.com/mail/')
Url(scheme='http', host='google.com', port=None, path='/mail/', ...)
>>> parse_url('google.com:80')
Url(scheme=None, host='google.com', port=80, path=None, ...)
>>> parse_url('/foo?bar')
Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...)
"""
if not url:
# Empty
return Url()
source_url = url
if not SCHEME_RE.search(url):
url = "//" + url
try:
scheme, authority, path, query, fragment = URI_RE.match(url).groups()
normalize_uri = scheme is None or scheme.lower() in NORMALIZABLE_SCHEMES
if scheme:
scheme = scheme.lower()
if authority:
auth, host, port = SUBAUTHORITY_RE.match(authority).groups()
if auth and normalize_uri:
auth = _encode_invalid_chars(auth, USERINFO_CHARS)
if port == "":
port = None
else:
auth, host, port = None, None, None
if port is not None:
port = int(port)
if not (0 <= port <= 65535):
raise LocationParseError(url)
host = _normalize_host(host, scheme)
if normalize_uri and path:
path = _remove_path_dot_segments(path)
path = _encode_invalid_chars(path, PATH_CHARS)
if normalize_uri and query:
query = _encode_invalid_chars(query, QUERY_CHARS)
if normalize_uri and fragment:
fragment = _encode_invalid_chars(fragment, FRAGMENT_CHARS)
except (ValueError, AttributeError):
return six.raise_from(LocationParseError(source_url), None)
# For the sake of backwards compatibility we put empty
# string values for path if there are any defined values
# beyond the path in the URL.
# TODO: Remove this when we break backwards compatibility.
if not path:
if query is not None or fragment is not None:
path = ""
else:
path = None
# Ensure that each part of the URL is a `str` for
# backwards compatibility.
if isinstance(url, six.text_type):
ensure_func = six.ensure_text
else:
ensure_func = six.ensure_str
def ensure_type(x):
return x if x is None else ensure_func(x)
return Url(
scheme=ensure_type(scheme),
auth=ensure_type(auth),
host=ensure_type(host),
port=port,
path=ensure_type(path),
query=ensure_type(query),
fragment=ensure_type(fragment),
)
def get_host(url):
"""
Deprecated. Use :func:`parse_url` instead.
"""
p = parse_url(url)
return p.scheme or "http", p.hostname, p.port

View file

@ -0,0 +1,153 @@
import errno
from functools import partial
import select
import sys
try:
from time import monotonic
except ImportError:
from time import time as monotonic
__all__ = ["NoWayToWaitForSocketError", "wait_for_read", "wait_for_write"]
class NoWayToWaitForSocketError(Exception):
pass
# How should we wait on sockets?
#
# There are two types of APIs you can use for waiting on sockets: the fancy
# modern stateful APIs like epoll/kqueue, and the older stateless APIs like
# select/poll. The stateful APIs are more efficient when you have a lots of
# sockets to keep track of, because you can set them up once and then use them
# lots of times. But we only ever want to wait on a single socket at a time
# and don't want to keep track of state, so the stateless APIs are actually
# more efficient. So we want to use select() or poll().
#
# Now, how do we choose between select() and poll()? On traditional Unixes,
# select() has a strange calling convention that makes it slow, or fail
# altogether, for high-numbered file descriptors. The point of poll() is to fix
# that, so on Unixes, we prefer poll().
#
# On Windows, there is no poll() (or at least Python doesn't provide a wrapper
# for it), but that's OK, because on Windows, select() doesn't have this
# strange calling convention; plain select() works fine.
#
# So: on Windows we use select(), and everywhere else we use poll(). We also
# fall back to select() in case poll() is somehow broken or missing.
if sys.version_info >= (3, 5):
# Modern Python, that retries syscalls by default
def _retry_on_intr(fn, timeout):
return fn(timeout)
else:
# Old and broken Pythons.
def _retry_on_intr(fn, timeout):
if timeout is None:
deadline = float("inf")
else:
deadline = monotonic() + timeout
while True:
try:
return fn(timeout)
# OSError for 3 <= pyver < 3.5, select.error for pyver <= 2.7
except (OSError, select.error) as e:
# 'e.args[0]' incantation works for both OSError and select.error
if e.args[0] != errno.EINTR:
raise
else:
timeout = deadline - monotonic()
if timeout < 0:
timeout = 0
if timeout == float("inf"):
timeout = None
continue
def select_wait_for_socket(sock, read=False, write=False, timeout=None):
if not read and not write:
raise RuntimeError("must specify at least one of read=True, write=True")
rcheck = []
wcheck = []
if read:
rcheck.append(sock)
if write:
wcheck.append(sock)
# When doing a non-blocking connect, most systems signal success by
# marking the socket writable. Windows, though, signals success by marked
# it as "exceptional". We paper over the difference by checking the write
# sockets for both conditions. (The stdlib selectors module does the same
# thing.)
fn = partial(select.select, rcheck, wcheck, wcheck)
rready, wready, xready = _retry_on_intr(fn, timeout)
return bool(rready or wready or xready)
def poll_wait_for_socket(sock, read=False, write=False, timeout=None):
if not read and not write:
raise RuntimeError("must specify at least one of read=True, write=True")
mask = 0
if read:
mask |= select.POLLIN
if write:
mask |= select.POLLOUT
poll_obj = select.poll()
poll_obj.register(sock, mask)
# For some reason, poll() takes timeout in milliseconds
def do_poll(t):
if t is not None:
t *= 1000
return poll_obj.poll(t)
return bool(_retry_on_intr(do_poll, timeout))
def null_wait_for_socket(*args, **kwargs):
raise NoWayToWaitForSocketError("no select-equivalent available")
def _have_working_poll():
# Apparently some systems have a select.poll that fails as soon as you try
# to use it, either due to strange configuration or broken monkeypatching
# from libraries like eventlet/greenlet.
try:
poll_obj = select.poll()
_retry_on_intr(poll_obj.poll, 0)
except (AttributeError, OSError):
return False
else:
return True
def wait_for_socket(*args, **kwargs):
# We delay choosing which implementation to use until the first time we're
# called. We could do it at import time, but then we might make the wrong
# decision if someone goes wild with monkeypatching select.poll after
# we're imported.
global wait_for_socket
if _have_working_poll():
wait_for_socket = poll_wait_for_socket
elif hasattr(select, "select"):
wait_for_socket = select_wait_for_socket
else: # Platform-specific: Appengine.
wait_for_socket = null_wait_for_socket
return wait_for_socket(*args, **kwargs)
def wait_for_read(sock, timeout=None):
""" Waits for reading to be available on a given socket.
Returns True if the socket is readable, or False if the timeout expired.
"""
return wait_for_socket(sock, read=True, timeout=timeout)
def wait_for_write(sock, timeout=None):
""" Waits for writing to be available on a given socket.
Returns True if the socket is readable, or False if the timeout expired.
"""
return wait_for_socket(sock, write=True, timeout=timeout)