Updated DB_Helper by adding firebase methods.

This commit is contained in:
Batuhan Berk Başoğlu 2020-10-05 16:53:40 -04:00
parent 485cc3bbba
commit c82121d036
1810 changed files with 537281 additions and 1 deletions

View file

@ -0,0 +1,34 @@
# -*- coding: utf-8 -*-
"""
requests-toolbelt
=================
See http://toolbelt.rtfd.org/ for documentation
:copyright: (c) 2014 by Ian Cordasco and Cory Benfield
:license: Apache v2.0, see LICENSE for more details
"""
from .adapters import SSLAdapter, SourceAddressAdapter
from .auth.guess import GuessAuth
from .multipart import (
MultipartEncoder, MultipartEncoderMonitor, MultipartDecoder,
ImproperBodyPartContentException, NonMultipartContentTypeException
)
from .streaming_iterator import StreamingIterator
from .utils.user_agent import user_agent
__title__ = 'requests-toolbelt'
__authors__ = 'Ian Cordasco, Cory Benfield'
__license__ = 'Apache v2.0'
__copyright__ = 'Copyright 2014 Ian Cordasco, Cory Benfield'
__version__ = '0.7.0'
__version_info__ = tuple(int(i) for i in __version__.split('.'))
__all__ = [
'GuessAuth', 'MultipartEncoder', 'MultipartEncoderMonitor',
'MultipartDecoder', 'SSLAdapter', 'SourceAddressAdapter',
'StreamingIterator', 'user_agent', 'ImproperBodyPartContentException',
'NonMultipartContentTypeException', '__title__', '__authors__',
'__license__', '__copyright__', '__version__', '__version_info__',
]

View file

@ -0,0 +1,310 @@
"""Private module full of compatibility hacks.
Primarily this is for downstream redistributions of requests that unvendor
urllib3 without providing a shim.
.. warning::
This module is private. If you use it, and something breaks, you were
warned
"""
from collections import Mapping, MutableMapping
import sys
import requests
try:
from requests.packages.urllib3 import fields
from requests.packages.urllib3 import filepost
from requests.packages.urllib3 import poolmanager
except ImportError:
from urllib3 import fields
from urllib3 import filepost
from urllib3 import poolmanager
try:
from requests.packages.urllib3.connection import HTTPConnection
from requests.packages.urllib3 import connection
except ImportError:
try:
from urllib3.connection import HTTPConnection
from urllib3 import connection
except ImportError:
HTTPConnection = None
connection = None
if requests.__build__ < 0x020300:
timeout = None
else:
try:
from requests.packages.urllib3.util import timeout
except ImportError:
from urllib3.util import timeout
if requests.__build__ < 0x021000:
gaecontrib = None
else:
try:
from requests.packages.urllib3.contrib import appengine as gaecontrib
except ImportError:
from urllib3.contrib import appengine as gaecontrib
PY3 = sys.version_info > (3, 0)
if PY3:
import queue
from urllib.parse import urlencode, urljoin
else:
import Queue as queue
from urllib import urlencode
from urlparse import urljoin
try:
basestring = basestring
except NameError:
basestring = (str, bytes)
class HTTPHeaderDict(MutableMapping):
"""
:param headers:
An iterable of field-value pairs. Must not contain multiple field names
when compared case-insensitively.
:param kwargs:
Additional field-value pairs to pass in to ``dict.update``.
A ``dict`` like container for storing HTTP Headers.
Field names are stored and compared case-insensitively in compliance with
RFC 7230. Iteration provides the first case-sensitive key seen for each
case-insensitive pair.
Using ``__setitem__`` syntax overwrites fields that compare equal
case-insensitively in order to maintain ``dict``'s api. For fields that
compare equal, instead create a new ``HTTPHeaderDict`` and use ``.add``
in a loop.
If multiple fields that are equal case-insensitively are passed to the
constructor or ``.update``, the behavior is undefined and some will be
lost.
>>> headers = HTTPHeaderDict()
>>> headers.add('Set-Cookie', 'foo=bar')
>>> headers.add('set-cookie', 'baz=quxx')
>>> headers['content-length'] = '7'
>>> headers['SET-cookie']
'foo=bar, baz=quxx'
>>> headers['Content-Length']
'7'
"""
def __init__(self, headers=None, **kwargs):
super(HTTPHeaderDict, self).__init__()
self._container = {}
if headers is not None:
if isinstance(headers, HTTPHeaderDict):
self._copy_from(headers)
else:
self.extend(headers)
if kwargs:
self.extend(kwargs)
def __setitem__(self, key, val):
self._container[key.lower()] = (key, val)
return self._container[key.lower()]
def __getitem__(self, key):
val = self._container[key.lower()]
return ', '.join(val[1:])
def __delitem__(self, key):
del self._container[key.lower()]
def __contains__(self, key):
return key.lower() in self._container
def __eq__(self, other):
if not isinstance(other, Mapping) and not hasattr(other, 'keys'):
return False
if not isinstance(other, type(self)):
other = type(self)(other)
return (dict((k.lower(), v) for k, v in self.itermerged()) ==
dict((k.lower(), v) for k, v in other.itermerged()))
def __ne__(self, other):
return not self.__eq__(other)
if not PY3: # Python 2
iterkeys = MutableMapping.iterkeys
itervalues = MutableMapping.itervalues
__marker = object()
def __len__(self):
return len(self._container)
def __iter__(self):
# Only provide the originally cased names
for vals in self._container.values():
yield vals[0]
def pop(self, key, default=__marker):
"""D.pop(k[,d]) -> v, remove specified key and return its value.
If key is not found, d is returned if given, otherwise KeyError is
raised.
"""
# Using the MutableMapping function directly fails due to the private
# marker.
# Using ordinary dict.pop would expose the internal structures.
# So let's reinvent the wheel.
try:
value = self[key]
except KeyError:
if default is self.__marker:
raise
return default
else:
del self[key]
return value
def discard(self, key):
try:
del self[key]
except KeyError:
pass
def add(self, key, val):
"""Adds a (name, value) pair, doesn't overwrite the value if it already
exists.
>>> headers = HTTPHeaderDict(foo='bar')
>>> headers.add('Foo', 'baz')
>>> headers['foo']
'bar, baz'
"""
key_lower = key.lower()
new_vals = key, val
# Keep the common case aka no item present as fast as possible
vals = self._container.setdefault(key_lower, new_vals)
if new_vals is not vals:
# new_vals was not inserted, as there was a previous one
if isinstance(vals, list):
# If already several items got inserted, we have a list
vals.append(val)
else:
# vals should be a tuple then, i.e. only one item so far
# Need to convert the tuple to list for further extension
self._container[key_lower] = [vals[0], vals[1], val]
def extend(self, *args, **kwargs):
"""Generic import function for any type of header-like object.
Adapted version of MutableMapping.update in order to insert items
with self.add instead of self.__setitem__
"""
if len(args) > 1:
raise TypeError("extend() takes at most 1 positional "
"arguments ({} given)".format(len(args)))
other = args[0] if len(args) >= 1 else ()
if isinstance(other, HTTPHeaderDict):
for key, val in other.iteritems():
self.add(key, val)
elif isinstance(other, Mapping):
for key in other:
self.add(key, other[key])
elif hasattr(other, "keys"):
for key in other.keys():
self.add(key, other[key])
else:
for key, value in other:
self.add(key, value)
for key, value in kwargs.items():
self.add(key, value)
def getlist(self, key):
"""Returns a list of all the values for the named field. Returns an
empty list if the key doesn't exist."""
try:
vals = self._container[key.lower()]
except KeyError:
return []
else:
if isinstance(vals, tuple):
return [vals[1]]
else:
return vals[1:]
# Backwards compatibility for httplib
getheaders = getlist
getallmatchingheaders = getlist
iget = getlist
def __repr__(self):
return "%s(%s)" % (type(self).__name__, dict(self.itermerged()))
def _copy_from(self, other):
for key in other:
val = other.getlist(key)
if isinstance(val, list):
# Don't need to convert tuples
val = list(val)
self._container[key.lower()] = [key] + val
def copy(self):
clone = type(self)()
clone._copy_from(self)
return clone
def iteritems(self):
"""Iterate over all header lines, including duplicate ones."""
for key in self:
vals = self._container[key.lower()]
for val in vals[1:]:
yield vals[0], val
def itermerged(self):
"""Iterate over all headers, merging duplicate ones together."""
for key in self:
val = self._container[key.lower()]
yield val[0], ', '.join(val[1:])
def items(self):
return list(self.iteritems())
@classmethod
def from_httplib(cls, message): # Python 2
"""Read headers from a Python 2 httplib message object."""
# python2.7 does not expose a proper API for exporting multiheaders
# efficiently. This function re-reads raw lines from the message
# object and extracts the multiheaders properly.
headers = []
for line in message.headers:
if line.startswith((' ', '\t')):
key, value = headers[-1]
headers[-1] = (key, value + '\r\n' + line.rstrip())
continue
key, value = line.split(':', 1)
headers.append((key, value.strip()))
return cls(headers)
__all__ = (
'basestring',
'connection',
'fields',
'filepost',
'poolmanager',
'timeout',
'HTTPHeaderDict',
'queue',
'urlencode',
'gaecontrib',
'urljoin',
)

View file

@ -0,0 +1,15 @@
# -*- coding: utf-8 -*-
"""
requests-toolbelt.adapters
==========================
See http://toolbelt.rtfd.org/ for documentation
:copyright: (c) 2014 by Ian Cordasco and Cory Benfield
:license: Apache v2.0, see LICENSE for more details
"""
from .ssl import SSLAdapter
from .source import SourceAddressAdapter
__all__ = ['SSLAdapter', 'SourceAddressAdapter']

View file

@ -0,0 +1,147 @@
# -*- coding: utf-8 -*-
"""The App Engine Transport Adapter for requests.
.. versionadded:: 0.6.0
This requires a version of requests >= 2.10.0 and Python 2.
There are two ways to use this library:
#. If you're using requests directly, you can use code like:
.. code-block:: python
>>> import requests
>>> import ssl
>>> import requests.packages.urllib3.contrib.appengine as ul_appengine
>>> from requests_toolbelt.adapters import appengine
>>> s = requests.Session()
>>> if ul_appengine.is_appengine_sandbox():
... s.mount('http://', appengine.AppEngineAdapter())
... s.mount('https://', appengine.AppEngineAdapter())
#. If you depend on external libraries which use requests, you can use code
like:
.. code-block:: python
>>> from requests_toolbelt.adapters import appengine
>>> appengine.monkeypatch()
which will ensure all requests.Session objects use AppEngineAdapter properly.
"""
import requests
from requests import adapters
from requests import sessions
from .. import exceptions as exc
from .._compat import gaecontrib
from .._compat import timeout
class AppEngineAdapter(adapters.HTTPAdapter):
"""The transport adapter for Requests to use urllib3's GAE support.
Implements Requests's HTTPAdapter API.
When deploying to Google's App Engine service, some of Requests'
functionality is broken. There is underlying support for GAE in urllib3.
This functionality, however, is opt-in and needs to be enabled explicitly
for Requests to be able to use it.
"""
def __init__(self, validate_certificate=True, *args, **kwargs):
_check_version()
self._validate_certificate = validate_certificate
super(AppEngineAdapter, self).__init__(*args, **kwargs)
def init_poolmanager(self, connections, maxsize, block=False):
self.poolmanager = _AppEnginePoolManager(self._validate_certificate)
class _AppEnginePoolManager(object):
"""Implements urllib3's PoolManager API expected by requests.
While a real PoolManager map hostnames to reusable Connections,
AppEngine has no concept of a reusable connection to a host.
So instead, this class constructs a small Connection per request,
that is returned to the Adapter and used to access the URL.
"""
def __init__(self, validate_certificate=True):
self.appengine_manager = gaecontrib.AppEngineManager(
validate_certificate=validate_certificate)
def connection_from_url(self, url):
return _AppEngineConnection(self.appengine_manager, url)
def clear(self):
pass
class _AppEngineConnection(object):
"""Implements urllib3's HTTPConnectionPool API's urlopen().
This Connection's urlopen() is called with a host-relative path,
so in order to properly support opening the URL, we need to store
the full URL when this Connection is constructed from the PoolManager.
This code wraps AppEngineManager.urlopen(), which exposes a different
API than in the original urllib3 urlopen(), and thus needs this adapter.
"""
def __init__(self, appengine_manager, url):
self.appengine_manager = appengine_manager
self.url = url
def urlopen(self, method, url, body=None, headers=None, retries=None,
redirect=True, assert_same_host=True,
timeout=timeout.Timeout.DEFAULT_TIMEOUT,
pool_timeout=None, release_conn=None, **response_kw):
# This function's url argument is a host-relative URL,
# but the AppEngineManager expects an absolute URL.
# So we saved out the self.url when the AppEngineConnection
# was constructed, which we then can use down below instead.
# We once tried to verify our assumptions here, but sometimes the
# passed-in URL differs on url fragments, or "http://a.com" vs "/".
# urllib3's App Engine adapter only uses Timeout.total, not read or
# connect.
if not timeout.total:
timeout.total = timeout._read or timeout._connect
# Jump through the hoops necessary to call AppEngineManager's API.
return self.appengine_manager.urlopen(
method,
self.url,
body=body,
headers=headers,
retries=retries,
redirect=redirect,
timeout=timeout,
**response_kw)
def monkeypatch():
"""Sets up all Sessions to use AppEngineAdapter by default.
If you don't want to deal with configuring your own Sessions,
or if you use libraries that use requests directly (ie requests.post),
then you may prefer to monkeypatch and auto-configure all Sessions.
"""
_check_version()
# HACK: We should consider modifying urllib3 to support this cleanly,
# so that we can set a module-level variable in the sessions module,
# instead of overriding an imported HTTPAdapter as is done here.
sessions.HTTPAdapter = AppEngineAdapter
def _check_version():
if gaecontrib is None:
raise exc.VersionMismatchError(
"The toolbelt requires at least Requests 2.10.0 to be "
"installed. Version {0} was found instead.".format(
requests.__version__
)
)

View file

@ -0,0 +1,48 @@
# -*- coding: utf-8 -*-
"""Submodule containing the implementation for the FingerprintAdapter.
This file contains an implementation of a Transport Adapter that validates
the fingerprints of SSL certificates presented upon connection.
"""
from requests.adapters import HTTPAdapter
from .._compat import poolmanager
class FingerprintAdapter(HTTPAdapter):
"""
A HTTPS Adapter for Python Requests that verifies certificate fingerprints,
instead of certificate hostnames.
Example usage:
.. code-block:: python
import requests
import ssl
from requests_toolbelt.adapters.fingerprint import FingerprintAdapter
twitter_fingerprint = '...'
s = requests.Session()
s.mount(
'https://twitter.com',
FingerprintAdapter(twitter_fingerprint)
)
The fingerprint should be provided as a hexadecimal string, optionally
containing colons.
"""
__attrs__ = HTTPAdapter.__attrs__ + ['fingerprint']
def __init__(self, fingerprint, **kwargs):
self.fingerprint = fingerprint
super(FingerprintAdapter, self).__init__(**kwargs)
def init_poolmanager(self, connections, maxsize, block=False):
self.poolmanager = poolmanager.PoolManager(
num_pools=connections,
maxsize=maxsize,
block=block,
assert_fingerprint=self.fingerprint)

View file

@ -0,0 +1,43 @@
# -*- coding: utf-8 -*-
"""
requests_toolbelt.adapters.host_header_ssl
==========================================
This file contains an implementation of the HostHeaderSSLAdapter.
"""
from requests.adapters import HTTPAdapter
class HostHeaderSSLAdapter(HTTPAdapter):
"""
A HTTPS Adapter for Python Requests that sets the hostname for certificate
verification based on the Host header.
This allows requesting the IP address directly via HTTPS without getting
a "hostname doesn't match" exception.
Example usage:
>>> s.mount('https://', HostHeaderSSLAdapter())
>>> s.get("https://93.184.216.34", headers={"Host": "example.org"})
"""
def send(self, request, **kwargs):
# HTTP headers are case-insensitive (RFC 7230)
host_header = None
for header in request.headers:
if header.lower() == "host":
host_header = request.headers[header]
break
connection_pool_kwargs = self.poolmanager.connection_pool_kw
if host_header:
connection_pool_kwargs["assert_hostname"] = host_header
elif "assert_hostname" in connection_pool_kwargs:
# an assert_hostname from a previous request may have been left
connection_pool_kwargs.pop("assert_hostname", None)
return super(HostHeaderSSLAdapter, self).send(request, **kwargs)

View file

@ -0,0 +1,118 @@
# -*- coding: utf-8 -*-
"""The implementation of the SocketOptionsAdapter."""
import socket
import warnings
import requests
from requests import adapters
from .._compat import connection
from .._compat import poolmanager
from .. import exceptions as exc
class SocketOptionsAdapter(adapters.HTTPAdapter):
"""An adapter for requests that allows users to specify socket options.
Since version 2.4.0 of requests, it is possible to specify a custom list
of socket options that need to be set before establishing the connection.
Example usage::
>>> import socket
>>> import requests
>>> from requests_toolbelt.adapters import socket_options
>>> s = requests.Session()
>>> opts = [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 0)]
>>> adapter = socket_options.SocketOptionsAdapter(socket_options=opts)
>>> s.mount('http://', adapter)
You can also take advantage of the list of default options on this class
to keep using the original options in addition to your custom options. In
that case, ``opts`` might look like::
>>> opts = socket_options.SocketOptionsAdapter.default_options + opts
"""
if connection is not None:
default_options = getattr(
connection.HTTPConnection,
'default_socket_options',
[(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]
)
else:
default_options = []
warnings.warn(exc.RequestsVersionTooOld,
"This version of Requests is only compatible with a "
"version of urllib3 which is too old to support "
"setting options on a socket. This adapter is "
"functionally useless.")
def __init__(self, **kwargs):
self.socket_options = kwargs.pop('socket_options',
self.default_options)
super(SocketOptionsAdapter, self).__init__(**kwargs)
def init_poolmanager(self, connections, maxsize, block=False):
if requests.__build__ >= 0x020400:
# NOTE(Ian): Perhaps we should raise a warning
self.poolmanager = poolmanager.PoolManager(
num_pools=connections,
maxsize=maxsize,
block=block,
socket_options=self.socket_options
)
else:
super(SocketOptionsAdapter, self).init_poolmanager(
connections, maxsize, block
)
class TCPKeepAliveAdapter(SocketOptionsAdapter):
"""An adapter for requests that turns on TCP Keep-Alive by default.
The adapter sets 4 socket options:
- ``SOL_SOCKET`` ``SO_KEEPALIVE`` - This turns on TCP Keep-Alive
- ``IPPROTO_TCP`` ``TCP_KEEPINTVL`` 20 - Sets the keep alive interval
- ``IPPROTO_TCP`` ``TCP_KEEPCNT`` 5 - Sets the number of keep alive probes
- ``IPPROTO_TCP`` ``TCP_KEEPIDLE`` 60 - Sets the keep alive time if the
socket library has the ``TCP_KEEPIDLE`` constant
The latter three can be overridden by keyword arguments (respectively):
- ``idle``
- ``interval``
- ``count``
You can use this adapter like so::
>>> from requests_toolbelt.adapters import socket_options
>>> tcp = socket_options.TCPKeepAliveAdapter(idle=120, interval=10)
>>> s = requests.Session()
>>> s.mount('http://', tcp)
"""
def __init__(self, **kwargs):
socket_options = kwargs.pop('socket_options',
SocketOptionsAdapter.default_options)
idle = kwargs.pop('idle', 60)
interval = kwargs.pop('interval', 20)
count = kwargs.pop('count', 5)
socket_options = socket_options + [
(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1),
(socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, interval),
(socket.IPPROTO_TCP, socket.TCP_KEEPCNT, count),
]
# NOTE(Ian): Apparently OSX does not have this constant defined, so we
# set it conditionally.
if getattr(socket, 'TCP_KEEPIDLE', None) is not None:
socket_options += [(socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, idle)]
super(TCPKeepAliveAdapter, self).__init__(
socket_options=socket_options, **kwargs
)

View file

@ -0,0 +1,67 @@
# -*- coding: utf-8 -*-
"""
requests_toolbelt.source_adapter
================================
This file contains an implementation of the SourceAddressAdapter originally
demonstrated on the Requests GitHub page.
"""
from requests.adapters import HTTPAdapter
from .._compat import poolmanager, basestring
class SourceAddressAdapter(HTTPAdapter):
"""
A Source Address Adapter for Python Requests that enables you to choose the
local address to bind to. This allows you to send your HTTP requests from a
specific interface and IP address.
Two address formats are accepted. The first is a string: this will set the
local IP address to the address given in the string, and will also choose a
semi-random high port for the local port number.
The second is a two-tuple of the form (ip address, port): for example,
``('10.10.10.10', 8999)``. This will set the local IP address to the first
element, and the local port to the second element. If ``0`` is used as the
port number, a semi-random high port will be selected.
.. warning:: Setting an explicit local port can have negative interactions
with connection-pooling in Requests: in particular, it risks
the possibility of getting "Address in use" errors. The
string-only argument is generally preferred to the tuple-form.
Example usage:
.. code-block:: python
import requests
from requests_toolbelt.adapters.source import SourceAddressAdapter
s = requests.Session()
s.mount('http://', SourceAddressAdapter('10.10.10.10'))
s.mount('https://', SourceAddressAdapter(('10.10.10.10', 8999))
"""
def __init__(self, source_address, **kwargs):
if isinstance(source_address, basestring):
self.source_address = (source_address, 0)
elif isinstance(source_address, tuple):
self.source_address = source_address
else:
raise TypeError(
"source_address must be IP address string or (ip, port) tuple"
)
super(SourceAddressAdapter, self).__init__(**kwargs)
def init_poolmanager(self, connections, maxsize, block=False):
self.poolmanager = poolmanager.PoolManager(
num_pools=connections,
maxsize=maxsize,
block=block,
source_address=self.source_address)
def proxy_manager_for(self, *args, **kwargs):
kwargs['source_address'] = self.source_address
return super(SourceAddressAdapter, self).proxy_manager_for(
*args, **kwargs)

View file

@ -0,0 +1,66 @@
# -*- coding: utf-8 -*-
"""
requests_toolbelt.ssl_adapter
=============================
This file contains an implementation of the SSLAdapter originally demonstrated
in this blog post:
https://lukasa.co.uk/2013/01/Choosing_SSL_Version_In_Requests/
"""
import requests
from requests.adapters import HTTPAdapter
from .._compat import poolmanager
class SSLAdapter(HTTPAdapter):
"""
A HTTPS Adapter for Python Requests that allows the choice of the SSL/TLS
version negotiated by Requests. This can be used either to enforce the
choice of high-security TLS versions (where supported), or to work around
misbehaving servers that fail to correctly negotiate the default TLS
version being offered.
Example usage:
>>> import requests
>>> import ssl
>>> from requests_toolbelt import SSLAdapter
>>> s = requests.Session()
>>> s.mount('https://', SSLAdapter(ssl.PROTOCOL_TLSv1))
You can replace the chosen protocol with any that are available in the
default Python SSL module. All subsequent requests that match the adapter
prefix will use the chosen SSL version instead of the default.
This adapter will also attempt to change the SSL/TLS version negotiated by
Requests when using a proxy. However, this may not always be possible:
prior to Requests v2.4.0 the adapter did not have access to the proxy setup
code. In earlier versions of Requests, this adapter will not function
properly when used with proxies.
"""
__attrs__ = HTTPAdapter.__attrs__ + ['ssl_version']
def __init__(self, ssl_version=None, **kwargs):
self.ssl_version = ssl_version
super(SSLAdapter, self).__init__(**kwargs)
def init_poolmanager(self, connections, maxsize, block=False):
self.poolmanager = poolmanager.PoolManager(
num_pools=connections,
maxsize=maxsize,
block=block,
ssl_version=self.ssl_version)
if requests.__build__ >= 0x020400:
# Earlier versions of requests either don't have this method or, worse,
# don't allow passing arbitrary keyword arguments. As a result, only
# conditionally define this method.
def proxy_manager_for(self, *args, **kwargs):
kwargs['ssl_version'] = self.ssl_version
return super(SSLAdapter, self).proxy_manager_for(*args, **kwargs)

View file

@ -0,0 +1,29 @@
"""Provide a compatibility layer for requests.auth.HTTPDigestAuth."""
import requests
class _ThreadingDescriptor(object):
def __init__(self, prop, default):
self.prop = prop
self.default = default
def __get__(self, obj, objtype=None):
return getattr(obj._thread_local, self.prop, self.default)
def __set__(self, obj, value):
setattr(obj._thread_local, self.prop, value)
class _HTTPDigestAuth(requests.auth.HTTPDigestAuth):
init = _ThreadingDescriptor('init', True)
last_nonce = _ThreadingDescriptor('last_nonce', '')
nonce_count = _ThreadingDescriptor('nonce_count', 0)
chal = _ThreadingDescriptor('chal', {})
pos = _ThreadingDescriptor('pos', None)
num_401_calls = _ThreadingDescriptor('num_401_calls', 1)
if requests.__build__ < 0x020800:
HTTPDigestAuth = requests.auth.HTTPDigestAuth
else:
HTTPDigestAuth = _HTTPDigestAuth

View file

@ -0,0 +1,146 @@
# -*- coding: utf-8 -*-
"""The module containing the code for GuessAuth."""
from requests import auth
from requests import cookies
from . import _digest_auth_compat as auth_compat, http_proxy_digest
class GuessAuth(auth.AuthBase):
"""Guesses the auth type by the WWW-Authentication header."""
def __init__(self, username, password):
self.username = username
self.password = password
self.auth = None
self.pos = None
def _handle_basic_auth_401(self, r, kwargs):
if self.pos is not None:
r.request.body.seek(self.pos)
# Consume content and release the original connection
# to allow our new request to reuse the same one.
r.content
r.raw.release_conn()
prep = r.request.copy()
if not hasattr(prep, '_cookies'):
prep._cookies = cookies.RequestsCookieJar()
cookies.extract_cookies_to_jar(prep._cookies, r.request, r.raw)
prep.prepare_cookies(prep._cookies)
self.auth = auth.HTTPBasicAuth(self.username, self.password)
prep = self.auth(prep)
_r = r.connection.send(prep, **kwargs)
_r.history.append(r)
_r.request = prep
return _r
def _handle_digest_auth_401(self, r, kwargs):
self.auth = auth_compat.HTTPDigestAuth(self.username, self.password)
try:
self.auth.init_per_thread_state()
except AttributeError:
# If we're not on requests 2.8.0+ this method does not exist and
# is not relevant.
pass
# Check that the attr exists because much older versions of requests
# set this attribute lazily. For example:
# https://github.com/kennethreitz/requests/blob/33735480f77891754304e7f13e3cdf83aaaa76aa/requests/auth.py#L59
if (hasattr(self.auth, 'num_401_calls') and
self.auth.num_401_calls is None):
self.auth.num_401_calls = 1
# Digest auth would resend the request by itself. We can take a
# shortcut here.
return self.auth.handle_401(r, **kwargs)
def handle_401(self, r, **kwargs):
"""Resends a request with auth headers, if needed."""
www_authenticate = r.headers.get('www-authenticate', '').lower()
if 'basic' in www_authenticate:
return self._handle_basic_auth_401(r, kwargs)
if 'digest' in www_authenticate:
return self._handle_digest_auth_401(r, kwargs)
def __call__(self, request):
if self.auth is not None:
return self.auth(request)
try:
self.pos = request.body.tell()
except AttributeError:
pass
request.register_hook('response', self.handle_401)
return request
class GuessProxyAuth(GuessAuth):
"""
Guesses the auth type by WWW-Authentication and Proxy-Authentication
headers
"""
def __init__(self, username=None, password=None,
proxy_username=None, proxy_password=None):
super(GuessProxyAuth, self).__init__(username, password)
self.proxy_username = proxy_username
self.proxy_password = proxy_password
self.proxy_auth = None
def _handle_basic_auth_407(self, r, kwargs):
if self.pos is not None:
r.request.body.seek(self.pos)
r.content
r.raw.release_conn()
prep = r.request.copy()
if not hasattr(prep, '_cookies'):
prep._cookies = cookies.RequestsCookieJar()
cookies.extract_cookies_to_jar(prep._cookies, r.request, r.raw)
prep.prepare_cookies(prep._cookies)
self.proxy_auth = auth.HTTPProxyAuth(self.proxy_username,
self.proxy_password)
prep = self.proxy_auth(prep)
_r = r.connection.send(prep, **kwargs)
_r.history.append(r)
_r.request = prep
return _r
def _handle_digest_auth_407(self, r, kwargs):
self.proxy_auth = http_proxy_digest.HTTPProxyDigestAuth(
username=self.proxy_username,
password=self.proxy_password)
try:
self.auth.init_per_thread_state()
except AttributeError:
pass
return self.proxy_auth.handle_407(r, **kwargs)
def handle_407(self, r, **kwargs):
proxy_authenticate = r.headers.get('Proxy-Authenticate', '').lower()
if 'basic' in proxy_authenticate:
return self._handle_basic_auth_407(r, kwargs)
if 'digest' in proxy_authenticate:
return self._handle_digest_auth_407(r, kwargs)
def __call__(self, request):
if self.proxy_auth is not None:
request = self.proxy_auth(request)
try:
self.pos = request.body.tell()
except AttributeError:
pass
request.register_hook('response', self.handle_407)
return super(GuessProxyAuth, self).__call__(request)

View file

@ -0,0 +1,142 @@
# -*- coding: utf-8 -*-
"""
requests_toolbelt.auth.handler
==============================
This holds all of the implementation details of the Authentication Handler.
"""
from requests.auth import AuthBase, HTTPBasicAuth
from requests.compat import urlparse, urlunparse
class AuthHandler(AuthBase):
"""
The ``AuthHandler`` object takes a dictionary of domains paired with
authentication strategies and will use this to determine which credentials
to use when making a request. For example, you could do the following:
.. code-block:: python
from requests import HTTPDigestAuth
from requests_toolbelt.auth.handler import AuthHandler
import requests
auth = AuthHandler({
'https://api.github.com': ('sigmavirus24', 'fakepassword'),
'https://example.com': HTTPDigestAuth('username', 'password')
})
r = requests.get('https://api.github.com/user', auth=auth)
# => <Response [200]>
r = requests.get('https://example.com/some/path', auth=auth)
# => <Response [200]>
s = requests.Session()
s.auth = auth
r = s.get('https://api.github.com/user')
# => <Response [200]>
.. warning::
:class:`requests.auth.HTTPDigestAuth` is not yet thread-safe. If you
use :class:`AuthHandler` across multiple threads you should
instantiate a new AuthHandler for each thread with a new
HTTPDigestAuth instance for each thread.
"""
def __init__(self, strategies):
self.strategies = dict(strategies)
self._make_uniform()
def __call__(self, request):
auth = self.get_strategy_for(request.url)
return auth(request)
def __repr__(self):
return '<AuthHandler({0!r})>'.format(self.strategies)
def _make_uniform(self):
existing_strategies = list(self.strategies.items())
self.strategies = {}
for (k, v) in existing_strategies:
self.add_strategy(k, v)
@staticmethod
def _key_from_url(url):
parsed = urlparse(url)
return urlunparse((parsed.scheme.lower(),
parsed.netloc.lower(),
'', '', '', ''))
def add_strategy(self, domain, strategy):
"""Add a new domain and authentication strategy.
:param str domain: The domain you wish to match against. For example:
``'https://api.github.com'``
:param str strategy: The authentication strategy you wish to use for
that domain. For example: ``('username', 'password')`` or
``requests.HTTPDigestAuth('username', 'password')``
.. code-block:: python
a = AuthHandler({})
a.add_strategy('https://api.github.com', ('username', 'password'))
"""
# Turn tuples into Basic Authentication objects
if isinstance(strategy, tuple):
strategy = HTTPBasicAuth(*strategy)
key = self._key_from_url(domain)
self.strategies[key] = strategy
def get_strategy_for(self, url):
"""Retrieve the authentication strategy for a specified URL.
:param str url: The full URL you will be making a request against. For
example, ``'https://api.github.com/user'``
:returns: Callable that adds authentication to a request.
.. code-block:: python
import requests
a = AuthHandler({'example.com', ('foo', 'bar')})
strategy = a.get_strategy_for('http://example.com/example')
assert isinstance(strategy, requests.auth.HTTPBasicAuth)
"""
key = self._key_from_url(url)
return self.strategies.get(key, NullAuthStrategy())
def remove_strategy(self, domain):
"""Remove the domain and strategy from the collection of strategies.
:param str domain: The domain you wish remove. For example,
``'https://api.github.com'``.
.. code-block:: python
a = AuthHandler({'example.com', ('foo', 'bar')})
a.remove_strategy('example.com')
assert a.strategies == {}
"""
key = self._key_from_url(domain)
if key in self.strategies:
del self.strategies[key]
class NullAuthStrategy(AuthBase):
def __repr__(self):
return '<NullAuthStrategy>'
def __call__(self, r):
return r

View file

@ -0,0 +1,103 @@
# -*- coding: utf-8 -*-
"""The module containing HTTPProxyDigestAuth."""
import re
from requests import cookies, utils
from . import _digest_auth_compat as auth
class HTTPProxyDigestAuth(auth.HTTPDigestAuth):
"""HTTP digest authentication between proxy
:param stale_rejects: The number of rejects indicate that:
the client may wish to simply retry the request
with a new encrypted response, without reprompting the user for a
new username and password. i.e., retry build_digest_header
:type stale_rejects: int
"""
_pat = re.compile(r'digest ', flags=re.IGNORECASE)
def __init__(self, *args, **kwargs):
super(HTTPProxyDigestAuth, self).__init__(*args, **kwargs)
self.stale_rejects = 0
self.init_per_thread_state()
@property
def stale_rejects(self):
thread_local = getattr(self, '_thread_local', None)
if thread_local is None:
return self._stale_rejects
return thread_local.stale_rejects
@stale_rejects.setter
def stale_rejects(self, value):
thread_local = getattr(self, '_thread_local', None)
if thread_local is None:
self._stale_rejects = value
else:
thread_local.stale_rejects = value
def init_per_thread_state(self):
try:
super(HTTPProxyDigestAuth, self).init_per_thread_state()
except AttributeError:
# If we're not on requests 2.8.0+ this method does not exist
pass
def handle_407(self, r, **kwargs):
"""Handle HTTP 407 only once, otherwise give up
:param r: current response
:returns: responses, along with the new response
"""
if r.status_code == 407 and self.stale_rejects < 2:
s_auth = r.headers.get("proxy-authenticate")
if s_auth is None:
raise IOError(
"proxy server violated RFC 7235:"
"407 response MUST contain header proxy-authenticate")
elif not self._pat.match(s_auth):
return r
self.chal = utils.parse_dict_header(
self._pat.sub('', s_auth, count=1))
# if we present the user/passwd and still get rejected
# http://tools.ietf.org/html/rfc2617#section-3.2.1
if ('Proxy-Authorization' in r.request.headers and
'stale' in self.chal):
if self.chal['stale'].lower() == 'true': # try again
self.stale_rejects += 1
# wrong user/passwd
elif self.chal['stale'].lower() == 'false':
raise IOError("User or password is invalid")
# Consume content and release the original connection
# to allow our new request to reuse the same one.
r.content
r.close()
prep = r.request.copy()
cookies.extract_cookies_to_jar(prep._cookies, r.request, r.raw)
prep.prepare_cookies(prep._cookies)
prep.headers['Proxy-Authorization'] = self.build_digest_header(
prep.method, prep.url)
_r = r.connection.send(prep, **kwargs)
_r.history.append(r)
_r.request = prep
return _r
else: # give up authenticate
return r
def __call__(self, r):
self.init_per_thread_state()
# if we have nonce, then just use it, otherwise server will tell us
if self.last_nonce:
r.headers['Proxy-Authorization'] = self.build_digest_header(
r.method, r.url
)
r.register_hook('response', self.handle_407)
return r

View file

@ -0,0 +1,7 @@
"""The module containing the code for ForgetfulCookieJar."""
from requests.cookies import RequestsCookieJar
class ForgetfulCookieJar(RequestsCookieJar):
def set_cookie(self, *args, **kwargs):
return

View file

@ -0,0 +1,177 @@
# -*- coding: utf-8 -*-
"""Utilities for dealing with streamed requests."""
import collections
import os.path
import re
from .. import exceptions as exc
# Regular expressions stolen from werkzeug/http.py
# cd2c97bb0a076da2322f11adce0b2731f9193396 L62-L64
_QUOTED_STRING_RE = r'"[^"\\]*(?:\\.[^"\\]*)*"'
_OPTION_HEADER_PIECE_RE = re.compile(
r';\s*(%s|[^\s;=]+)\s*(?:=\s*(%s|[^;]+))?\s*' % (_QUOTED_STRING_RE,
_QUOTED_STRING_RE)
)
_DEFAULT_CHUNKSIZE = 512
def _get_filename(content_disposition):
for match in _OPTION_HEADER_PIECE_RE.finditer(content_disposition):
k, v = match.groups()
if k == 'filename':
# ignore any directory paths in the filename
return os.path.split(v)[1]
return None
def get_download_file_path(response, path):
"""
Given a response and a path, return a file path for a download.
If a ``path`` parameter is a directory, this function will parse the
``Content-Disposition`` header on the response to determine the name of the
file as reported by the server, and return a file path in the specified
directory.
If ``path`` is empty or None, this function will return a path relative
to the process' current working directory.
If path is a full file path, return it.
:param response: A Response object from requests
:type response: requests.models.Response
:param str path: Directory or file path.
:returns: full file path to download as
:rtype: str
:raises: :class:`requests_toolbelt.exceptions.StreamingError`
"""
path_is_dir = path and os.path.isdir(path)
if path and not path_is_dir:
# fully qualified file path
filepath = path
else:
response_filename = _get_filename(
response.headers.get('content-disposition', '')
)
if not response_filename:
raise exc.StreamingError('No filename given to stream response to')
if path_is_dir:
# directory to download to
filepath = os.path.join(path, response_filename)
else:
# fallback to downloading to current working directory
filepath = response_filename
return filepath
def stream_response_to_file(response, path=None, chunksize=_DEFAULT_CHUNKSIZE):
"""Stream a response body to the specified file.
Either use the ``path`` provided or use the name provided in the
``Content-Disposition`` header.
.. warning::
If you pass this function an open file-like object as the ``path``
parameter, the function will not close that file for you.
.. warning::
This function will not automatically close the response object
passed in as the ``response`` parameter.
If a ``path`` parameter is a directory, this function will parse the
``Content-Disposition`` header on the response to determine the name of the
file as reported by the server, and return a file path in the specified
directory. If no ``path`` parameter is supplied, this function will default
to the process' current working directory.
.. code-block:: python
import requests
from requests_toolbelt import exceptions
from requests_toolbelt.downloadutils import stream
r = requests.get(url, stream=True)
try:
filename = stream.stream_response_to_file(r)
except exceptions.StreamingError as e:
# The toolbelt could not find the filename in the
# Content-Disposition
print(e.message)
You can also specify the filename as a string. This will be passed to
the built-in :func:`open` and we will read the content into the file.
.. code-block:: python
import requests
from requests_toolbelt.downloadutils import stream
r = requests.get(url, stream=True)
filename = stream.stream_response_to_file(r, path='myfile')
If the calculated download file path already exists, this function will
raise a StreamingError.
Instead, if you want to manage the file object yourself, you need to
provide either a :class:`io.BytesIO` object or a file opened with the
`'b'` flag. See the two examples below for more details.
.. code-block:: python
import requests
from requests_toolbelt.downloadutils import stream
with open('myfile', 'wb') as fd:
r = requests.get(url, stream=True)
filename = stream.stream_response_to_file(r, path=fd)
print('{0} saved to {1}'.format(url, filename))
.. code-block:: python
import io
import requests
from requests_toolbelt.downloadutils import stream
b = io.BytesIO()
r = requests.get(url, stream=True)
filename = stream.stream_response_to_file(r, path=b)
assert filename is None
:param response: A Response object from requests
:type response: requests.models.Response
:param path: *(optional)*, Either a string with the path to the location
to save the response content, or a file-like object expecting bytes.
:type path: :class:`str`, or object with a :meth:`write`
:param int chunksize: (optional), Size of chunk to attempt to stream
(default 512B).
:returns: The name of the file, if one can be determined, else None
:rtype: str
:raises: :class:`requests_toolbelt.exceptions.StreamingError`
"""
pre_opened = False
fd = None
filename = None
if path and isinstance(getattr(path, 'write', None), collections.Callable):
pre_opened = True
fd = path
filename = getattr(fd, 'name', None)
else:
filename = get_download_file_path(response, path)
if os.path.exists(filename):
raise exc.StreamingError("File already exists: %s" % filename)
fd = open(filename, 'wb')
for chunk in response.iter_content(chunk_size=chunksize):
fd.write(chunk)
if not pre_opened:
fd.close()
return filename

View file

@ -0,0 +1,123 @@
"""Tee function implementations."""
import io
_DEFAULT_CHUNKSIZE = 65536
__all__ = ['tee', 'tee_to_file', 'tee_to_bytearray']
def _tee(response, callback, chunksize, decode_content):
for chunk in response.raw.stream(amt=chunksize,
decode_content=decode_content):
callback(chunk)
yield chunk
def tee(response, fileobject, chunksize=_DEFAULT_CHUNKSIZE,
decode_content=None):
"""Stream the response both to the generator and a file.
This will stream the response body while writing the bytes to
``fileobject``.
Example usage:
.. code-block:: python
resp = requests.get(url, stream=True)
with open('save_file', 'wb') as save_file:
for chunk in tee(resp, save_file):
# do stuff with chunk
.. code-block:: python
import io
resp = requests.get(url, stream=True)
fileobject = io.BytesIO()
for chunk in tee(resp, fileobject):
# do stuff with chunk
:param response: Response from requests.
:type response: requests.Response
:param fileobject: Writable file-like object.
:type fileobject: file, io.BytesIO
:param int chunksize: (optional), Size of chunk to attempt to stream.
:param bool decode_content: (optional), If True, this will decode the
compressed content of the response.
:raises: TypeError if the fileobject wasn't opened with the right mode
or isn't a BytesIO object.
"""
# We will be streaming the raw bytes from over the wire, so we need to
# ensure that writing to the fileobject will preserve those bytes. On
# Python3, if the user passes an io.StringIO, this will fail, so we need
# to check for BytesIO instead.
if not ('b' in getattr(fileobject, 'mode', '') or
isinstance(fileobject, io.BytesIO)):
raise TypeError('tee() will write bytes directly to this fileobject'
', it must be opened with the "b" flag if it is a file'
' or inherit from io.BytesIO.')
return _tee(response, fileobject.write, chunksize, decode_content)
def tee_to_file(response, filename, chunksize=_DEFAULT_CHUNKSIZE,
decode_content=None):
"""Stream the response both to the generator and a file.
This will open a file named ``filename`` and stream the response body
while writing the bytes to the opened file object.
Example usage:
.. code-block:: python
resp = requests.get(url, stream=True)
for chunk in tee_to_file(resp, 'save_file'):
# do stuff with chunk
:param response: Response from requests.
:type response: requests.Response
:param str filename: Name of file in which we write the response content.
:param int chunksize: (optional), Size of chunk to attempt to stream.
:param bool decode_content: (optional), If True, this will decode the
compressed content of the response.
"""
with open(filename, 'wb') as fd:
for chunk in tee(response, fd, chunksize, decode_content):
yield chunk
def tee_to_bytearray(response, bytearr, chunksize=_DEFAULT_CHUNKSIZE,
decode_content=None):
"""Stream the response both to the generator and a bytearray.
This will stream the response provided to the function, add them to the
provided :class:`bytearray` and yield them to the user.
.. note::
This uses the :meth:`bytearray.extend` by default instead of passing
the bytearray into the ``readinto`` method.
Example usage:
.. code-block:: python
b = bytearray()
resp = requests.get(url, stream=True)
for chunk in tee_to_bytearray(resp, b):
# do stuff with chunk
:param response: Response from requests.
:type response: requests.Response
:param bytearray bytearr: Array to add the streamed bytes to.
:param int chunksize: (optional), Size of chunk to attempt to stream.
:param bool decode_content: (optional), If True, this will decode the
compressed content of the response.
"""
if not isinstance(bytearr, bytearray):
raise TypeError('tee_to_bytearray() expects bytearr to be a '
'bytearray')
return _tee(response, bytearr.extend, chunksize, decode_content)

View file

@ -0,0 +1,25 @@
# -*- coding: utf-8 -*-
"""Collection of exceptions raised by requests-toolbelt."""
class StreamingError(Exception):
"""Used in :mod:`requests_toolbelt.downloadutils.stream`."""
pass
class VersionMismatchError(Exception):
"""Used to indicate a version mismatch in the version of requests required.
The feature in use requires a newer version of Requests to function
appropriately but the version installed is not sufficient.
"""
pass
class RequestsVersionTooOld(Warning):
"""Used to indiciate that the Requests version is too old.
If the version of Requests is too old to support a feature, we will issue
this warning to the user.
"""
pass

View file

@ -0,0 +1,31 @@
"""
requests_toolbelt.multipart
===========================
See http://toolbelt.rtfd.org/ for documentation
:copyright: (c) 2014 by Ian Cordasco and Cory Benfield
:license: Apache v2.0, see LICENSE for more details
"""
from .encoder import MultipartEncoder, MultipartEncoderMonitor
from .decoder import MultipartDecoder
from .decoder import ImproperBodyPartContentException
from .decoder import NonMultipartContentTypeException
__title__ = 'requests-toolbelt'
__authors__ = 'Ian Cordasco, Cory Benfield'
__license__ = 'Apache v2.0'
__copyright__ = 'Copyright 2014 Ian Cordasco, Cory Benfield'
__all__ = [
'MultipartEncoder',
'MultipartEncoderMonitor',
'MultipartDecoder',
'ImproperBodyPartContentException',
'NonMultipartContentTypeException',
'__title__',
'__authors__',
'__license__',
'__copyright__',
]

View file

@ -0,0 +1,155 @@
# -*- coding: utf-8 -*-
"""
requests_toolbelt.multipart.decoder
===================================
This holds all the implementation details of the MultipartDecoder
"""
import sys
import email.parser
from .encoder import encode_with
from requests.structures import CaseInsensitiveDict
def _split_on_find(content, bound):
point = content.find(bound)
return content[:point], content[point + len(bound):]
class ImproperBodyPartContentException(Exception):
pass
class NonMultipartContentTypeException(Exception):
pass
def _header_parser(string, encoding):
major = sys.version_info[0]
if major == 3:
string = string.decode(encoding)
headers = email.parser.HeaderParser().parsestr(string).items()
return (
(encode_with(k, encoding), encode_with(v, encoding))
for k, v in headers
)
class BodyPart(object):
"""
The ``BodyPart`` object is a ``Response``-like interface to an individual
subpart of a multipart response. It is expected that these will
generally be created by objects of the ``MultipartDecoder`` class.
Like ``Response``, there is a ``CaseInsensitiveDict`` object named header,
``content`` to access bytes, ``text`` to access unicode, and ``encoding``
to access the unicode codec.
"""
def __init__(self, content, encoding):
self.encoding = encoding
headers = {}
# Split into header section (if any) and the content
if b'\r\n\r\n' in content:
first, self.content = _split_on_find(content, b'\r\n\r\n')
if first != b'':
headers = _header_parser(first.lstrip(), encoding)
else:
raise ImproperBodyPartContentException(
'content does not contain CR-LF-CR-LF'
)
self.headers = CaseInsensitiveDict(headers)
@property
def text(self):
"""Content of the ``BodyPart`` in unicode."""
return self.content.decode(self.encoding)
class MultipartDecoder(object):
"""
The ``MultipartDecoder`` object parses the multipart payload of
a bytestring into a tuple of ``Response``-like ``BodyPart`` objects.
The basic usage is::
import requests
from requests_toolbelt import MultipartDecoder
response = request.get(url)
decoder = MultipartDecoder.from_response(response)
for part in decoder.parts:
print(part.header['content-type'])
If the multipart content is not from a response, basic usage is::
from requests_toolbelt import MultipartDecoder
decoder = MultipartDecoder(content, content_type)
for part in decoder.parts:
print(part.header['content-type'])
For both these usages, there is an optional ``encoding`` parameter. This is
a string, which is the name of the unicode codec to use (default is
``'utf-8'``).
"""
def __init__(self, content, content_type, encoding='utf-8'):
#: Original content
self.content = content
#: Original Content-Type header
self.content_type = content_type
#: Response body encoding
self.encoding = encoding
#: Parsed parts of the multipart response body
self.parts = tuple()
self._find_boundary()
self._parse_body()
def _find_boundary(self):
ct_info = tuple(x.strip() for x in self.content_type.split(';'))
mimetype = ct_info[0]
if mimetype.split('/')[0] != 'multipart':
raise NonMultipartContentTypeException(
"Unexpected mimetype in content-type: '{0}'".format(mimetype)
)
for item in ct_info[1:]:
attr, value = _split_on_find(
item,
'='
)
if attr.lower() == 'boundary':
self.boundary = encode_with(value.strip('"'), self.encoding)
@staticmethod
def _fix_first_part(part, boundary_marker):
bm_len = len(boundary_marker)
if boundary_marker == part[:bm_len]:
return part[bm_len:]
else:
return part
def _parse_body(self):
boundary = b''.join((b'--', self.boundary))
def body_part(part):
fixed = MultipartDecoder._fix_first_part(part, boundary)
return BodyPart(fixed, self.encoding)
def test_part(part):
return part != b'' and part != b'\r\n' and part[:4] != b'--\r\n'
parts = self.content.split(b''.join((b'\r\n', boundary)))
self.parts = tuple(body_part(x) for x in parts if test_part(x))
@classmethod
def from_response(cls, response, encoding='utf-8'):
content = response.content
content_type = response.headers.get('content-type', None)
return cls(content, content_type, encoding)

View file

@ -0,0 +1,570 @@
# -*- coding: utf-8 -*-
"""
requests_toolbelt.multipart.encoder
===================================
This holds all of the implementation details of the MultipartEncoder
"""
import contextlib
import io
import os
from uuid import uuid4
from .._compat import fields
class MultipartEncoder(object):
"""
The ``MultipartEncoder`` oject is a generic interface to the engine that
will create a ``multipart/form-data`` body for you.
The basic usage is:
.. code-block:: python
import requests
from requests_toolbelt import MultipartEncoder
encoder = MultipartEncoder({'field': 'value',
'other_field', 'other_value'})
r = requests.post('https://httpbin.org/post', data=encoder,
headers={'Content-Type': encoder.content_type})
If you do not need to take advantage of streaming the post body, you can
also do:
.. code-block:: python
r = requests.post('https://httpbin.org/post',
data=encoder.to_string(),
headers={'Content-Type': encoder.content_type})
If you want the encoder to use a specific order, you can use an
OrderedDict or more simply, a list of tuples:
.. code-block:: python
encoder = MultipartEncoder([('field', 'value'),
('other_field', 'other_value')])
.. versionchanged:: 0.4.0
You can also provide tuples as part values as you would provide them to
requests' ``files`` parameter.
.. code-block:: python
encoder = MultipartEncoder({
'field': ('file_name', b'{"a": "b"}', 'application/json',
{'X-My-Header': 'my-value'})
])
.. warning::
This object will end up directly in :mod:`httplib`. Currently,
:mod:`httplib` has a hard-coded read size of **8192 bytes**. This
means that it will loop until the file has been read and your upload
could take a while. This is **not** a bug in requests. A feature is
being considered for this object to allow you, the user, to specify
what size should be returned on a read. If you have opinions on this,
please weigh in on `this issue`_.
.. _this issue:
https://github.com/sigmavirus24/requests-toolbelt/issues/75
"""
def __init__(self, fields, boundary=None, encoding='utf-8'):
#: Boundary value either passed in by the user or created
self.boundary_value = boundary or uuid4().hex
# Computed boundary
self.boundary = '--{0}'.format(self.boundary_value)
#: Encoding of the data being passed in
self.encoding = encoding
# Pre-encoded boundary
self._encoded_boundary = b''.join([
encode_with(self.boundary, self.encoding),
encode_with('\r\n', self.encoding)
])
#: Fields provided by the user
self.fields = fields
#: Whether or not the encoder is finished
self.finished = False
#: Pre-computed parts of the upload
self.parts = []
# Pre-computed parts iterator
self._iter_parts = iter([])
# The part we're currently working with
self._current_part = None
# Cached computation of the body's length
self._len = None
# Our buffer
self._buffer = CustomBytesIO(encoding=encoding)
# Pre-compute each part's headers
self._prepare_parts()
# Load boundary into buffer
self._write_boundary()
@property
def len(self):
"""Length of the multipart/form-data body.
requests will first attempt to get the length of the body by calling
``len(body)`` and then by checking for the ``len`` attribute.
On 32-bit systems, the ``__len__`` method cannot return anything
larger than an integer (in C) can hold. If the total size of the body
is even slightly larger than 4GB users will see an OverflowError. This
manifested itself in `bug #80`_.
As such, we now calculate the length lazily as a property.
.. _bug #80:
https://github.com/sigmavirus24/requests-toolbelt/issues/80
"""
# If _len isn't already calculated, calculate, return, and set it
return self._len or self._calculate_length()
def __repr__(self):
return '<MultipartEncoder: {0!r}>'.format(self.fields)
def _calculate_length(self):
"""
This uses the parts to calculate the length of the body.
This returns the calculated length so __len__ can be lazy.
"""
boundary_len = len(self.boundary) # Length of --{boundary}
# boundary length + header length + body length + len('\r\n') * 2
self._len = sum(
(boundary_len + total_len(p) + 4) for p in self.parts
) + boundary_len + 4
return self._len
def _calculate_load_amount(self, read_size):
"""This calculates how many bytes need to be added to the buffer.
When a consumer read's ``x`` from the buffer, there are two cases to
satisfy:
1. Enough data in the buffer to return the requested amount
2. Not enough data
This function uses the amount of unread bytes in the buffer and
determines how much the Encoder has to load before it can return the
requested amount of bytes.
:param int read_size: the number of bytes the consumer requests
:returns: int -- the number of bytes that must be loaded into the
buffer before the read can be satisfied. This will be strictly
non-negative
"""
amount = read_size - total_len(self._buffer)
return amount if amount > 0 else 0
def _load(self, amount):
"""Load ``amount`` number of bytes into the buffer."""
self._buffer.smart_truncate()
part = self._current_part or self._next_part()
while amount == -1 or amount > 0:
written = 0
if not part.bytes_left_to_write():
written += self._write(b'\r\n')
written += self._write_boundary()
part = self._next_part()
if not part:
written += self._write_closing_boundary()
self.finished = True
break
written += part.write_to(self._buffer, amount)
if amount != -1:
amount -= written
def _next_part(self):
try:
p = self._current_part = next(self._iter_parts)
except StopIteration:
p = None
return p
def _iter_fields(self):
_fields = self.fields
if hasattr(self.fields, 'items'):
_fields = list(self.fields.items())
for k, v in _fields:
file_name = None
file_type = None
file_headers = None
if isinstance(v, (list, tuple)):
if len(v) == 2:
file_name, file_pointer = v
elif len(v) == 3:
file_name, file_pointer, file_type = v
else:
file_name, file_pointer, file_type, file_headers = v
else:
file_pointer = v
field = fields.RequestField(name=k, data=file_pointer,
filename=file_name,
headers=file_headers)
field.make_multipart(content_type=file_type)
yield field
def _prepare_parts(self):
"""This uses the fields provided by the user and creates Part objects.
It populates the `parts` attribute and uses that to create a
generator for iteration.
"""
enc = self.encoding
self.parts = [Part.from_field(f, enc) for f in self._iter_fields()]
self._iter_parts = iter(self.parts)
def _write(self, bytes_to_write):
"""Write the bytes to the end of the buffer.
:param bytes bytes_to_write: byte-string (or bytearray) to append to
the buffer
:returns: int -- the number of bytes written
"""
return self._buffer.append(bytes_to_write)
def _write_boundary(self):
"""Write the boundary to the end of the buffer."""
return self._write(self._encoded_boundary)
def _write_closing_boundary(self):
"""Write the bytes necessary to finish a multipart/form-data body."""
with reset(self._buffer):
self._buffer.seek(-2, 2)
self._buffer.write(b'--\r\n')
return 2
def _write_headers(self, headers):
"""Write the current part's headers to the buffer."""
return self._write(encode_with(headers, self.encoding))
@property
def content_type(self):
return str(
'multipart/form-data; boundary={0}'.format(self.boundary_value)
)
def to_string(self):
"""Return the entirety of the data in the encoder.
.. note::
This simply reads all of the data it can. If you have started
streaming or reading data from the encoder, this method will only
return whatever data is left in the encoder.
.. note::
This method affects the internal state of the encoder. Calling
this method will exhaust the encoder.
:returns: the multipart message
:rtype: bytes
"""
return self.read()
def read(self, size=-1):
"""Read data from the streaming encoder.
:param int size: (optional), If provided, ``read`` will return exactly
that many bytes. If it is not provided, it will return the
remaining bytes.
:returns: bytes
"""
if self.finished:
return self._buffer.read(size)
bytes_to_load = size
if bytes_to_load != -1 and bytes_to_load is not None:
bytes_to_load = self._calculate_load_amount(int(size))
self._load(bytes_to_load)
return self._buffer.read(size)
def IDENTITY(monitor):
return monitor
class MultipartEncoderMonitor(object):
"""
An object used to monitor the progress of a :class:`MultipartEncoder`.
The :class:`MultipartEncoder` should only be responsible for preparing and
streaming the data. For anyone who wishes to monitor it, they shouldn't be
using that instance to manage that as well. Using this class, they can
monitor an encoder and register a callback. The callback receives the
instance of the monitor.
To use this monitor, you construct your :class:`MultipartEncoder` as you
normally would.
.. code-block:: python
from requests_toolbelt import (MultipartEncoder,
MultipartEncoderMonitor)
import requests
def callback(encoder, bytes_read):
# Do something with this information
pass
m = MultipartEncoder(fields={'field0': 'value0'})
monitor = MultipartEncoderMonitor(m, callback)
headers = {'Content-Type': montior.content_type}
r = requests.post('https://httpbin.org/post', data=monitor,
headers=headers)
Alternatively, if your use case is very simple, you can use the following
pattern.
.. code-block:: python
from requests_toolbelt import MultipartEncoderMonitor
import requests
def callback(encoder, bytes_read):
# Do something with this information
pass
monitor = MultipartEncoderMonitor.from_fields(
fields={'field0': 'value0'}, callback
)
headers = {'Content-Type': montior.content_type}
r = requests.post('https://httpbin.org/post', data=monitor,
headers=headers)
"""
def __init__(self, encoder, callback=None):
#: Instance of the :class:`MultipartEncoder` being monitored
self.encoder = encoder
#: Optionally function to call after a read
self.callback = callback or IDENTITY
#: Number of bytes already read from the :class:`MultipartEncoder`
#: instance
self.bytes_read = 0
#: Avoid the same problem in bug #80
self.len = self.encoder.len
@classmethod
def from_fields(cls, fields, boundary=None, encoding='utf-8',
callback=None):
encoder = MultipartEncoder(fields, boundary, encoding)
return cls(encoder, callback)
@property
def content_type(self):
return self.encoder.content_type
def to_string(self):
return self.read()
def read(self, size=-1):
string = self.encoder.read(size)
self.bytes_read += len(string)
self.callback(self)
return string
def encode_with(string, encoding):
"""Encoding ``string`` with ``encoding`` if necessary.
:param str string: If string is a bytes object, it will not encode it.
Otherwise, this function will encode it with the provided encoding.
:param str encoding: The encoding with which to encode string.
:returns: encoded bytes object
"""
if not (string is None or isinstance(string, bytes)):
return string.encode(encoding)
return string
def readable_data(data, encoding):
"""Coerce the data to an object with a ``read`` method."""
if hasattr(data, 'read'):
return data
return CustomBytesIO(data, encoding)
def total_len(o):
if hasattr(o, '__len__'):
return len(o)
if hasattr(o, 'len'):
return o.len
if hasattr(o, 'fileno'):
try:
fileno = o.fileno()
except io.UnsupportedOperation:
pass
else:
return os.fstat(fileno).st_size
if hasattr(o, 'getvalue'):
# e.g. BytesIO, cStringIO.StringIO
return len(o.getvalue())
@contextlib.contextmanager
def reset(buffer):
"""Keep track of the buffer's current position and write to the end.
This is a context manager meant to be used when adding data to the buffer.
It eliminates the need for every function to be concerned with the
position of the cursor in the buffer.
"""
original_position = buffer.tell()
buffer.seek(0, 2)
yield
buffer.seek(original_position, 0)
def coerce_data(data, encoding):
"""Ensure that every object's __len__ behaves uniformly."""
if not isinstance(data, CustomBytesIO):
if hasattr(data, 'getvalue'):
return CustomBytesIO(data.getvalue(), encoding)
if hasattr(data, 'fileno'):
return FileWrapper(data)
if not hasattr(data, 'read'):
return CustomBytesIO(data, encoding)
return data
def to_list(fields):
if hasattr(fields, 'items'):
return list(fields.items())
return list(fields)
class Part(object):
def __init__(self, headers, body):
self.headers = headers
self.body = body
self.headers_unread = True
self.len = len(self.headers) + total_len(self.body)
@classmethod
def from_field(cls, field, encoding):
"""Create a part from a Request Field generated by urllib3."""
headers = encode_with(field.render_headers(), encoding)
body = coerce_data(field.data, encoding)
return cls(headers, body)
def bytes_left_to_write(self):
"""Determine if there are bytes left to write.
:returns: bool -- ``True`` if there are bytes left to write, otherwise
``False``
"""
to_read = 0
if self.headers_unread:
to_read += len(self.headers)
return (to_read + total_len(self.body)) > 0
def write_to(self, buffer, size):
"""Write the requested amount of bytes to the buffer provided.
The number of bytes written may exceed size on the first read since we
load the headers ambitiously.
:param CustomBytesIO buffer: buffer we want to write bytes to
:param int size: number of bytes requested to be written to the buffer
:returns: int -- number of bytes actually written
"""
written = 0
if self.headers_unread:
written += buffer.append(self.headers)
self.headers_unread = False
while total_len(self.body) > 0 and (size == -1 or written < size):
amount_to_read = size
if size != -1:
amount_to_read = size - written
written += buffer.append(self.body.read(amount_to_read))
return written
class CustomBytesIO(io.BytesIO):
def __init__(self, buffer=None, encoding='utf-8'):
buffer = encode_with(buffer, encoding)
super(CustomBytesIO, self).__init__(buffer)
def _get_end(self):
current_pos = self.tell()
self.seek(0, 2)
length = self.tell()
self.seek(current_pos, 0)
return length
@property
def len(self):
length = self._get_end()
return length - self.tell()
def append(self, bytes):
with reset(self):
written = self.write(bytes)
return written
def smart_truncate(self):
to_be_read = total_len(self)
already_read = self._get_end() - to_be_read
if already_read >= to_be_read:
old_bytes = self.read()
self.seek(0, 0)
self.truncate()
self.write(old_bytes)
self.seek(0, 0) # We want to be at the beginning
class FileWrapper(object):
def __init__(self, file_object):
self.fd = file_object
@property
def len(self):
return total_len(self.fd) - self.fd.tell()
def read(self, length=-1):
return self.fd.read(length)

View file

@ -0,0 +1,70 @@
import requests
from ._compat import urljoin
class BaseUrlSession(requests.Session):
"""A Session with a URL that all requests will use as a base.
Let's start by looking at an example:
.. code-block:: python
>>> from requests_toolbelt import sessions
>>> s = sessions.BaseUrlSession(
... base_url='https://example.com/resource/')
>>> r = s.get('sub-resource/' params={'foo': 'bar'})
>>> print(r.request.url)
https://example.com/resource/sub-resource/?foo=bar
Our call to the ``get`` method will make a request to the URL passed in
when we created the Session and the partial resource name we provide.
We implement this by overriding the ``request`` method so most uses of a
Session are covered. (This, however, precludes the use of PreparedRequest
objects).
.. note::
The base URL that you provide and the path you provide are **very**
important.
Let's look at another *similar* example
.. code-block:: python
>>> from requests_toolbelt import sessions
>>> s = sessions.BaseUrlSession(
... base_url='https://example.com/resource/')
>>> r = s.get('/sub-resource/' params={'foo': 'bar'})
>>> print(r.request.url)
https://example.com/sub-resource/?foo=bar
The key difference here is that we called ``get`` with ``/sub-resource/``,
i.e., there was a leading ``/``. This changes how we create the URL
because we rely on :mod:`urllib.parse.urljoin`.
To override how we generate the URL, sub-class this method and override the
``create_url`` method.
Based on implementation from
https://github.com/kennethreitz/requests/issues/2554#issuecomment-109341010
"""
base_url = None
def __init__(self, base_url=None):
if base_url:
self.base_url = base_url
super(BaseUrlSession, self).__init__()
def request(self, method, url, *args, **kwargs):
"""Send the request after generating the complete URL."""
url = self.create_url(url)
return super(BaseUrlSession, self).request(
method, url, *args, **kwargs
)
def create_url(self, url):
"""Create the URL based off this partial path."""
return urljoin(self.base_url, url)

View file

@ -0,0 +1,116 @@
# -*- coding: utf-8 -*-
"""
requests_toolbelt.streaming_iterator
====================================
This holds the implementation details for the :class:`StreamingIterator`. It
is designed for the case where you, the user, know the size of the upload but
need to provide the data as an iterator. This class will allow you to specify
the size and stream the data without using a chunked transfer-encoding.
"""
from requests.utils import super_len
from .multipart.encoder import CustomBytesIO, encode_with
class StreamingIterator(object):
"""
This class provides a way of allowing iterators with a known size to be
streamed instead of chunked.
In requests, if you pass in an iterator it assumes you want to use
chunked transfer-encoding to upload the data, which not all servers
support well. Additionally, you may want to set the content-length
yourself to avoid this but that will not work. The only way to preempt
requests using a chunked transfer-encoding and forcing it to stream the
uploads is to mimic a very specific interace. Instead of having to know
these details you can instead just use this class. You simply provide the
size and iterator and pass the instance of StreamingIterator to requests
via the data parameter like so:
.. code-block:: python
from requests_toolbelt import StreamingIterator
import requests
# Let iterator be some generator that you already have and size be
# the size of the data produced by the iterator
r = requests.post(url, data=StreamingIterator(size, iterator))
You can also pass file-like objects to :py:class:`StreamingIterator` in
case requests can't determize the filesize itself. This is the case with
streaming file objects like ``stdin`` or any sockets. Wrapping e.g. files
that are on disk with ``StreamingIterator`` is unnecessary, because
requests can determine the filesize itself.
Naturally, you should also set the `Content-Type` of your upload
appropriately because the toolbelt will not attempt to guess that for you.
"""
def __init__(self, size, iterator, encoding='utf-8'):
#: The expected size of the upload
self.size = int(size)
if self.size < 0:
raise ValueError(
'The size of the upload must be a positive integer'
)
#: Attribute that requests will check to determine the length of the
#: body. See bug #80 for more details
self.len = self.size
#: Encoding the input data is using
self.encoding = encoding
#: The iterator used to generate the upload data
self.iterator = iterator
if hasattr(iterator, 'read'):
self._file = iterator
else:
self._file = _IteratorAsBinaryFile(iterator, encoding)
def read(self, size=-1):
return encode_with(self._file.read(size), self.encoding)
class _IteratorAsBinaryFile(object):
def __init__(self, iterator, encoding='utf-8'):
#: The iterator used to generate the upload data
self.iterator = iterator
#: Encoding the iterator is using
self.encoding = encoding
# The buffer we use to provide the correct number of bytes requested
# during a read
self._buffer = CustomBytesIO()
def _get_bytes(self):
try:
return encode_with(next(self.iterator), self.encoding)
except StopIteration:
return b''
def _load_bytes(self, size):
self._buffer.smart_truncate()
amount_to_load = size - super_len(self._buffer)
bytes_to_append = True
while amount_to_load > 0 and bytes_to_append:
bytes_to_append = self._get_bytes()
amount_to_load -= self._buffer.append(bytes_to_append)
def read(self, size=-1):
size = int(size)
if size == -1:
return b''.join(self.iterator)
self._load_bytes(size)
return self._buffer.read(size)

View file

@ -0,0 +1,97 @@
"""
This module provides the API for ``requests_toolbelt.threaded``.
The module provides a clean and simple API for making requests via a thread
pool. The thread pool will use sessions for increased performance.
A simple use-case is:
.. code-block:: python
from requests_toolbelt import threaded
urls_to_get = [{
'url': 'https://api.github.com/users/sigmavirus24',
'method': 'GET',
}, {
'url': 'https://api.github.com/repos/sigmavirus24/requests-toolbelt',
'method': 'GET',
}, {
'url': 'https://google.com',
'method': 'GET',
}]
responses, errors = threaded.map(urls_to_get)
By default, the threaded submodule will detect the number of CPUs your
computer has and use that if no other number of processes is selected. To
change this, always use the keyword argument ``num_processes``. Using the
above example, we would expand it like so:
.. code-block:: python
responses, errors = threaded.map(urls_to_get, num_processes=10)
You can also customize how a :class:`requests.Session` is initialized by
creating a callback function:
.. code-block:: python
from requests_toolbelt import user_agent
def initialize_session(session):
session.headers['User-Agent'] = user_agent('my-scraper', '0.1')
session.headers['Accept'] = 'application/json'
responses, errors = threaded.map(urls_to_get,
initializer=initialize_session)
.. autofunction:: requests_toolbelt.threaded.map
Inspiration is blatantly drawn from the standard library's multiprocessing
library. See the following references:
- multiprocessing's `pool source`_
- map and map_async `inspiration`_
.. _pool source:
https://hg.python.org/cpython/file/8ef4f75a8018/Lib/multiprocessing/pool.py
.. _inspiration:
https://hg.python.org/cpython/file/8ef4f75a8018/Lib/multiprocessing/pool.py#l340
"""
from . import pool
from .._compat import queue
def map(requests, **kwargs):
r"""Simple interface to the threaded Pool object.
This function takes a list of dictionaries representing requests to make
using Sessions in threads and returns a tuple where the first item is
a generator of successful responses and the second is a generator of
exceptions.
:param list requests:
Collection of dictionaries representing requests to make with the Pool
object.
:param \*\*kwargs:
Keyword arguments that are passed to the
:class:`~requests_toolbelt.threaded.pool.Pool` object.
:returns: Tuple of responses and exceptions from the pool
:rtype: (:class:`~requests_toolbelt.threaded.pool.ThreadResponse`,
:class:`~requests_toolbelt.threaded.pool.ThreadException`)
"""
if not (requests and all(isinstance(r, dict) for r in requests)):
raise ValueError('map expects a list of dictionaries.')
# Build our queue of requests
job_queue = queue.Queue()
for request in requests:
job_queue.put(request)
# Ensure the user doesn't try to pass their own job_queue
kwargs['job_queue'] = job_queue
threadpool = pool.Pool(**kwargs)
threadpool.join_all()
return threadpool.responses(), threadpool.exceptions()

View file

@ -0,0 +1,211 @@
"""Module implementing the Pool for :mod:``requests_toolbelt.threaded``."""
import multiprocessing
import requests
from . import thread
from .._compat import queue
class Pool(object):
"""Pool that manages the threads containing sessions.
:param queue:
The queue you're expected to use to which you should add items.
:type queue: queue.Queue
:param initializer:
Function used to initialize an instance of ``session``.
:type initializer: collections.Callable
:param auth_generator:
Function used to generate new auth credentials for the session.
:type auth_generator: collections.Callable
:param int num_threads:
Number of threads to create.
:param session:
:type session: requests.Session
"""
def __init__(self, job_queue, initializer=None, auth_generator=None,
num_processes=None, session=requests.Session):
if num_processes is None:
num_processes = multiprocessing.cpu_count() or 1
if num_processes < 1:
raise ValueError("Number of processes should at least be 1.")
self._job_queue = job_queue
self._response_queue = queue.Queue()
self._exc_queue = queue.Queue()
self._processes = num_processes
self._initializer = initializer or _identity
self._auth = auth_generator or _identity
self._session = session
self._pool = [
thread.SessionThread(self._new_session(), self._job_queue,
self._response_queue, self._exc_queue)
for _ in range(self._processes)
]
def _new_session(self):
return self._auth(self._initializer(self._session()))
@classmethod
def from_exceptions(cls, exceptions, **kwargs):
r"""Create a :class:`~Pool` from an :class:`~ThreadException`\ s.
Provided an iterable that provides :class:`~ThreadException` objects,
this classmethod will generate a new pool to retry the requests that
caused the exceptions.
:param exceptions:
Iterable that returns :class:`~ThreadException`
:type exceptions: iterable
:param kwargs:
Keyword arguments passed to the :class:`~Pool` initializer.
:returns: An initialized :class:`~Pool` object.
:rtype: :class:`~Pool`
"""
job_queue = queue.Queue()
for exc in exceptions:
job_queue.put(exc.request_kwargs)
return cls(job_queue=job_queue, **kwargs)
@classmethod
def from_urls(cls, urls, request_kwargs=None, **kwargs):
"""Create a :class:`~Pool` from an iterable of URLs.
:param urls:
Iterable that returns URLs with which we create a pool.
:type urls: iterable
:param dict request_kwargs:
Dictionary of other keyword arguments to provide to the request
method.
:param kwargs:
Keyword arguments passed to the :class:`~Pool` initializer.
:returns: An initialized :class:`~Pool` object.
:rtype: :class:`~Pool`
"""
request_dict = {'method': 'GET'}
request_dict.update(request_kwargs or {})
job_queue = queue.Queue()
for url in urls:
job = request_dict.copy()
job.update({'url': url})
job_queue.put(job)
return cls(job_queue=job_queue, **kwargs)
def exceptions(self):
"""Iterate over all the exceptions in the pool.
:returns: Generator of :class:`~ThreadException`
"""
while True:
exc = self.get_exception()
if exc is None:
break
yield exc
def get_exception(self):
"""Get an exception from the pool.
:rtype: :class:`~ThreadException`
"""
try:
(request, exc) = self._exc_queue.get_nowait()
except queue.Empty:
return None
else:
return ThreadException(request, exc)
def get_response(self):
"""Get a response from the pool.
:rtype: :class:`~ThreadResponse`
"""
try:
(request, response) = self._response_queue.get_nowait()
except queue.Empty:
return None
else:
return ThreadResponse(request, response)
def responses(self):
"""Iterate over all the responses in the pool.
:returns: Generator of :class:`~ThreadResponse`
"""
while True:
resp = self.get_response()
if resp is None:
break
yield resp
def join_all(self):
"""Join all the threads to the master thread."""
for session_thread in self._pool:
session_thread.join()
class ThreadProxy(object):
proxied_attr = None
def __getattr__(self, attr):
"""Proxy attribute accesses to the proxied object."""
get = object.__getattribute__
if attr not in self.attrs:
response = get(self, self.proxied_attr)
return getattr(response, attr)
else:
return get(self, attr)
class ThreadResponse(ThreadProxy):
"""A wrapper around a requests Response object.
This will proxy most attribute access actions to the Response object. For
example, if you wanted the parsed JSON from the response, you might do:
.. code-block:: python
thread_response = pool.get_response()
json = thread_response.json()
"""
proxied_attr = 'response'
attrs = frozenset(['request_kwargs', 'response'])
def __init__(self, request_kwargs, response):
#: The original keyword arguments provided to the queue
self.request_kwargs = request_kwargs
#: The wrapped response
self.response = response
class ThreadException(ThreadProxy):
"""A wrapper around an exception raised during a request.
This will proxy most attribute access actions to the exception object. For
example, if you wanted the message from the exception, you might do:
.. code-block:: python
thread_exc = pool.get_exception()
msg = thread_exc.message
"""
proxied_attr = 'exception'
attrs = frozenset(['request_kwargs', 'exception'])
def __init__(self, request_kwargs, exception):
#: The original keyword arguments provided to the queue
self.request_kwargs = request_kwargs
#: The captured and wrapped exception
self.exception = exception
def _identity(session_obj):
return session_obj
__all__ = ['ThreadException', 'ThreadResponse', 'Pool']

View file

@ -0,0 +1,53 @@
"""Module containing the SessionThread class."""
import threading
import uuid
import requests.exceptions as exc
from .._compat import queue
class SessionThread(object):
def __init__(self, initialized_session, job_queue, response_queue,
exception_queue):
self._session = initialized_session
self._jobs = job_queue
self._create_worker()
self._responses = response_queue
self._exceptions = exception_queue
def _create_worker(self):
self._worker = threading.Thread(
target=self._make_request,
name=uuid.uuid4(),
)
self._worker.daemon = True
self._worker._state = 0
self._worker.start()
def _handle_request(self, kwargs):
try:
response = self._session.request(**kwargs)
except exc.RequestException as e:
self._exceptions.put((kwargs, e))
else:
self._responses.put((kwargs, response))
finally:
self._jobs.task_done()
def _make_request(self):
while True:
try:
kwargs = self._jobs.get_nowait()
except queue.Empty:
break
self._handle_request(kwargs)
def is_alive(self):
"""Proxy to the thread's ``is_alive`` method."""
return self._worker.is_alive()
def join(self):
"""Join this thread to the master thread."""
self._worker.join()

View file

@ -0,0 +1,84 @@
# -*- coding: utf-8 -*-
"""A collection of functions deprecated in requests.utils."""
import re
from requests import utils
def get_encodings_from_content(content):
"""Return encodings from given content string.
.. code-block:: python
import requests
from requests_toolbelt.utils import deprecated
r = requests.get(url)
encodings = deprecated.get_encodings_from_content(r)
:param content: bytestring to extract encodings from.
:type content: bytes
"""
find_charset = re.compile(
r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I
).findall
find_pragma = re.compile(
r'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I
).findall
find_xml = re.compile(
r'^<\?xml.*?encoding=["\']*(.+?)["\'>]'
).findall
return find_charset(content) + find_pragma(content) + find_xml(content)
def get_unicode_from_response(response):
"""Return the requested content back in unicode.
This will first attempt to retrieve the encoding from the response
headers. If that fails, it will use
:func:`requests_toolbelt.utils.deprecated.get_encodings_from_content`
to determine encodings from HTML elements.
.. code-block:: python
import requests
from requests_toolbelt.utils import deprecated
r = requests.get(url)
text = deprecated.get_unicode_from_response(r)
:param response: Response object to get unicode content from.
:type response: requests.models.Response
"""
tried_encodings = set()
# Try charset from content-type
encoding = utils.get_encoding_from_headers(response.headers)
if encoding:
try:
return str(response.content, encoding)
except UnicodeError:
tried_encodings.add(encoding.lower())
encodings = get_encodings_from_content(response.content)
for _encoding in encodings:
_encoding = _encoding.lower()
if _encoding in tried_encodings:
continue
try:
return str(response.content, _encoding)
except UnicodeError:
tried_encodings.add(_encoding)
# Fall back:
if encoding:
try:
return str(response.content, encoding, errors='replace')
except TypeError:
pass
return response.text

View file

@ -0,0 +1,195 @@
"""This module provides functions for dumping information about responses."""
import collections
from requests import compat
__all__ = ('dump_response', 'dump_all')
HTTP_VERSIONS = {
9: b'0.9',
10: b'1.0',
11: b'1.1',
}
_PrefixSettings = collections.namedtuple('PrefixSettings',
['request', 'response'])
class PrefixSettings(_PrefixSettings):
def __new__(cls, request, response):
request = _coerce_to_bytes(request)
response = _coerce_to_bytes(response)
return super(PrefixSettings, cls).__new__(cls, request, response)
def _get_proxy_information(response):
if getattr(response.connection, 'proxy_manager', False):
proxy_info = {}
request_url = response.request.url
if request_url.startswith('https://'):
proxy_info['method'] = 'CONNECT'
proxy_info['request_path'] = request_url
return proxy_info
return None
def _format_header(name, value):
return (_coerce_to_bytes(name) + b': ' + _coerce_to_bytes(value) +
b'\r\n')
def _build_request_path(url, proxy_info):
uri = compat.urlparse(url)
proxy_url = proxy_info.get('request_path')
if proxy_url is not None:
return proxy_url, uri
request_path = _coerce_to_bytes(uri.path)
if uri.query:
request_path += b'?' + _coerce_to_bytes(uri.query)
return request_path, uri
def _dump_request_data(request, prefixes, bytearr, proxy_info=None):
if proxy_info is None:
proxy_info = {}
prefix = prefixes.request
method = _coerce_to_bytes(proxy_info.pop('method', request.method))
request_path, uri = _build_request_path(request.url, proxy_info)
# <prefix><METHOD> <request-path> HTTP/1.1
bytearr.extend(prefix + method + b' ' + request_path + b' HTTP/1.1\r\n')
# <prefix>Host: <request-host> OR host header specified by user
headers = request.headers.copy()
host_header = _coerce_to_bytes(headers.pop('Host', uri.netloc))
bytearr.extend(prefix + b'Host: ' + host_header + b'\r\n')
for name, value in headers.items():
bytearr.extend(prefix + _format_header(name, value))
bytearr.extend(prefix + b'\r\n')
if request.body:
if isinstance(request.body, compat.basestring):
bytearr.extend(prefix + _coerce_to_bytes(request.body))
else:
# In the event that the body is a file-like object, let's not try
# to read everything into memory.
bytearr.extend('<< Request body is not a string-like type >>')
bytearr.extend(b'\r\n')
def _dump_response_data(response, prefixes, bytearr):
prefix = prefixes.response
# Let's interact almost entirely with urllib3's response
raw = response.raw
# Let's convert the version int from httplib to bytes
version_str = HTTP_VERSIONS.get(raw.version, b'?')
# <prefix>HTTP/<version_str> <status_code> <reason>
bytearr.extend(prefix + b'HTTP/' + version_str + b' ' +
str(raw.status).encode('ascii') + b' ' +
_coerce_to_bytes(response.reason) + b'\r\n')
headers = raw.headers
for name in headers.keys():
for value in headers.getlist(name):
bytearr.extend(prefix + _format_header(name, value))
bytearr.extend(prefix + b'\r\n')
bytearr.extend(response.content)
def _coerce_to_bytes(data):
if not isinstance(data, bytes) and hasattr(data, 'encode'):
data = data.encode('utf-8')
return data
def dump_response(response, request_prefix=b'< ', response_prefix=b'> ',
data_array=None):
"""Dump a single request-response cycle's information.
This will take a response object and dump only the data that requests can
see for that single request-response cycle.
Example::
import requests
from requests_toolbelt.utils import dump
resp = requests.get('https://api.github.com/users/sigmavirus24')
data = dump.dump_response(resp)
print(data.decode('utf-8'))
:param response:
The response to format
:type response: :class:`requests.Response`
:param request_prefix: (*optional*)
Bytes to prefix each line of the request data
:type request_prefix: :class:`bytes`
:param response_prefix: (*optional*)
Bytes to prefix each line of the response data
:type response_prefix: :class:`bytes`
:param data_array: (*optional*)
Bytearray to which we append the request-response cycle data
:type data_array: :class:`bytearray`
:returns: Formatted bytes of request and response information.
:rtype: :class:`bytearray`
"""
data = data_array if data_array is not None else bytearray()
prefixes = PrefixSettings(request_prefix, response_prefix)
if not hasattr(response, 'request'):
raise ValueError('Response has no associated request')
proxy_info = _get_proxy_information(response)
_dump_request_data(response.request, prefixes, data,
proxy_info=proxy_info)
_dump_response_data(response, prefixes, data)
return data
def dump_all(response, request_prefix=b'< ', response_prefix=b'> '):
"""Dump all requests and responses including redirects.
This takes the response returned by requests and will dump all
request-response pairs in the redirect history in order followed by the
final request-response.
Example::
import requests
from requests_toolbelt.utils import dump
resp = requests.get('https://httpbin.org/redirect/5')
data = dump.dump_all(resp)
print(data.decode('utf-8'))
:param response:
The response to format
:type response: :class:`requests.Response`
:param request_prefix: (*optional*)
Bytes to prefix each line of the request data
:type request_prefix: :class:`bytes`
:param response_prefix: (*optional*)
Bytes to prefix each line of the response data
:type response_prefix: :class:`bytes`
:returns: Formatted bytes of request and response information.
:rtype: :class:`bytearray`
"""
data = bytearray()
history = list(response.history[:])
history.append(response)
for response in history:
dump_response(response, request_prefix, response_prefix, data)
return data

View file

@ -0,0 +1,108 @@
# -*- coding: utf-8 -*-
"""Implementation of nested form-data encoding function(s)."""
from .._compat import basestring
from .._compat import urlencode as _urlencode
__all__ = ('urlencode',)
def urlencode(query, *args, **kwargs):
"""Handle nested form-data queries and serialize them appropriately.
There are times when a website expects a nested form data query to be sent
but, the standard library's urlencode function does not appropriately
handle the nested structures. In that case, you need this function which
will flatten the structure first and then properly encode it for you.
When using this to send data in the body of a request, make sure you
specify the appropriate Content-Type header for the request.
.. code-block:: python
import requests
from requests_toolbelt.utils import formdata
query = {
'my_dict': {
'foo': 'bar',
'biz': 'baz",
},
'a': 'b',
}
resp = requests.get(url, params=formdata.urlencode(query))
# or
resp = requests.post(
url,
data=formdata.urlencode(query),
headers={
'Content-Type': 'application/x-www-form-urlencoded'
},
)
Similarly, you can specify a list of nested tuples, e.g.,
.. code-block:: python
import requests
from requests_toolbelt.utils import formdata
query = [
('my_list', [
('foo', 'bar'),
('biz', 'baz'),
]),
('a', 'b'),
]
resp = requests.get(url, params=formdata.urlencode(query))
# or
resp = requests.post(
url,
data=formdata.urlencode(query),
headers={
'Content-Type': 'application/x-www-form-urlencoded'
},
)
For additional parameter and return information, see the official
`urlencode`_ documentation.
.. _urlencode:
https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlencode
"""
expand_classes = (dict, list, tuple)
original_query_list = _to_kv_list(query)
if not all(_is_two_tuple(i) for i in original_query_list):
raise ValueError("Expected query to be able to be converted to a "
"list comprised of length 2 tuples.")
query_list = original_query_list
while any(isinstance(v, expand_classes) for _, v in query_list):
query_list = _expand_query_values(query_list)
return _urlencode(query_list, *args, **kwargs)
def _to_kv_list(dict_or_list):
if hasattr(dict_or_list, 'items'):
return list(dict_or_list.items())
return dict_or_list
def _is_two_tuple(item):
return isinstance(item, (list, tuple)) and len(item) == 2
def _expand_query_values(original_query_list):
query_list = []
for key, value in original_query_list:
if isinstance(value, basestring):
query_list.append((key, value))
else:
key_fmt = key + '[%s]'
value_list = _to_kv_list(value)
query_list.extend((key_fmt % k, v) for k, v in value_list)
return query_list

View file

@ -0,0 +1,76 @@
# -*- coding: utf-8 -*-
import platform
import sys
def user_agent(name, version, extras=None):
"""
Returns an internet-friendly user_agent string.
The majority of this code has been wilfully stolen from the equivalent
function in Requests.
:param name: The intended name of the user-agent, e.g. "python-requests".
:param version: The version of the user-agent, e.g. "0.0.1".
:param extras: List of two-item tuples that are added to the user-agent
string.
:returns: Formatted user-agent string
:rtype: str
"""
try:
p_system = platform.system()
p_release = platform.release()
except IOError:
p_system = 'Unknown'
p_release = 'Unknown'
if extras is None:
extras = []
if any(len(extra) != 2 for extra in extras):
raise ValueError('Extras should be a sequence of two item tuples.')
format_string = '%s/%s'
extra_pieces = [
format_string % (extra_name, extra_version)
for extra_name, extra_version in extras
]
user_agent_pieces = ([format_string % (name, version)] + extra_pieces +
[_implementation_string(),
'%s/%s' % (p_system, p_release)])
return " ".join(user_agent_pieces)
def _implementation_string():
"""
Returns a string that provides both the name and the version of the Python
implementation currently running. For example, on CPython 2.7.5 it will
return "CPython/2.7.5".
This function works best on CPython and PyPy: in particular, it probably
doesn't work for Jython or IronPython. Future investigation should be done
to work out the correct shape of the code for those platforms.
"""
implementation = platform.python_implementation()
if implementation == 'CPython':
implementation_version = platform.python_version()
elif implementation == 'PyPy':
implementation_version = '%s.%s.%s' % (sys.pypy_version_info.major,
sys.pypy_version_info.minor,
sys.pypy_version_info.micro)
if sys.pypy_version_info.releaselevel != 'final':
implementation_version = ''.join([
implementation_version, sys.pypy_version_info.releaselevel
])
elif implementation == 'Jython':
implementation_version = platform.python_version() # Complete Guess
elif implementation == 'IronPython':
implementation_version = platform.python_version() # Complete Guess
else:
implementation_version = 'Unknown'
return "%s/%s" % (implementation, implementation_version)