348 lines
12 KiB
Python
348 lines
12 KiB
Python
# Copyright 2014 Google LLC
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""Batch updates / deletes of storage buckets / blobs.
|
|
|
|
See https://cloud.google.com/storage/docs/json_api/v1/how-tos/batch
|
|
"""
|
|
from email.encoders import encode_noop
|
|
from email.generator import Generator
|
|
from email.mime.application import MIMEApplication
|
|
from email.mime.multipart import MIMEMultipart
|
|
from email.parser import Parser
|
|
import io
|
|
import json
|
|
|
|
import requests
|
|
import six
|
|
|
|
from google.cloud import _helpers
|
|
from google.cloud import exceptions
|
|
from google.cloud.storage._http import Connection
|
|
from google.cloud.storage.constants import _DEFAULT_TIMEOUT
|
|
|
|
|
|
class MIMEApplicationHTTP(MIMEApplication):
|
|
"""MIME type for ``application/http``.
|
|
|
|
Constructs payload from headers and body
|
|
|
|
:type method: str
|
|
:param method: HTTP method
|
|
|
|
:type uri: str
|
|
:param uri: URI for HTTP request
|
|
|
|
:type headers: dict
|
|
:param headers: HTTP headers
|
|
|
|
:type body: str
|
|
:param body: (Optional) HTTP payload
|
|
|
|
"""
|
|
|
|
def __init__(self, method, uri, headers, body):
|
|
if isinstance(body, dict):
|
|
body = json.dumps(body)
|
|
headers["Content-Type"] = "application/json"
|
|
headers["Content-Length"] = len(body)
|
|
if body is None:
|
|
body = ""
|
|
lines = ["%s %s HTTP/1.1" % (method, uri)]
|
|
lines.extend(
|
|
["%s: %s" % (key, value) for key, value in sorted(headers.items())]
|
|
)
|
|
lines.append("")
|
|
lines.append(body)
|
|
payload = "\r\n".join(lines)
|
|
if six.PY2:
|
|
# email.message.Message is an old-style class, so we
|
|
# cannot use 'super()'.
|
|
MIMEApplication.__init__(self, payload, "http", encode_noop)
|
|
else: # pragma: NO COVER Python3
|
|
super_init = super(MIMEApplicationHTTP, self).__init__
|
|
super_init(payload, "http", encode_noop)
|
|
|
|
|
|
class _FutureDict(object):
|
|
"""Class to hold a future value for a deferred request.
|
|
|
|
Used by for requests that get sent in a :class:`Batch`.
|
|
"""
|
|
|
|
@staticmethod
|
|
def get(key, default=None):
|
|
"""Stand-in for dict.get.
|
|
|
|
:type key: object
|
|
:param key: Hashable dictionary key.
|
|
|
|
:type default: object
|
|
:param default: Fallback value to dict.get.
|
|
|
|
:raises: :class:`KeyError` always since the future is intended to fail
|
|
as a dictionary.
|
|
"""
|
|
raise KeyError("Cannot get(%r, default=%r) on a future" % (key, default))
|
|
|
|
def __getitem__(self, key):
|
|
"""Stand-in for dict[key].
|
|
|
|
:type key: object
|
|
:param key: Hashable dictionary key.
|
|
|
|
:raises: :class:`KeyError` always since the future is intended to fail
|
|
as a dictionary.
|
|
"""
|
|
raise KeyError("Cannot get item %r from a future" % (key,))
|
|
|
|
def __setitem__(self, key, value):
|
|
"""Stand-in for dict[key] = value.
|
|
|
|
:type key: object
|
|
:param key: Hashable dictionary key.
|
|
|
|
:type value: object
|
|
:param value: Dictionary value.
|
|
|
|
:raises: :class:`KeyError` always since the future is intended to fail
|
|
as a dictionary.
|
|
"""
|
|
raise KeyError("Cannot set %r -> %r on a future" % (key, value))
|
|
|
|
|
|
class _FutureResponse(requests.Response):
|
|
"""Reponse that returns a placeholder dictionary for a batched requests."""
|
|
|
|
def __init__(self, future_dict):
|
|
super(_FutureResponse, self).__init__()
|
|
self._future_dict = future_dict
|
|
self.status_code = 204
|
|
|
|
def json(self):
|
|
return self._future_dict
|
|
|
|
@property
|
|
def content(self):
|
|
return self._future_dict
|
|
|
|
|
|
class Batch(Connection):
|
|
"""Proxy an underlying connection, batching up change operations.
|
|
|
|
:type client: :class:`google.cloud.storage.client.Client`
|
|
:param client: The client to use for making connections.
|
|
"""
|
|
|
|
_MAX_BATCH_SIZE = 1000
|
|
|
|
def __init__(self, client):
|
|
super(Batch, self).__init__(client)
|
|
self._requests = []
|
|
self._target_objects = []
|
|
|
|
def _do_request(
|
|
self, method, url, headers, data, target_object, timeout=_DEFAULT_TIMEOUT
|
|
):
|
|
"""Override Connection: defer actual HTTP request.
|
|
|
|
Only allow up to ``_MAX_BATCH_SIZE`` requests to be deferred.
|
|
|
|
:type method: str
|
|
:param method: The HTTP method to use in the request.
|
|
|
|
:type url: str
|
|
:param url: The URL to send the request to.
|
|
|
|
:type headers: dict
|
|
:param headers: A dictionary of HTTP headers to send with the request.
|
|
|
|
:type data: str
|
|
:param data: The data to send as the body of the request.
|
|
|
|
:type target_object: object
|
|
:param target_object:
|
|
(Optional) This allows us to enable custom behavior in our batch
|
|
connection. Here we defer an HTTP request and complete
|
|
initialization of the object at a later time.
|
|
|
|
:type timeout: float or tuple
|
|
:param timeout: (Optional) The amount of time, in seconds, to wait
|
|
for the server response.
|
|
|
|
Can also be passed as a tuple (connect_timeout, read_timeout).
|
|
See :meth:`requests.Session.request` documentation for details.
|
|
|
|
:rtype: tuple of ``response`` (a dictionary of sorts)
|
|
and ``content`` (a string).
|
|
:returns: The HTTP response object and the content of the response.
|
|
"""
|
|
if len(self._requests) >= self._MAX_BATCH_SIZE:
|
|
raise ValueError(
|
|
"Too many deferred requests (max %d)" % self._MAX_BATCH_SIZE
|
|
)
|
|
self._requests.append((method, url, headers, data, timeout))
|
|
result = _FutureDict()
|
|
self._target_objects.append(target_object)
|
|
if target_object is not None:
|
|
target_object._properties = result
|
|
return _FutureResponse(result)
|
|
|
|
def _prepare_batch_request(self):
|
|
"""Prepares headers and body for a batch request.
|
|
|
|
:rtype: tuple (dict, str)
|
|
:returns: The pair of headers and body of the batch request to be sent.
|
|
:raises: :class:`ValueError` if no requests have been deferred.
|
|
"""
|
|
if len(self._requests) == 0:
|
|
raise ValueError("No deferred requests")
|
|
|
|
multi = MIMEMultipart()
|
|
|
|
# Use timeout of last request, default to _DEFAULT_TIMEOUT
|
|
timeout = _DEFAULT_TIMEOUT
|
|
for method, uri, headers, body, _timeout in self._requests:
|
|
subrequest = MIMEApplicationHTTP(method, uri, headers, body)
|
|
multi.attach(subrequest)
|
|
timeout = _timeout
|
|
|
|
# The `email` package expects to deal with "native" strings
|
|
if six.PY2: # pragma: NO COVER Python3
|
|
buf = io.BytesIO()
|
|
else:
|
|
buf = io.StringIO()
|
|
generator = Generator(buf, False, 0)
|
|
generator.flatten(multi)
|
|
payload = buf.getvalue()
|
|
|
|
# Strip off redundant header text
|
|
_, body = payload.split("\n\n", 1)
|
|
return dict(multi._headers), body, timeout
|
|
|
|
def _finish_futures(self, responses):
|
|
"""Apply all the batch responses to the futures created.
|
|
|
|
:type responses: list of (headers, payload) tuples.
|
|
:param responses: List of headers and payloads from each response in
|
|
the batch.
|
|
|
|
:raises: :class:`ValueError` if no requests have been deferred.
|
|
"""
|
|
# If a bad status occurs, we track it, but don't raise an exception
|
|
# until all futures have been populated.
|
|
exception_args = None
|
|
|
|
if len(self._target_objects) != len(responses): # pragma: NO COVER
|
|
raise ValueError("Expected a response for every request.")
|
|
|
|
for target_object, subresponse in zip(self._target_objects, responses):
|
|
if not 200 <= subresponse.status_code < 300:
|
|
exception_args = exception_args or subresponse
|
|
elif target_object is not None:
|
|
try:
|
|
target_object._properties = subresponse.json()
|
|
except ValueError:
|
|
target_object._properties = subresponse.content
|
|
|
|
if exception_args is not None:
|
|
raise exceptions.from_http_response(exception_args)
|
|
|
|
def finish(self):
|
|
"""Submit a single `multipart/mixed` request with deferred requests.
|
|
|
|
:rtype: list of tuples
|
|
:returns: one ``(headers, payload)`` tuple per deferred request.
|
|
"""
|
|
headers, body, timeout = self._prepare_batch_request()
|
|
|
|
url = "%s/batch/storage/v1" % self.API_BASE_URL
|
|
|
|
# Use the private ``_base_connection`` rather than the property
|
|
# ``_connection``, since the property may be this
|
|
# current batch.
|
|
response = self._client._base_connection._make_request(
|
|
"POST", url, data=body, headers=headers, timeout=timeout
|
|
)
|
|
responses = list(_unpack_batch_response(response))
|
|
self._finish_futures(responses)
|
|
return responses
|
|
|
|
def current(self):
|
|
"""Return the topmost batch, or None."""
|
|
return self._client.current_batch
|
|
|
|
def __enter__(self):
|
|
self._client._push_batch(self)
|
|
return self
|
|
|
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
try:
|
|
if exc_type is None:
|
|
self.finish()
|
|
finally:
|
|
self._client._pop_batch()
|
|
|
|
|
|
def _generate_faux_mime_message(parser, response):
|
|
"""Convert response, content -> (multipart) email.message.
|
|
|
|
Helper for _unpack_batch_response.
|
|
"""
|
|
# We coerce to bytes to get consistent concat across
|
|
# Py2 and Py3. Percent formatting is insufficient since
|
|
# it includes the b in Py3.
|
|
content_type = _helpers._to_bytes(response.headers.get("content-type", ""))
|
|
|
|
faux_message = b"".join(
|
|
[b"Content-Type: ", content_type, b"\nMIME-Version: 1.0\n\n", response.content]
|
|
)
|
|
|
|
if six.PY2:
|
|
return parser.parsestr(faux_message)
|
|
else: # pragma: NO COVER Python3
|
|
return parser.parsestr(faux_message.decode("utf-8"))
|
|
|
|
|
|
def _unpack_batch_response(response):
|
|
"""Convert requests.Response -> [(headers, payload)].
|
|
|
|
Creates a generator of tuples of emulating the responses to
|
|
:meth:`requests.Session.request`.
|
|
|
|
:type response: :class:`requests.Response`
|
|
:param response: HTTP response / headers from a request.
|
|
"""
|
|
parser = Parser()
|
|
message = _generate_faux_mime_message(parser, response)
|
|
|
|
if not isinstance(message._payload, list): # pragma: NO COVER
|
|
raise ValueError("Bad response: not multi-part")
|
|
|
|
for subrequest in message._payload:
|
|
status_line, rest = subrequest._payload.split("\n", 1)
|
|
_, status, _ = status_line.split(" ", 2)
|
|
sub_message = parser.parsestr(rest)
|
|
payload = sub_message._payload
|
|
msg_headers = dict(sub_message._headers)
|
|
content_id = msg_headers.get("Content-ID")
|
|
|
|
subresponse = requests.Response()
|
|
subresponse.request = requests.Request(
|
|
method="BATCH", url="contentid://{}".format(content_id)
|
|
).prepare()
|
|
subresponse.status_code = int(status)
|
|
subresponse.headers.update(msg_headers)
|
|
subresponse._content = payload.encode("utf-8")
|
|
|
|
yield subresponse
|