Added delete option to database storage.
This commit is contained in:
parent
308604a33c
commit
963b5bc68b
1868 changed files with 192402 additions and 13278 deletions
|
@ -0,0 +1,61 @@
|
|||
# Copyright 2017 Google Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Utilities for Google Media Downloads and Resumable Uploads.
|
||||
|
||||
This package has some general purposes modules, e.g.
|
||||
:mod:`~google.resumable_media.common`, but the majority of the
|
||||
public interface will be contained in subpackages.
|
||||
|
||||
===========
|
||||
Subpackages
|
||||
===========
|
||||
|
||||
Each subpackage is tailored to a specific transport library:
|
||||
|
||||
* the :mod:`~google.resumable_media.requests` subpackage uses the ``requests``
|
||||
transport library.
|
||||
|
||||
.. _requests: http://docs.python-requests.org/
|
||||
|
||||
==========
|
||||
Installing
|
||||
==========
|
||||
|
||||
To install with `pip`_:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ pip install --upgrade google-resumable-media
|
||||
|
||||
.. _pip: https://pip.pypa.io/
|
||||
"""
|
||||
|
||||
|
||||
from google.resumable_media.common import DataCorruption
|
||||
from google.resumable_media.common import InvalidResponse
|
||||
from google.resumable_media.common import PERMANENT_REDIRECT
|
||||
from google.resumable_media.common import RetryStrategy
|
||||
from google.resumable_media.common import TOO_MANY_REQUESTS
|
||||
from google.resumable_media.common import UPLOAD_CHUNK_SIZE
|
||||
|
||||
|
||||
__all__ = [
|
||||
u"DataCorruption",
|
||||
u"InvalidResponse",
|
||||
u"PERMANENT_REDIRECT",
|
||||
u"RetryStrategy",
|
||||
u"TOO_MANY_REQUESTS",
|
||||
u"UPLOAD_CHUNK_SIZE",
|
||||
]
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,553 @@
|
|||
# Copyright 2017 Google Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Virtual bases classes for downloading media from Google APIs."""
|
||||
|
||||
|
||||
import re
|
||||
|
||||
from six.moves import http_client
|
||||
|
||||
|
||||
from google._async_resumable_media import _helpers
|
||||
from google.resumable_media import common
|
||||
|
||||
|
||||
_CONTENT_RANGE_RE = re.compile(
|
||||
r"bytes (?P<start_byte>\d+)-(?P<end_byte>\d+)/(?P<total_bytes>\d+)",
|
||||
flags=re.IGNORECASE,
|
||||
)
|
||||
_ACCEPTABLE_STATUS_CODES = (http_client.OK, http_client.PARTIAL_CONTENT)
|
||||
_GET = u"GET"
|
||||
_ZERO_CONTENT_RANGE_HEADER = u"bytes */0"
|
||||
|
||||
|
||||
class DownloadBase(object):
|
||||
"""Base class for download helpers.
|
||||
|
||||
Defines core shared behavior across different download types.
|
||||
|
||||
Args:
|
||||
media_url (str): The URL containing the media to be downloaded.
|
||||
stream (IO[bytes]): A write-able stream (i.e. file-like object) that
|
||||
the downloaded resource can be written to.
|
||||
start (int): The first byte in a range to be downloaded.
|
||||
end (int): The last byte in a range to be downloaded.
|
||||
headers (Optional[Mapping[str, str]]): Extra headers that should
|
||||
be sent with the request, e.g. headers for encrypted data.
|
||||
|
||||
Attributes:
|
||||
media_url (str): The URL containing the media to be downloaded.
|
||||
start (Optional[int]): The first byte in a range to be downloaded.
|
||||
end (Optional[int]): The last byte in a range to be downloaded.
|
||||
"""
|
||||
|
||||
def __init__(self, media_url, stream=None, start=None, end=None, headers=None):
|
||||
self.media_url = media_url
|
||||
self._stream = stream
|
||||
self.start = start
|
||||
self.end = end
|
||||
if headers is None:
|
||||
headers = {}
|
||||
self._headers = headers
|
||||
self._finished = False
|
||||
self._retry_strategy = common.RetryStrategy()
|
||||
|
||||
@property
|
||||
def finished(self):
|
||||
"""bool: Flag indicating if the download has completed."""
|
||||
return self._finished
|
||||
|
||||
@staticmethod
|
||||
def _get_status_code(response):
|
||||
"""Access the status code from an HTTP response.
|
||||
|
||||
Args:
|
||||
response (object): The HTTP response object.
|
||||
|
||||
Raises:
|
||||
NotImplementedError: Always, since virtual.
|
||||
"""
|
||||
raise NotImplementedError(u"This implementation is virtual.")
|
||||
|
||||
@staticmethod
|
||||
def _get_headers(response):
|
||||
"""Access the headers from an HTTP response.
|
||||
|
||||
Args:
|
||||
response (object): The HTTP response object.
|
||||
|
||||
Raises:
|
||||
NotImplementedError: Always, since virtual.
|
||||
"""
|
||||
raise NotImplementedError(u"This implementation is virtual.")
|
||||
|
||||
@staticmethod
|
||||
def _get_body(response):
|
||||
"""Access the response body from an HTTP response.
|
||||
|
||||
Args:
|
||||
response (object): The HTTP response object.
|
||||
|
||||
Raises:
|
||||
NotImplementedError: Always, since virtual.
|
||||
"""
|
||||
raise NotImplementedError(u"This implementation is virtual.")
|
||||
|
||||
|
||||
class Download(DownloadBase):
|
||||
"""Helper to manage downloading a resource from a Google API.
|
||||
|
||||
"Slices" of the resource can be retrieved by specifying a range
|
||||
with ``start`` and / or ``end``. However, in typical usage, neither
|
||||
``start`` nor ``end`` is expected to be provided.
|
||||
|
||||
Args:
|
||||
media_url (str): The URL containing the media to be downloaded.
|
||||
stream (IO[bytes]): A write-able stream (i.e. file-like object) that
|
||||
the downloaded resource can be written to.
|
||||
start (int): The first byte in a range to be downloaded. If not
|
||||
provided, but ``end`` is provided, will download from the
|
||||
beginning to ``end`` of the media.
|
||||
end (int): The last byte in a range to be downloaded. If not
|
||||
provided, but ``start`` is provided, will download from the
|
||||
``start`` to the end of the media.
|
||||
headers (Optional[Mapping[str, str]]): Extra headers that should
|
||||
be sent with the request, e.g. headers for encrypted data.
|
||||
checksum Optional([str]): The type of checksum to compute to verify
|
||||
the integrity of the object. The response headers must contain
|
||||
a checksum of the requested type. If the headers lack an
|
||||
appropriate checksum (for instance in the case of transcoded or
|
||||
ranged downloads where the remote service does not know the
|
||||
correct checksum) an INFO-level log will be emitted. Supported
|
||||
values are "md5", "crc32c" and None.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, media_url, stream=None, start=None, end=None, headers=None, checksum="md5"
|
||||
):
|
||||
super(Download, self).__init__(
|
||||
media_url, stream=stream, start=start, end=end, headers=headers
|
||||
)
|
||||
self.checksum = checksum
|
||||
|
||||
def _prepare_request(self):
|
||||
"""Prepare the contents of an HTTP request.
|
||||
|
||||
This is everything that must be done before a request that doesn't
|
||||
require network I/O (or other I/O). This is based on the `sans-I/O`_
|
||||
philosophy.
|
||||
|
||||
Returns:
|
||||
Tuple[str, str, NoneType, Mapping[str, str]]: The quadruple
|
||||
|
||||
* HTTP verb for the request (always GET)
|
||||
* the URL for the request
|
||||
* the body of the request (always :data:`None`)
|
||||
* headers for the request
|
||||
|
||||
Raises:
|
||||
ValueError: If the current :class:`Download` has already
|
||||
finished.
|
||||
|
||||
.. _sans-I/O: https://sans-io.readthedocs.io/
|
||||
"""
|
||||
if self.finished:
|
||||
raise ValueError(u"A download can only be used once.")
|
||||
|
||||
add_bytes_range(self.start, self.end, self._headers)
|
||||
return _GET, self.media_url, None, self._headers
|
||||
|
||||
def _process_response(self, response):
|
||||
"""Process the response from an HTTP request.
|
||||
|
||||
This is everything that must be done after a request that doesn't
|
||||
require network I/O (or other I/O). This is based on the `sans-I/O`_
|
||||
philosophy.
|
||||
|
||||
Args:
|
||||
response (object): The HTTP response object.
|
||||
|
||||
.. _sans-I/O: https://sans-io.readthedocs.io/
|
||||
"""
|
||||
# Tombstone the current Download so it cannot be used again.
|
||||
self._finished = True
|
||||
_helpers.require_status_code(
|
||||
response, _ACCEPTABLE_STATUS_CODES, self._get_status_code
|
||||
)
|
||||
|
||||
def consume(self, transport, timeout=None):
|
||||
"""Consume the resource to be downloaded.
|
||||
|
||||
If a ``stream`` is attached to this download, then the downloaded
|
||||
resource will be written to the stream.
|
||||
|
||||
Args:
|
||||
transport (object): An object which can make authenticated
|
||||
requests.
|
||||
timeout (Optional[Union[float, aiohttp.ClientTimeout]]):
|
||||
The number of seconds to wait for the server response.
|
||||
Depending on the retry strategy, a request may be repeated
|
||||
several times using the same timeout each time.
|
||||
Can also be passed as an `aiohttp.ClientTimeout` object.
|
||||
|
||||
Raises:
|
||||
NotImplementedError: Always, since virtual.
|
||||
"""
|
||||
raise NotImplementedError(u"This implementation is virtual.")
|
||||
|
||||
|
||||
class ChunkedDownload(DownloadBase):
|
||||
"""Download a resource in chunks from a Google API.
|
||||
|
||||
Args:
|
||||
media_url (str): The URL containing the media to be downloaded.
|
||||
chunk_size (int): The number of bytes to be retrieved in each
|
||||
request.
|
||||
stream (IO[bytes]): A write-able stream (i.e. file-like object) that
|
||||
will be used to concatenate chunks of the resource as they are
|
||||
downloaded.
|
||||
start (int): The first byte in a range to be downloaded. If not
|
||||
provided, defaults to ``0``.
|
||||
end (int): The last byte in a range to be downloaded. If not
|
||||
provided, will download to the end of the media.
|
||||
headers (Optional[Mapping[str, str]]): Extra headers that should
|
||||
be sent with each request, e.g. headers for data encryption
|
||||
key headers.
|
||||
|
||||
Attributes:
|
||||
media_url (str): The URL containing the media to be downloaded.
|
||||
start (Optional[int]): The first byte in a range to be downloaded.
|
||||
end (Optional[int]): The last byte in a range to be downloaded.
|
||||
chunk_size (int): The number of bytes to be retrieved in each request.
|
||||
|
||||
Raises:
|
||||
ValueError: If ``start`` is negative.
|
||||
"""
|
||||
|
||||
def __init__(self, media_url, chunk_size, stream, start=0, end=None, headers=None):
|
||||
if start < 0:
|
||||
raise ValueError(
|
||||
u"On a chunked download the starting " u"value cannot be negative."
|
||||
)
|
||||
super(ChunkedDownload, self).__init__(
|
||||
media_url, stream=stream, start=start, end=end, headers=headers
|
||||
)
|
||||
self.chunk_size = chunk_size
|
||||
self._bytes_downloaded = 0
|
||||
self._total_bytes = None
|
||||
self._invalid = False
|
||||
|
||||
@property
|
||||
def bytes_downloaded(self):
|
||||
"""int: Number of bytes that have been downloaded."""
|
||||
return self._bytes_downloaded
|
||||
|
||||
@property
|
||||
def total_bytes(self):
|
||||
"""Optional[int]: The total number of bytes to be downloaded."""
|
||||
return self._total_bytes
|
||||
|
||||
@property
|
||||
def invalid(self):
|
||||
"""bool: Indicates if the download is in an invalid state.
|
||||
|
||||
This will occur if a call to :meth:`consume_next_chunk` fails.
|
||||
"""
|
||||
return self._invalid
|
||||
|
||||
def _get_byte_range(self):
|
||||
"""Determines the byte range for the next request.
|
||||
|
||||
Returns:
|
||||
Tuple[int, int]: The pair of begin and end byte for the next
|
||||
chunked request.
|
||||
"""
|
||||
curr_start = self.start + self.bytes_downloaded
|
||||
curr_end = curr_start + self.chunk_size - 1
|
||||
# Make sure ``curr_end`` does not exceed ``end``.
|
||||
if self.end is not None:
|
||||
curr_end = min(curr_end, self.end)
|
||||
# Make sure ``curr_end`` does not exceed ``total_bytes - 1``.
|
||||
if self.total_bytes is not None:
|
||||
curr_end = min(curr_end, self.total_bytes - 1)
|
||||
return curr_start, curr_end
|
||||
|
||||
def _prepare_request(self):
|
||||
"""Prepare the contents of an HTTP request.
|
||||
|
||||
This is everything that must be done before a request that doesn't
|
||||
require network I/O (or other I/O). This is based on the `sans-I/O`_
|
||||
philosophy.
|
||||
|
||||
.. note:
|
||||
|
||||
This method will be used multiple times, so ``headers`` will
|
||||
be mutated in between requests. However, we don't make a copy
|
||||
since the same keys are being updated.
|
||||
|
||||
Returns:
|
||||
Tuple[str, str, NoneType, Mapping[str, str]]: The quadruple
|
||||
|
||||
* HTTP verb for the request (always GET)
|
||||
* the URL for the request
|
||||
* the body of the request (always :data:`None`)
|
||||
* headers for the request
|
||||
|
||||
Raises:
|
||||
ValueError: If the current download has finished.
|
||||
ValueError: If the current download is invalid.
|
||||
|
||||
.. _sans-I/O: https://sans-io.readthedocs.io/
|
||||
"""
|
||||
if self.finished:
|
||||
raise ValueError(u"Download has finished.")
|
||||
if self.invalid:
|
||||
raise ValueError(u"Download is invalid and cannot be re-used.")
|
||||
|
||||
curr_start, curr_end = self._get_byte_range()
|
||||
add_bytes_range(curr_start, curr_end, self._headers)
|
||||
return _GET, self.media_url, None, self._headers
|
||||
|
||||
def _make_invalid(self):
|
||||
"""Simple setter for ``invalid``.
|
||||
|
||||
This is intended to be passed along as a callback to helpers that
|
||||
raise an exception so they can mark this instance as invalid before
|
||||
raising.
|
||||
"""
|
||||
self._invalid = True
|
||||
|
||||
async def _process_response(self, response):
|
||||
"""Process the response from an HTTP request.
|
||||
|
||||
This is everything that must be done after a request that doesn't
|
||||
require network I/O. This is based on the `sans-I/O`_ philosophy.
|
||||
|
||||
For the time being, this **does require** some form of I/O to write
|
||||
a chunk to ``stream``. However, this will (almost) certainly not be
|
||||
network I/O.
|
||||
|
||||
Updates the current state after consuming a chunk. First,
|
||||
increments ``bytes_downloaded`` by the number of bytes in the
|
||||
``content-length`` header.
|
||||
|
||||
If ``total_bytes`` is already set, this assumes (but does not check)
|
||||
that we already have the correct value and doesn't bother to check
|
||||
that it agrees with the headers.
|
||||
|
||||
We expect the **total** length to be in the ``content-range`` header,
|
||||
but this header is only present on requests which sent the ``range``
|
||||
header. This response header should be of the form
|
||||
``bytes {start}-{end}/{total}`` and ``{end} - {start} + 1``
|
||||
should be the same as the ``Content-Length``.
|
||||
|
||||
Args:
|
||||
response (object): The HTTP response object (need headers).
|
||||
|
||||
Raises:
|
||||
~google.resumable_media.common.InvalidResponse: If the number
|
||||
of bytes in the body doesn't match the content length header.
|
||||
|
||||
.. _sans-I/O: https://sans-io.readthedocs.io/
|
||||
"""
|
||||
# Verify the response before updating the current instance.
|
||||
if _check_for_zero_content_range(
|
||||
response, self._get_status_code, self._get_headers
|
||||
):
|
||||
self._finished = True
|
||||
return
|
||||
|
||||
_helpers.require_status_code(
|
||||
response,
|
||||
_ACCEPTABLE_STATUS_CODES,
|
||||
self._get_status_code,
|
||||
callback=self._make_invalid,
|
||||
)
|
||||
headers = self._get_headers(response)
|
||||
response_body = await self._get_body(response)
|
||||
|
||||
start_byte, end_byte, total_bytes = get_range_info(
|
||||
response, self._get_headers, callback=self._make_invalid
|
||||
)
|
||||
|
||||
transfer_encoding = headers.get(u"transfer-encoding")
|
||||
|
||||
if transfer_encoding is None:
|
||||
content_length = _helpers.header_required(
|
||||
response,
|
||||
u"content-length",
|
||||
self._get_headers,
|
||||
callback=self._make_invalid,
|
||||
)
|
||||
num_bytes = int(content_length)
|
||||
|
||||
if len(response_body) != num_bytes:
|
||||
self._make_invalid()
|
||||
raise common.InvalidResponse(
|
||||
response,
|
||||
u"Response is different size than content-length",
|
||||
u"Expected",
|
||||
num_bytes,
|
||||
u"Received",
|
||||
len(response_body),
|
||||
)
|
||||
else:
|
||||
# 'content-length' header not allowed with chunked encoding.
|
||||
num_bytes = end_byte - start_byte + 1
|
||||
|
||||
# First update ``bytes_downloaded``.
|
||||
self._bytes_downloaded += num_bytes
|
||||
# If the end byte is past ``end`` or ``total_bytes - 1`` we are done.
|
||||
if self.end is not None and end_byte >= self.end:
|
||||
self._finished = True
|
||||
elif end_byte >= total_bytes - 1:
|
||||
self._finished = True
|
||||
# NOTE: We only use ``total_bytes`` if not already known.
|
||||
if self.total_bytes is None:
|
||||
self._total_bytes = total_bytes
|
||||
# Write the response body to the stream.
|
||||
self._stream.write(response_body)
|
||||
|
||||
def consume_next_chunk(self, transport, timeout=None):
|
||||
"""Consume the next chunk of the resource to be downloaded.
|
||||
|
||||
Args:
|
||||
transport (object): An object which can make authenticated
|
||||
requests.
|
||||
timeout (Optional[Union[float, aiohttp.ClientTimeout]]):
|
||||
The number of seconds to wait for the server response.
|
||||
Depending on the retry strategy, a request may be repeated
|
||||
several times using the same timeout each time.
|
||||
Can also be passed as an `aiohttp.ClientTimeout` object.
|
||||
Raises:
|
||||
NotImplementedError: Always, since virtual.
|
||||
"""
|
||||
raise NotImplementedError(u"This implementation is virtual.")
|
||||
|
||||
|
||||
def add_bytes_range(start, end, headers):
|
||||
"""Add a bytes range to a header dictionary.
|
||||
|
||||
Some possible inputs and the corresponding bytes ranges::
|
||||
|
||||
>>> headers = {}
|
||||
>>> add_bytes_range(None, None, headers)
|
||||
>>> headers
|
||||
{}
|
||||
>>> add_bytes_range(500, 999, headers)
|
||||
>>> headers['range']
|
||||
'bytes=500-999'
|
||||
>>> add_bytes_range(None, 499, headers)
|
||||
>>> headers['range']
|
||||
'bytes=0-499'
|
||||
>>> add_bytes_range(-500, None, headers)
|
||||
>>> headers['range']
|
||||
'bytes=-500'
|
||||
>>> add_bytes_range(9500, None, headers)
|
||||
>>> headers['range']
|
||||
'bytes=9500-'
|
||||
|
||||
Args:
|
||||
start (Optional[int]): The first byte in a range. Can be zero,
|
||||
positive, negative or :data:`None`.
|
||||
end (Optional[int]): The last byte in a range. Assumed to be
|
||||
positive.
|
||||
headers (Mapping[str, str]): A headers mapping which can have the
|
||||
bytes range added if at least one of ``start`` or ``end``
|
||||
is not :data:`None`.
|
||||
"""
|
||||
if start is None:
|
||||
if end is None:
|
||||
# No range to add.
|
||||
return
|
||||
else:
|
||||
# NOTE: This assumes ``end`` is non-negative.
|
||||
bytes_range = u"0-{:d}".format(end)
|
||||
else:
|
||||
if end is None:
|
||||
if start < 0:
|
||||
bytes_range = u"{:d}".format(start)
|
||||
else:
|
||||
bytes_range = u"{:d}-".format(start)
|
||||
else:
|
||||
# NOTE: This is invalid if ``start < 0``.
|
||||
bytes_range = u"{:d}-{:d}".format(start, end)
|
||||
|
||||
headers[_helpers.RANGE_HEADER] = u"bytes=" + bytes_range
|
||||
|
||||
|
||||
def get_range_info(response, get_headers, callback=_helpers.do_nothing):
|
||||
"""Get the start, end and total bytes from a content range header.
|
||||
|
||||
Args:
|
||||
response (object): An HTTP response object.
|
||||
get_headers (Callable[Any, Mapping[str, str]]): Helper to get headers
|
||||
from an HTTP response.
|
||||
callback (Optional[Callable]): A callback that takes no arguments,
|
||||
to be executed when an exception is being raised.
|
||||
|
||||
Returns:
|
||||
Tuple[int, int, int]: The start byte, end byte and total bytes.
|
||||
|
||||
Raises:
|
||||
~google.resumable_media.common.InvalidResponse: If the
|
||||
``Content-Range`` header is not of the form
|
||||
``bytes {start}-{end}/{total}``.
|
||||
"""
|
||||
content_range = _helpers.header_required(
|
||||
response, _helpers.CONTENT_RANGE_HEADER, get_headers, callback=callback
|
||||
)
|
||||
match = _CONTENT_RANGE_RE.match(content_range)
|
||||
if match is None:
|
||||
callback()
|
||||
raise common.InvalidResponse(
|
||||
response,
|
||||
u"Unexpected content-range header",
|
||||
content_range,
|
||||
u'Expected to be of the form "bytes {start}-{end}/{total}"',
|
||||
)
|
||||
|
||||
return (
|
||||
int(match.group(u"start_byte")),
|
||||
int(match.group(u"end_byte")),
|
||||
int(match.group(u"total_bytes")),
|
||||
)
|
||||
|
||||
|
||||
def _check_for_zero_content_range(response, get_status_code, get_headers):
|
||||
"""Validate if response status code is 416 and content range is zero.
|
||||
|
||||
This is the special case for handling zero bytes files.
|
||||
|
||||
Args:
|
||||
response (object): An HTTP response object.
|
||||
get_status_code (Callable[Any, int]): Helper to get a status code
|
||||
from a response.
|
||||
get_headers (Callable[Any, Mapping[str, str]]): Helper to get headers
|
||||
from an HTTP response.
|
||||
|
||||
Returns:
|
||||
bool: True if content range total bytes is zero, false otherwise.
|
||||
"""
|
||||
if get_status_code(response) == http_client.REQUESTED_RANGE_NOT_SATISFIABLE:
|
||||
content_range = _helpers.header_required(
|
||||
response,
|
||||
_helpers.CONTENT_RANGE_HEADER,
|
||||
get_headers,
|
||||
callback=_helpers.do_nothing,
|
||||
)
|
||||
if content_range == _ZERO_CONTENT_RANGE_HEADER:
|
||||
return True
|
||||
return False
|
198
venv/Lib/site-packages/google/_async_resumable_media/_helpers.py
Normal file
198
venv/Lib/site-packages/google/_async_resumable_media/_helpers.py
Normal file
|
@ -0,0 +1,198 @@
|
|||
# Copyright 2020 Google Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Shared utilities used by both downloads and uploads."""
|
||||
|
||||
import logging
|
||||
import random
|
||||
import time
|
||||
|
||||
|
||||
from six.moves import http_client
|
||||
|
||||
|
||||
from google.resumable_media import common
|
||||
|
||||
|
||||
RANGE_HEADER = u"range"
|
||||
CONTENT_RANGE_HEADER = u"content-range"
|
||||
RETRYABLE = (
|
||||
common.TOO_MANY_REQUESTS,
|
||||
http_client.INTERNAL_SERVER_ERROR,
|
||||
http_client.BAD_GATEWAY,
|
||||
http_client.SERVICE_UNAVAILABLE,
|
||||
http_client.GATEWAY_TIMEOUT,
|
||||
)
|
||||
|
||||
_SLOW_CRC32C_WARNING = (
|
||||
"Currently using crcmod in pure python form. This is a slow "
|
||||
"implementation. Python 3 has a faster implementation, `google-crc32c`, "
|
||||
"which will be used if it is installed."
|
||||
)
|
||||
_HASH_HEADER = u"x-goog-hash"
|
||||
_MISSING_CHECKSUM = u"""\
|
||||
No {checksum_type} checksum was returned from the service while downloading {}
|
||||
(which happens for composite objects), so client-side content integrity
|
||||
checking is not being performed."""
|
||||
_LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def do_nothing():
|
||||
"""Simple default callback."""
|
||||
|
||||
|
||||
def header_required(response, name, get_headers, callback=do_nothing):
|
||||
"""Checks that a specific header is in a headers dictionary.
|
||||
|
||||
Args:
|
||||
response (object): An HTTP response object, expected to have a
|
||||
``headers`` attribute that is a ``Mapping[str, str]``.
|
||||
name (str): The name of a required header.
|
||||
get_headers (Callable[Any, Mapping[str, str]]): Helper to get headers
|
||||
from an HTTP response.
|
||||
callback (Optional[Callable]): A callback that takes no arguments,
|
||||
to be executed when an exception is being raised.
|
||||
|
||||
Returns:
|
||||
str: The desired header.
|
||||
|
||||
Raises:
|
||||
~google.resumable_media.common.InvalidResponse: If the header
|
||||
is missing.
|
||||
"""
|
||||
headers = get_headers(response)
|
||||
if name not in headers:
|
||||
callback()
|
||||
raise common.InvalidResponse(
|
||||
response, u"Response headers must contain header", name
|
||||
)
|
||||
|
||||
return headers[name]
|
||||
|
||||
|
||||
def require_status_code(response, status_codes, get_status_code, callback=do_nothing):
|
||||
"""Require a response has a status code among a list.
|
||||
|
||||
Args:
|
||||
response (object): The HTTP response object.
|
||||
status_codes (tuple): The acceptable status codes.
|
||||
get_status_code (Callable[Any, int]): Helper to get a status code
|
||||
from a response.
|
||||
callback (Optional[Callable]): A callback that takes no arguments,
|
||||
to be executed when an exception is being raised.
|
||||
|
||||
Returns:
|
||||
int: The status code.
|
||||
|
||||
Raises:
|
||||
~google.resumable_media.common.InvalidResponse: If the status code
|
||||
is not one of the values in ``status_codes``.
|
||||
"""
|
||||
status_code = get_status_code(response)
|
||||
if status_code not in status_codes:
|
||||
callback()
|
||||
raise common.InvalidResponse(
|
||||
response,
|
||||
u"Request failed with status code",
|
||||
status_code,
|
||||
u"Expected one of",
|
||||
*status_codes
|
||||
)
|
||||
return status_code
|
||||
|
||||
|
||||
def calculate_retry_wait(base_wait, max_sleep):
|
||||
"""Calculate the amount of time to wait before a retry attempt.
|
||||
|
||||
Wait time grows exponentially with the number of attempts, until
|
||||
it hits ``max_sleep``.
|
||||
|
||||
A random amount of jitter (between 0 and 1 seconds) is added to spread out
|
||||
retry attempts from different clients.
|
||||
|
||||
Args:
|
||||
base_wait (float): The "base" wait time (i.e. without any jitter)
|
||||
that will be doubled until it reaches the maximum sleep.
|
||||
max_sleep (float): Maximum value that a sleep time is allowed to be.
|
||||
|
||||
Returns:
|
||||
Tuple[float, float]: The new base wait time as well as the wait time
|
||||
to be applied (with a random amount of jitter between 0 and 1 seconds
|
||||
added).
|
||||
"""
|
||||
new_base_wait = 2.0 * base_wait
|
||||
if new_base_wait > max_sleep:
|
||||
new_base_wait = max_sleep
|
||||
|
||||
jitter_ms = random.randint(0, 1000)
|
||||
return new_base_wait, new_base_wait + 0.001 * jitter_ms
|
||||
|
||||
|
||||
async def wait_and_retry(func, get_status_code, retry_strategy):
|
||||
"""Attempts to retry a call to ``func`` until success.
|
||||
|
||||
Expects ``func`` to return an HTTP response and uses ``get_status_code``
|
||||
to check if the response is retry-able.
|
||||
|
||||
Will retry until :meth:`~.RetryStrategy.retry_allowed` (on the current
|
||||
``retry_strategy``) returns :data:`False`. Uses
|
||||
:func:`calculate_retry_wait` to double the wait time (with jitter) after
|
||||
each attempt.
|
||||
|
||||
Args:
|
||||
func (Callable): A callable that takes no arguments and produces
|
||||
an HTTP response which will be checked as retry-able.
|
||||
get_status_code (Callable[Any, int]): Helper to get a status code
|
||||
from a response.
|
||||
retry_strategy (~google.resumable_media.common.RetryStrategy): The
|
||||
strategy to use if the request fails and must be retried.
|
||||
|
||||
Returns:
|
||||
object: The return value of ``func``.
|
||||
"""
|
||||
|
||||
response = await func()
|
||||
|
||||
if get_status_code(response) not in RETRYABLE:
|
||||
return response
|
||||
|
||||
total_sleep = 0.0
|
||||
num_retries = 0
|
||||
base_wait = 0.5 # When doubled will give 1.0
|
||||
while retry_strategy.retry_allowed(total_sleep, num_retries):
|
||||
base_wait, wait_time = calculate_retry_wait(base_wait, retry_strategy.max_sleep)
|
||||
num_retries += 1
|
||||
total_sleep += wait_time
|
||||
time.sleep(wait_time)
|
||||
response = await func()
|
||||
if get_status_code(response) not in RETRYABLE:
|
||||
return response
|
||||
|
||||
return response
|
||||
|
||||
|
||||
class _DoNothingHash(object):
|
||||
"""Do-nothing hash object.
|
||||
|
||||
Intended as a stand-in for ``hashlib.md5`` or a crc32c checksum
|
||||
implementation in cases where it isn't necessary to compute the hash.
|
||||
"""
|
||||
|
||||
def update(self, unused_chunk):
|
||||
"""Do-nothing ``update`` method.
|
||||
|
||||
Intended to match the interface of ``hashlib.md5`` and other checksums.
|
||||
Args:
|
||||
unused_chunk (bytes): A chunk of data.
|
||||
"""
|
979
venv/Lib/site-packages/google/_async_resumable_media/_upload.py
Normal file
979
venv/Lib/site-packages/google/_async_resumable_media/_upload.py
Normal file
|
@ -0,0 +1,979 @@
|
|||
# Copyright 2017 Google Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Virtual bases classes for uploading media via Google APIs.
|
||||
|
||||
Supported here are:
|
||||
|
||||
* simple (media) uploads
|
||||
* multipart uploads that contain both metadata and a small file as payload
|
||||
* resumable uploads (with metadata as well)
|
||||
"""
|
||||
|
||||
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
import sys
|
||||
|
||||
import six
|
||||
from six.moves import http_client
|
||||
|
||||
from google import _async_resumable_media
|
||||
from google._async_resumable_media import _helpers
|
||||
from google.resumable_media import _helpers as sync_helpers
|
||||
from google.resumable_media import _upload as sync_upload
|
||||
from google.resumable_media import common
|
||||
|
||||
|
||||
from google.resumable_media._upload import (
|
||||
_CONTENT_TYPE_HEADER,
|
||||
_CONTENT_RANGE_TEMPLATE,
|
||||
_RANGE_UNKNOWN_TEMPLATE,
|
||||
_EMPTY_RANGE_TEMPLATE,
|
||||
_BOUNDARY_FORMAT,
|
||||
_MULTIPART_SEP,
|
||||
_CRLF,
|
||||
_MULTIPART_BEGIN,
|
||||
_RELATED_HEADER,
|
||||
_BYTES_RANGE_RE,
|
||||
_STREAM_ERROR_TEMPLATE,
|
||||
_POST,
|
||||
_PUT,
|
||||
_UPLOAD_CHECKSUM_MISMATCH_MESSAGE,
|
||||
_UPLOAD_METADATA_NO_APPROPRIATE_CHECKSUM_MESSAGE,
|
||||
)
|
||||
|
||||
|
||||
class UploadBase(object):
|
||||
"""Base class for upload helpers.
|
||||
|
||||
Defines core shared behavior across different upload types.
|
||||
|
||||
Args:
|
||||
upload_url (str): The URL where the content will be uploaded.
|
||||
headers (Optional[Mapping[str, str]]): Extra headers that should
|
||||
be sent with the request, e.g. headers for encrypted data.
|
||||
|
||||
Attributes:
|
||||
upload_url (str): The URL where the content will be uploaded.
|
||||
"""
|
||||
|
||||
def __init__(self, upload_url, headers=None):
|
||||
self.upload_url = upload_url
|
||||
if headers is None:
|
||||
headers = {}
|
||||
self._headers = headers
|
||||
self._finished = False
|
||||
self._retry_strategy = common.RetryStrategy()
|
||||
|
||||
@property
|
||||
def finished(self):
|
||||
"""bool: Flag indicating if the upload has completed."""
|
||||
return self._finished
|
||||
|
||||
def _process_response(self, response):
|
||||
"""Process the response from an HTTP request.
|
||||
|
||||
This is everything that must be done after a request that doesn't
|
||||
require network I/O (or other I/O). This is based on the `sans-I/O`_
|
||||
philosophy.
|
||||
|
||||
Args:
|
||||
response (object): The HTTP response object.
|
||||
|
||||
Raises:
|
||||
~google.resumable_media.common.InvalidResponse: If the status
|
||||
code is not 200.
|
||||
|
||||
.. _sans-I/O: https://sans-io.readthedocs.io/
|
||||
"""
|
||||
# Tombstone the current upload so it cannot be used again (in either
|
||||
# failure or success).
|
||||
self._finished = True
|
||||
_helpers.require_status_code(response, (http_client.OK,), self._get_status_code)
|
||||
|
||||
@staticmethod
|
||||
def _get_status_code(response):
|
||||
"""Access the status code from an HTTP response.
|
||||
|
||||
Args:
|
||||
response (object): The HTTP response object.
|
||||
|
||||
Raises:
|
||||
NotImplementedError: Always, since virtual.
|
||||
"""
|
||||
raise NotImplementedError(u"This implementation is virtual.")
|
||||
|
||||
@staticmethod
|
||||
def _get_headers(response):
|
||||
"""Access the headers from an HTTP response.
|
||||
|
||||
Args:
|
||||
response (object): The HTTP response object.
|
||||
|
||||
Raises:
|
||||
NotImplementedError: Always, since virtual.
|
||||
"""
|
||||
raise NotImplementedError(u"This implementation is virtual.")
|
||||
|
||||
@staticmethod
|
||||
def _get_body(response):
|
||||
"""Access the response body from an HTTP response.
|
||||
|
||||
Args:
|
||||
response (object): The HTTP response object.
|
||||
|
||||
Raises:
|
||||
NotImplementedError: Always, since virtual.
|
||||
"""
|
||||
raise NotImplementedError(u"This implementation is virtual.")
|
||||
|
||||
|
||||
class SimpleUpload(UploadBase):
|
||||
"""Upload a resource to a Google API.
|
||||
|
||||
A **simple** media upload sends no metadata and completes the upload
|
||||
in a single request.
|
||||
|
||||
Args:
|
||||
upload_url (str): The URL where the content will be uploaded.
|
||||
headers (Optional[Mapping[str, str]]): Extra headers that should
|
||||
be sent with the request, e.g. headers for encrypted data.
|
||||
|
||||
Attributes:
|
||||
upload_url (str): The URL where the content will be uploaded.
|
||||
"""
|
||||
|
||||
def _prepare_request(self, data, content_type):
|
||||
"""Prepare the contents of an HTTP request.
|
||||
|
||||
This is everything that must be done before a request that doesn't
|
||||
require network I/O (or other I/O). This is based on the `sans-I/O`_
|
||||
philosophy.
|
||||
|
||||
.. note:
|
||||
|
||||
This method will be used only once, so ``headers`` will be
|
||||
mutated by having a new key added to it.
|
||||
|
||||
Args:
|
||||
data (bytes): The resource content to be uploaded.
|
||||
content_type (str): The content type for the request.
|
||||
|
||||
Returns:
|
||||
Tuple[str, str, bytes, Mapping[str, str]]: The quadruple
|
||||
|
||||
* HTTP verb for the request (always POST)
|
||||
* the URL for the request
|
||||
* the body of the request
|
||||
* headers for the request
|
||||
|
||||
Raises:
|
||||
ValueError: If the current upload has already finished.
|
||||
TypeError: If ``data`` isn't bytes.
|
||||
|
||||
.. _sans-I/O: https://sans-io.readthedocs.io/
|
||||
"""
|
||||
if self.finished:
|
||||
raise ValueError(u"An upload can only be used once.")
|
||||
|
||||
if not isinstance(data, six.binary_type):
|
||||
raise TypeError(u"`data` must be bytes, received", type(data))
|
||||
self._headers[_CONTENT_TYPE_HEADER] = content_type
|
||||
return _POST, self.upload_url, data, self._headers
|
||||
|
||||
def transmit(self, transport, data, content_type, timeout=None):
|
||||
"""Transmit the resource to be uploaded.
|
||||
|
||||
Args:
|
||||
transport (object): An object which can make authenticated
|
||||
requests.
|
||||
data (bytes): The resource content to be uploaded.
|
||||
content_type (str): The content type of the resource, e.g. a JPEG
|
||||
image has content type ``image/jpeg``.
|
||||
timeout (Optional[Union[float, aiohttp.ClientTimeout]]):
|
||||
The number of seconds to wait for the server response.
|
||||
Depending on the retry strategy, a request may be repeated
|
||||
several times using the same timeout each time.
|
||||
Can also be passed as an `aiohttp.ClientTimeout` object.
|
||||
|
||||
Raises:
|
||||
NotImplementedError: Always, since virtual.
|
||||
"""
|
||||
raise NotImplementedError(u"This implementation is virtual.")
|
||||
|
||||
|
||||
class MultipartUpload(UploadBase):
|
||||
"""Upload a resource with metadata to a Google API.
|
||||
|
||||
A **multipart** upload sends both metadata and the resource in a single
|
||||
(multipart) request.
|
||||
|
||||
Args:
|
||||
upload_url (str): The URL where the content will be uploaded.
|
||||
headers (Optional[Mapping[str, str]]): Extra headers that should
|
||||
be sent with the request, e.g. headers for encrypted data.
|
||||
checksum Optional([str]): The type of checksum to compute to verify
|
||||
the integrity of the object. The request metadata will be amended
|
||||
to include the computed value. Using this option will override a
|
||||
manually-set checksum value. Supported values are "md5", "crc32c"
|
||||
and None. The default is None.
|
||||
|
||||
Attributes:
|
||||
upload_url (str): The URL where the content will be uploaded.
|
||||
"""
|
||||
|
||||
def __init__(self, upload_url, headers=None, checksum=None):
|
||||
super(MultipartUpload, self).__init__(upload_url, headers=headers)
|
||||
self._checksum_type = checksum
|
||||
|
||||
def _prepare_request(self, data, metadata, content_type):
|
||||
"""Prepare the contents of an HTTP request.
|
||||
|
||||
This is everything that must be done before a request that doesn't
|
||||
require network I/O (or other I/O). This is based on the `sans-I/O`_
|
||||
philosophy.
|
||||
|
||||
.. note:
|
||||
|
||||
This method will be used only once, so ``headers`` will be
|
||||
mutated by having a new key added to it.
|
||||
|
||||
Args:
|
||||
data (bytes): The resource content to be uploaded.
|
||||
metadata (Mapping[str, str]): The resource metadata, such as an
|
||||
ACL list.
|
||||
content_type (str): The content type of the resource, e.g. a JPEG
|
||||
image has content type ``image/jpeg``.
|
||||
|
||||
Returns:
|
||||
Tuple[str, str, bytes, Mapping[str, str]]: The quadruple
|
||||
|
||||
* HTTP verb for the request (always POST)
|
||||
* the URL for the request
|
||||
* the body of the request
|
||||
* headers for the request
|
||||
|
||||
Raises:
|
||||
ValueError: If the current upload has already finished.
|
||||
TypeError: If ``data`` isn't bytes.
|
||||
|
||||
.. _sans-I/O: https://sans-io.readthedocs.io/
|
||||
"""
|
||||
if self.finished:
|
||||
raise ValueError(u"An upload can only be used once.")
|
||||
|
||||
if not isinstance(data, six.binary_type):
|
||||
raise TypeError(u"`data` must be bytes, received", type(data))
|
||||
|
||||
checksum_object = sync_helpers._get_checksum_object(self._checksum_type)
|
||||
|
||||
if checksum_object:
|
||||
checksum_object.update(data)
|
||||
actual_checksum = sync_helpers.prepare_checksum_digest(
|
||||
checksum_object.digest()
|
||||
)
|
||||
metadata_key = sync_helpers._get_metadata_key(self._checksum_type)
|
||||
metadata[metadata_key] = actual_checksum
|
||||
|
||||
content, multipart_boundary = construct_multipart_request(
|
||||
data, metadata, content_type
|
||||
)
|
||||
multipart_content_type = _RELATED_HEADER + multipart_boundary + b'"'
|
||||
|
||||
self._headers[_CONTENT_TYPE_HEADER] = multipart_content_type
|
||||
|
||||
return _POST, self.upload_url, content, self._headers
|
||||
|
||||
def transmit(self, transport, data, metadata, content_type, timeout=None):
|
||||
"""Transmit the resource to be uploaded.
|
||||
|
||||
Args:
|
||||
transport (object): An object which can make authenticated
|
||||
requests.
|
||||
data (bytes): The resource content to be uploaded.
|
||||
metadata (Mapping[str, str]): The resource metadata, such as an
|
||||
ACL list.
|
||||
content_type (str): The content type of the resource, e.g. a JPEG
|
||||
image has content type ``image/jpeg``.
|
||||
timeout (Optional[Union[float, aiohttp.ClientTimeout]]):
|
||||
The number of seconds to wait for the server response.
|
||||
Depending on the retry strategy, a request may be repeated
|
||||
several times using the same timeout each time.
|
||||
Can also be passed as an `aiohttp.ClientTimeout` object.
|
||||
|
||||
Raises:
|
||||
NotImplementedError: Always, since virtual.
|
||||
"""
|
||||
raise NotImplementedError(u"This implementation is virtual.")
|
||||
|
||||
|
||||
class ResumableUpload(UploadBase, sync_upload.ResumableUpload):
|
||||
"""Initiate and fulfill a resumable upload to a Google API.
|
||||
|
||||
A **resumable** upload sends an initial request with the resource metadata
|
||||
and then gets assigned an upload ID / upload URL to send bytes to.
|
||||
Using the upload URL, the upload is then done in chunks (determined by
|
||||
the user) until all bytes have been uploaded.
|
||||
|
||||
Args:
|
||||
upload_url (str): The URL where the resumable upload will be initiated.
|
||||
chunk_size (int): The size of each chunk used to upload the resource.
|
||||
headers (Optional[Mapping[str, str]]): Extra headers that should
|
||||
be sent with the :meth:`initiate` request, e.g. headers for
|
||||
encrypted data. These **will not** be sent with
|
||||
:meth:`transmit_next_chunk` or :meth:`recover` requests.
|
||||
checksum Optional([str]): The type of checksum to compute to verify
|
||||
the integrity of the object. After the upload is complete, the
|
||||
server-computed checksum of the resulting object will be read
|
||||
and google.resumable_media.common.DataCorruption will be raised on
|
||||
a mismatch. The corrupted file will not be deleted from the remote
|
||||
host automatically. Supported values are "md5", "crc32c" and None.
|
||||
The default is None.
|
||||
|
||||
Attributes:
|
||||
upload_url (str): The URL where the content will be uploaded.
|
||||
|
||||
Raises:
|
||||
ValueError: If ``chunk_size`` is not a multiple of
|
||||
:data:`.UPLOAD_CHUNK_SIZE`.
|
||||
"""
|
||||
|
||||
def __init__(self, upload_url, chunk_size, checksum=None, headers=None):
|
||||
super(ResumableUpload, self).__init__(upload_url, headers=headers)
|
||||
if chunk_size % _async_resumable_media.UPLOAD_CHUNK_SIZE != 0:
|
||||
raise ValueError(
|
||||
u"{} KB must divide chunk size".format(
|
||||
_async_resumable_media.UPLOAD_CHUNK_SIZE / 1024
|
||||
)
|
||||
)
|
||||
self._chunk_size = chunk_size
|
||||
self._stream = None
|
||||
self._content_type = None
|
||||
self._bytes_uploaded = 0
|
||||
self._bytes_checksummed = 0
|
||||
self._checksum_type = checksum
|
||||
self._checksum_object = None
|
||||
self._total_bytes = None
|
||||
self._resumable_url = None
|
||||
self._invalid = False
|
||||
|
||||
@property
|
||||
def invalid(self):
|
||||
"""bool: Indicates if the upload is in an invalid state.
|
||||
|
||||
This will occur if a call to :meth:`transmit_next_chunk` fails.
|
||||
To recover from such a failure, call :meth:`recover`.
|
||||
"""
|
||||
return self._invalid
|
||||
|
||||
@property
|
||||
def chunk_size(self):
|
||||
"""int: The size of each chunk used to upload the resource."""
|
||||
return self._chunk_size
|
||||
|
||||
@property
|
||||
def resumable_url(self):
|
||||
"""Optional[str]: The URL of the in-progress resumable upload."""
|
||||
return self._resumable_url
|
||||
|
||||
@property
|
||||
def bytes_uploaded(self):
|
||||
"""int: Number of bytes that have been uploaded."""
|
||||
return self._bytes_uploaded
|
||||
|
||||
@property
|
||||
def total_bytes(self):
|
||||
"""Optional[int]: The total number of bytes to be uploaded.
|
||||
|
||||
If this upload is initiated (via :meth:`initiate`) with
|
||||
``stream_final=True``, this value will be populated based on the size
|
||||
of the ``stream`` being uploaded. (By default ``stream_final=True``.)
|
||||
|
||||
If this upload is initiated with ``stream_final=False``,
|
||||
:attr:`total_bytes` will be :data:`None` since it cannot be
|
||||
determined from the stream.
|
||||
"""
|
||||
return self._total_bytes
|
||||
|
||||
def _prepare_initiate_request(
|
||||
self, stream, metadata, content_type, total_bytes=None, stream_final=True
|
||||
):
|
||||
"""Prepare the contents of HTTP request to initiate upload.
|
||||
|
||||
This is everything that must be done before a request that doesn't
|
||||
require network I/O (or other I/O). This is based on the `sans-I/O`_
|
||||
philosophy.
|
||||
|
||||
Args:
|
||||
stream (IO[bytes]): The stream (i.e. file-like object) that will
|
||||
be uploaded. The stream **must** be at the beginning (i.e.
|
||||
``stream.tell() == 0``).
|
||||
metadata (Mapping[str, str]): The resource metadata, such as an
|
||||
ACL list.
|
||||
content_type (str): The content type of the resource, e.g. a JPEG
|
||||
image has content type ``image/jpeg``.
|
||||
total_bytes (Optional[int]): The total number of bytes to be
|
||||
uploaded. If specified, the upload size **will not** be
|
||||
determined from the stream (even if ``stream_final=True``).
|
||||
stream_final (Optional[bool]): Indicates if the ``stream`` is
|
||||
"final" (i.e. no more bytes will be added to it). In this case
|
||||
we determine the upload size from the size of the stream. If
|
||||
``total_bytes`` is passed, this argument will be ignored.
|
||||
|
||||
Returns:
|
||||
Tuple[str, str, bytes, Mapping[str, str]]: The quadruple
|
||||
|
||||
* HTTP verb for the request (always POST)
|
||||
* the URL for the request
|
||||
* the body of the request
|
||||
* headers for the request
|
||||
|
||||
Raises:
|
||||
ValueError: If the current upload has already been initiated.
|
||||
ValueError: If ``stream`` is not at the beginning.
|
||||
|
||||
.. _sans-I/O: https://sans-io.readthedocs.io/
|
||||
"""
|
||||
if self.resumable_url is not None:
|
||||
raise ValueError(u"This upload has already been initiated.")
|
||||
if stream.tell() != 0:
|
||||
raise ValueError(u"Stream must be at beginning.")
|
||||
|
||||
self._stream = stream
|
||||
self._content_type = content_type
|
||||
headers = {
|
||||
_CONTENT_TYPE_HEADER: u"application/json; charset=UTF-8",
|
||||
u"x-upload-content-type": content_type,
|
||||
}
|
||||
# Set the total bytes if possible.
|
||||
if total_bytes is not None:
|
||||
self._total_bytes = total_bytes
|
||||
elif stream_final:
|
||||
self._total_bytes = get_total_bytes(stream)
|
||||
# Add the total bytes to the headers if set.
|
||||
if self._total_bytes is not None:
|
||||
content_length = u"{:d}".format(self._total_bytes)
|
||||
headers[u"x-upload-content-length"] = content_length
|
||||
|
||||
headers.update(self._headers)
|
||||
payload = json.dumps(metadata).encode(u"utf-8")
|
||||
return _POST, self.upload_url, payload, headers
|
||||
|
||||
def _process_initiate_response(self, response):
|
||||
"""Process the response from an HTTP request that initiated upload.
|
||||
|
||||
This is everything that must be done after a request that doesn't
|
||||
require network I/O (or other I/O). This is based on the `sans-I/O`_
|
||||
philosophy.
|
||||
|
||||
This method takes the URL from the ``Location`` header and stores it
|
||||
for future use. Within that URL, we assume the ``upload_id`` query
|
||||
parameter has been included, but we do not check.
|
||||
|
||||
Args:
|
||||
response (object): The HTTP response object (need headers).
|
||||
|
||||
.. _sans-I/O: https://sans-io.readthedocs.io/
|
||||
"""
|
||||
_helpers.require_status_code(
|
||||
response,
|
||||
(http_client.OK,),
|
||||
self._get_status_code,
|
||||
callback=self._make_invalid,
|
||||
)
|
||||
self._resumable_url = _helpers.header_required(
|
||||
response, u"location", self._get_headers
|
||||
)
|
||||
|
||||
def initiate(
|
||||
self,
|
||||
transport,
|
||||
stream,
|
||||
metadata,
|
||||
content_type,
|
||||
total_bytes=None,
|
||||
stream_final=True,
|
||||
timeout=None,
|
||||
):
|
||||
"""Initiate a resumable upload.
|
||||
|
||||
By default, this method assumes your ``stream`` is in a "final"
|
||||
state ready to transmit. However, ``stream_final=False`` can be used
|
||||
to indicate that the size of the resource is not known. This can happen
|
||||
if bytes are being dynamically fed into ``stream``, e.g. if the stream
|
||||
is attached to application logs.
|
||||
|
||||
If ``stream_final=False`` is used, :attr:`chunk_size` bytes will be
|
||||
read from the stream every time :meth:`transmit_next_chunk` is called.
|
||||
If one of those reads produces strictly fewer bites than the chunk
|
||||
size, the upload will be concluded.
|
||||
|
||||
Args:
|
||||
transport (object): An object which can make authenticated
|
||||
requests.
|
||||
stream (IO[bytes]): The stream (i.e. file-like object) that will
|
||||
be uploaded. The stream **must** be at the beginning (i.e.
|
||||
``stream.tell() == 0``).
|
||||
metadata (Mapping[str, str]): The resource metadata, such as an
|
||||
ACL list.
|
||||
content_type (str): The content type of the resource, e.g. a JPEG
|
||||
image has content type ``image/jpeg``.
|
||||
total_bytes (Optional[int]): The total number of bytes to be
|
||||
uploaded. If specified, the upload size **will not** be
|
||||
determined from the stream (even if ``stream_final=True``).
|
||||
stream_final (Optional[bool]): Indicates if the ``stream`` is
|
||||
"final" (i.e. no more bytes will be added to it). In this case
|
||||
we determine the upload size from the size of the stream. If
|
||||
``total_bytes`` is passed, this argument will be ignored.
|
||||
timeout (Optional[Union[float, aiohttp.ClientTimeout]]):
|
||||
The number of seconds to wait for the server response.
|
||||
Depending on the retry strategy, a request may be repeated
|
||||
several times using the same timeout each time.
|
||||
Can also be passed as an `aiohttp.ClientTimeout` object.
|
||||
|
||||
Raises:
|
||||
NotImplementedError: Always, since virtual.
|
||||
"""
|
||||
raise NotImplementedError(u"This implementation is virtual.")
|
||||
|
||||
def _prepare_request(self):
|
||||
"""Prepare the contents of HTTP request to upload a chunk.
|
||||
|
||||
This is everything that must be done before a request that doesn't
|
||||
require network I/O. This is based on the `sans-I/O`_ philosophy.
|
||||
|
||||
For the time being, this **does require** some form of I/O to read
|
||||
a chunk from ``stream`` (via :func:`get_next_chunk`). However, this
|
||||
will (almost) certainly not be network I/O.
|
||||
|
||||
Returns:
|
||||
Tuple[str, str, bytes, Mapping[str, str]]: The quadruple
|
||||
|
||||
* HTTP verb for the request (always PUT)
|
||||
* the URL for the request
|
||||
* the body of the request
|
||||
* headers for the request
|
||||
|
||||
The headers **do not** incorporate the ``_headers`` on the
|
||||
current instance.
|
||||
|
||||
Raises:
|
||||
ValueError: If the current upload has finished.
|
||||
ValueError: If the current upload is in an invalid state.
|
||||
ValueError: If the current upload has not been initiated.
|
||||
ValueError: If the location in the stream (i.e. ``stream.tell()``)
|
||||
does not agree with ``bytes_uploaded``.
|
||||
|
||||
.. _sans-I/O: https://sans-io.readthedocs.io/
|
||||
"""
|
||||
if self.finished:
|
||||
raise ValueError(u"Upload has finished.")
|
||||
if self.invalid:
|
||||
raise ValueError(
|
||||
u"Upload is in an invalid state. To recover call `recover()`."
|
||||
)
|
||||
if self.resumable_url is None:
|
||||
raise ValueError(
|
||||
u"This upload has not been initiated. Please call "
|
||||
u"initiate() before beginning to transmit chunks."
|
||||
)
|
||||
|
||||
start_byte, payload, content_range = get_next_chunk(
|
||||
self._stream, self._chunk_size, self._total_bytes
|
||||
)
|
||||
if start_byte != self.bytes_uploaded:
|
||||
msg = _STREAM_ERROR_TEMPLATE.format(start_byte, self.bytes_uploaded)
|
||||
raise ValueError(msg)
|
||||
|
||||
self._update_checksum(start_byte, payload)
|
||||
|
||||
headers = {
|
||||
_CONTENT_TYPE_HEADER: self._content_type,
|
||||
_helpers.CONTENT_RANGE_HEADER: content_range,
|
||||
}
|
||||
return _PUT, self.resumable_url, payload, headers
|
||||
|
||||
def _make_invalid(self):
|
||||
"""Simple setter for ``invalid``.
|
||||
|
||||
This is intended to be passed along as a callback to helpers that
|
||||
raise an exception so they can mark this instance as invalid before
|
||||
raising.
|
||||
"""
|
||||
self._invalid = True
|
||||
|
||||
async def _process_response(self, response, bytes_sent):
|
||||
"""Process the response from an HTTP request.
|
||||
|
||||
This is everything that must be done after a request that doesn't
|
||||
require network I/O (or other I/O). This is based on the `sans-I/O`_
|
||||
philosophy.
|
||||
|
||||
Args:
|
||||
response (object): The HTTP response object.
|
||||
bytes_sent (int): The number of bytes sent in the request that
|
||||
``response`` was returned for.
|
||||
|
||||
Raises:
|
||||
~google.resumable_media.common.InvalidResponse: If the status
|
||||
code is 308 and the ``range`` header is not of the form
|
||||
``bytes 0-{end}``.
|
||||
~google.resumable_media.common.InvalidResponse: If the status
|
||||
code is not 200 or 308.
|
||||
|
||||
.. _sans-I/O: https://sans-io.readthedocs.io/
|
||||
"""
|
||||
status_code = _helpers.require_status_code(
|
||||
response,
|
||||
(http_client.OK, _async_resumable_media.PERMANENT_REDIRECT),
|
||||
self._get_status_code,
|
||||
callback=self._make_invalid,
|
||||
)
|
||||
if status_code == http_client.OK:
|
||||
# NOTE: We use the "local" information of ``bytes_sent`` to update
|
||||
# ``bytes_uploaded``, but do not verify this against other
|
||||
# state. However, there may be some other information:
|
||||
#
|
||||
# * a ``size`` key in JSON response body
|
||||
# * the ``total_bytes`` attribute (if set)
|
||||
# * ``stream.tell()`` (relying on fact that ``initiate()``
|
||||
# requires stream to be at the beginning)
|
||||
self._bytes_uploaded = self._bytes_uploaded + bytes_sent
|
||||
# Tombstone the current upload so it cannot be used again.
|
||||
self._finished = True
|
||||
# Validate the checksum. This can raise an exception on failure.
|
||||
await self._validate_checksum(response)
|
||||
else:
|
||||
bytes_range = _helpers.header_required(
|
||||
response,
|
||||
_helpers.RANGE_HEADER,
|
||||
self._get_headers,
|
||||
callback=self._make_invalid,
|
||||
)
|
||||
match = _BYTES_RANGE_RE.match(bytes_range)
|
||||
if match is None:
|
||||
self._make_invalid()
|
||||
raise common.InvalidResponse(
|
||||
response,
|
||||
u'Unexpected "range" header',
|
||||
bytes_range,
|
||||
u'Expected to be of the form "bytes=0-{end}"',
|
||||
)
|
||||
self._bytes_uploaded = int(match.group(u"end_byte")) + 1
|
||||
|
||||
async def _validate_checksum(self, response):
|
||||
"""Check the computed checksum, if any, against the response headers.
|
||||
Args:
|
||||
response (object): The HTTP response object.
|
||||
Raises:
|
||||
~google.resumable_media.common.DataCorruption: If the checksum
|
||||
computed locally and the checksum reported by the remote host do
|
||||
not match.
|
||||
"""
|
||||
if self._checksum_type is None:
|
||||
return
|
||||
metadata_key = sync_helpers._get_metadata_key(self._checksum_type)
|
||||
metadata = await response.json()
|
||||
remote_checksum = metadata.get(metadata_key)
|
||||
if remote_checksum is None:
|
||||
raise common.InvalidResponse(
|
||||
response,
|
||||
_UPLOAD_METADATA_NO_APPROPRIATE_CHECKSUM_MESSAGE.format(metadata_key),
|
||||
self._get_headers(response),
|
||||
)
|
||||
local_checksum = sync_helpers.prepare_checksum_digest(
|
||||
self._checksum_object.digest()
|
||||
)
|
||||
if local_checksum != remote_checksum:
|
||||
raise common.DataCorruption(
|
||||
response,
|
||||
_UPLOAD_CHECKSUM_MISMATCH_MESSAGE.format(
|
||||
self._checksum_type.upper(), local_checksum, remote_checksum
|
||||
),
|
||||
)
|
||||
|
||||
def transmit_next_chunk(self, transport, timeout=None):
|
||||
"""Transmit the next chunk of the resource to be uploaded.
|
||||
|
||||
If the current upload was initiated with ``stream_final=False``,
|
||||
this method will dynamically determine if the upload has completed.
|
||||
The upload will be considered complete if the stream produces
|
||||
fewer than :attr:`chunk_size` bytes when a chunk is read from it.
|
||||
|
||||
Args:
|
||||
transport (object): An object which can make authenticated
|
||||
requests.
|
||||
timeout (Optional[Union[float, aiohttp.ClientTimeout]]):
|
||||
The number of seconds to wait for the server response.
|
||||
Depending on the retry strategy, a request may be repeated
|
||||
several times using the same timeout each time.
|
||||
Can also be passed as an `aiohttp.ClientTimeout` object.
|
||||
|
||||
Raises:
|
||||
NotImplementedError: Always, since virtual.
|
||||
"""
|
||||
raise NotImplementedError(u"This implementation is virtual.")
|
||||
|
||||
def _prepare_recover_request(self):
|
||||
"""Prepare the contents of HTTP request to recover from failure.
|
||||
|
||||
This is everything that must be done before a request that doesn't
|
||||
require network I/O. This is based on the `sans-I/O`_ philosophy.
|
||||
|
||||
We assume that the :attr:`resumable_url` is set (i.e. the only way
|
||||
the upload can end up :attr:`invalid` is if it has been initiated.
|
||||
|
||||
Returns:
|
||||
Tuple[str, str, NoneType, Mapping[str, str]]: The quadruple
|
||||
|
||||
* HTTP verb for the request (always PUT)
|
||||
* the URL for the request
|
||||
* the body of the request (always :data:`None`)
|
||||
* headers for the request
|
||||
|
||||
The headers **do not** incorporate the ``_headers`` on the
|
||||
current instance.
|
||||
|
||||
Raises:
|
||||
ValueError: If the current upload is not in an invalid state.
|
||||
|
||||
.. _sans-I/O: https://sans-io.readthedocs.io/
|
||||
"""
|
||||
if not self.invalid:
|
||||
raise ValueError(u"Upload is not in invalid state, no need to recover.")
|
||||
|
||||
headers = {_helpers.CONTENT_RANGE_HEADER: u"bytes */*"}
|
||||
return _PUT, self.resumable_url, None, headers
|
||||
|
||||
def _process_recover_response(self, response):
|
||||
"""Process the response from an HTTP request to recover from failure.
|
||||
|
||||
This is everything that must be done after a request that doesn't
|
||||
require network I/O (or other I/O). This is based on the `sans-I/O`_
|
||||
philosophy.
|
||||
|
||||
Args:
|
||||
response (object): The HTTP response object.
|
||||
|
||||
Raises:
|
||||
~google.resumable_media.common.InvalidResponse: If the status
|
||||
code is not 308.
|
||||
~google.resumable_media.common.InvalidResponse: If the status
|
||||
code is 308 and the ``range`` header is not of the form
|
||||
``bytes 0-{end}``.
|
||||
|
||||
.. _sans-I/O: https://sans-io.readthedocs.io/
|
||||
"""
|
||||
_helpers.require_status_code(
|
||||
response,
|
||||
(_async_resumable_media.PERMANENT_REDIRECT,),
|
||||
self._get_status_code,
|
||||
)
|
||||
headers = self._get_headers(response)
|
||||
if _helpers.RANGE_HEADER in headers:
|
||||
bytes_range = headers[_helpers.RANGE_HEADER]
|
||||
match = _BYTES_RANGE_RE.match(bytes_range)
|
||||
if match is None:
|
||||
raise common.InvalidResponse(
|
||||
response,
|
||||
u'Unexpected "range" header',
|
||||
bytes_range,
|
||||
u'Expected to be of the form "bytes=0-{end}"',
|
||||
)
|
||||
self._bytes_uploaded = int(match.group(u"end_byte")) + 1
|
||||
else:
|
||||
# In this case, the upload has not "begun".
|
||||
self._bytes_uploaded = 0
|
||||
|
||||
self._stream.seek(self._bytes_uploaded)
|
||||
self._invalid = False
|
||||
|
||||
def recover(self, transport):
|
||||
"""Recover from a failure.
|
||||
|
||||
This method should be used when a :class:`ResumableUpload` is in an
|
||||
:attr:`~ResumableUpload.invalid` state due to a request failure.
|
||||
|
||||
This will verify the progress with the server and make sure the
|
||||
current upload is in a valid state before :meth:`transmit_next_chunk`
|
||||
can be used again.
|
||||
|
||||
Args:
|
||||
transport (object): An object which can make authenticated
|
||||
requests.
|
||||
|
||||
Raises:
|
||||
NotImplementedError: Always, since virtual.
|
||||
"""
|
||||
raise NotImplementedError(u"This implementation is virtual.")
|
||||
|
||||
|
||||
def get_boundary():
|
||||
"""Get a random boundary for a multipart request.
|
||||
|
||||
Returns:
|
||||
bytes: The boundary used to separate parts of a multipart request.
|
||||
"""
|
||||
random_int = random.randrange(sys.maxsize)
|
||||
boundary = _BOUNDARY_FORMAT.format(random_int)
|
||||
# NOTE: Neither % formatting nor .format() are available for byte strings
|
||||
# in Python 3.4, so we must use unicode strings as templates.
|
||||
return boundary.encode(u"utf-8")
|
||||
|
||||
|
||||
def construct_multipart_request(data, metadata, content_type):
|
||||
"""Construct a multipart request body.
|
||||
|
||||
Args:
|
||||
data (bytes): The resource content (UTF-8 encoded as bytes)
|
||||
to be uploaded.
|
||||
metadata (Mapping[str, str]): The resource metadata, such as an
|
||||
ACL list.
|
||||
content_type (str): The content type of the resource, e.g. a JPEG
|
||||
image has content type ``image/jpeg``.
|
||||
|
||||
Returns:
|
||||
Tuple[bytes, bytes]: The multipart request body and the boundary used
|
||||
between each part.
|
||||
"""
|
||||
multipart_boundary = get_boundary()
|
||||
json_bytes = json.dumps(metadata).encode(u"utf-8")
|
||||
content_type = content_type.encode(u"utf-8")
|
||||
# Combine the two parts into a multipart payload.
|
||||
# NOTE: We'd prefer a bytes template but are restricted by Python 3.4.
|
||||
boundary_sep = _MULTIPART_SEP + multipart_boundary
|
||||
content = (
|
||||
boundary_sep
|
||||
+ _MULTIPART_BEGIN
|
||||
+ json_bytes
|
||||
+ _CRLF
|
||||
+ boundary_sep
|
||||
+ _CRLF
|
||||
+ b"content-type: "
|
||||
+ content_type
|
||||
+ _CRLF
|
||||
+ _CRLF
|
||||
+ data # Empty line between headers and body.
|
||||
+ _CRLF
|
||||
+ boundary_sep
|
||||
+ _MULTIPART_SEP
|
||||
)
|
||||
|
||||
return content, multipart_boundary
|
||||
|
||||
|
||||
def get_total_bytes(stream):
|
||||
"""Determine the total number of bytes in a stream.
|
||||
|
||||
Args:
|
||||
stream (IO[bytes]): The stream (i.e. file-like object).
|
||||
|
||||
Returns:
|
||||
int: The number of bytes.
|
||||
"""
|
||||
current_position = stream.tell()
|
||||
# NOTE: ``.seek()`` **should** return the same value that ``.tell()``
|
||||
# returns, but in Python 2, ``file`` objects do not.
|
||||
stream.seek(0, os.SEEK_END)
|
||||
end_position = stream.tell()
|
||||
# Go back to the initial position.
|
||||
stream.seek(current_position)
|
||||
|
||||
return end_position
|
||||
|
||||
|
||||
def get_next_chunk(stream, chunk_size, total_bytes):
|
||||
"""Get a chunk from an I/O stream.
|
||||
|
||||
The ``stream`` may have fewer bytes remaining than ``chunk_size``
|
||||
so it may not always be the case that
|
||||
``end_byte == start_byte + chunk_size - 1``.
|
||||
|
||||
Args:
|
||||
stream (IO[bytes]): The stream (i.e. file-like object).
|
||||
chunk_size (int): The size of the chunk to be read from the ``stream``.
|
||||
total_bytes (Optional[int]): The (expected) total number of bytes
|
||||
in the ``stream``.
|
||||
|
||||
Returns:
|
||||
Tuple[int, bytes, str]: Triple of:
|
||||
|
||||
* the start byte index
|
||||
* the content in between the start and end bytes (inclusive)
|
||||
* content range header for the chunk (slice) that has been read
|
||||
|
||||
Raises:
|
||||
ValueError: If ``total_bytes == 0`` but ``stream.read()`` yields
|
||||
non-empty content.
|
||||
ValueError: If there is no data left to consume. This corresponds
|
||||
exactly to the case ``end_byte < start_byte``, which can only
|
||||
occur if ``end_byte == start_byte - 1``.
|
||||
"""
|
||||
start_byte = stream.tell()
|
||||
if total_bytes is not None and start_byte + chunk_size >= total_bytes > 0:
|
||||
payload = stream.read(total_bytes - start_byte)
|
||||
else:
|
||||
payload = stream.read(chunk_size)
|
||||
end_byte = stream.tell() - 1
|
||||
|
||||
num_bytes_read = len(payload)
|
||||
if total_bytes is None:
|
||||
if num_bytes_read < chunk_size:
|
||||
# We now **KNOW** the total number of bytes.
|
||||
total_bytes = end_byte + 1
|
||||
elif total_bytes == 0:
|
||||
# NOTE: We also expect ``start_byte == 0`` here but don't check
|
||||
# because ``_prepare_initiate_request()`` requires the
|
||||
# stream to be at the beginning.
|
||||
if num_bytes_read != 0:
|
||||
raise ValueError(
|
||||
u"Stream specified as empty, but produced non-empty content."
|
||||
)
|
||||
else:
|
||||
if num_bytes_read == 0:
|
||||
raise ValueError(
|
||||
u"Stream is already exhausted. There is no content remaining."
|
||||
)
|
||||
|
||||
content_range = get_content_range(start_byte, end_byte, total_bytes)
|
||||
return start_byte, payload, content_range
|
||||
|
||||
|
||||
def get_content_range(start_byte, end_byte, total_bytes):
|
||||
"""Convert start, end and total into content range header.
|
||||
|
||||
If ``total_bytes`` is not known, uses "bytes {start}-{end}/*".
|
||||
If we are dealing with an empty range (i.e. ``end_byte < start_byte``)
|
||||
then "bytes */{total}" is used.
|
||||
|
||||
This function **ASSUMES** that if the size is not known, the caller will
|
||||
not also pass an empty range.
|
||||
|
||||
Args:
|
||||
start_byte (int): The start (inclusive) of the byte range.
|
||||
end_byte (int): The end (inclusive) of the byte range.
|
||||
total_bytes (Optional[int]): The number of bytes in the byte
|
||||
range (if known).
|
||||
|
||||
Returns:
|
||||
str: The content range header.
|
||||
"""
|
||||
if total_bytes is None:
|
||||
return _RANGE_UNKNOWN_TEMPLATE.format(start_byte, end_byte)
|
||||
elif end_byte < start_byte:
|
||||
return _EMPTY_RANGE_TEMPLATE.format(total_bytes)
|
||||
else:
|
||||
return _CONTENT_RANGE_TEMPLATE.format(start_byte, end_byte, total_bytes)
|
|
@ -0,0 +1,678 @@
|
|||
# Copyright 2017 Google Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""``requests`` utilities for Google Media Downloads and Resumable Uploads.
|
||||
|
||||
This sub-package assumes callers will use the `requests`_ library
|
||||
as transport and `google-auth`_ for sending authenticated HTTP traffic
|
||||
with ``requests``.
|
||||
|
||||
.. _requests: http://docs.python-requests.org/
|
||||
.. _google-auth: https://google-auth.readthedocs.io/
|
||||
|
||||
====================
|
||||
Authorized Transport
|
||||
====================
|
||||
|
||||
To use ``google-auth`` and ``requests`` to create an authorized transport
|
||||
that has read-only access to Google Cloud Storage (GCS):
|
||||
|
||||
.. testsetup:: get-credentials
|
||||
|
||||
import google.auth
|
||||
import google.auth.credentials as creds_mod
|
||||
import mock
|
||||
|
||||
def mock_default(scopes=None):
|
||||
credentials = mock.Mock(spec=creds_mod.Credentials)
|
||||
return credentials, u'mock-project'
|
||||
|
||||
# Patch the ``default`` function on the module.
|
||||
original_default = google.auth.default
|
||||
google.auth.default = mock_default
|
||||
|
||||
.. doctest:: get-credentials
|
||||
|
||||
>>> import google.auth
|
||||
>>> import google.auth.transport.requests as tr_requests
|
||||
>>>
|
||||
>>> ro_scope = u'https://www.googleapis.com/auth/devstorage.read_only'
|
||||
>>> credentials, _ = google.auth.default(scopes=(ro_scope,))
|
||||
>>> transport = tr_requests.AuthorizedSession(credentials)
|
||||
>>> transport
|
||||
<google.auth.transport.requests.AuthorizedSession object at 0x...>
|
||||
|
||||
.. testcleanup:: get-credentials
|
||||
|
||||
# Put back the correct ``default`` function on the module.
|
||||
google.auth.default = original_default
|
||||
|
||||
================
|
||||
Simple Downloads
|
||||
================
|
||||
|
||||
To download an object from Google Cloud Storage, construct the media URL
|
||||
for the GCS object and download it with an authorized transport that has
|
||||
access to the resource:
|
||||
|
||||
.. testsetup:: basic-download
|
||||
|
||||
import mock
|
||||
import requests
|
||||
from six.moves import http_client
|
||||
|
||||
bucket = u'bucket-foo'
|
||||
blob_name = u'file.txt'
|
||||
|
||||
fake_response = requests.Response()
|
||||
fake_response.status_code = int(http_client.OK)
|
||||
fake_response.headers[u'Content-Length'] = u'1364156'
|
||||
fake_content = mock.MagicMock(spec=['__len__'])
|
||||
fake_content.__len__.return_value = 1364156
|
||||
fake_response._content = fake_content
|
||||
|
||||
get_method = mock.Mock(return_value=fake_response, spec=[])
|
||||
transport = mock.Mock(request=get_method, spec=['request'])
|
||||
|
||||
.. doctest:: basic-download
|
||||
|
||||
>>> from google.resumable_media.requests import Download
|
||||
>>>
|
||||
>>> url_template = (
|
||||
... u'https://www.googleapis.com/download/storage/v1/b/'
|
||||
... u'{bucket}/o/{blob_name}?alt=media')
|
||||
>>> media_url = url_template.format(
|
||||
... bucket=bucket, blob_name=blob_name)
|
||||
>>>
|
||||
>>> download = Download(media_url)
|
||||
>>> response = download.consume(transport)
|
||||
>>> download.finished
|
||||
True
|
||||
>>> response
|
||||
<Response [200]>
|
||||
>>> response.headers[u'Content-Length']
|
||||
'1364156'
|
||||
>>> len(response.content)
|
||||
1364156
|
||||
|
||||
To download only a portion of the bytes in the object,
|
||||
specify ``start`` and ``end`` byte positions (both optional):
|
||||
|
||||
.. testsetup:: basic-download-with-slice
|
||||
|
||||
import mock
|
||||
import requests
|
||||
from six.moves import http_client
|
||||
|
||||
from google.resumable_media.requests import Download
|
||||
|
||||
media_url = u'http://test.invalid'
|
||||
start = 4096
|
||||
end = 8191
|
||||
slice_size = end - start + 1
|
||||
|
||||
fake_response = requests.Response()
|
||||
fake_response.status_code = int(http_client.PARTIAL_CONTENT)
|
||||
fake_response.headers[u'Content-Length'] = u'{:d}'.format(slice_size)
|
||||
content_range = u'bytes {:d}-{:d}/1364156'.format(start, end)
|
||||
fake_response.headers[u'Content-Range'] = content_range
|
||||
fake_content = mock.MagicMock(spec=['__len__'])
|
||||
fake_content.__len__.return_value = slice_size
|
||||
fake_response._content = fake_content
|
||||
|
||||
get_method = mock.Mock(return_value=fake_response, spec=[])
|
||||
transport = mock.Mock(request=get_method, spec=['request'])
|
||||
|
||||
.. doctest:: basic-download-with-slice
|
||||
|
||||
>>> download = Download(media_url, start=4096, end=8191)
|
||||
>>> response = download.consume(transport)
|
||||
>>> download.finished
|
||||
True
|
||||
>>> response
|
||||
<Response [206]>
|
||||
>>> response.headers[u'Content-Length']
|
||||
'4096'
|
||||
>>> response.headers[u'Content-Range']
|
||||
'bytes 4096-8191/1364156'
|
||||
>>> len(response.content)
|
||||
4096
|
||||
|
||||
=================
|
||||
Chunked Downloads
|
||||
=================
|
||||
|
||||
For very large objects or objects of unknown size, it may make more sense
|
||||
to download the object in chunks rather than all at once. This can be done
|
||||
to avoid dropped connections with a poor internet connection or can allow
|
||||
multiple chunks to be downloaded in parallel to speed up the total
|
||||
download.
|
||||
|
||||
A :class:`.ChunkedDownload` uses the same media URL and authorized
|
||||
transport that a basic :class:`.Download` would use, but also
|
||||
requires a chunk size and a write-able byte ``stream``. The chunk size is used
|
||||
to determine how much of the resouce to consume with each request and the
|
||||
stream is to allow the resource to be written out (e.g. to disk) without
|
||||
having to fit in memory all at once.
|
||||
|
||||
.. testsetup:: chunked-download
|
||||
|
||||
import io
|
||||
|
||||
import mock
|
||||
import requests
|
||||
from six.moves import http_client
|
||||
|
||||
media_url = u'http://test.invalid'
|
||||
|
||||
fifty_mb = 50 * 1024 * 1024
|
||||
one_gb = 1024 * 1024 * 1024
|
||||
fake_response = requests.Response()
|
||||
fake_response.status_code = int(http_client.PARTIAL_CONTENT)
|
||||
fake_response.headers[u'Content-Length'] = u'{:d}'.format(fifty_mb)
|
||||
content_range = u'bytes 0-{:d}/{:d}'.format(fifty_mb - 1, one_gb)
|
||||
fake_response.headers[u'Content-Range'] = content_range
|
||||
fake_content_begin = b'The beginning of the chunk...'
|
||||
fake_content = fake_content_begin + b'1' * (fifty_mb - 29)
|
||||
fake_response._content = fake_content
|
||||
|
||||
get_method = mock.Mock(return_value=fake_response, spec=[])
|
||||
transport = mock.Mock(request=get_method, spec=['request'])
|
||||
|
||||
.. doctest:: chunked-download
|
||||
|
||||
>>> from google.resumable_media.requests import ChunkedDownload
|
||||
>>>
|
||||
>>> chunk_size = 50 * 1024 * 1024 # 50MB
|
||||
>>> stream = io.BytesIO()
|
||||
>>> download = ChunkedDownload(
|
||||
... media_url, chunk_size, stream)
|
||||
>>> # Check the state of the download before starting.
|
||||
>>> download.bytes_downloaded
|
||||
0
|
||||
>>> download.total_bytes is None
|
||||
True
|
||||
>>> response = download.consume_next_chunk(transport)
|
||||
>>> # Check the state of the download after consuming one chunk.
|
||||
>>> download.finished
|
||||
False
|
||||
>>> download.bytes_downloaded # chunk_size
|
||||
52428800
|
||||
>>> download.total_bytes # 1GB
|
||||
1073741824
|
||||
>>> response
|
||||
<Response [206]>
|
||||
>>> response.headers[u'Content-Length']
|
||||
'52428800'
|
||||
>>> response.headers[u'Content-Range']
|
||||
'bytes 0-52428799/1073741824'
|
||||
>>> len(response.content) == chunk_size
|
||||
True
|
||||
>>> stream.seek(0)
|
||||
0
|
||||
>>> stream.read(29)
|
||||
b'The beginning of the chunk...'
|
||||
|
||||
The download will change it's ``finished`` status to :data:`True`
|
||||
once the final chunk is consumed. In some cases, the final chunk may
|
||||
not be the same size as the other chunks:
|
||||
|
||||
.. testsetup:: chunked-download-end
|
||||
|
||||
import mock
|
||||
import requests
|
||||
from six.moves import http_client
|
||||
|
||||
from google.resumable_media.requests import ChunkedDownload
|
||||
|
||||
media_url = u'http://test.invalid'
|
||||
|
||||
fifty_mb = 50 * 1024 * 1024
|
||||
one_gb = 1024 * 1024 * 1024
|
||||
stream = mock.Mock(spec=['write'])
|
||||
download = ChunkedDownload(media_url, fifty_mb, stream)
|
||||
download._bytes_downloaded = 20 * fifty_mb
|
||||
download._total_bytes = one_gb
|
||||
|
||||
fake_response = requests.Response()
|
||||
fake_response.status_code = int(http_client.PARTIAL_CONTENT)
|
||||
slice_size = one_gb - 20 * fifty_mb
|
||||
fake_response.headers[u'Content-Length'] = u'{:d}'.format(slice_size)
|
||||
content_range = u'bytes {:d}-{:d}/{:d}'.format(
|
||||
20 * fifty_mb, one_gb - 1, one_gb)
|
||||
fake_response.headers[u'Content-Range'] = content_range
|
||||
fake_content = mock.MagicMock(spec=['__len__'])
|
||||
fake_content.__len__.return_value = slice_size
|
||||
fake_response._content = fake_content
|
||||
|
||||
get_method = mock.Mock(return_value=fake_response, spec=[])
|
||||
transport = mock.Mock(request=get_method, spec=['request'])
|
||||
|
||||
.. doctest:: chunked-download-end
|
||||
|
||||
>>> # The state of the download in progress.
|
||||
>>> download.finished
|
||||
False
|
||||
>>> download.bytes_downloaded # 20 chunks at 50MB
|
||||
1048576000
|
||||
>>> download.total_bytes # 1GB
|
||||
1073741824
|
||||
>>> response = download.consume_next_chunk(transport)
|
||||
>>> # The state of the download after consuming the final chunk.
|
||||
>>> download.finished
|
||||
True
|
||||
>>> download.bytes_downloaded == download.total_bytes
|
||||
True
|
||||
>>> response
|
||||
<Response [206]>
|
||||
>>> response.headers[u'Content-Length']
|
||||
'25165824'
|
||||
>>> response.headers[u'Content-Range']
|
||||
'bytes 1048576000-1073741823/1073741824'
|
||||
>>> len(response.content) < download.chunk_size
|
||||
True
|
||||
|
||||
In addition, a :class:`.ChunkedDownload` can also take optional
|
||||
``start`` and ``end`` byte positions.
|
||||
|
||||
==============
|
||||
Simple Uploads
|
||||
==============
|
||||
|
||||
Among the three supported upload classes, the simplest is
|
||||
:class:`.SimpleUpload`. A simple upload should be used when the resource
|
||||
being uploaded is small and when there is no metadata (other than the name)
|
||||
associated with the resource.
|
||||
|
||||
.. testsetup:: simple-upload
|
||||
|
||||
import json
|
||||
|
||||
import mock
|
||||
import requests
|
||||
from six.moves import http_client
|
||||
|
||||
bucket = u'some-bucket'
|
||||
blob_name = u'file.txt'
|
||||
|
||||
fake_response = requests.Response()
|
||||
fake_response.status_code = int(http_client.OK)
|
||||
payload = {
|
||||
u'bucket': bucket,
|
||||
u'contentType': u'text/plain',
|
||||
u'md5Hash': u'M0XLEsX9/sMdiI+4pB4CAQ==',
|
||||
u'name': blob_name,
|
||||
u'size': u'27',
|
||||
}
|
||||
fake_response._content = json.dumps(payload).encode(u'utf-8')
|
||||
|
||||
post_method = mock.Mock(return_value=fake_response, spec=[])
|
||||
transport = mock.Mock(request=post_method, spec=['request'])
|
||||
|
||||
.. doctest:: simple-upload
|
||||
:options: +NORMALIZE_WHITESPACE
|
||||
|
||||
>>> from google.resumable_media.requests import SimpleUpload
|
||||
>>>
|
||||
>>> url_template = (
|
||||
... u'https://www.googleapis.com/upload/storage/v1/b/{bucket}/o?'
|
||||
... u'uploadType=media&'
|
||||
... u'name={blob_name}')
|
||||
>>> upload_url = url_template.format(
|
||||
... bucket=bucket, blob_name=blob_name)
|
||||
>>>
|
||||
>>> upload = SimpleUpload(upload_url)
|
||||
>>> data = b'Some not too large content.'
|
||||
>>> content_type = u'text/plain'
|
||||
>>> response = upload.transmit(transport, data, content_type)
|
||||
>>> upload.finished
|
||||
True
|
||||
>>> response
|
||||
<Response [200]>
|
||||
>>> json_response = response.json()
|
||||
>>> json_response[u'bucket'] == bucket
|
||||
True
|
||||
>>> json_response[u'name'] == blob_name
|
||||
True
|
||||
>>> json_response[u'contentType'] == content_type
|
||||
True
|
||||
>>> json_response[u'md5Hash']
|
||||
'M0XLEsX9/sMdiI+4pB4CAQ=='
|
||||
>>> int(json_response[u'size']) == len(data)
|
||||
True
|
||||
|
||||
In the rare case that an upload fails, an :exc:`.InvalidResponse`
|
||||
will be raised:
|
||||
|
||||
.. testsetup:: simple-upload-fail
|
||||
|
||||
import time
|
||||
|
||||
import mock
|
||||
import requests
|
||||
from six.moves import http_client
|
||||
|
||||
from google import resumable_media
|
||||
from google.resumable_media import _helpers
|
||||
from google.resumable_media.requests import SimpleUpload as constructor
|
||||
|
||||
upload_url = u'http://test.invalid'
|
||||
data = b'Some not too large content.'
|
||||
content_type = u'text/plain'
|
||||
|
||||
fake_response = requests.Response()
|
||||
fake_response.status_code = int(http_client.SERVICE_UNAVAILABLE)
|
||||
|
||||
post_method = mock.Mock(return_value=fake_response, spec=[])
|
||||
transport = mock.Mock(request=post_method, spec=['request'])
|
||||
|
||||
time_sleep = time.sleep
|
||||
def dont_sleep(seconds):
|
||||
raise RuntimeError(u'No sleep', seconds)
|
||||
|
||||
def SimpleUpload(*args, **kwargs):
|
||||
upload = constructor(*args, **kwargs)
|
||||
# Mock the cumulative sleep to avoid retries (and `time.sleep()`).
|
||||
upload._retry_strategy = resumable_media.RetryStrategy(
|
||||
max_cumulative_retry=-1.0)
|
||||
return upload
|
||||
|
||||
time.sleep = dont_sleep
|
||||
|
||||
.. doctest:: simple-upload-fail
|
||||
:options: +NORMALIZE_WHITESPACE
|
||||
|
||||
>>> upload = SimpleUpload(upload_url)
|
||||
>>> error = None
|
||||
>>> try:
|
||||
... upload.transmit(transport, data, content_type)
|
||||
... except resumable_media.InvalidResponse as caught_exc:
|
||||
... error = caught_exc
|
||||
...
|
||||
>>> error
|
||||
InvalidResponse('Request failed with status code', 503,
|
||||
'Expected one of', <HTTPStatus.OK: 200>)
|
||||
>>> error.response
|
||||
<Response [503]>
|
||||
>>>
|
||||
>>> upload.finished
|
||||
True
|
||||
|
||||
.. testcleanup:: simple-upload-fail
|
||||
|
||||
# Put back the correct ``sleep`` function on the ``time`` module.
|
||||
time.sleep = time_sleep
|
||||
|
||||
Even in the case of failure, we see that the upload is
|
||||
:attr:`~.SimpleUpload.finished`, i.e. it cannot be re-used.
|
||||
|
||||
=================
|
||||
Multipart Uploads
|
||||
=================
|
||||
|
||||
After the simple upload, the :class:`.MultipartUpload` can be used to
|
||||
achieve essentially the same task. However, a multipart upload allows some
|
||||
metadata about the resource to be sent along as well. (This is the "multi":
|
||||
we send a first part with the metadata and a second part with the actual
|
||||
bytes in the resource.)
|
||||
|
||||
Usage is similar to the simple upload, but :meth:`~.MultipartUpload.transmit`
|
||||
accepts an extra required argument: ``metadata``.
|
||||
|
||||
.. testsetup:: multipart-upload
|
||||
|
||||
import json
|
||||
|
||||
import mock
|
||||
import requests
|
||||
from six.moves import http_client
|
||||
|
||||
bucket = u'some-bucket'
|
||||
blob_name = u'file.txt'
|
||||
data = b'Some not too large content.'
|
||||
content_type = u'text/plain'
|
||||
|
||||
fake_response = requests.Response()
|
||||
fake_response.status_code = int(http_client.OK)
|
||||
payload = {
|
||||
u'bucket': bucket,
|
||||
u'name': blob_name,
|
||||
u'metadata': {u'color': u'grurple'},
|
||||
}
|
||||
fake_response._content = json.dumps(payload).encode(u'utf-8')
|
||||
|
||||
post_method = mock.Mock(return_value=fake_response, spec=[])
|
||||
transport = mock.Mock(request=post_method, spec=['request'])
|
||||
|
||||
.. doctest:: multipart-upload
|
||||
|
||||
>>> from google.resumable_media.requests import MultipartUpload
|
||||
>>>
|
||||
>>> url_template = (
|
||||
... u'https://www.googleapis.com/upload/storage/v1/b/{bucket}/o?'
|
||||
... u'uploadType=multipart')
|
||||
>>> upload_url = url_template.format(bucket=bucket)
|
||||
>>>
|
||||
>>> upload = MultipartUpload(upload_url)
|
||||
>>> metadata = {
|
||||
... u'name': blob_name,
|
||||
... u'metadata': {
|
||||
... u'color': u'grurple',
|
||||
... },
|
||||
... }
|
||||
>>> response = upload.transmit(transport, data, metadata, content_type)
|
||||
>>> upload.finished
|
||||
True
|
||||
>>> response
|
||||
<Response [200]>
|
||||
>>> json_response = response.json()
|
||||
>>> json_response[u'bucket'] == bucket
|
||||
True
|
||||
>>> json_response[u'name'] == blob_name
|
||||
True
|
||||
>>> json_response[u'metadata'] == metadata[u'metadata']
|
||||
True
|
||||
|
||||
As with the simple upload, in the case of failure an :exc:`.InvalidResponse`
|
||||
is raised, enclosing the :attr:`~.InvalidResponse.response` that caused
|
||||
the failure and the ``upload`` object cannot be re-used after a failure.
|
||||
|
||||
=================
|
||||
Resumable Uploads
|
||||
=================
|
||||
|
||||
A :class:`.ResumableUpload` deviates from the other two upload classes:
|
||||
it transmits a resource over the course of multiple requests. This
|
||||
is intended to be used in cases where:
|
||||
|
||||
* the size of the resource is not known (i.e. it is generated on the fly)
|
||||
* requests must be short-lived
|
||||
* the client has request **size** limitations
|
||||
* the resource is too large to fit into memory
|
||||
|
||||
In general, a resource should be sent in a **single** request to avoid
|
||||
latency and reduce QPS. See `GCS best practices`_ for more things to
|
||||
consider when using a resumable upload.
|
||||
|
||||
.. _GCS best practices: https://cloud.google.com/storage/docs/\
|
||||
best-practices#uploading
|
||||
|
||||
After creating a :class:`.ResumableUpload` instance, a
|
||||
**resumable upload session** must be initiated to let the server know that
|
||||
a series of chunked upload requests will be coming and to obtain an
|
||||
``upload_id`` for the session. In contrast to the other two upload classes,
|
||||
:meth:`~.ResumableUpload.initiate` takes a byte ``stream`` as input rather
|
||||
than raw bytes as ``data``. This can be a file object, a :class:`~io.BytesIO`
|
||||
object or any other stream implementing the same interface.
|
||||
|
||||
.. testsetup:: resumable-initiate
|
||||
|
||||
import io
|
||||
|
||||
import mock
|
||||
import requests
|
||||
from six.moves import http_client
|
||||
|
||||
bucket = u'some-bucket'
|
||||
blob_name = u'file.txt'
|
||||
data = b'Some resumable bytes.'
|
||||
content_type = u'text/plain'
|
||||
|
||||
fake_response = requests.Response()
|
||||
fake_response.status_code = int(http_client.OK)
|
||||
fake_response._content = b''
|
||||
upload_id = u'ABCdef189XY_super_serious'
|
||||
resumable_url_template = (
|
||||
u'https://www.googleapis.com/upload/storage/v1/b/{bucket}'
|
||||
u'/o?uploadType=resumable&upload_id={upload_id}')
|
||||
resumable_url = resumable_url_template.format(
|
||||
bucket=bucket, upload_id=upload_id)
|
||||
fake_response.headers[u'location'] = resumable_url
|
||||
fake_response.headers[u'x-guploader-uploadid'] = upload_id
|
||||
|
||||
post_method = mock.Mock(return_value=fake_response, spec=[])
|
||||
transport = mock.Mock(request=post_method, spec=['request'])
|
||||
|
||||
.. doctest:: resumable-initiate
|
||||
|
||||
>>> from google.resumable_media.requests import ResumableUpload
|
||||
>>>
|
||||
>>> url_template = (
|
||||
... u'https://www.googleapis.com/upload/storage/v1/b/{bucket}/o?'
|
||||
... u'uploadType=resumable')
|
||||
>>> upload_url = url_template.format(bucket=bucket)
|
||||
>>>
|
||||
>>> chunk_size = 1024 * 1024 # 1MB
|
||||
>>> upload = ResumableUpload(upload_url, chunk_size)
|
||||
>>> stream = io.BytesIO(data)
|
||||
>>> # The upload doesn't know how "big" it is until seeing a stream.
|
||||
>>> upload.total_bytes is None
|
||||
True
|
||||
>>> metadata = {u'name': blob_name}
|
||||
>>> response = upload.initiate(transport, stream, metadata, content_type)
|
||||
>>> response
|
||||
<Response [200]>
|
||||
>>> upload.resumable_url == response.headers[u'Location']
|
||||
True
|
||||
>>> upload.total_bytes == len(data)
|
||||
True
|
||||
>>> upload_id = response.headers[u'X-GUploader-UploadID']
|
||||
>>> upload_id
|
||||
'ABCdef189XY_super_serious'
|
||||
>>> upload.resumable_url == upload_url + u'&upload_id=' + upload_id
|
||||
True
|
||||
|
||||
Once a :class:`.ResumableUpload` has been initiated, the resource is
|
||||
transmitted in chunks until completion:
|
||||
|
||||
.. testsetup:: resumable-transmit
|
||||
|
||||
import io
|
||||
import json
|
||||
|
||||
import mock
|
||||
import requests
|
||||
from six.moves import http_client
|
||||
|
||||
from google import resumable_media
|
||||
import google.resumable_media.requests.upload as upload_mod
|
||||
|
||||
data = b'01234567891'
|
||||
stream = io.BytesIO(data)
|
||||
# Create an "already initiated" upload.
|
||||
upload_url = u'http://test.invalid'
|
||||
chunk_size = 256 * 1024 # 256KB
|
||||
upload = upload_mod.ResumableUpload(upload_url, chunk_size)
|
||||
upload._resumable_url = u'http://test.invalid?upload_id=mocked'
|
||||
upload._stream = stream
|
||||
upload._content_type = u'text/plain'
|
||||
upload._total_bytes = len(data)
|
||||
|
||||
# After-the-fact update the chunk size so that len(data)
|
||||
# is split into three.
|
||||
upload._chunk_size = 4
|
||||
# Make three fake responses.
|
||||
fake_response0 = requests.Response()
|
||||
fake_response0.status_code = resumable_media.PERMANENT_REDIRECT
|
||||
fake_response0.headers[u'range'] = u'bytes=0-3'
|
||||
|
||||
fake_response1 = requests.Response()
|
||||
fake_response1.status_code = resumable_media.PERMANENT_REDIRECT
|
||||
fake_response1.headers[u'range'] = u'bytes=0-7'
|
||||
|
||||
fake_response2 = requests.Response()
|
||||
fake_response2.status_code = int(http_client.OK)
|
||||
bucket = u'some-bucket'
|
||||
blob_name = u'file.txt'
|
||||
payload = {
|
||||
u'bucket': bucket,
|
||||
u'name': blob_name,
|
||||
u'size': u'{:d}'.format(len(data)),
|
||||
}
|
||||
fake_response2._content = json.dumps(payload).encode(u'utf-8')
|
||||
|
||||
# Use the fake responses to mock a transport.
|
||||
responses = [fake_response0, fake_response1, fake_response2]
|
||||
put_method = mock.Mock(side_effect=responses, spec=[])
|
||||
transport = mock.Mock(request=put_method, spec=['request'])
|
||||
|
||||
.. doctest:: resumable-transmit
|
||||
|
||||
>>> response0 = upload.transmit_next_chunk(transport)
|
||||
>>> response0
|
||||
<Response [308]>
|
||||
>>> upload.finished
|
||||
False
|
||||
>>> upload.bytes_uploaded == upload.chunk_size
|
||||
True
|
||||
>>>
|
||||
>>> response1 = upload.transmit_next_chunk(transport)
|
||||
>>> response1
|
||||
<Response [308]>
|
||||
>>> upload.finished
|
||||
False
|
||||
>>> upload.bytes_uploaded == 2 * upload.chunk_size
|
||||
True
|
||||
>>>
|
||||
>>> response2 = upload.transmit_next_chunk(transport)
|
||||
>>> response2
|
||||
<Response [200]>
|
||||
>>> upload.finished
|
||||
True
|
||||
>>> upload.bytes_uploaded == upload.total_bytes
|
||||
True
|
||||
>>> json_response = response2.json()
|
||||
>>> json_response[u'bucket'] == bucket
|
||||
True
|
||||
>>> json_response[u'name'] == blob_name
|
||||
True
|
||||
"""
|
||||
from google._async_resumable_media.requests.download import ChunkedDownload
|
||||
from google._async_resumable_media.requests.download import Download
|
||||
from google._async_resumable_media.requests.upload import MultipartUpload
|
||||
from google._async_resumable_media.requests.download import RawChunkedDownload
|
||||
from google._async_resumable_media.requests.download import RawDownload
|
||||
from google._async_resumable_media.requests.upload import ResumableUpload
|
||||
from google._async_resumable_media.requests.upload import SimpleUpload
|
||||
|
||||
|
||||
__all__ = [
|
||||
u"ChunkedDownload",
|
||||
u"Download",
|
||||
u"MultipartUpload",
|
||||
u"RawChunkedDownload",
|
||||
u"RawDownload",
|
||||
u"ResumableUpload",
|
||||
u"SimpleUpload",
|
||||
]
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,155 @@
|
|||
# Copyright 2017 Google Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Shared utilities used by both downloads and uploads.
|
||||
|
||||
This utilities are explicitly catered to ``requests``-like transports.
|
||||
"""
|
||||
|
||||
|
||||
import functools
|
||||
|
||||
from google._async_resumable_media import _helpers
|
||||
from google.resumable_media import common
|
||||
|
||||
import google.auth.transport._aiohttp_requests as aiohttp_requests
|
||||
import aiohttp
|
||||
|
||||
_DEFAULT_RETRY_STRATEGY = common.RetryStrategy()
|
||||
_SINGLE_GET_CHUNK_SIZE = 8192
|
||||
|
||||
|
||||
# The number of seconds to wait to establish a connection
|
||||
# (connect() call on socket). Avoid setting this to a multiple of 3 to not
|
||||
# Align with TCP Retransmission timing. (typically 2.5-3s)
|
||||
_DEFAULT_CONNECT_TIMEOUT = 61
|
||||
# The number of seconds to wait between bytes sent from the server.
|
||||
_DEFAULT_READ_TIMEOUT = 60
|
||||
_DEFAULT_TIMEOUT = aiohttp.ClientTimeout(
|
||||
connect=_DEFAULT_CONNECT_TIMEOUT, sock_read=_DEFAULT_READ_TIMEOUT
|
||||
)
|
||||
|
||||
|
||||
class RequestsMixin(object):
|
||||
"""Mix-in class implementing ``requests``-specific behavior.
|
||||
|
||||
These are methods that are more general purpose, with implementations
|
||||
specific to the types defined in ``requests``.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def _get_status_code(response):
|
||||
"""Access the status code from an HTTP response.
|
||||
|
||||
Args:
|
||||
response (~requests.Response): The HTTP response object.
|
||||
|
||||
Returns:
|
||||
int: The status code.
|
||||
"""
|
||||
return response.status
|
||||
|
||||
@staticmethod
|
||||
def _get_headers(response):
|
||||
"""Access the headers from an HTTP response.
|
||||
|
||||
Args:
|
||||
response (~requests.Response): The HTTP response object.
|
||||
|
||||
Returns:
|
||||
~requests.structures.CaseInsensitiveDict: The header mapping (keys
|
||||
are case-insensitive).
|
||||
"""
|
||||
# For Async testing,`_headers` is modified instead of headers
|
||||
# access via the internal field.
|
||||
return response._headers
|
||||
|
||||
@staticmethod
|
||||
async def _get_body(response):
|
||||
"""Access the response body from an HTTP response.
|
||||
|
||||
Args:
|
||||
response (~requests.Response): The HTTP response object.
|
||||
|
||||
Returns:
|
||||
bytes: The body of the ``response``.
|
||||
"""
|
||||
wrapped_response = aiohttp_requests._CombinedResponse(response)
|
||||
content = await wrapped_response.data.read()
|
||||
return content
|
||||
|
||||
|
||||
class RawRequestsMixin(RequestsMixin):
|
||||
@staticmethod
|
||||
async def _get_body(response):
|
||||
"""Access the response body from an HTTP response.
|
||||
|
||||
Args:
|
||||
response (~requests.Response): The HTTP response object.
|
||||
|
||||
Returns:
|
||||
bytes: The body of the ``response``.
|
||||
"""
|
||||
|
||||
wrapped_response = aiohttp_requests._CombinedResponse(response)
|
||||
content = await wrapped_response.raw_content()
|
||||
return content
|
||||
|
||||
|
||||
async def http_request(
|
||||
transport,
|
||||
method,
|
||||
url,
|
||||
data=None,
|
||||
headers=None,
|
||||
retry_strategy=_DEFAULT_RETRY_STRATEGY,
|
||||
**transport_kwargs
|
||||
):
|
||||
"""Make an HTTP request.
|
||||
|
||||
Args:
|
||||
transport (~requests.Session): A ``requests`` object which can make
|
||||
authenticated requests via a ``request()`` method. This method
|
||||
must accept an HTTP method, an upload URL, a ``data`` keyword
|
||||
argument and a ``headers`` keyword argument.
|
||||
method (str): The HTTP method for the request.
|
||||
url (str): The URL for the request.
|
||||
data (Optional[bytes]): The body of the request.
|
||||
headers (Mapping[str, str]): The headers for the request (``transport``
|
||||
may also add additional headers).
|
||||
retry_strategy (~google.resumable_media.common.RetryStrategy): The
|
||||
strategy to use if the request fails and must be retried.
|
||||
transport_kwargs (Dict[str, str]): Extra keyword arguments to be
|
||||
passed along to ``transport.request``.
|
||||
|
||||
Returns:
|
||||
~requests.Response: The return value of ``transport.request()``.
|
||||
"""
|
||||
|
||||
# NOTE(asyncio/aiohttp): Sync versions use a tuple for two timeouts,
|
||||
# default connect timeout and read timeout. Since async requests only
|
||||
# accepts a single value, this is using the connect timeout. This logic
|
||||
# diverges from the sync implementation.
|
||||
if "timeout" not in transport_kwargs:
|
||||
timeout = _DEFAULT_TIMEOUT
|
||||
transport_kwargs["timeout"] = timeout
|
||||
|
||||
func = functools.partial(
|
||||
transport.request, method, url, data=data, headers=headers, **transport_kwargs
|
||||
)
|
||||
|
||||
resp = await _helpers.wait_and_retry(
|
||||
func, RequestsMixin._get_status_code, retry_strategy
|
||||
)
|
||||
return resp
|
|
@ -0,0 +1,461 @@
|
|||
# Copyright 2017 Google Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Support for downloading media from Google APIs."""
|
||||
|
||||
import urllib3.response
|
||||
|
||||
from google._async_resumable_media import _download
|
||||
from google._async_resumable_media import _helpers
|
||||
from google._async_resumable_media.requests import _request_helpers
|
||||
from google.resumable_media import common
|
||||
from google.resumable_media import _helpers as sync_helpers
|
||||
from google.resumable_media.requests import download
|
||||
|
||||
_CHECKSUM_MISMATCH = download._CHECKSUM_MISMATCH
|
||||
|
||||
|
||||
class Download(_request_helpers.RequestsMixin, _download.Download):
|
||||
"""Helper to manage downloading a resource from a Google API.
|
||||
|
||||
"Slices" of the resource can be retrieved by specifying a range
|
||||
with ``start`` and / or ``end``. However, in typical usage, neither
|
||||
``start`` nor ``end`` is expected to be provided.
|
||||
|
||||
Args:
|
||||
media_url (str): The URL containing the media to be downloaded.
|
||||
stream (IO[bytes]): A write-able stream (i.e. file-like object) that
|
||||
the downloaded resource can be written to.
|
||||
start (int): The first byte in a range to be downloaded. If not
|
||||
provided, but ``end`` is provided, will download from the
|
||||
beginning to ``end`` of the media.
|
||||
end (int): The last byte in a range to be downloaded. If not
|
||||
provided, but ``start`` is provided, will download from the
|
||||
``start`` to the end of the media.
|
||||
headers (Optional[Mapping[str, str]]): Extra headers that should
|
||||
be sent with the request, e.g. headers for encrypted data.
|
||||
checksum Optional([str]): The type of checksum to compute to verify
|
||||
the integrity of the object. The response headers must contain
|
||||
a checksum of the requested type. If the headers lack an
|
||||
appropriate checksum (for instance in the case of transcoded or
|
||||
ranged downloads where the remote service does not know the
|
||||
correct checksum) an INFO-level log will be emitted. Supported
|
||||
values are "md5", "crc32c" and None. The default is "md5".
|
||||
|
||||
Attributes:
|
||||
media_url (str): The URL containing the media to be downloaded.
|
||||
start (Optional[int]): The first byte in a range to be downloaded.
|
||||
end (Optional[int]): The last byte in a range to be downloaded.
|
||||
"""
|
||||
|
||||
async def _write_to_stream(self, response):
|
||||
"""Write response body to a write-able stream.
|
||||
|
||||
.. note:
|
||||
|
||||
This method assumes that the ``_stream`` attribute is set on the
|
||||
current download.
|
||||
|
||||
Args:
|
||||
response (~requests.Response): The HTTP response object.
|
||||
|
||||
Raises:
|
||||
~google.resumable_media.common.DataCorruption: If the download's
|
||||
checksum doesn't agree with server-computed checksum.
|
||||
"""
|
||||
|
||||
# `_get_expected_checksum()` may return None even if a checksum was
|
||||
# requested, in which case it will emit an info log _MISSING_CHECKSUM.
|
||||
# If an invalid checksum type is specified, this will raise ValueError.
|
||||
expected_checksum, checksum_object = sync_helpers._get_expected_checksum(
|
||||
response, self._get_headers, self.media_url, checksum_type=self.checksum
|
||||
)
|
||||
|
||||
local_checksum_object = _add_decoder(response, checksum_object)
|
||||
|
||||
async for chunk in response.content.iter_chunked(
|
||||
_request_helpers._SINGLE_GET_CHUNK_SIZE
|
||||
):
|
||||
self._stream.write(chunk)
|
||||
local_checksum_object.update(chunk)
|
||||
|
||||
if expected_checksum is None:
|
||||
return
|
||||
|
||||
else:
|
||||
actual_checksum = sync_helpers.prepare_checksum_digest(
|
||||
checksum_object.digest()
|
||||
)
|
||||
if actual_checksum != expected_checksum:
|
||||
msg = _CHECKSUM_MISMATCH.format(
|
||||
self.media_url,
|
||||
expected_checksum,
|
||||
actual_checksum,
|
||||
checksum_type=self.checksum.upper(),
|
||||
)
|
||||
raise common.DataCorruption(response, msg)
|
||||
|
||||
async def consume(self, transport, timeout=_request_helpers._DEFAULT_TIMEOUT):
|
||||
"""Consume the resource to be downloaded.
|
||||
|
||||
If a ``stream`` is attached to this download, then the downloaded
|
||||
resource will be written to the stream.
|
||||
|
||||
Args:
|
||||
transport (~requests.Session): A ``requests`` object which can
|
||||
make authenticated requests.
|
||||
timeout (Optional[Union[float, aiohttp.ClientTimeout]]):
|
||||
The number of seconds to wait for the server response.
|
||||
Depending on the retry strategy, a request may be repeated
|
||||
several times using the same timeout each time.
|
||||
Can also be passed as an `aiohttp.ClientTimeout` object.
|
||||
|
||||
Returns:
|
||||
~requests.Response: The HTTP response returned by ``transport``.
|
||||
|
||||
Raises:
|
||||
~google.resumable_media.common.DataCorruption: If the download's
|
||||
checksum doesn't agree with server-computed checksum.
|
||||
ValueError: If the current :class:`Download` has already
|
||||
finished.
|
||||
"""
|
||||
method, url, payload, headers = self._prepare_request()
|
||||
# NOTE: We assume "payload is None" but pass it along anyway.
|
||||
request_kwargs = {
|
||||
u"data": payload,
|
||||
u"headers": headers,
|
||||
u"retry_strategy": self._retry_strategy,
|
||||
u"timeout": timeout,
|
||||
}
|
||||
|
||||
if self._stream is not None:
|
||||
request_kwargs[u"stream"] = True
|
||||
|
||||
result = await _request_helpers.http_request(
|
||||
transport, method, url, **request_kwargs
|
||||
)
|
||||
|
||||
self._process_response(result)
|
||||
|
||||
if self._stream is not None:
|
||||
await self._write_to_stream(result)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
class RawDownload(_request_helpers.RawRequestsMixin, _download.Download):
|
||||
"""Helper to manage downloading a raw resource from a Google API.
|
||||
|
||||
"Slices" of the resource can be retrieved by specifying a range
|
||||
with ``start`` and / or ``end``. However, in typical usage, neither
|
||||
``start`` nor ``end`` is expected to be provided.
|
||||
|
||||
Args:
|
||||
media_url (str): The URL containing the media to be downloaded.
|
||||
stream (IO[bytes]): A write-able stream (i.e. file-like object) that
|
||||
the downloaded resource can be written to.
|
||||
start (int): The first byte in a range to be downloaded. If not
|
||||
provided, but ``end`` is provided, will download from the
|
||||
beginning to ``end`` of the media.
|
||||
end (int): The last byte in a range to be downloaded. If not
|
||||
provided, but ``start`` is provided, will download from the
|
||||
``start`` to the end of the media.
|
||||
headers (Optional[Mapping[str, str]]): Extra headers that should
|
||||
be sent with the request, e.g. headers for encrypted data.
|
||||
checksum Optional([str]): The type of checksum to compute to verify
|
||||
the integrity of the object. The response headers must contain
|
||||
a checksum of the requested type. If the headers lack an
|
||||
appropriate checksum (for instance in the case of transcoded or
|
||||
ranged downloads where the remote service does not know the
|
||||
correct checksum) an INFO-level log will be emitted. Supported
|
||||
values are "md5", "crc32c" and None. The default is "md5".
|
||||
|
||||
Attributes:
|
||||
media_url (str): The URL containing the media to be downloaded.
|
||||
start (Optional[int]): The first byte in a range to be downloaded.
|
||||
end (Optional[int]): The last byte in a range to be downloaded.
|
||||
"""
|
||||
|
||||
async def _write_to_stream(self, response):
|
||||
"""Write response body to a write-able stream.
|
||||
|
||||
.. note:
|
||||
|
||||
This method assumes that the ``_stream`` attribute is set on the
|
||||
current download.
|
||||
|
||||
Args:
|
||||
response (~requests.Response): The HTTP response object.
|
||||
|
||||
Raises:
|
||||
~google.resumable_media.common.DataCorruption: If the download's
|
||||
checksum doesn't agree with server-computed checksum.
|
||||
"""
|
||||
|
||||
# `_get_expected_checksum()` may return None even if a checksum was
|
||||
# requested, in which case it will emit an info log _MISSING_CHECKSUM.
|
||||
# If an invalid checksum type is specified, this will raise ValueError.
|
||||
expected_checksum, checksum_object = sync_helpers._get_expected_checksum(
|
||||
response, self._get_headers, self.media_url, checksum_type=self.checksum
|
||||
)
|
||||
|
||||
async for chunk in response.content.iter_chunked(
|
||||
_request_helpers._SINGLE_GET_CHUNK_SIZE
|
||||
):
|
||||
self._stream.write(chunk)
|
||||
checksum_object.update(chunk)
|
||||
|
||||
if expected_checksum is None:
|
||||
return
|
||||
else:
|
||||
actual_checksum = sync_helpers.prepare_checksum_digest(
|
||||
checksum_object.digest()
|
||||
)
|
||||
|
||||
if actual_checksum != expected_checksum:
|
||||
msg = _CHECKSUM_MISMATCH.format(
|
||||
self.media_url,
|
||||
expected_checksum,
|
||||
actual_checksum,
|
||||
checksum_type=self.checksum.upper(),
|
||||
)
|
||||
raise common.DataCorruption(response, msg)
|
||||
|
||||
async def consume(self, transport, timeout=_request_helpers._DEFAULT_TIMEOUT):
|
||||
"""Consume the resource to be downloaded.
|
||||
|
||||
If a ``stream`` is attached to this download, then the downloaded
|
||||
resource will be written to the stream.
|
||||
|
||||
Args:
|
||||
transport (~requests.Session): A ``requests`` object which can
|
||||
make authenticated requests.
|
||||
timeout (Optional[Union[float, Tuple[float, float]]]):
|
||||
The number of seconds to wait for the server response.
|
||||
Depending on the retry strategy, a request may be repeated
|
||||
several times using the same timeout each time.
|
||||
Can also be passed as a tuple (connect_timeout, read_timeout).
|
||||
See :meth:`requests.Session.request` documentation for details.
|
||||
|
||||
Returns:
|
||||
~requests.Response: The HTTP response returned by ``transport``.
|
||||
|
||||
Raises:
|
||||
~google.resumable_media.common.DataCorruption: If the download's
|
||||
checksum doesn't agree with server-computed checksum.
|
||||
ValueError: If the current :class:`Download` has already
|
||||
finished.
|
||||
"""
|
||||
method, url, payload, headers = self._prepare_request()
|
||||
# NOTE: We assume "payload is None" but pass it along anyway.
|
||||
result = await _request_helpers.http_request(
|
||||
transport,
|
||||
method,
|
||||
url,
|
||||
data=payload,
|
||||
headers=headers,
|
||||
retry_strategy=self._retry_strategy,
|
||||
)
|
||||
|
||||
self._process_response(result)
|
||||
|
||||
if self._stream is not None:
|
||||
await self._write_to_stream(result)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
class ChunkedDownload(_request_helpers.RequestsMixin, _download.ChunkedDownload):
|
||||
"""Download a resource in chunks from a Google API.
|
||||
|
||||
Args:
|
||||
media_url (str): The URL containing the media to be downloaded.
|
||||
chunk_size (int): The number of bytes to be retrieved in each
|
||||
request.
|
||||
stream (IO[bytes]): A write-able stream (i.e. file-like object) that
|
||||
will be used to concatenate chunks of the resource as they are
|
||||
downloaded.
|
||||
start (int): The first byte in a range to be downloaded. If not
|
||||
provided, defaults to ``0``.
|
||||
end (int): The last byte in a range to be downloaded. If not
|
||||
provided, will download to the end of the media.
|
||||
headers (Optional[Mapping[str, str]]): Extra headers that should
|
||||
be sent with each request, e.g. headers for data encryption
|
||||
key headers.
|
||||
|
||||
Attributes:
|
||||
media_url (str): The URL containing the media to be downloaded.
|
||||
start (Optional[int]): The first byte in a range to be downloaded.
|
||||
end (Optional[int]): The last byte in a range to be downloaded.
|
||||
chunk_size (int): The number of bytes to be retrieved in each request.
|
||||
|
||||
Raises:
|
||||
ValueError: If ``start`` is negative.
|
||||
"""
|
||||
|
||||
async def consume_next_chunk(
|
||||
self, transport, timeout=_request_helpers._DEFAULT_TIMEOUT
|
||||
):
|
||||
|
||||
"""
|
||||
Consume the next chunk of the resource to be downloaded.
|
||||
|
||||
Args:
|
||||
transport (~requests.Session): A ``requests`` object which can
|
||||
make authenticated requests.
|
||||
timeout (Optional[Union[float, aiohttp.ClientTimeout]]):
|
||||
The number of seconds to wait for the server response.
|
||||
Depending on the retry strategy, a request may be repeated
|
||||
several times using the same timeout each time.
|
||||
Can also be passed as an `aiohttp.ClientTimeout` object.
|
||||
|
||||
Returns:
|
||||
~requests.Response: The HTTP response returned by ``transport``.
|
||||
|
||||
Raises:
|
||||
ValueError: If the current download has finished.
|
||||
"""
|
||||
method, url, payload, headers = self._prepare_request()
|
||||
# NOTE: We assume "payload is None" but pass it along anyway.
|
||||
result = await _request_helpers.http_request(
|
||||
transport,
|
||||
method,
|
||||
url,
|
||||
data=payload,
|
||||
headers=headers,
|
||||
retry_strategy=self._retry_strategy,
|
||||
timeout=timeout,
|
||||
)
|
||||
|
||||
await self._process_response(result)
|
||||
return result
|
||||
|
||||
|
||||
class RawChunkedDownload(_request_helpers.RawRequestsMixin, _download.ChunkedDownload):
|
||||
"""Download a raw resource in chunks from a Google API.
|
||||
|
||||
Args:
|
||||
media_url (str): The URL containing the media to be downloaded.
|
||||
chunk_size (int): The number of bytes to be retrieved in each
|
||||
request.
|
||||
stream (IO[bytes]): A write-able stream (i.e. file-like object) that
|
||||
will be used to concatenate chunks of the resource as they are
|
||||
downloaded.
|
||||
start (int): The first byte in a range to be downloaded. If not
|
||||
provided, defaults to ``0``.
|
||||
end (int): The last byte in a range to be downloaded. If not
|
||||
provided, will download to the end of the media.
|
||||
headers (Optional[Mapping[str, str]]): Extra headers that should
|
||||
be sent with each request, e.g. headers for data encryption
|
||||
key headers.
|
||||
|
||||
Attributes:
|
||||
media_url (str): The URL containing the media to be downloaded.
|
||||
start (Optional[int]): The first byte in a range to be downloaded.
|
||||
end (Optional[int]): The last byte in a range to be downloaded.
|
||||
chunk_size (int): The number of bytes to be retrieved in each request.
|
||||
|
||||
Raises:
|
||||
ValueError: If ``start`` is negative.
|
||||
"""
|
||||
|
||||
async def consume_next_chunk(
|
||||
self, transport, timeout=_request_helpers._DEFAULT_TIMEOUT
|
||||
):
|
||||
"""Consume the next chunk of the resource to be downloaded.
|
||||
|
||||
Args:
|
||||
transport (~requests.Session): A ``requests`` object which can
|
||||
make authenticated requests.
|
||||
timeout (Optional[Union[float, aiohttp.ClientTimeout]]):
|
||||
The number of seconds to wait for the server response.
|
||||
Depending on the retry strategy, a request may be repeated
|
||||
several times using the same timeout each time.
|
||||
Can also be passed as an `aiohttp.ClientTimeout` object.
|
||||
|
||||
Returns:
|
||||
~requests.Response: The HTTP response returned by ``transport``.
|
||||
|
||||
Raises:
|
||||
ValueError: If the current download has finished.
|
||||
"""
|
||||
method, url, payload, headers = self._prepare_request()
|
||||
# NOTE: We assume "payload is None" but pass it along anyway.
|
||||
result = await _request_helpers.http_request(
|
||||
transport,
|
||||
method,
|
||||
url,
|
||||
data=payload,
|
||||
headers=headers,
|
||||
retry_strategy=self._retry_strategy,
|
||||
timeout=timeout,
|
||||
)
|
||||
await self._process_response(result)
|
||||
return result
|
||||
|
||||
|
||||
def _add_decoder(response_raw, checksum):
|
||||
"""Patch the ``_decoder`` on a ``urllib3`` response.
|
||||
|
||||
This is so that we can intercept the compressed bytes before they are
|
||||
decoded.
|
||||
|
||||
Only patches if the content encoding is ``gzip``.
|
||||
|
||||
Args:
|
||||
response_raw (urllib3.response.HTTPResponse): The raw response for
|
||||
an HTTP request.
|
||||
checksum (object):
|
||||
A checksum which will be updated with compressed bytes.
|
||||
|
||||
Returns:
|
||||
object: Either the original ``checksum`` if ``_decoder`` is not
|
||||
patched, or a ``_DoNothingHash`` if the decoder is patched, since the
|
||||
caller will no longer need to hash to decoded bytes.
|
||||
"""
|
||||
|
||||
encoding = response_raw.headers.get(u"content-encoding", u"").lower()
|
||||
if encoding != u"gzip":
|
||||
return checksum
|
||||
|
||||
response_raw._decoder = _GzipDecoder(checksum)
|
||||
return _helpers._DoNothingHash()
|
||||
|
||||
|
||||
class _GzipDecoder(urllib3.response.GzipDecoder):
|
||||
"""Custom subclass of ``urllib3`` decoder for ``gzip``-ed bytes.
|
||||
|
||||
Allows a checksum function to see the compressed bytes before they are
|
||||
decoded. This way the checksum of the compressed value can be computed.
|
||||
|
||||
Args:
|
||||
checksum (object):
|
||||
A checksum which will be updated with compressed bytes.
|
||||
"""
|
||||
|
||||
def __init__(self, checksum):
|
||||
super(_GzipDecoder, self).__init__()
|
||||
self._checksum = checksum
|
||||
|
||||
def decompress(self, data):
|
||||
"""Decompress the bytes.
|
||||
|
||||
Args:
|
||||
data (bytes): The compressed bytes to be decompressed.
|
||||
|
||||
Returns:
|
||||
bytes: The decompressed bytes from ``data``.
|
||||
"""
|
||||
self._checksum.update(data)
|
||||
return super(_GzipDecoder, self).decompress(data)
|
|
@ -0,0 +1,515 @@
|
|||
# Copyright 2017 Google Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Support for resumable uploads.
|
||||
|
||||
Also supported here are simple (media) uploads and multipart
|
||||
uploads that contain both metadata and a small file as payload.
|
||||
"""
|
||||
|
||||
|
||||
from google._async_resumable_media import _upload
|
||||
from google._async_resumable_media.requests import _request_helpers
|
||||
|
||||
|
||||
class SimpleUpload(_request_helpers.RequestsMixin, _upload.SimpleUpload):
|
||||
"""Upload a resource to a Google API.
|
||||
|
||||
A **simple** media upload sends no metadata and completes the upload
|
||||
in a single request.
|
||||
|
||||
Args:
|
||||
upload_url (str): The URL where the content will be uploaded.
|
||||
headers (Optional[Mapping[str, str]]): Extra headers that should
|
||||
be sent with the request, e.g. headers for encrypted data.
|
||||
|
||||
Attributes:
|
||||
upload_url (str): The URL where the content will be uploaded.
|
||||
"""
|
||||
|
||||
async def transmit(
|
||||
self,
|
||||
transport,
|
||||
data,
|
||||
content_type,
|
||||
timeout=_request_helpers._DEFAULT_TIMEOUT,
|
||||
):
|
||||
"""Transmit the resource to be uploaded.
|
||||
|
||||
Args:
|
||||
transport (~requests.Session): A ``requests`` object which can
|
||||
make authenticated requests.
|
||||
data (bytes): The resource content to be uploaded.
|
||||
content_type (str): The content type of the resource, e.g. a JPEG
|
||||
image has content type ``image/jpeg``.
|
||||
timeout (Optional[Union[float, aiohttp.ClientTimeout]]):
|
||||
The number of seconds to wait for the server response.
|
||||
Depending on the retry strategy, a request may be repeated
|
||||
several times using the same timeout each time.
|
||||
Can also be passed as an `aiohttp.ClientTimeout` object.
|
||||
|
||||
Returns:
|
||||
~requests.Response: The HTTP response returned by ``transport``.
|
||||
"""
|
||||
method, url, payload, headers = self._prepare_request(data, content_type)
|
||||
|
||||
response = await _request_helpers.http_request(
|
||||
transport,
|
||||
method,
|
||||
url,
|
||||
data=payload,
|
||||
headers=headers,
|
||||
retry_strategy=self._retry_strategy,
|
||||
timeout=timeout,
|
||||
)
|
||||
self._process_response(response)
|
||||
return response
|
||||
|
||||
|
||||
class MultipartUpload(_request_helpers.RequestsMixin, _upload.MultipartUpload):
|
||||
"""Upload a resource with metadata to a Google API.
|
||||
|
||||
A **multipart** upload sends both metadata and the resource in a single
|
||||
(multipart) request.
|
||||
|
||||
Args:
|
||||
upload_url (str): The URL where the content will be uploaded.
|
||||
headers (Optional[Mapping[str, str]]): Extra headers that should
|
||||
be sent with the request, e.g. headers for encrypted data.
|
||||
checksum Optional([str]): The type of checksum to compute to verify
|
||||
the integrity of the object. The request metadata will be amended
|
||||
to include the computed value. Using this option will override a
|
||||
manually-set checksum value. Supported values are "md5",
|
||||
"crc32c" and None. The default is None.
|
||||
|
||||
Attributes:
|
||||
upload_url (str): The URL where the content will be uploaded.
|
||||
"""
|
||||
|
||||
async def transmit(
|
||||
self,
|
||||
transport,
|
||||
data,
|
||||
metadata,
|
||||
content_type,
|
||||
timeout=_request_helpers._DEFAULT_TIMEOUT,
|
||||
):
|
||||
"""Transmit the resource to be uploaded.
|
||||
|
||||
Args:
|
||||
transport (~requests.Session): A ``requests`` object which can
|
||||
make authenticated requests.
|
||||
data (bytes): The resource content to be uploaded.
|
||||
metadata (Mapping[str, str]): The resource metadata, such as an
|
||||
ACL list.
|
||||
content_type (str): The content type of the resource, e.g. a JPEG
|
||||
image has content type ``image/jpeg``.
|
||||
timeout (Optional[Union[float, aiohttp.ClientTimeout]]):
|
||||
The number of seconds to wait for the server response.
|
||||
Depending on the retry strategy, a request may be repeated
|
||||
several times using the same timeout each time.
|
||||
Can also be passed as an `aiohttp.ClientTimeout` object.
|
||||
|
||||
Returns:
|
||||
~requests.Response: The HTTP response returned by ``transport``.
|
||||
"""
|
||||
method, url, payload, headers = self._prepare_request(
|
||||
data, metadata, content_type
|
||||
)
|
||||
|
||||
response = await _request_helpers.http_request(
|
||||
transport,
|
||||
method,
|
||||
url,
|
||||
data=payload,
|
||||
headers=headers,
|
||||
retry_strategy=self._retry_strategy,
|
||||
timeout=timeout,
|
||||
)
|
||||
self._process_response(response)
|
||||
return response
|
||||
|
||||
|
||||
class ResumableUpload(_request_helpers.RequestsMixin, _upload.ResumableUpload):
|
||||
"""Initiate and fulfill a resumable upload to a Google API.
|
||||
|
||||
A **resumable** upload sends an initial request with the resource metadata
|
||||
and then gets assigned an upload ID / upload URL to send bytes to.
|
||||
Using the upload URL, the upload is then done in chunks (determined by
|
||||
the user) until all bytes have been uploaded.
|
||||
|
||||
When constructing a resumable upload, only the resumable upload URL and
|
||||
the chunk size are required:
|
||||
|
||||
.. testsetup:: resumable-constructor
|
||||
|
||||
bucket = u'bucket-foo'
|
||||
|
||||
.. doctest:: resumable-constructor
|
||||
|
||||
>>> from google.resumable_media.requests import ResumableUpload
|
||||
>>>
|
||||
>>> url_template = (
|
||||
... u'https://www.googleapis.com/upload/storage/v1/b/{bucket}/o?'
|
||||
... u'uploadType=resumable')
|
||||
>>> upload_url = url_template.format(bucket=bucket)
|
||||
>>>
|
||||
>>> chunk_size = 3 * 1024 * 1024 # 3MB
|
||||
>>> upload = ResumableUpload(upload_url, chunk_size)
|
||||
|
||||
When initiating an upload (via :meth:`initiate`), the caller is expected
|
||||
to pass the resource being uploaded as a file-like ``stream``. If the size
|
||||
of the resource is explicitly known, it can be passed in directly:
|
||||
|
||||
.. testsetup:: resumable-explicit-size
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
import mock
|
||||
import requests
|
||||
from six.moves import http_client
|
||||
|
||||
from google.resumable_media.requests import ResumableUpload
|
||||
|
||||
upload_url = u'http://test.invalid'
|
||||
chunk_size = 3 * 1024 * 1024 # 3MB
|
||||
upload = ResumableUpload(upload_url, chunk_size)
|
||||
|
||||
file_desc, filename = tempfile.mkstemp()
|
||||
os.close(file_desc)
|
||||
|
||||
data = b'some bytes!'
|
||||
with open(filename, u'wb') as file_obj:
|
||||
file_obj.write(data)
|
||||
|
||||
fake_response = requests.Response()
|
||||
fake_response.status_code = int(http_client.OK)
|
||||
fake_response._content = b''
|
||||
resumable_url = u'http://test.invalid?upload_id=7up'
|
||||
fake_response.headers[u'location'] = resumable_url
|
||||
|
||||
post_method = mock.Mock(return_value=fake_response, spec=[])
|
||||
transport = mock.Mock(request=post_method, spec=['request'])
|
||||
|
||||
.. doctest:: resumable-explicit-size
|
||||
|
||||
>>> import os
|
||||
>>>
|
||||
>>> upload.total_bytes is None
|
||||
True
|
||||
>>>
|
||||
>>> stream = open(filename, u'rb')
|
||||
>>> total_bytes = os.path.getsize(filename)
|
||||
>>> metadata = {u'name': filename}
|
||||
>>> response = upload.initiate(
|
||||
... transport, stream, metadata, u'text/plain',
|
||||
... total_bytes=total_bytes)
|
||||
>>> response
|
||||
<Response [200]>
|
||||
>>>
|
||||
>>> upload.total_bytes == total_bytes
|
||||
True
|
||||
|
||||
.. testcleanup:: resumable-explicit-size
|
||||
|
||||
os.remove(filename)
|
||||
|
||||
If the stream is in a "final" state (i.e. it won't have any more bytes
|
||||
written to it), the total number of bytes can be determined implicitly
|
||||
from the ``stream`` itself:
|
||||
|
||||
.. testsetup:: resumable-implicit-size
|
||||
|
||||
import io
|
||||
|
||||
import mock
|
||||
import requests
|
||||
from six.moves import http_client
|
||||
|
||||
from google.resumable_media.requests import ResumableUpload
|
||||
|
||||
upload_url = u'http://test.invalid'
|
||||
chunk_size = 3 * 1024 * 1024 # 3MB
|
||||
upload = ResumableUpload(upload_url, chunk_size)
|
||||
|
||||
fake_response = requests.Response()
|
||||
fake_response.status_code = int(http_client.OK)
|
||||
fake_response._content = b''
|
||||
resumable_url = u'http://test.invalid?upload_id=7up'
|
||||
fake_response.headers[u'location'] = resumable_url
|
||||
|
||||
post_method = mock.Mock(return_value=fake_response, spec=[])
|
||||
transport = mock.Mock(request=post_method, spec=['request'])
|
||||
|
||||
data = b'some MOAR bytes!'
|
||||
metadata = {u'name': u'some-file.jpg'}
|
||||
content_type = u'image/jpeg'
|
||||
|
||||
.. doctest:: resumable-implicit-size
|
||||
|
||||
>>> stream = io.BytesIO(data)
|
||||
>>> response = upload.initiate(
|
||||
... transport, stream, metadata, content_type)
|
||||
>>>
|
||||
>>> upload.total_bytes == len(data)
|
||||
True
|
||||
|
||||
If the size of the resource is **unknown** when the upload is initiated,
|
||||
the ``stream_final`` argument can be used. This might occur if the
|
||||
resource is being dynamically created on the client (e.g. application
|
||||
logs). To use this argument:
|
||||
|
||||
.. testsetup:: resumable-unknown-size
|
||||
|
||||
import io
|
||||
|
||||
import mock
|
||||
import requests
|
||||
from six.moves import http_client
|
||||
|
||||
from google.resumable_media.requests import ResumableUpload
|
||||
|
||||
upload_url = u'http://test.invalid'
|
||||
chunk_size = 3 * 1024 * 1024 # 3MB
|
||||
upload = ResumableUpload(upload_url, chunk_size)
|
||||
|
||||
fake_response = requests.Response()
|
||||
fake_response.status_code = int(http_client.OK)
|
||||
fake_response._content = b''
|
||||
resumable_url = u'http://test.invalid?upload_id=7up'
|
||||
fake_response.headers[u'location'] = resumable_url
|
||||
|
||||
post_method = mock.Mock(return_value=fake_response, spec=[])
|
||||
transport = mock.Mock(request=post_method, spec=['request'])
|
||||
|
||||
metadata = {u'name': u'some-file.jpg'}
|
||||
content_type = u'application/octet-stream'
|
||||
|
||||
stream = io.BytesIO(b'data')
|
||||
|
||||
.. doctest:: resumable-unknown-size
|
||||
|
||||
>>> response = upload.initiate(
|
||||
... transport, stream, metadata, content_type,
|
||||
... stream_final=False)
|
||||
>>>
|
||||
>>> upload.total_bytes is None
|
||||
True
|
||||
|
||||
Args:
|
||||
upload_url (str): The URL where the resumable upload will be initiated.
|
||||
chunk_size (int): The size of each chunk used to upload the resource.
|
||||
headers (Optional[Mapping[str, str]]): Extra headers that should
|
||||
be sent with the :meth:`initiate` request, e.g. headers for
|
||||
encrypted data. These **will not** be sent with
|
||||
:meth:`transmit_next_chunk` or :meth:`recover` requests.
|
||||
checksum Optional([str]): The type of checksum to compute to verify
|
||||
the integrity of the object. After the upload is complete, the
|
||||
server-computed checksum of the resulting object will be checked
|
||||
and google.resumable_media.common.DataCorruption will be raised on
|
||||
a mismatch. The corrupted file will not be deleted from the remote
|
||||
host automatically. Supported values are "md5", "crc32c" and None.
|
||||
The default is None.
|
||||
|
||||
Attributes:
|
||||
upload_url (str): The URL where the content will be uploaded.
|
||||
|
||||
Raises:
|
||||
ValueError: If ``chunk_size`` is not a multiple of
|
||||
:data:`.UPLOAD_CHUNK_SIZE`.
|
||||
"""
|
||||
|
||||
async def initiate(
|
||||
self,
|
||||
transport,
|
||||
stream,
|
||||
metadata,
|
||||
content_type,
|
||||
total_bytes=None,
|
||||
stream_final=True,
|
||||
timeout=_request_helpers._DEFAULT_TIMEOUT,
|
||||
):
|
||||
"""Initiate a resumable upload.
|
||||
|
||||
By default, this method assumes your ``stream`` is in a "final"
|
||||
state ready to transmit. However, ``stream_final=False`` can be used
|
||||
to indicate that the size of the resource is not known. This can happen
|
||||
if bytes are being dynamically fed into ``stream``, e.g. if the stream
|
||||
is attached to application logs.
|
||||
|
||||
If ``stream_final=False`` is used, :attr:`chunk_size` bytes will be
|
||||
read from the stream every time :meth:`transmit_next_chunk` is called.
|
||||
If one of those reads produces strictly fewer bites than the chunk
|
||||
size, the upload will be concluded.
|
||||
|
||||
Args:
|
||||
transport (~requests.Session): A ``requests`` object which can
|
||||
make authenticated requests.
|
||||
stream (IO[bytes]): The stream (i.e. file-like object) that will
|
||||
be uploaded. The stream **must** be at the beginning (i.e.
|
||||
``stream.tell() == 0``).
|
||||
metadata (Mapping[str, str]): The resource metadata, such as an
|
||||
ACL list.
|
||||
content_type (str): The content type of the resource, e.g. a JPEG
|
||||
image has content type ``image/jpeg``.
|
||||
total_bytes (Optional[int]): The total number of bytes to be
|
||||
uploaded. If specified, the upload size **will not** be
|
||||
determined from the stream (even if ``stream_final=True``).
|
||||
stream_final (Optional[bool]): Indicates if the ``stream`` is
|
||||
"final" (i.e. no more bytes will be added to it). In this case
|
||||
we determine the upload size from the size of the stream. If
|
||||
``total_bytes`` is passed, this argument will be ignored.
|
||||
timeout (Optional[Union[float, aiohttp.ClientTimeout]]):
|
||||
The number of seconds to wait for the server response.
|
||||
Depending on the retry strategy, a request may be repeated
|
||||
several times using the same timeout each time.
|
||||
Can also be passed as an `aiohttp.ClientTimeout` object.
|
||||
|
||||
Returns:
|
||||
~requests.Response: The HTTP response returned by ``transport``.
|
||||
"""
|
||||
method, url, payload, headers = self._prepare_initiate_request(
|
||||
stream,
|
||||
metadata,
|
||||
content_type,
|
||||
total_bytes=total_bytes,
|
||||
stream_final=stream_final,
|
||||
)
|
||||
response = await _request_helpers.http_request(
|
||||
transport,
|
||||
method,
|
||||
url,
|
||||
data=payload,
|
||||
headers=headers,
|
||||
retry_strategy=self._retry_strategy,
|
||||
timeout=timeout,
|
||||
)
|
||||
self._process_initiate_response(response)
|
||||
return response
|
||||
|
||||
async def transmit_next_chunk(
|
||||
self, transport, timeout=_request_helpers._DEFAULT_TIMEOUT
|
||||
):
|
||||
"""Transmit the next chunk of the resource to be uploaded.
|
||||
|
||||
If the current upload was initiated with ``stream_final=False``,
|
||||
this method will dynamically determine if the upload has completed.
|
||||
The upload will be considered complete if the stream produces
|
||||
fewer than :attr:`chunk_size` bytes when a chunk is read from it.
|
||||
|
||||
In the case of failure, an exception is thrown that preserves the
|
||||
failed response:
|
||||
|
||||
.. testsetup:: bad-response
|
||||
|
||||
import io
|
||||
|
||||
import mock
|
||||
import requests
|
||||
from six.moves import http_client
|
||||
|
||||
from google import resumable_media
|
||||
import google.resumable_media.requests.upload as upload_mod
|
||||
|
||||
transport = mock.Mock(spec=['request'])
|
||||
fake_response = requests.Response()
|
||||
fake_response.status_code = int(http_client.BAD_REQUEST)
|
||||
transport.request.return_value = fake_response
|
||||
|
||||
upload_url = u'http://test.invalid'
|
||||
upload = upload_mod.ResumableUpload(
|
||||
upload_url, resumable_media.UPLOAD_CHUNK_SIZE)
|
||||
# Fake that the upload has been initiate()-d
|
||||
data = b'data is here'
|
||||
upload._stream = io.BytesIO(data)
|
||||
upload._total_bytes = len(data)
|
||||
upload._resumable_url = u'http://test.invalid?upload_id=nope'
|
||||
|
||||
.. doctest:: bad-response
|
||||
:options: +NORMALIZE_WHITESPACE
|
||||
|
||||
>>> error = None
|
||||
>>> try:
|
||||
... upload.transmit_next_chunk(transport)
|
||||
... except resumable_media.InvalidResponse as caught_exc:
|
||||
... error = caught_exc
|
||||
...
|
||||
>>> error
|
||||
InvalidResponse('Request failed with status code', 400,
|
||||
'Expected one of', <HTTPStatus.OK: 200>, 308)
|
||||
>>> error.response
|
||||
<Response [400]>
|
||||
|
||||
Args:
|
||||
transport (~requests.Session): A ``requests`` object which can
|
||||
make authenticated requests.
|
||||
timeout (Optional[Union[float, aiohttp.ClientTimeout]]):
|
||||
The number of seconds to wait for the server response.
|
||||
Depending on the retry strategy, a request may be repeated
|
||||
several times using the same timeout each time.
|
||||
Can also be passed as an `aiohttp.ClientTimeout` object.
|
||||
|
||||
Returns:
|
||||
~requests.Response: The HTTP response returned by ``transport``.
|
||||
|
||||
Raises:
|
||||
~google.resumable_media.common.InvalidResponse: If the status
|
||||
code is not 200 or 308.
|
||||
~google.resumable_media.common.DataCorruption: If this is the final
|
||||
chunk, a checksum validation was requested, and the checksum
|
||||
does not match or is not available.
|
||||
"""
|
||||
method, url, payload, headers = self._prepare_request()
|
||||
response = await _request_helpers.http_request(
|
||||
transport,
|
||||
method,
|
||||
url,
|
||||
data=payload,
|
||||
headers=headers,
|
||||
retry_strategy=self._retry_strategy,
|
||||
timeout=timeout,
|
||||
)
|
||||
await self._process_response(response, len(payload))
|
||||
return response
|
||||
|
||||
async def recover(self, transport):
|
||||
"""Recover from a failure.
|
||||
|
||||
This method should be used when a :class:`ResumableUpload` is in an
|
||||
:attr:`~ResumableUpload.invalid` state due to a request failure.
|
||||
|
||||
This will verify the progress with the server and make sure the
|
||||
current upload is in a valid state before :meth:`transmit_next_chunk`
|
||||
can be used again.
|
||||
|
||||
Args:
|
||||
transport (~requests.Session): A ``requests`` object which can
|
||||
make authenticated requests.
|
||||
|
||||
Returns:
|
||||
~requests.Response: The HTTP response returned by ``transport``.
|
||||
"""
|
||||
method, url, payload, headers = self._prepare_recover_request()
|
||||
# NOTE: We assume "payload is None" but pass it along anyway.
|
||||
response = await _request_helpers.http_request(
|
||||
transport,
|
||||
method,
|
||||
url,
|
||||
data=payload,
|
||||
headers=headers,
|
||||
retry_strategy=self._retry_strategy,
|
||||
)
|
||||
self._process_recover_response(response)
|
||||
return response
|
Loading…
Add table
Add a link
Reference in a new issue