Added delete option to database storage.

This commit is contained in:
Batuhan Berk Başoğlu 2020-10-12 12:10:01 -04:00
parent 308604a33c
commit 963b5bc68b
1868 changed files with 192402 additions and 13278 deletions

View file

@ -0,0 +1,678 @@
# Copyright 2017 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""``requests`` utilities for Google Media Downloads and Resumable Uploads.
This sub-package assumes callers will use the `requests`_ library
as transport and `google-auth`_ for sending authenticated HTTP traffic
with ``requests``.
.. _requests: http://docs.python-requests.org/
.. _google-auth: https://google-auth.readthedocs.io/
====================
Authorized Transport
====================
To use ``google-auth`` and ``requests`` to create an authorized transport
that has read-only access to Google Cloud Storage (GCS):
.. testsetup:: get-credentials
import google.auth
import google.auth.credentials as creds_mod
import mock
def mock_default(scopes=None):
credentials = mock.Mock(spec=creds_mod.Credentials)
return credentials, u'mock-project'
# Patch the ``default`` function on the module.
original_default = google.auth.default
google.auth.default = mock_default
.. doctest:: get-credentials
>>> import google.auth
>>> import google.auth.transport.requests as tr_requests
>>>
>>> ro_scope = u'https://www.googleapis.com/auth/devstorage.read_only'
>>> credentials, _ = google.auth.default(scopes=(ro_scope,))
>>> transport = tr_requests.AuthorizedSession(credentials)
>>> transport
<google.auth.transport.requests.AuthorizedSession object at 0x...>
.. testcleanup:: get-credentials
# Put back the correct ``default`` function on the module.
google.auth.default = original_default
================
Simple Downloads
================
To download an object from Google Cloud Storage, construct the media URL
for the GCS object and download it with an authorized transport that has
access to the resource:
.. testsetup:: basic-download
import mock
import requests
from six.moves import http_client
bucket = u'bucket-foo'
blob_name = u'file.txt'
fake_response = requests.Response()
fake_response.status_code = int(http_client.OK)
fake_response.headers[u'Content-Length'] = u'1364156'
fake_content = mock.MagicMock(spec=['__len__'])
fake_content.__len__.return_value = 1364156
fake_response._content = fake_content
get_method = mock.Mock(return_value=fake_response, spec=[])
transport = mock.Mock(request=get_method, spec=['request'])
.. doctest:: basic-download
>>> from google.resumable_media.requests import Download
>>>
>>> url_template = (
... u'https://www.googleapis.com/download/storage/v1/b/'
... u'{bucket}/o/{blob_name}?alt=media')
>>> media_url = url_template.format(
... bucket=bucket, blob_name=blob_name)
>>>
>>> download = Download(media_url)
>>> response = download.consume(transport)
>>> download.finished
True
>>> response
<Response [200]>
>>> response.headers[u'Content-Length']
'1364156'
>>> len(response.content)
1364156
To download only a portion of the bytes in the object,
specify ``start`` and ``end`` byte positions (both optional):
.. testsetup:: basic-download-with-slice
import mock
import requests
from six.moves import http_client
from google.resumable_media.requests import Download
media_url = u'http://test.invalid'
start = 4096
end = 8191
slice_size = end - start + 1
fake_response = requests.Response()
fake_response.status_code = int(http_client.PARTIAL_CONTENT)
fake_response.headers[u'Content-Length'] = u'{:d}'.format(slice_size)
content_range = u'bytes {:d}-{:d}/1364156'.format(start, end)
fake_response.headers[u'Content-Range'] = content_range
fake_content = mock.MagicMock(spec=['__len__'])
fake_content.__len__.return_value = slice_size
fake_response._content = fake_content
get_method = mock.Mock(return_value=fake_response, spec=[])
transport = mock.Mock(request=get_method, spec=['request'])
.. doctest:: basic-download-with-slice
>>> download = Download(media_url, start=4096, end=8191)
>>> response = download.consume(transport)
>>> download.finished
True
>>> response
<Response [206]>
>>> response.headers[u'Content-Length']
'4096'
>>> response.headers[u'Content-Range']
'bytes 4096-8191/1364156'
>>> len(response.content)
4096
=================
Chunked Downloads
=================
For very large objects or objects of unknown size, it may make more sense
to download the object in chunks rather than all at once. This can be done
to avoid dropped connections with a poor internet connection or can allow
multiple chunks to be downloaded in parallel to speed up the total
download.
A :class:`.ChunkedDownload` uses the same media URL and authorized
transport that a basic :class:`.Download` would use, but also
requires a chunk size and a write-able byte ``stream``. The chunk size is used
to determine how much of the resouce to consume with each request and the
stream is to allow the resource to be written out (e.g. to disk) without
having to fit in memory all at once.
.. testsetup:: chunked-download
import io
import mock
import requests
from six.moves import http_client
media_url = u'http://test.invalid'
fifty_mb = 50 * 1024 * 1024
one_gb = 1024 * 1024 * 1024
fake_response = requests.Response()
fake_response.status_code = int(http_client.PARTIAL_CONTENT)
fake_response.headers[u'Content-Length'] = u'{:d}'.format(fifty_mb)
content_range = u'bytes 0-{:d}/{:d}'.format(fifty_mb - 1, one_gb)
fake_response.headers[u'Content-Range'] = content_range
fake_content_begin = b'The beginning of the chunk...'
fake_content = fake_content_begin + b'1' * (fifty_mb - 29)
fake_response._content = fake_content
get_method = mock.Mock(return_value=fake_response, spec=[])
transport = mock.Mock(request=get_method, spec=['request'])
.. doctest:: chunked-download
>>> from google.resumable_media.requests import ChunkedDownload
>>>
>>> chunk_size = 50 * 1024 * 1024 # 50MB
>>> stream = io.BytesIO()
>>> download = ChunkedDownload(
... media_url, chunk_size, stream)
>>> # Check the state of the download before starting.
>>> download.bytes_downloaded
0
>>> download.total_bytes is None
True
>>> response = download.consume_next_chunk(transport)
>>> # Check the state of the download after consuming one chunk.
>>> download.finished
False
>>> download.bytes_downloaded # chunk_size
52428800
>>> download.total_bytes # 1GB
1073741824
>>> response
<Response [206]>
>>> response.headers[u'Content-Length']
'52428800'
>>> response.headers[u'Content-Range']
'bytes 0-52428799/1073741824'
>>> len(response.content) == chunk_size
True
>>> stream.seek(0)
0
>>> stream.read(29)
b'The beginning of the chunk...'
The download will change it's ``finished`` status to :data:`True`
once the final chunk is consumed. In some cases, the final chunk may
not be the same size as the other chunks:
.. testsetup:: chunked-download-end
import mock
import requests
from six.moves import http_client
from google.resumable_media.requests import ChunkedDownload
media_url = u'http://test.invalid'
fifty_mb = 50 * 1024 * 1024
one_gb = 1024 * 1024 * 1024
stream = mock.Mock(spec=['write'])
download = ChunkedDownload(media_url, fifty_mb, stream)
download._bytes_downloaded = 20 * fifty_mb
download._total_bytes = one_gb
fake_response = requests.Response()
fake_response.status_code = int(http_client.PARTIAL_CONTENT)
slice_size = one_gb - 20 * fifty_mb
fake_response.headers[u'Content-Length'] = u'{:d}'.format(slice_size)
content_range = u'bytes {:d}-{:d}/{:d}'.format(
20 * fifty_mb, one_gb - 1, one_gb)
fake_response.headers[u'Content-Range'] = content_range
fake_content = mock.MagicMock(spec=['__len__'])
fake_content.__len__.return_value = slice_size
fake_response._content = fake_content
get_method = mock.Mock(return_value=fake_response, spec=[])
transport = mock.Mock(request=get_method, spec=['request'])
.. doctest:: chunked-download-end
>>> # The state of the download in progress.
>>> download.finished
False
>>> download.bytes_downloaded # 20 chunks at 50MB
1048576000
>>> download.total_bytes # 1GB
1073741824
>>> response = download.consume_next_chunk(transport)
>>> # The state of the download after consuming the final chunk.
>>> download.finished
True
>>> download.bytes_downloaded == download.total_bytes
True
>>> response
<Response [206]>
>>> response.headers[u'Content-Length']
'25165824'
>>> response.headers[u'Content-Range']
'bytes 1048576000-1073741823/1073741824'
>>> len(response.content) < download.chunk_size
True
In addition, a :class:`.ChunkedDownload` can also take optional
``start`` and ``end`` byte positions.
==============
Simple Uploads
==============
Among the three supported upload classes, the simplest is
:class:`.SimpleUpload`. A simple upload should be used when the resource
being uploaded is small and when there is no metadata (other than the name)
associated with the resource.
.. testsetup:: simple-upload
import json
import mock
import requests
from six.moves import http_client
bucket = u'some-bucket'
blob_name = u'file.txt'
fake_response = requests.Response()
fake_response.status_code = int(http_client.OK)
payload = {
u'bucket': bucket,
u'contentType': u'text/plain',
u'md5Hash': u'M0XLEsX9/sMdiI+4pB4CAQ==',
u'name': blob_name,
u'size': u'27',
}
fake_response._content = json.dumps(payload).encode(u'utf-8')
post_method = mock.Mock(return_value=fake_response, spec=[])
transport = mock.Mock(request=post_method, spec=['request'])
.. doctest:: simple-upload
:options: +NORMALIZE_WHITESPACE
>>> from google.resumable_media.requests import SimpleUpload
>>>
>>> url_template = (
... u'https://www.googleapis.com/upload/storage/v1/b/{bucket}/o?'
... u'uploadType=media&'
... u'name={blob_name}')
>>> upload_url = url_template.format(
... bucket=bucket, blob_name=blob_name)
>>>
>>> upload = SimpleUpload(upload_url)
>>> data = b'Some not too large content.'
>>> content_type = u'text/plain'
>>> response = upload.transmit(transport, data, content_type)
>>> upload.finished
True
>>> response
<Response [200]>
>>> json_response = response.json()
>>> json_response[u'bucket'] == bucket
True
>>> json_response[u'name'] == blob_name
True
>>> json_response[u'contentType'] == content_type
True
>>> json_response[u'md5Hash']
'M0XLEsX9/sMdiI+4pB4CAQ=='
>>> int(json_response[u'size']) == len(data)
True
In the rare case that an upload fails, an :exc:`.InvalidResponse`
will be raised:
.. testsetup:: simple-upload-fail
import time
import mock
import requests
from six.moves import http_client
from google import resumable_media
from google.resumable_media import _helpers
from google.resumable_media.requests import SimpleUpload as constructor
upload_url = u'http://test.invalid'
data = b'Some not too large content.'
content_type = u'text/plain'
fake_response = requests.Response()
fake_response.status_code = int(http_client.SERVICE_UNAVAILABLE)
post_method = mock.Mock(return_value=fake_response, spec=[])
transport = mock.Mock(request=post_method, spec=['request'])
time_sleep = time.sleep
def dont_sleep(seconds):
raise RuntimeError(u'No sleep', seconds)
def SimpleUpload(*args, **kwargs):
upload = constructor(*args, **kwargs)
# Mock the cumulative sleep to avoid retries (and `time.sleep()`).
upload._retry_strategy = resumable_media.RetryStrategy(
max_cumulative_retry=-1.0)
return upload
time.sleep = dont_sleep
.. doctest:: simple-upload-fail
:options: +NORMALIZE_WHITESPACE
>>> upload = SimpleUpload(upload_url)
>>> error = None
>>> try:
... upload.transmit(transport, data, content_type)
... except resumable_media.InvalidResponse as caught_exc:
... error = caught_exc
...
>>> error
InvalidResponse('Request failed with status code', 503,
'Expected one of', <HTTPStatus.OK: 200>)
>>> error.response
<Response [503]>
>>>
>>> upload.finished
True
.. testcleanup:: simple-upload-fail
# Put back the correct ``sleep`` function on the ``time`` module.
time.sleep = time_sleep
Even in the case of failure, we see that the upload is
:attr:`~.SimpleUpload.finished`, i.e. it cannot be re-used.
=================
Multipart Uploads
=================
After the simple upload, the :class:`.MultipartUpload` can be used to
achieve essentially the same task. However, a multipart upload allows some
metadata about the resource to be sent along as well. (This is the "multi":
we send a first part with the metadata and a second part with the actual
bytes in the resource.)
Usage is similar to the simple upload, but :meth:`~.MultipartUpload.transmit`
accepts an extra required argument: ``metadata``.
.. testsetup:: multipart-upload
import json
import mock
import requests
from six.moves import http_client
bucket = u'some-bucket'
blob_name = u'file.txt'
data = b'Some not too large content.'
content_type = u'text/plain'
fake_response = requests.Response()
fake_response.status_code = int(http_client.OK)
payload = {
u'bucket': bucket,
u'name': blob_name,
u'metadata': {u'color': u'grurple'},
}
fake_response._content = json.dumps(payload).encode(u'utf-8')
post_method = mock.Mock(return_value=fake_response, spec=[])
transport = mock.Mock(request=post_method, spec=['request'])
.. doctest:: multipart-upload
>>> from google.resumable_media.requests import MultipartUpload
>>>
>>> url_template = (
... u'https://www.googleapis.com/upload/storage/v1/b/{bucket}/o?'
... u'uploadType=multipart')
>>> upload_url = url_template.format(bucket=bucket)
>>>
>>> upload = MultipartUpload(upload_url)
>>> metadata = {
... u'name': blob_name,
... u'metadata': {
... u'color': u'grurple',
... },
... }
>>> response = upload.transmit(transport, data, metadata, content_type)
>>> upload.finished
True
>>> response
<Response [200]>
>>> json_response = response.json()
>>> json_response[u'bucket'] == bucket
True
>>> json_response[u'name'] == blob_name
True
>>> json_response[u'metadata'] == metadata[u'metadata']
True
As with the simple upload, in the case of failure an :exc:`.InvalidResponse`
is raised, enclosing the :attr:`~.InvalidResponse.response` that caused
the failure and the ``upload`` object cannot be re-used after a failure.
=================
Resumable Uploads
=================
A :class:`.ResumableUpload` deviates from the other two upload classes:
it transmits a resource over the course of multiple requests. This
is intended to be used in cases where:
* the size of the resource is not known (i.e. it is generated on the fly)
* requests must be short-lived
* the client has request **size** limitations
* the resource is too large to fit into memory
In general, a resource should be sent in a **single** request to avoid
latency and reduce QPS. See `GCS best practices`_ for more things to
consider when using a resumable upload.
.. _GCS best practices: https://cloud.google.com/storage/docs/\
best-practices#uploading
After creating a :class:`.ResumableUpload` instance, a
**resumable upload session** must be initiated to let the server know that
a series of chunked upload requests will be coming and to obtain an
``upload_id`` for the session. In contrast to the other two upload classes,
:meth:`~.ResumableUpload.initiate` takes a byte ``stream`` as input rather
than raw bytes as ``data``. This can be a file object, a :class:`~io.BytesIO`
object or any other stream implementing the same interface.
.. testsetup:: resumable-initiate
import io
import mock
import requests
from six.moves import http_client
bucket = u'some-bucket'
blob_name = u'file.txt'
data = b'Some resumable bytes.'
content_type = u'text/plain'
fake_response = requests.Response()
fake_response.status_code = int(http_client.OK)
fake_response._content = b''
upload_id = u'ABCdef189XY_super_serious'
resumable_url_template = (
u'https://www.googleapis.com/upload/storage/v1/b/{bucket}'
u'/o?uploadType=resumable&upload_id={upload_id}')
resumable_url = resumable_url_template.format(
bucket=bucket, upload_id=upload_id)
fake_response.headers[u'location'] = resumable_url
fake_response.headers[u'x-guploader-uploadid'] = upload_id
post_method = mock.Mock(return_value=fake_response, spec=[])
transport = mock.Mock(request=post_method, spec=['request'])
.. doctest:: resumable-initiate
>>> from google.resumable_media.requests import ResumableUpload
>>>
>>> url_template = (
... u'https://www.googleapis.com/upload/storage/v1/b/{bucket}/o?'
... u'uploadType=resumable')
>>> upload_url = url_template.format(bucket=bucket)
>>>
>>> chunk_size = 1024 * 1024 # 1MB
>>> upload = ResumableUpload(upload_url, chunk_size)
>>> stream = io.BytesIO(data)
>>> # The upload doesn't know how "big" it is until seeing a stream.
>>> upload.total_bytes is None
True
>>> metadata = {u'name': blob_name}
>>> response = upload.initiate(transport, stream, metadata, content_type)
>>> response
<Response [200]>
>>> upload.resumable_url == response.headers[u'Location']
True
>>> upload.total_bytes == len(data)
True
>>> upload_id = response.headers[u'X-GUploader-UploadID']
>>> upload_id
'ABCdef189XY_super_serious'
>>> upload.resumable_url == upload_url + u'&upload_id=' + upload_id
True
Once a :class:`.ResumableUpload` has been initiated, the resource is
transmitted in chunks until completion:
.. testsetup:: resumable-transmit
import io
import json
import mock
import requests
from six.moves import http_client
from google import resumable_media
import google.resumable_media.requests.upload as upload_mod
data = b'01234567891'
stream = io.BytesIO(data)
# Create an "already initiated" upload.
upload_url = u'http://test.invalid'
chunk_size = 256 * 1024 # 256KB
upload = upload_mod.ResumableUpload(upload_url, chunk_size)
upload._resumable_url = u'http://test.invalid?upload_id=mocked'
upload._stream = stream
upload._content_type = u'text/plain'
upload._total_bytes = len(data)
# After-the-fact update the chunk size so that len(data)
# is split into three.
upload._chunk_size = 4
# Make three fake responses.
fake_response0 = requests.Response()
fake_response0.status_code = resumable_media.PERMANENT_REDIRECT
fake_response0.headers[u'range'] = u'bytes=0-3'
fake_response1 = requests.Response()
fake_response1.status_code = resumable_media.PERMANENT_REDIRECT
fake_response1.headers[u'range'] = u'bytes=0-7'
fake_response2 = requests.Response()
fake_response2.status_code = int(http_client.OK)
bucket = u'some-bucket'
blob_name = u'file.txt'
payload = {
u'bucket': bucket,
u'name': blob_name,
u'size': u'{:d}'.format(len(data)),
}
fake_response2._content = json.dumps(payload).encode(u'utf-8')
# Use the fake responses to mock a transport.
responses = [fake_response0, fake_response1, fake_response2]
put_method = mock.Mock(side_effect=responses, spec=[])
transport = mock.Mock(request=put_method, spec=['request'])
.. doctest:: resumable-transmit
>>> response0 = upload.transmit_next_chunk(transport)
>>> response0
<Response [308]>
>>> upload.finished
False
>>> upload.bytes_uploaded == upload.chunk_size
True
>>>
>>> response1 = upload.transmit_next_chunk(transport)
>>> response1
<Response [308]>
>>> upload.finished
False
>>> upload.bytes_uploaded == 2 * upload.chunk_size
True
>>>
>>> response2 = upload.transmit_next_chunk(transport)
>>> response2
<Response [200]>
>>> upload.finished
True
>>> upload.bytes_uploaded == upload.total_bytes
True
>>> json_response = response2.json()
>>> json_response[u'bucket'] == bucket
True
>>> json_response[u'name'] == blob_name
True
"""
from google._async_resumable_media.requests.download import ChunkedDownload
from google._async_resumable_media.requests.download import Download
from google._async_resumable_media.requests.upload import MultipartUpload
from google._async_resumable_media.requests.download import RawChunkedDownload
from google._async_resumable_media.requests.download import RawDownload
from google._async_resumable_media.requests.upload import ResumableUpload
from google._async_resumable_media.requests.upload import SimpleUpload
__all__ = [
u"ChunkedDownload",
u"Download",
u"MultipartUpload",
u"RawChunkedDownload",
u"RawDownload",
u"ResumableUpload",
u"SimpleUpload",
]

View file

@ -0,0 +1,155 @@
# Copyright 2017 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Shared utilities used by both downloads and uploads.
This utilities are explicitly catered to ``requests``-like transports.
"""
import functools
from google._async_resumable_media import _helpers
from google.resumable_media import common
import google.auth.transport._aiohttp_requests as aiohttp_requests
import aiohttp
_DEFAULT_RETRY_STRATEGY = common.RetryStrategy()
_SINGLE_GET_CHUNK_SIZE = 8192
# The number of seconds to wait to establish a connection
# (connect() call on socket). Avoid setting this to a multiple of 3 to not
# Align with TCP Retransmission timing. (typically 2.5-3s)
_DEFAULT_CONNECT_TIMEOUT = 61
# The number of seconds to wait between bytes sent from the server.
_DEFAULT_READ_TIMEOUT = 60
_DEFAULT_TIMEOUT = aiohttp.ClientTimeout(
connect=_DEFAULT_CONNECT_TIMEOUT, sock_read=_DEFAULT_READ_TIMEOUT
)
class RequestsMixin(object):
"""Mix-in class implementing ``requests``-specific behavior.
These are methods that are more general purpose, with implementations
specific to the types defined in ``requests``.
"""
@staticmethod
def _get_status_code(response):
"""Access the status code from an HTTP response.
Args:
response (~requests.Response): The HTTP response object.
Returns:
int: The status code.
"""
return response.status
@staticmethod
def _get_headers(response):
"""Access the headers from an HTTP response.
Args:
response (~requests.Response): The HTTP response object.
Returns:
~requests.structures.CaseInsensitiveDict: The header mapping (keys
are case-insensitive).
"""
# For Async testing,`_headers` is modified instead of headers
# access via the internal field.
return response._headers
@staticmethod
async def _get_body(response):
"""Access the response body from an HTTP response.
Args:
response (~requests.Response): The HTTP response object.
Returns:
bytes: The body of the ``response``.
"""
wrapped_response = aiohttp_requests._CombinedResponse(response)
content = await wrapped_response.data.read()
return content
class RawRequestsMixin(RequestsMixin):
@staticmethod
async def _get_body(response):
"""Access the response body from an HTTP response.
Args:
response (~requests.Response): The HTTP response object.
Returns:
bytes: The body of the ``response``.
"""
wrapped_response = aiohttp_requests._CombinedResponse(response)
content = await wrapped_response.raw_content()
return content
async def http_request(
transport,
method,
url,
data=None,
headers=None,
retry_strategy=_DEFAULT_RETRY_STRATEGY,
**transport_kwargs
):
"""Make an HTTP request.
Args:
transport (~requests.Session): A ``requests`` object which can make
authenticated requests via a ``request()`` method. This method
must accept an HTTP method, an upload URL, a ``data`` keyword
argument and a ``headers`` keyword argument.
method (str): The HTTP method for the request.
url (str): The URL for the request.
data (Optional[bytes]): The body of the request.
headers (Mapping[str, str]): The headers for the request (``transport``
may also add additional headers).
retry_strategy (~google.resumable_media.common.RetryStrategy): The
strategy to use if the request fails and must be retried.
transport_kwargs (Dict[str, str]): Extra keyword arguments to be
passed along to ``transport.request``.
Returns:
~requests.Response: The return value of ``transport.request()``.
"""
# NOTE(asyncio/aiohttp): Sync versions use a tuple for two timeouts,
# default connect timeout and read timeout. Since async requests only
# accepts a single value, this is using the connect timeout. This logic
# diverges from the sync implementation.
if "timeout" not in transport_kwargs:
timeout = _DEFAULT_TIMEOUT
transport_kwargs["timeout"] = timeout
func = functools.partial(
transport.request, method, url, data=data, headers=headers, **transport_kwargs
)
resp = await _helpers.wait_and_retry(
func, RequestsMixin._get_status_code, retry_strategy
)
return resp

View file

@ -0,0 +1,461 @@
# Copyright 2017 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Support for downloading media from Google APIs."""
import urllib3.response
from google._async_resumable_media import _download
from google._async_resumable_media import _helpers
from google._async_resumable_media.requests import _request_helpers
from google.resumable_media import common
from google.resumable_media import _helpers as sync_helpers
from google.resumable_media.requests import download
_CHECKSUM_MISMATCH = download._CHECKSUM_MISMATCH
class Download(_request_helpers.RequestsMixin, _download.Download):
"""Helper to manage downloading a resource from a Google API.
"Slices" of the resource can be retrieved by specifying a range
with ``start`` and / or ``end``. However, in typical usage, neither
``start`` nor ``end`` is expected to be provided.
Args:
media_url (str): The URL containing the media to be downloaded.
stream (IO[bytes]): A write-able stream (i.e. file-like object) that
the downloaded resource can be written to.
start (int): The first byte in a range to be downloaded. If not
provided, but ``end`` is provided, will download from the
beginning to ``end`` of the media.
end (int): The last byte in a range to be downloaded. If not
provided, but ``start`` is provided, will download from the
``start`` to the end of the media.
headers (Optional[Mapping[str, str]]): Extra headers that should
be sent with the request, e.g. headers for encrypted data.
checksum Optional([str]): The type of checksum to compute to verify
the integrity of the object. The response headers must contain
a checksum of the requested type. If the headers lack an
appropriate checksum (for instance in the case of transcoded or
ranged downloads where the remote service does not know the
correct checksum) an INFO-level log will be emitted. Supported
values are "md5", "crc32c" and None. The default is "md5".
Attributes:
media_url (str): The URL containing the media to be downloaded.
start (Optional[int]): The first byte in a range to be downloaded.
end (Optional[int]): The last byte in a range to be downloaded.
"""
async def _write_to_stream(self, response):
"""Write response body to a write-able stream.
.. note:
This method assumes that the ``_stream`` attribute is set on the
current download.
Args:
response (~requests.Response): The HTTP response object.
Raises:
~google.resumable_media.common.DataCorruption: If the download's
checksum doesn't agree with server-computed checksum.
"""
# `_get_expected_checksum()` may return None even if a checksum was
# requested, in which case it will emit an info log _MISSING_CHECKSUM.
# If an invalid checksum type is specified, this will raise ValueError.
expected_checksum, checksum_object = sync_helpers._get_expected_checksum(
response, self._get_headers, self.media_url, checksum_type=self.checksum
)
local_checksum_object = _add_decoder(response, checksum_object)
async for chunk in response.content.iter_chunked(
_request_helpers._SINGLE_GET_CHUNK_SIZE
):
self._stream.write(chunk)
local_checksum_object.update(chunk)
if expected_checksum is None:
return
else:
actual_checksum = sync_helpers.prepare_checksum_digest(
checksum_object.digest()
)
if actual_checksum != expected_checksum:
msg = _CHECKSUM_MISMATCH.format(
self.media_url,
expected_checksum,
actual_checksum,
checksum_type=self.checksum.upper(),
)
raise common.DataCorruption(response, msg)
async def consume(self, transport, timeout=_request_helpers._DEFAULT_TIMEOUT):
"""Consume the resource to be downloaded.
If a ``stream`` is attached to this download, then the downloaded
resource will be written to the stream.
Args:
transport (~requests.Session): A ``requests`` object which can
make authenticated requests.
timeout (Optional[Union[float, aiohttp.ClientTimeout]]):
The number of seconds to wait for the server response.
Depending on the retry strategy, a request may be repeated
several times using the same timeout each time.
Can also be passed as an `aiohttp.ClientTimeout` object.
Returns:
~requests.Response: The HTTP response returned by ``transport``.
Raises:
~google.resumable_media.common.DataCorruption: If the download's
checksum doesn't agree with server-computed checksum.
ValueError: If the current :class:`Download` has already
finished.
"""
method, url, payload, headers = self._prepare_request()
# NOTE: We assume "payload is None" but pass it along anyway.
request_kwargs = {
u"data": payload,
u"headers": headers,
u"retry_strategy": self._retry_strategy,
u"timeout": timeout,
}
if self._stream is not None:
request_kwargs[u"stream"] = True
result = await _request_helpers.http_request(
transport, method, url, **request_kwargs
)
self._process_response(result)
if self._stream is not None:
await self._write_to_stream(result)
return result
class RawDownload(_request_helpers.RawRequestsMixin, _download.Download):
"""Helper to manage downloading a raw resource from a Google API.
"Slices" of the resource can be retrieved by specifying a range
with ``start`` and / or ``end``. However, in typical usage, neither
``start`` nor ``end`` is expected to be provided.
Args:
media_url (str): The URL containing the media to be downloaded.
stream (IO[bytes]): A write-able stream (i.e. file-like object) that
the downloaded resource can be written to.
start (int): The first byte in a range to be downloaded. If not
provided, but ``end`` is provided, will download from the
beginning to ``end`` of the media.
end (int): The last byte in a range to be downloaded. If not
provided, but ``start`` is provided, will download from the
``start`` to the end of the media.
headers (Optional[Mapping[str, str]]): Extra headers that should
be sent with the request, e.g. headers for encrypted data.
checksum Optional([str]): The type of checksum to compute to verify
the integrity of the object. The response headers must contain
a checksum of the requested type. If the headers lack an
appropriate checksum (for instance in the case of transcoded or
ranged downloads where the remote service does not know the
correct checksum) an INFO-level log will be emitted. Supported
values are "md5", "crc32c" and None. The default is "md5".
Attributes:
media_url (str): The URL containing the media to be downloaded.
start (Optional[int]): The first byte in a range to be downloaded.
end (Optional[int]): The last byte in a range to be downloaded.
"""
async def _write_to_stream(self, response):
"""Write response body to a write-able stream.
.. note:
This method assumes that the ``_stream`` attribute is set on the
current download.
Args:
response (~requests.Response): The HTTP response object.
Raises:
~google.resumable_media.common.DataCorruption: If the download's
checksum doesn't agree with server-computed checksum.
"""
# `_get_expected_checksum()` may return None even if a checksum was
# requested, in which case it will emit an info log _MISSING_CHECKSUM.
# If an invalid checksum type is specified, this will raise ValueError.
expected_checksum, checksum_object = sync_helpers._get_expected_checksum(
response, self._get_headers, self.media_url, checksum_type=self.checksum
)
async for chunk in response.content.iter_chunked(
_request_helpers._SINGLE_GET_CHUNK_SIZE
):
self._stream.write(chunk)
checksum_object.update(chunk)
if expected_checksum is None:
return
else:
actual_checksum = sync_helpers.prepare_checksum_digest(
checksum_object.digest()
)
if actual_checksum != expected_checksum:
msg = _CHECKSUM_MISMATCH.format(
self.media_url,
expected_checksum,
actual_checksum,
checksum_type=self.checksum.upper(),
)
raise common.DataCorruption(response, msg)
async def consume(self, transport, timeout=_request_helpers._DEFAULT_TIMEOUT):
"""Consume the resource to be downloaded.
If a ``stream`` is attached to this download, then the downloaded
resource will be written to the stream.
Args:
transport (~requests.Session): A ``requests`` object which can
make authenticated requests.
timeout (Optional[Union[float, Tuple[float, float]]]):
The number of seconds to wait for the server response.
Depending on the retry strategy, a request may be repeated
several times using the same timeout each time.
Can also be passed as a tuple (connect_timeout, read_timeout).
See :meth:`requests.Session.request` documentation for details.
Returns:
~requests.Response: The HTTP response returned by ``transport``.
Raises:
~google.resumable_media.common.DataCorruption: If the download's
checksum doesn't agree with server-computed checksum.
ValueError: If the current :class:`Download` has already
finished.
"""
method, url, payload, headers = self._prepare_request()
# NOTE: We assume "payload is None" but pass it along anyway.
result = await _request_helpers.http_request(
transport,
method,
url,
data=payload,
headers=headers,
retry_strategy=self._retry_strategy,
)
self._process_response(result)
if self._stream is not None:
await self._write_to_stream(result)
return result
class ChunkedDownload(_request_helpers.RequestsMixin, _download.ChunkedDownload):
"""Download a resource in chunks from a Google API.
Args:
media_url (str): The URL containing the media to be downloaded.
chunk_size (int): The number of bytes to be retrieved in each
request.
stream (IO[bytes]): A write-able stream (i.e. file-like object) that
will be used to concatenate chunks of the resource as they are
downloaded.
start (int): The first byte in a range to be downloaded. If not
provided, defaults to ``0``.
end (int): The last byte in a range to be downloaded. If not
provided, will download to the end of the media.
headers (Optional[Mapping[str, str]]): Extra headers that should
be sent with each request, e.g. headers for data encryption
key headers.
Attributes:
media_url (str): The URL containing the media to be downloaded.
start (Optional[int]): The first byte in a range to be downloaded.
end (Optional[int]): The last byte in a range to be downloaded.
chunk_size (int): The number of bytes to be retrieved in each request.
Raises:
ValueError: If ``start`` is negative.
"""
async def consume_next_chunk(
self, transport, timeout=_request_helpers._DEFAULT_TIMEOUT
):
"""
Consume the next chunk of the resource to be downloaded.
Args:
transport (~requests.Session): A ``requests`` object which can
make authenticated requests.
timeout (Optional[Union[float, aiohttp.ClientTimeout]]):
The number of seconds to wait for the server response.
Depending on the retry strategy, a request may be repeated
several times using the same timeout each time.
Can also be passed as an `aiohttp.ClientTimeout` object.
Returns:
~requests.Response: The HTTP response returned by ``transport``.
Raises:
ValueError: If the current download has finished.
"""
method, url, payload, headers = self._prepare_request()
# NOTE: We assume "payload is None" but pass it along anyway.
result = await _request_helpers.http_request(
transport,
method,
url,
data=payload,
headers=headers,
retry_strategy=self._retry_strategy,
timeout=timeout,
)
await self._process_response(result)
return result
class RawChunkedDownload(_request_helpers.RawRequestsMixin, _download.ChunkedDownload):
"""Download a raw resource in chunks from a Google API.
Args:
media_url (str): The URL containing the media to be downloaded.
chunk_size (int): The number of bytes to be retrieved in each
request.
stream (IO[bytes]): A write-able stream (i.e. file-like object) that
will be used to concatenate chunks of the resource as they are
downloaded.
start (int): The first byte in a range to be downloaded. If not
provided, defaults to ``0``.
end (int): The last byte in a range to be downloaded. If not
provided, will download to the end of the media.
headers (Optional[Mapping[str, str]]): Extra headers that should
be sent with each request, e.g. headers for data encryption
key headers.
Attributes:
media_url (str): The URL containing the media to be downloaded.
start (Optional[int]): The first byte in a range to be downloaded.
end (Optional[int]): The last byte in a range to be downloaded.
chunk_size (int): The number of bytes to be retrieved in each request.
Raises:
ValueError: If ``start`` is negative.
"""
async def consume_next_chunk(
self, transport, timeout=_request_helpers._DEFAULT_TIMEOUT
):
"""Consume the next chunk of the resource to be downloaded.
Args:
transport (~requests.Session): A ``requests`` object which can
make authenticated requests.
timeout (Optional[Union[float, aiohttp.ClientTimeout]]):
The number of seconds to wait for the server response.
Depending on the retry strategy, a request may be repeated
several times using the same timeout each time.
Can also be passed as an `aiohttp.ClientTimeout` object.
Returns:
~requests.Response: The HTTP response returned by ``transport``.
Raises:
ValueError: If the current download has finished.
"""
method, url, payload, headers = self._prepare_request()
# NOTE: We assume "payload is None" but pass it along anyway.
result = await _request_helpers.http_request(
transport,
method,
url,
data=payload,
headers=headers,
retry_strategy=self._retry_strategy,
timeout=timeout,
)
await self._process_response(result)
return result
def _add_decoder(response_raw, checksum):
"""Patch the ``_decoder`` on a ``urllib3`` response.
This is so that we can intercept the compressed bytes before they are
decoded.
Only patches if the content encoding is ``gzip``.
Args:
response_raw (urllib3.response.HTTPResponse): The raw response for
an HTTP request.
checksum (object):
A checksum which will be updated with compressed bytes.
Returns:
object: Either the original ``checksum`` if ``_decoder`` is not
patched, or a ``_DoNothingHash`` if the decoder is patched, since the
caller will no longer need to hash to decoded bytes.
"""
encoding = response_raw.headers.get(u"content-encoding", u"").lower()
if encoding != u"gzip":
return checksum
response_raw._decoder = _GzipDecoder(checksum)
return _helpers._DoNothingHash()
class _GzipDecoder(urllib3.response.GzipDecoder):
"""Custom subclass of ``urllib3`` decoder for ``gzip``-ed bytes.
Allows a checksum function to see the compressed bytes before they are
decoded. This way the checksum of the compressed value can be computed.
Args:
checksum (object):
A checksum which will be updated with compressed bytes.
"""
def __init__(self, checksum):
super(_GzipDecoder, self).__init__()
self._checksum = checksum
def decompress(self, data):
"""Decompress the bytes.
Args:
data (bytes): The compressed bytes to be decompressed.
Returns:
bytes: The decompressed bytes from ``data``.
"""
self._checksum.update(data)
return super(_GzipDecoder, self).decompress(data)

View file

@ -0,0 +1,515 @@
# Copyright 2017 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Support for resumable uploads.
Also supported here are simple (media) uploads and multipart
uploads that contain both metadata and a small file as payload.
"""
from google._async_resumable_media import _upload
from google._async_resumable_media.requests import _request_helpers
class SimpleUpload(_request_helpers.RequestsMixin, _upload.SimpleUpload):
"""Upload a resource to a Google API.
A **simple** media upload sends no metadata and completes the upload
in a single request.
Args:
upload_url (str): The URL where the content will be uploaded.
headers (Optional[Mapping[str, str]]): Extra headers that should
be sent with the request, e.g. headers for encrypted data.
Attributes:
upload_url (str): The URL where the content will be uploaded.
"""
async def transmit(
self,
transport,
data,
content_type,
timeout=_request_helpers._DEFAULT_TIMEOUT,
):
"""Transmit the resource to be uploaded.
Args:
transport (~requests.Session): A ``requests`` object which can
make authenticated requests.
data (bytes): The resource content to be uploaded.
content_type (str): The content type of the resource, e.g. a JPEG
image has content type ``image/jpeg``.
timeout (Optional[Union[float, aiohttp.ClientTimeout]]):
The number of seconds to wait for the server response.
Depending on the retry strategy, a request may be repeated
several times using the same timeout each time.
Can also be passed as an `aiohttp.ClientTimeout` object.
Returns:
~requests.Response: The HTTP response returned by ``transport``.
"""
method, url, payload, headers = self._prepare_request(data, content_type)
response = await _request_helpers.http_request(
transport,
method,
url,
data=payload,
headers=headers,
retry_strategy=self._retry_strategy,
timeout=timeout,
)
self._process_response(response)
return response
class MultipartUpload(_request_helpers.RequestsMixin, _upload.MultipartUpload):
"""Upload a resource with metadata to a Google API.
A **multipart** upload sends both metadata and the resource in a single
(multipart) request.
Args:
upload_url (str): The URL where the content will be uploaded.
headers (Optional[Mapping[str, str]]): Extra headers that should
be sent with the request, e.g. headers for encrypted data.
checksum Optional([str]): The type of checksum to compute to verify
the integrity of the object. The request metadata will be amended
to include the computed value. Using this option will override a
manually-set checksum value. Supported values are "md5",
"crc32c" and None. The default is None.
Attributes:
upload_url (str): The URL where the content will be uploaded.
"""
async def transmit(
self,
transport,
data,
metadata,
content_type,
timeout=_request_helpers._DEFAULT_TIMEOUT,
):
"""Transmit the resource to be uploaded.
Args:
transport (~requests.Session): A ``requests`` object which can
make authenticated requests.
data (bytes): The resource content to be uploaded.
metadata (Mapping[str, str]): The resource metadata, such as an
ACL list.
content_type (str): The content type of the resource, e.g. a JPEG
image has content type ``image/jpeg``.
timeout (Optional[Union[float, aiohttp.ClientTimeout]]):
The number of seconds to wait for the server response.
Depending on the retry strategy, a request may be repeated
several times using the same timeout each time.
Can also be passed as an `aiohttp.ClientTimeout` object.
Returns:
~requests.Response: The HTTP response returned by ``transport``.
"""
method, url, payload, headers = self._prepare_request(
data, metadata, content_type
)
response = await _request_helpers.http_request(
transport,
method,
url,
data=payload,
headers=headers,
retry_strategy=self._retry_strategy,
timeout=timeout,
)
self._process_response(response)
return response
class ResumableUpload(_request_helpers.RequestsMixin, _upload.ResumableUpload):
"""Initiate and fulfill a resumable upload to a Google API.
A **resumable** upload sends an initial request with the resource metadata
and then gets assigned an upload ID / upload URL to send bytes to.
Using the upload URL, the upload is then done in chunks (determined by
the user) until all bytes have been uploaded.
When constructing a resumable upload, only the resumable upload URL and
the chunk size are required:
.. testsetup:: resumable-constructor
bucket = u'bucket-foo'
.. doctest:: resumable-constructor
>>> from google.resumable_media.requests import ResumableUpload
>>>
>>> url_template = (
... u'https://www.googleapis.com/upload/storage/v1/b/{bucket}/o?'
... u'uploadType=resumable')
>>> upload_url = url_template.format(bucket=bucket)
>>>
>>> chunk_size = 3 * 1024 * 1024 # 3MB
>>> upload = ResumableUpload(upload_url, chunk_size)
When initiating an upload (via :meth:`initiate`), the caller is expected
to pass the resource being uploaded as a file-like ``stream``. If the size
of the resource is explicitly known, it can be passed in directly:
.. testsetup:: resumable-explicit-size
import os
import tempfile
import mock
import requests
from six.moves import http_client
from google.resumable_media.requests import ResumableUpload
upload_url = u'http://test.invalid'
chunk_size = 3 * 1024 * 1024 # 3MB
upload = ResumableUpload(upload_url, chunk_size)
file_desc, filename = tempfile.mkstemp()
os.close(file_desc)
data = b'some bytes!'
with open(filename, u'wb') as file_obj:
file_obj.write(data)
fake_response = requests.Response()
fake_response.status_code = int(http_client.OK)
fake_response._content = b''
resumable_url = u'http://test.invalid?upload_id=7up'
fake_response.headers[u'location'] = resumable_url
post_method = mock.Mock(return_value=fake_response, spec=[])
transport = mock.Mock(request=post_method, spec=['request'])
.. doctest:: resumable-explicit-size
>>> import os
>>>
>>> upload.total_bytes is None
True
>>>
>>> stream = open(filename, u'rb')
>>> total_bytes = os.path.getsize(filename)
>>> metadata = {u'name': filename}
>>> response = upload.initiate(
... transport, stream, metadata, u'text/plain',
... total_bytes=total_bytes)
>>> response
<Response [200]>
>>>
>>> upload.total_bytes == total_bytes
True
.. testcleanup:: resumable-explicit-size
os.remove(filename)
If the stream is in a "final" state (i.e. it won't have any more bytes
written to it), the total number of bytes can be determined implicitly
from the ``stream`` itself:
.. testsetup:: resumable-implicit-size
import io
import mock
import requests
from six.moves import http_client
from google.resumable_media.requests import ResumableUpload
upload_url = u'http://test.invalid'
chunk_size = 3 * 1024 * 1024 # 3MB
upload = ResumableUpload(upload_url, chunk_size)
fake_response = requests.Response()
fake_response.status_code = int(http_client.OK)
fake_response._content = b''
resumable_url = u'http://test.invalid?upload_id=7up'
fake_response.headers[u'location'] = resumable_url
post_method = mock.Mock(return_value=fake_response, spec=[])
transport = mock.Mock(request=post_method, spec=['request'])
data = b'some MOAR bytes!'
metadata = {u'name': u'some-file.jpg'}
content_type = u'image/jpeg'
.. doctest:: resumable-implicit-size
>>> stream = io.BytesIO(data)
>>> response = upload.initiate(
... transport, stream, metadata, content_type)
>>>
>>> upload.total_bytes == len(data)
True
If the size of the resource is **unknown** when the upload is initiated,
the ``stream_final`` argument can be used. This might occur if the
resource is being dynamically created on the client (e.g. application
logs). To use this argument:
.. testsetup:: resumable-unknown-size
import io
import mock
import requests
from six.moves import http_client
from google.resumable_media.requests import ResumableUpload
upload_url = u'http://test.invalid'
chunk_size = 3 * 1024 * 1024 # 3MB
upload = ResumableUpload(upload_url, chunk_size)
fake_response = requests.Response()
fake_response.status_code = int(http_client.OK)
fake_response._content = b''
resumable_url = u'http://test.invalid?upload_id=7up'
fake_response.headers[u'location'] = resumable_url
post_method = mock.Mock(return_value=fake_response, spec=[])
transport = mock.Mock(request=post_method, spec=['request'])
metadata = {u'name': u'some-file.jpg'}
content_type = u'application/octet-stream'
stream = io.BytesIO(b'data')
.. doctest:: resumable-unknown-size
>>> response = upload.initiate(
... transport, stream, metadata, content_type,
... stream_final=False)
>>>
>>> upload.total_bytes is None
True
Args:
upload_url (str): The URL where the resumable upload will be initiated.
chunk_size (int): The size of each chunk used to upload the resource.
headers (Optional[Mapping[str, str]]): Extra headers that should
be sent with the :meth:`initiate` request, e.g. headers for
encrypted data. These **will not** be sent with
:meth:`transmit_next_chunk` or :meth:`recover` requests.
checksum Optional([str]): The type of checksum to compute to verify
the integrity of the object. After the upload is complete, the
server-computed checksum of the resulting object will be checked
and google.resumable_media.common.DataCorruption will be raised on
a mismatch. The corrupted file will not be deleted from the remote
host automatically. Supported values are "md5", "crc32c" and None.
The default is None.
Attributes:
upload_url (str): The URL where the content will be uploaded.
Raises:
ValueError: If ``chunk_size`` is not a multiple of
:data:`.UPLOAD_CHUNK_SIZE`.
"""
async def initiate(
self,
transport,
stream,
metadata,
content_type,
total_bytes=None,
stream_final=True,
timeout=_request_helpers._DEFAULT_TIMEOUT,
):
"""Initiate a resumable upload.
By default, this method assumes your ``stream`` is in a "final"
state ready to transmit. However, ``stream_final=False`` can be used
to indicate that the size of the resource is not known. This can happen
if bytes are being dynamically fed into ``stream``, e.g. if the stream
is attached to application logs.
If ``stream_final=False`` is used, :attr:`chunk_size` bytes will be
read from the stream every time :meth:`transmit_next_chunk` is called.
If one of those reads produces strictly fewer bites than the chunk
size, the upload will be concluded.
Args:
transport (~requests.Session): A ``requests`` object which can
make authenticated requests.
stream (IO[bytes]): The stream (i.e. file-like object) that will
be uploaded. The stream **must** be at the beginning (i.e.
``stream.tell() == 0``).
metadata (Mapping[str, str]): The resource metadata, such as an
ACL list.
content_type (str): The content type of the resource, e.g. a JPEG
image has content type ``image/jpeg``.
total_bytes (Optional[int]): The total number of bytes to be
uploaded. If specified, the upload size **will not** be
determined from the stream (even if ``stream_final=True``).
stream_final (Optional[bool]): Indicates if the ``stream`` is
"final" (i.e. no more bytes will be added to it). In this case
we determine the upload size from the size of the stream. If
``total_bytes`` is passed, this argument will be ignored.
timeout (Optional[Union[float, aiohttp.ClientTimeout]]):
The number of seconds to wait for the server response.
Depending on the retry strategy, a request may be repeated
several times using the same timeout each time.
Can also be passed as an `aiohttp.ClientTimeout` object.
Returns:
~requests.Response: The HTTP response returned by ``transport``.
"""
method, url, payload, headers = self._prepare_initiate_request(
stream,
metadata,
content_type,
total_bytes=total_bytes,
stream_final=stream_final,
)
response = await _request_helpers.http_request(
transport,
method,
url,
data=payload,
headers=headers,
retry_strategy=self._retry_strategy,
timeout=timeout,
)
self._process_initiate_response(response)
return response
async def transmit_next_chunk(
self, transport, timeout=_request_helpers._DEFAULT_TIMEOUT
):
"""Transmit the next chunk of the resource to be uploaded.
If the current upload was initiated with ``stream_final=False``,
this method will dynamically determine if the upload has completed.
The upload will be considered complete if the stream produces
fewer than :attr:`chunk_size` bytes when a chunk is read from it.
In the case of failure, an exception is thrown that preserves the
failed response:
.. testsetup:: bad-response
import io
import mock
import requests
from six.moves import http_client
from google import resumable_media
import google.resumable_media.requests.upload as upload_mod
transport = mock.Mock(spec=['request'])
fake_response = requests.Response()
fake_response.status_code = int(http_client.BAD_REQUEST)
transport.request.return_value = fake_response
upload_url = u'http://test.invalid'
upload = upload_mod.ResumableUpload(
upload_url, resumable_media.UPLOAD_CHUNK_SIZE)
# Fake that the upload has been initiate()-d
data = b'data is here'
upload._stream = io.BytesIO(data)
upload._total_bytes = len(data)
upload._resumable_url = u'http://test.invalid?upload_id=nope'
.. doctest:: bad-response
:options: +NORMALIZE_WHITESPACE
>>> error = None
>>> try:
... upload.transmit_next_chunk(transport)
... except resumable_media.InvalidResponse as caught_exc:
... error = caught_exc
...
>>> error
InvalidResponse('Request failed with status code', 400,
'Expected one of', <HTTPStatus.OK: 200>, 308)
>>> error.response
<Response [400]>
Args:
transport (~requests.Session): A ``requests`` object which can
make authenticated requests.
timeout (Optional[Union[float, aiohttp.ClientTimeout]]):
The number of seconds to wait for the server response.
Depending on the retry strategy, a request may be repeated
several times using the same timeout each time.
Can also be passed as an `aiohttp.ClientTimeout` object.
Returns:
~requests.Response: The HTTP response returned by ``transport``.
Raises:
~google.resumable_media.common.InvalidResponse: If the status
code is not 200 or 308.
~google.resumable_media.common.DataCorruption: If this is the final
chunk, a checksum validation was requested, and the checksum
does not match or is not available.
"""
method, url, payload, headers = self._prepare_request()
response = await _request_helpers.http_request(
transport,
method,
url,
data=payload,
headers=headers,
retry_strategy=self._retry_strategy,
timeout=timeout,
)
await self._process_response(response, len(payload))
return response
async def recover(self, transport):
"""Recover from a failure.
This method should be used when a :class:`ResumableUpload` is in an
:attr:`~ResumableUpload.invalid` state due to a request failure.
This will verify the progress with the server and make sure the
current upload is in a valid state before :meth:`transmit_next_chunk`
can be used again.
Args:
transport (~requests.Session): A ``requests`` object which can
make authenticated requests.
Returns:
~requests.Response: The HTTP response returned by ``transport``.
"""
method, url, payload, headers = self._prepare_recover_request()
# NOTE: We assume "payload is None" but pass it along anyway.
response = await _request_helpers.http_request(
transport,
method,
url,
data=payload,
headers=headers,
retry_strategy=self._retry_strategy,
)
self._process_recover_response(response)
return response