Vehicle-Anti-Theft-Face-Rec.../venv/Lib/site-packages/google/resumable_media/_download.py

557 lines
20 KiB
Python
Raw Permalink Normal View History

# Copyright 2017 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Virtual bases classes for downloading media from Google APIs."""
import re
from six.moves import http_client
from google.resumable_media import _helpers
from google.resumable_media import common
_CONTENT_RANGE_RE = re.compile(
r"bytes (?P<start_byte>\d+)-(?P<end_byte>\d+)/(?P<total_bytes>\d+)",
flags=re.IGNORECASE,
)
_ACCEPTABLE_STATUS_CODES = (http_client.OK, http_client.PARTIAL_CONTENT)
_GET = u"GET"
_ZERO_CONTENT_RANGE_HEADER = u"bytes */0"
class DownloadBase(object):
"""Base class for download helpers.
Defines core shared behavior across different download types.
Args:
media_url (str): The URL containing the media to be downloaded.
stream (IO[bytes]): A write-able stream (i.e. file-like object) that
the downloaded resource can be written to.
start (int): The first byte in a range to be downloaded.
end (int): The last byte in a range to be downloaded.
headers (Optional[Mapping[str, str]]): Extra headers that should
be sent with the request, e.g. headers for encrypted data.
Attributes:
media_url (str): The URL containing the media to be downloaded.
start (Optional[int]): The first byte in a range to be downloaded.
end (Optional[int]): The last byte in a range to be downloaded.
"""
def __init__(self, media_url, stream=None, start=None, end=None, headers=None):
self.media_url = media_url
self._stream = stream
self.start = start
self.end = end
if headers is None:
headers = {}
self._headers = headers
self._finished = False
self._retry_strategy = common.RetryStrategy()
@property
def finished(self):
"""bool: Flag indicating if the download has completed."""
return self._finished
@staticmethod
def _get_status_code(response):
"""Access the status code from an HTTP response.
Args:
response (object): The HTTP response object.
Raises:
NotImplementedError: Always, since virtual.
"""
raise NotImplementedError(u"This implementation is virtual.")
@staticmethod
def _get_headers(response):
"""Access the headers from an HTTP response.
Args:
response (object): The HTTP response object.
Raises:
NotImplementedError: Always, since virtual.
"""
raise NotImplementedError(u"This implementation is virtual.")
@staticmethod
def _get_body(response):
"""Access the response body from an HTTP response.
Args:
response (object): The HTTP response object.
Raises:
NotImplementedError: Always, since virtual.
"""
raise NotImplementedError(u"This implementation is virtual.")
class Download(DownloadBase):
"""Helper to manage downloading a resource from a Google API.
"Slices" of the resource can be retrieved by specifying a range
with ``start`` and / or ``end``. However, in typical usage, neither
``start`` nor ``end`` is expected to be provided.
Args:
media_url (str): The URL containing the media to be downloaded.
stream (IO[bytes]): A write-able stream (i.e. file-like object) that
the downloaded resource can be written to.
start (int): The first byte in a range to be downloaded. If not
provided, but ``end`` is provided, will download from the
beginning to ``end`` of the media.
end (int): The last byte in a range to be downloaded. If not
provided, but ``start`` is provided, will download from the
``start`` to the end of the media.
headers (Optional[Mapping[str, str]]): Extra headers that should
be sent with the request, e.g. headers for encrypted data.
checksum Optional([str]): The type of checksum to compute to verify
the integrity of the object. The response headers must contain
a checksum of the requested type. If the headers lack an
appropriate checksum (for instance in the case of transcoded or
ranged downloads where the remote service does not know the
correct checksum) an INFO-level log will be emitted. Supported
values are "md5", "crc32c" and None.
"""
def __init__(
self, media_url, stream=None, start=None, end=None, headers=None, checksum="md5"
):
super(Download, self).__init__(
media_url, stream=stream, start=start, end=end, headers=headers
)
self.checksum = checksum
def _prepare_request(self):
"""Prepare the contents of an HTTP request.
This is everything that must be done before a request that doesn't
require network I/O (or other I/O). This is based on the `sans-I/O`_
philosophy.
Returns:
Tuple[str, str, NoneType, Mapping[str, str]]: The quadruple
* HTTP verb for the request (always GET)
* the URL for the request
* the body of the request (always :data:`None`)
* headers for the request
Raises:
ValueError: If the current :class:`Download` has already
finished.
.. _sans-I/O: https://sans-io.readthedocs.io/
"""
if self.finished:
raise ValueError(u"A download can only be used once.")
add_bytes_range(self.start, self.end, self._headers)
return _GET, self.media_url, None, self._headers
def _process_response(self, response):
"""Process the response from an HTTP request.
This is everything that must be done after a request that doesn't
require network I/O (or other I/O). This is based on the `sans-I/O`_
philosophy.
Args:
response (object): The HTTP response object.
.. _sans-I/O: https://sans-io.readthedocs.io/
"""
# Tombstone the current Download so it cannot be used again.
self._finished = True
_helpers.require_status_code(
response, _ACCEPTABLE_STATUS_CODES, self._get_status_code
)
def consume(self, transport, timeout=None):
"""Consume the resource to be downloaded.
If a ``stream`` is attached to this download, then the downloaded
resource will be written to the stream.
Args:
transport (object): An object which can make authenticated
requests.
timeout (Optional[Union[float, Tuple[float, float]]]):
The number of seconds to wait for the server response.
Depending on the retry strategy, a request may be repeated
several times using the same timeout each time.
Can also be passed as a tuple (connect_timeout, read_timeout).
See :meth:`requests.Session.request` documentation for details.
Raises:
NotImplementedError: Always, since virtual.
"""
raise NotImplementedError(u"This implementation is virtual.")
class ChunkedDownload(DownloadBase):
"""Download a resource in chunks from a Google API.
Args:
media_url (str): The URL containing the media to be downloaded.
chunk_size (int): The number of bytes to be retrieved in each
request.
stream (IO[bytes]): A write-able stream (i.e. file-like object) that
will be used to concatenate chunks of the resource as they are
downloaded.
start (int): The first byte in a range to be downloaded. If not
provided, defaults to ``0``.
end (int): The last byte in a range to be downloaded. If not
provided, will download to the end of the media.
headers (Optional[Mapping[str, str]]): Extra headers that should
be sent with each request, e.g. headers for data encryption
key headers.
Attributes:
media_url (str): The URL containing the media to be downloaded.
start (Optional[int]): The first byte in a range to be downloaded.
end (Optional[int]): The last byte in a range to be downloaded.
chunk_size (int): The number of bytes to be retrieved in each request.
Raises:
ValueError: If ``start`` is negative.
"""
def __init__(self, media_url, chunk_size, stream, start=0, end=None, headers=None):
if start < 0:
raise ValueError(
u"On a chunked download the starting " u"value cannot be negative."
)
super(ChunkedDownload, self).__init__(
media_url, stream=stream, start=start, end=end, headers=headers
)
self.chunk_size = chunk_size
self._bytes_downloaded = 0
self._total_bytes = None
self._invalid = False
@property
def bytes_downloaded(self):
"""int: Number of bytes that have been downloaded."""
return self._bytes_downloaded
@property
def total_bytes(self):
"""Optional[int]: The total number of bytes to be downloaded."""
return self._total_bytes
@property
def invalid(self):
"""bool: Indicates if the download is in an invalid state.
This will occur if a call to :meth:`consume_next_chunk` fails.
"""
return self._invalid
def _get_byte_range(self):
"""Determines the byte range for the next request.
Returns:
Tuple[int, int]: The pair of begin and end byte for the next
chunked request.
"""
curr_start = self.start + self.bytes_downloaded
curr_end = curr_start + self.chunk_size - 1
# Make sure ``curr_end`` does not exceed ``end``.
if self.end is not None:
curr_end = min(curr_end, self.end)
# Make sure ``curr_end`` does not exceed ``total_bytes - 1``.
if self.total_bytes is not None:
curr_end = min(curr_end, self.total_bytes - 1)
return curr_start, curr_end
def _prepare_request(self):
"""Prepare the contents of an HTTP request.
This is everything that must be done before a request that doesn't
require network I/O (or other I/O). This is based on the `sans-I/O`_
philosophy.
.. note:
This method will be used multiple times, so ``headers`` will
be mutated in between requests. However, we don't make a copy
since the same keys are being updated.
Returns:
Tuple[str, str, NoneType, Mapping[str, str]]: The quadruple
* HTTP verb for the request (always GET)
* the URL for the request
* the body of the request (always :data:`None`)
* headers for the request
Raises:
ValueError: If the current download has finished.
ValueError: If the current download is invalid.
.. _sans-I/O: https://sans-io.readthedocs.io/
"""
if self.finished:
raise ValueError(u"Download has finished.")
if self.invalid:
raise ValueError(u"Download is invalid and cannot be re-used.")
curr_start, curr_end = self._get_byte_range()
add_bytes_range(curr_start, curr_end, self._headers)
return _GET, self.media_url, None, self._headers
def _make_invalid(self):
"""Simple setter for ``invalid``.
This is intended to be passed along as a callback to helpers that
raise an exception so they can mark this instance as invalid before
raising.
"""
self._invalid = True
def _process_response(self, response):
"""Process the response from an HTTP request.
This is everything that must be done after a request that doesn't
require network I/O. This is based on the `sans-I/O`_ philosophy.
For the time being, this **does require** some form of I/O to write
a chunk to ``stream``. However, this will (almost) certainly not be
network I/O.
Updates the current state after consuming a chunk. First,
increments ``bytes_downloaded`` by the number of bytes in the
``content-length`` header.
If ``total_bytes`` is already set, this assumes (but does not check)
that we already have the correct value and doesn't bother to check
that it agrees with the headers.
We expect the **total** length to be in the ``content-range`` header,
but this header is only present on requests which sent the ``range``
header. This response header should be of the form
``bytes {start}-{end}/{total}`` and ``{end} - {start} + 1``
should be the same as the ``Content-Length``.
Args:
response (object): The HTTP response object (need headers).
Raises:
~google.resumable_media.common.InvalidResponse: If the number
of bytes in the body doesn't match the content length header.
.. _sans-I/O: https://sans-io.readthedocs.io/
"""
# Verify the response before updating the current instance.
if _check_for_zero_content_range(
response, self._get_status_code, self._get_headers
):
self._finished = True
return
_helpers.require_status_code(
response,
_ACCEPTABLE_STATUS_CODES,
self._get_status_code,
callback=self._make_invalid,
)
headers = self._get_headers(response)
response_body = self._get_body(response)
start_byte, end_byte, total_bytes = get_range_info(
response, self._get_headers, callback=self._make_invalid
)
transfer_encoding = headers.get(u"transfer-encoding")
if transfer_encoding is None:
content_length = _helpers.header_required(
response,
u"content-length",
self._get_headers,
callback=self._make_invalid,
)
num_bytes = int(content_length)
if len(response_body) != num_bytes:
self._make_invalid()
raise common.InvalidResponse(
response,
u"Response is different size than content-length",
u"Expected",
num_bytes,
u"Received",
len(response_body),
)
else:
# 'content-length' header not allowed with chunked encoding.
num_bytes = end_byte - start_byte + 1
# First update ``bytes_downloaded``.
self._bytes_downloaded += num_bytes
# If the end byte is past ``end`` or ``total_bytes - 1`` we are done.
if self.end is not None and end_byte >= self.end:
self._finished = True
elif end_byte >= total_bytes - 1:
self._finished = True
# NOTE: We only use ``total_bytes`` if not already known.
if self.total_bytes is None:
self._total_bytes = total_bytes
# Write the response body to the stream.
self._stream.write(response_body)
def consume_next_chunk(self, transport, timeout=None):
"""Consume the next chunk of the resource to be downloaded.
Args:
transport (object): An object which can make authenticated
requests.
timeout (Optional[Union[float, Tuple[float, float]]]):
The number of seconds to wait for the server response.
Depending on the retry strategy, a request may be repeated
several times using the same timeout each time.
Can also be passed as a tuple (connect_timeout, read_timeout).
See :meth:`requests.Session.request` documentation for details.
Raises:
NotImplementedError: Always, since virtual.
"""
raise NotImplementedError(u"This implementation is virtual.")
def add_bytes_range(start, end, headers):
"""Add a bytes range to a header dictionary.
Some possible inputs and the corresponding bytes ranges::
>>> headers = {}
>>> add_bytes_range(None, None, headers)
>>> headers
{}
>>> add_bytes_range(500, 999, headers)
>>> headers['range']
'bytes=500-999'
>>> add_bytes_range(None, 499, headers)
>>> headers['range']
'bytes=0-499'
>>> add_bytes_range(-500, None, headers)
>>> headers['range']
'bytes=-500'
>>> add_bytes_range(9500, None, headers)
>>> headers['range']
'bytes=9500-'
Args:
start (Optional[int]): The first byte in a range. Can be zero,
positive, negative or :data:`None`.
end (Optional[int]): The last byte in a range. Assumed to be
positive.
headers (Mapping[str, str]): A headers mapping which can have the
bytes range added if at least one of ``start`` or ``end``
is not :data:`None`.
"""
if start is None:
if end is None:
# No range to add.
return
else:
# NOTE: This assumes ``end`` is non-negative.
bytes_range = u"0-{:d}".format(end)
else:
if end is None:
if start < 0:
bytes_range = u"{:d}".format(start)
else:
bytes_range = u"{:d}-".format(start)
else:
# NOTE: This is invalid if ``start < 0``.
bytes_range = u"{:d}-{:d}".format(start, end)
headers[_helpers.RANGE_HEADER] = u"bytes=" + bytes_range
def get_range_info(response, get_headers, callback=_helpers.do_nothing):
"""Get the start, end and total bytes from a content range header.
Args:
response (object): An HTTP response object.
get_headers (Callable[Any, Mapping[str, str]]): Helper to get headers
from an HTTP response.
callback (Optional[Callable]): A callback that takes no arguments,
to be executed when an exception is being raised.
Returns:
Tuple[int, int, int]: The start byte, end byte and total bytes.
Raises:
~google.resumable_media.common.InvalidResponse: If the
``Content-Range`` header is not of the form
``bytes {start}-{end}/{total}``.
"""
content_range = _helpers.header_required(
response, _helpers.CONTENT_RANGE_HEADER, get_headers, callback=callback
)
match = _CONTENT_RANGE_RE.match(content_range)
if match is None:
callback()
raise common.InvalidResponse(
response,
u"Unexpected content-range header",
content_range,
u'Expected to be of the form "bytes {start}-{end}/{total}"',
)
return (
int(match.group(u"start_byte")),
int(match.group(u"end_byte")),
int(match.group(u"total_bytes")),
)
def _check_for_zero_content_range(response, get_status_code, get_headers):
"""Validate if response status code is 416 and content range is zero.
This is the special case for handling zero bytes files.
Args:
response (object): An HTTP response object.
get_status_code (Callable[Any, int]): Helper to get a status code
from a response.
get_headers (Callable[Any, Mapping[str, str]]): Helper to get headers
from an HTTP response.
Returns:
bool: True if content range total bytes is zero, false otherwise.
"""
if get_status_code(response) == http_client.REQUESTED_RANGE_NOT_SATISFIABLE:
content_range = _helpers.header_required(
response,
_helpers.CONTENT_RANGE_HEADER,
get_headers,
callback=_helpers.do_nothing,
)
if content_range == _ZERO_CONTENT_RANGE_HEADER:
return True
return False