366 lines
		
	
	
	
		
			12 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			366 lines
		
	
	
	
		
			12 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| # Copyright 2017 Google Inc.
 | |
| #
 | |
| # Licensed under the Apache License, Version 2.0 (the "License");
 | |
| # you may not use this file except in compliance with the License.
 | |
| # You may obtain a copy of the License at
 | |
| #
 | |
| #     http://www.apache.org/licenses/LICENSE-2.0
 | |
| #
 | |
| # Unless required by applicable law or agreed to in writing, software
 | |
| # distributed under the License is distributed on an "AS IS" BASIS,
 | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| # See the License for the specific language governing permissions and
 | |
| # limitations under the License.
 | |
| 
 | |
| """Shared utilities used by both downloads and uploads."""
 | |
| 
 | |
| import base64
 | |
| import hashlib
 | |
| import logging
 | |
| import random
 | |
| import time
 | |
| import warnings
 | |
| 
 | |
| from six.moves import http_client
 | |
| 
 | |
| from google.resumable_media import common
 | |
| 
 | |
| 
 | |
| RANGE_HEADER = u"range"
 | |
| CONTENT_RANGE_HEADER = u"content-range"
 | |
| RETRYABLE = (
 | |
|     common.TOO_MANY_REQUESTS,
 | |
|     http_client.INTERNAL_SERVER_ERROR,
 | |
|     http_client.BAD_GATEWAY,
 | |
|     http_client.SERVICE_UNAVAILABLE,
 | |
|     http_client.GATEWAY_TIMEOUT,
 | |
| )
 | |
| 
 | |
| _SLOW_CRC32C_WARNING = (
 | |
|     "Currently using crcmod in pure python form. This is a slow "
 | |
|     "implementation. Python 3 has a faster implementation, `google-crc32c`, "
 | |
|     "which will be used if it is installed."
 | |
| )
 | |
| _HASH_HEADER = u"x-goog-hash"
 | |
| _MISSING_CHECKSUM = u"""\
 | |
| No {checksum_type} checksum was returned from the service while downloading {}
 | |
| (which happens for composite objects), so client-side content integrity
 | |
| checking is not being performed."""
 | |
| _LOGGER = logging.getLogger(__name__)
 | |
| 
 | |
| 
 | |
| def do_nothing():
 | |
|     """Simple default callback."""
 | |
| 
 | |
| 
 | |
| def header_required(response, name, get_headers, callback=do_nothing):
 | |
|     """Checks that a specific header is in a headers dictionary.
 | |
| 
 | |
|     Args:
 | |
|         response (object): An HTTP response object, expected to have a
 | |
|             ``headers`` attribute that is a ``Mapping[str, str]``.
 | |
|         name (str): The name of a required header.
 | |
|         get_headers (Callable[Any, Mapping[str, str]]): Helper to get headers
 | |
|             from an HTTP response.
 | |
|         callback (Optional[Callable]): A callback that takes no arguments,
 | |
|             to be executed when an exception is being raised.
 | |
| 
 | |
|     Returns:
 | |
|         str: The desired header.
 | |
| 
 | |
|     Raises:
 | |
|         ~google.resumable_media.common.InvalidResponse: If the header
 | |
|             is missing.
 | |
|     """
 | |
|     headers = get_headers(response)
 | |
|     if name not in headers:
 | |
|         callback()
 | |
|         raise common.InvalidResponse(
 | |
|             response, u"Response headers must contain header", name
 | |
|         )
 | |
| 
 | |
|     return headers[name]
 | |
| 
 | |
| 
 | |
| def require_status_code(response, status_codes, get_status_code, callback=do_nothing):
 | |
|     """Require a response has a status code among a list.
 | |
| 
 | |
|     Args:
 | |
|         response (object): The HTTP response object.
 | |
|         status_codes (tuple): The acceptable status codes.
 | |
|         get_status_code (Callable[Any, int]): Helper to get a status code
 | |
|             from a response.
 | |
|         callback (Optional[Callable]): A callback that takes no arguments,
 | |
|             to be executed when an exception is being raised.
 | |
| 
 | |
|     Returns:
 | |
|         int: The status code.
 | |
| 
 | |
|     Raises:
 | |
|         ~google.resumable_media.common.InvalidResponse: If the status code
 | |
|             is not one of the values in ``status_codes``.
 | |
|     """
 | |
|     status_code = get_status_code(response)
 | |
|     if status_code not in status_codes:
 | |
|         callback()
 | |
|         raise common.InvalidResponse(
 | |
|             response,
 | |
|             u"Request failed with status code",
 | |
|             status_code,
 | |
|             u"Expected one of",
 | |
|             *status_codes
 | |
|         )
 | |
|     return status_code
 | |
| 
 | |
| 
 | |
| def calculate_retry_wait(base_wait, max_sleep):
 | |
|     """Calculate the amount of time to wait before a retry attempt.
 | |
| 
 | |
|     Wait time grows exponentially with the number of attempts, until
 | |
|     it hits ``max_sleep``.
 | |
| 
 | |
|     A random amount of jitter (between 0 and 1 seconds) is added to spread out
 | |
|     retry attempts from different clients.
 | |
| 
 | |
|     Args:
 | |
|         base_wait (float): The "base" wait time (i.e. without any jitter)
 | |
|             that will be doubled until it reaches the maximum sleep.
 | |
|         max_sleep (float): Maximum value that a sleep time is allowed to be.
 | |
| 
 | |
|     Returns:
 | |
|         Tuple[float, float]: The new base wait time as well as the wait time
 | |
|         to be applied (with a random amount of jitter between 0 and 1 seconds
 | |
|         added).
 | |
|     """
 | |
|     new_base_wait = 2.0 * base_wait
 | |
|     if new_base_wait > max_sleep:
 | |
|         new_base_wait = max_sleep
 | |
| 
 | |
|     jitter_ms = random.randint(0, 1000)
 | |
|     return new_base_wait, new_base_wait + 0.001 * jitter_ms
 | |
| 
 | |
| 
 | |
| def wait_and_retry(func, get_status_code, retry_strategy):
 | |
|     """Attempts to retry a call to ``func`` until success.
 | |
| 
 | |
|     Expects ``func`` to return an HTTP response and uses ``get_status_code``
 | |
|     to check if the response is retry-able.
 | |
| 
 | |
|     Will retry until :meth:`~.RetryStrategy.retry_allowed` (on the current
 | |
|     ``retry_strategy``) returns :data:`False`. Uses
 | |
|     :func:`calculate_retry_wait` to double the wait time (with jitter) after
 | |
|     each attempt.
 | |
| 
 | |
|     Args:
 | |
|         func (Callable): A callable that takes no arguments and produces
 | |
|             an HTTP response which will be checked as retry-able.
 | |
|         get_status_code (Callable[Any, int]): Helper to get a status code
 | |
|             from a response.
 | |
|         retry_strategy (~google.resumable_media.common.RetryStrategy): The
 | |
|             strategy to use if the request fails and must be retried.
 | |
| 
 | |
|     Returns:
 | |
|         object: The return value of ``func``.
 | |
|     """
 | |
|     response = func()
 | |
|     if get_status_code(response) not in RETRYABLE:
 | |
|         return response
 | |
| 
 | |
|     total_sleep = 0.0
 | |
|     num_retries = 0
 | |
|     base_wait = 0.5  # When doubled will give 1.0
 | |
|     while retry_strategy.retry_allowed(total_sleep, num_retries):
 | |
|         base_wait, wait_time = calculate_retry_wait(base_wait, retry_strategy.max_sleep)
 | |
|         num_retries += 1
 | |
|         total_sleep += wait_time
 | |
|         time.sleep(wait_time)
 | |
|         response = func()
 | |
|         if get_status_code(response) not in RETRYABLE:
 | |
|             return response
 | |
| 
 | |
|     return response
 | |
| 
 | |
| 
 | |
| def _get_crc32c_object():
 | |
|     """Get crc32c object
 | |
|     Attempt to use the Google-CRC32c package. If it isn't available, try
 | |
|     to use CRCMod. CRCMod might be using a 'slow' varietal. If so, warn...
 | |
|     """
 | |
|     try:
 | |
|         import google_crc32c
 | |
| 
 | |
|         crc_obj = google_crc32c.Checksum()
 | |
|     except ImportError:
 | |
|         try:
 | |
|             import crcmod
 | |
| 
 | |
|             crc_obj = crcmod.predefined.Crc("crc-32c")
 | |
|             _is_fast_crcmod()
 | |
| 
 | |
|         except ImportError:
 | |
|             raise ImportError("Failed to import either `google-crc32c` or `crcmod`")
 | |
| 
 | |
|     return crc_obj
 | |
| 
 | |
| 
 | |
| def _is_fast_crcmod():
 | |
|     # Determine if this is using the slow form of crcmod.
 | |
|     nested_crcmod = __import__(
 | |
|         "crcmod.crcmod",
 | |
|         globals(),
 | |
|         locals(),
 | |
|         ["_usingExtension"],
 | |
|         0,
 | |
|     )
 | |
|     fast_crc = getattr(nested_crcmod, "_usingExtension", False)
 | |
|     if not fast_crc:
 | |
|         warnings.warn(_SLOW_CRC32C_WARNING, RuntimeWarning, stacklevel=2)
 | |
|     return fast_crc
 | |
| 
 | |
| 
 | |
| def _get_metadata_key(checksum_type):
 | |
|     if checksum_type == "md5":
 | |
|         return "md5Hash"
 | |
|     else:
 | |
|         return checksum_type
 | |
| 
 | |
| 
 | |
| def prepare_checksum_digest(digest_bytestring):
 | |
|     """Convert a checksum object into a digest encoded for an HTTP header.
 | |
| 
 | |
|     Args:
 | |
|         bytes: A checksum digest bytestring.
 | |
| 
 | |
|     Returns:
 | |
|         str: A base64 string representation of the input.
 | |
|     """
 | |
|     encoded_digest = base64.b64encode(digest_bytestring)
 | |
|     # NOTE: ``b64encode`` returns ``bytes``, but HTTP headers expect ``str``.
 | |
|     return encoded_digest.decode(u"utf-8")
 | |
| 
 | |
| 
 | |
| def _get_expected_checksum(response, get_headers, media_url, checksum_type):
 | |
|     """Get the expected checksum and checksum object for the download response.
 | |
| 
 | |
|     Args:
 | |
|         response (~requests.Response): The HTTP response object.
 | |
|         get_headers (callable: response->dict): returns response headers.
 | |
|         media_url (str): The URL containing the media to be downloaded.
 | |
|         checksum_type Optional(str): The checksum type to read from the headers,
 | |
|             exactly as it will appear in the headers (case-sensitive). Must be
 | |
|             "md5", "crc32c" or None.
 | |
| 
 | |
|     Returns:
 | |
|         Tuple (Optional[str], object): The expected checksum of the response,
 | |
|         if it can be detected from the ``X-Goog-Hash`` header, and the
 | |
|         appropriate checksum object for the expected checksum.
 | |
|     """
 | |
|     if checksum_type not in ["md5", "crc32c", None]:
 | |
|         raise ValueError("checksum must be ``'md5'``, ``'crc32c'`` or ``None``")
 | |
|     elif checksum_type in ["md5", "crc32c"]:
 | |
|         headers = get_headers(response)
 | |
|         expected_checksum = _parse_checksum_header(
 | |
|             headers.get(_HASH_HEADER), response, checksum_label=checksum_type
 | |
|         )
 | |
| 
 | |
|         if expected_checksum is None:
 | |
|             msg = _MISSING_CHECKSUM.format(
 | |
|                 media_url, checksum_type=checksum_type.upper()
 | |
|             )
 | |
|             _LOGGER.info(msg)
 | |
|             checksum_object = _DoNothingHash()
 | |
|         else:
 | |
|             if checksum_type == "md5":
 | |
|                 checksum_object = hashlib.md5()
 | |
|             else:
 | |
|                 checksum_object = _get_crc32c_object()
 | |
|     else:
 | |
|         expected_checksum = None
 | |
|         checksum_object = _DoNothingHash()
 | |
| 
 | |
|     return (expected_checksum, checksum_object)
 | |
| 
 | |
| 
 | |
| def _parse_checksum_header(header_value, response, checksum_label):
 | |
|     """Parses the checksum header from an ``X-Goog-Hash`` value.
 | |
| 
 | |
|     .. _header reference: https://cloud.google.com/storage/docs/\
 | |
|                           xml-api/reference-headers#xgooghash
 | |
| 
 | |
|     Expects ``header_value`` (if not :data:`None`) to be in one of the three
 | |
|     following formats:
 | |
| 
 | |
|     * ``crc32c=n03x6A==``
 | |
|     * ``md5=Ojk9c3dhfxgoKVVHYwFbHQ==``
 | |
|     * ``crc32c=n03x6A==,md5=Ojk9c3dhfxgoKVVHYwFbHQ==``
 | |
| 
 | |
|     See the `header reference`_ for more information.
 | |
| 
 | |
|     Args:
 | |
|         header_value (Optional[str]): The ``X-Goog-Hash`` header from
 | |
|             a download response.
 | |
|         response (~requests.Response): The HTTP response object.
 | |
|         checksum_label (str): The label of the header value to read, as in the
 | |
|             examples above. Typically "md5" or "crc32c"
 | |
| 
 | |
|     Returns:
 | |
|         Optional[str]: The expected checksum of the response, if it
 | |
|         can be detected from the ``X-Goog-Hash`` header; otherwise, None.
 | |
| 
 | |
|     Raises:
 | |
|         ~google.resumable_media.common.InvalidResponse: If there are
 | |
|             multiple checksums of the requested type in ``header_value``.
 | |
|     """
 | |
|     if header_value is None:
 | |
|         return None
 | |
| 
 | |
|     matches = []
 | |
|     for checksum in header_value.split(u","):
 | |
|         name, value = checksum.split(u"=", 1)
 | |
|         # Official docs say "," is the separator, but real-world responses have encountered ", "
 | |
|         if name.lstrip() == checksum_label:
 | |
|             matches.append(value)
 | |
| 
 | |
|     if len(matches) == 0:
 | |
|         return None
 | |
|     elif len(matches) == 1:
 | |
|         return matches[0]
 | |
|     else:
 | |
|         raise common.InvalidResponse(
 | |
|             response,
 | |
|             u"X-Goog-Hash header had multiple ``{}`` values.".format(checksum_label),
 | |
|             header_value,
 | |
|             matches,
 | |
|         )
 | |
| 
 | |
| 
 | |
| def _get_checksum_object(checksum_type):
 | |
|     """Respond with a checksum object for a supported type, if not None.
 | |
| 
 | |
|     Raises ValueError if checksum_type is unsupported.
 | |
|     """
 | |
|     if checksum_type == "md5":
 | |
|         return hashlib.md5()
 | |
|     elif checksum_type == "crc32c":
 | |
|         return _get_crc32c_object()
 | |
|     elif checksum_type is None:
 | |
|         return None
 | |
|     else:
 | |
|         raise ValueError("checksum must be ``'md5'``, ``'crc32c'`` or ``None``")
 | |
| 
 | |
| 
 | |
| class _DoNothingHash(object):
 | |
|     """Do-nothing hash object.
 | |
| 
 | |
|     Intended as a stand-in for ``hashlib.md5`` or a crc32c checksum
 | |
|     implementation in cases where it isn't necessary to compute the hash.
 | |
|     """
 | |
| 
 | |
|     def update(self, unused_chunk):
 | |
|         """Do-nothing ``update`` method.
 | |
| 
 | |
|         Intended to match the interface of ``hashlib.md5`` and other checksums.
 | |
| 
 | |
|         Args:
 | |
|             unused_chunk (bytes): A chunk of data.
 | |
|         """
 |