515 lines
19 KiB
Python
515 lines
19 KiB
Python
|
# Copyright 2014 Google LLC
|
||
|
#
|
||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
# you may not use this file except in compliance with the License.
|
||
|
# You may obtain a copy of the License at
|
||
|
#
|
||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||
|
#
|
||
|
# Unless required by applicable law or agreed to in writing, software
|
||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
# See the License for the specific language governing permissions and
|
||
|
# limitations under the License.
|
||
|
|
||
|
"""Helper functions for Cloud Storage utility classes.
|
||
|
|
||
|
These are *not* part of the API.
|
||
|
"""
|
||
|
|
||
|
import base64
|
||
|
from hashlib import md5
|
||
|
from datetime import datetime
|
||
|
import os
|
||
|
|
||
|
from six.moves.urllib.parse import urlsplit
|
||
|
from google.cloud.storage.constants import _DEFAULT_TIMEOUT
|
||
|
|
||
|
|
||
|
STORAGE_EMULATOR_ENV_VAR = "STORAGE_EMULATOR_HOST"
|
||
|
"""Environment variable defining host for Storage emulator."""
|
||
|
|
||
|
_DEFAULT_STORAGE_HOST = u"https://storage.googleapis.com"
|
||
|
|
||
|
# generation match parameters in camel and snake cases
|
||
|
_GENERATION_MATCH_PARAMETERS = (
|
||
|
("if_generation_match", "ifGenerationMatch"),
|
||
|
("if_generation_not_match", "ifGenerationNotMatch"),
|
||
|
("if_metageneration_match", "ifMetagenerationMatch"),
|
||
|
("if_metageneration_not_match", "ifMetagenerationNotMatch"),
|
||
|
("if_source_generation_match", "ifSourceGenerationMatch"),
|
||
|
("if_source_generation_not_match", "ifSourceGenerationNotMatch"),
|
||
|
("if_source_metageneration_match", "ifSourceMetagenerationMatch"),
|
||
|
("if_source_metageneration_not_match", "ifSourceMetagenerationNotMatch"),
|
||
|
)
|
||
|
|
||
|
|
||
|
def _get_storage_host():
|
||
|
return os.environ.get(STORAGE_EMULATOR_ENV_VAR, _DEFAULT_STORAGE_HOST)
|
||
|
|
||
|
|
||
|
def _validate_name(name):
|
||
|
"""Pre-flight ``Bucket`` name validation.
|
||
|
|
||
|
:type name: str or :data:`NoneType`
|
||
|
:param name: Proposed bucket name.
|
||
|
|
||
|
:rtype: str or :data:`NoneType`
|
||
|
:returns: ``name`` if valid.
|
||
|
"""
|
||
|
if name is None:
|
||
|
return
|
||
|
|
||
|
# The first and last characters must be alphanumeric.
|
||
|
if not all([name[0].isalnum(), name[-1].isalnum()]):
|
||
|
raise ValueError("Bucket names must start and end with a number or letter.")
|
||
|
return name
|
||
|
|
||
|
|
||
|
class _PropertyMixin(object):
|
||
|
"""Abstract mixin for cloud storage classes with associated properties.
|
||
|
|
||
|
Non-abstract subclasses should implement:
|
||
|
- path
|
||
|
- client
|
||
|
- user_project
|
||
|
|
||
|
:type name: str
|
||
|
:param name: The name of the object. Bucket names must start and end with a
|
||
|
number or letter.
|
||
|
"""
|
||
|
|
||
|
def __init__(self, name=None):
|
||
|
self.name = name
|
||
|
self._properties = {}
|
||
|
self._changes = set()
|
||
|
|
||
|
@property
|
||
|
def path(self):
|
||
|
"""Abstract getter for the object path."""
|
||
|
raise NotImplementedError
|
||
|
|
||
|
@property
|
||
|
def client(self):
|
||
|
"""Abstract getter for the object client."""
|
||
|
raise NotImplementedError
|
||
|
|
||
|
@property
|
||
|
def user_project(self):
|
||
|
"""Abstract getter for the object user_project."""
|
||
|
raise NotImplementedError
|
||
|
|
||
|
def _require_client(self, client):
|
||
|
"""Check client or verify over-ride.
|
||
|
|
||
|
:type client: :class:`~google.cloud.storage.client.Client` or
|
||
|
``NoneType``
|
||
|
:param client: the client to use. If not passed, falls back to the
|
||
|
``client`` stored on the current object.
|
||
|
|
||
|
:rtype: :class:`google.cloud.storage.client.Client`
|
||
|
:returns: The client passed in or the currently bound client.
|
||
|
"""
|
||
|
if client is None:
|
||
|
client = self.client
|
||
|
return client
|
||
|
|
||
|
def _encryption_headers(self):
|
||
|
"""Return any encryption headers needed to fetch the object.
|
||
|
|
||
|
.. note::
|
||
|
Defined here because :meth:`reload` calls it, but this method is
|
||
|
really only relevant for :class:`~google.cloud.storage.blob.Blob`.
|
||
|
|
||
|
:rtype: dict
|
||
|
:returns: a mapping of encryption-related headers.
|
||
|
"""
|
||
|
return {}
|
||
|
|
||
|
@property
|
||
|
def _query_params(self):
|
||
|
"""Default query parameters."""
|
||
|
params = {}
|
||
|
if self.user_project is not None:
|
||
|
params["userProject"] = self.user_project
|
||
|
return params
|
||
|
|
||
|
def reload(
|
||
|
self,
|
||
|
client=None,
|
||
|
projection="noAcl",
|
||
|
timeout=_DEFAULT_TIMEOUT,
|
||
|
if_generation_match=None,
|
||
|
if_generation_not_match=None,
|
||
|
if_metageneration_match=None,
|
||
|
if_metageneration_not_match=None,
|
||
|
):
|
||
|
"""Reload properties from Cloud Storage.
|
||
|
|
||
|
If :attr:`user_project` is set, bills the API request to that project.
|
||
|
|
||
|
:type client: :class:`~google.cloud.storage.client.Client` or
|
||
|
``NoneType``
|
||
|
:param client: the client to use. If not passed, falls back to the
|
||
|
``client`` stored on the current object.
|
||
|
|
||
|
:type projection: str
|
||
|
:param projection: (Optional) If used, must be 'full' or 'noAcl'.
|
||
|
Defaults to ``'noAcl'``. Specifies the set of
|
||
|
properties to return.
|
||
|
|
||
|
:type timeout: float or tuple
|
||
|
:param timeout: (Optional) The amount of time, in seconds, to wait
|
||
|
for the server response.
|
||
|
|
||
|
Can also be passed as a tuple (connect_timeout, read_timeout).
|
||
|
See :meth:`requests.Session.request` documentation for details.
|
||
|
|
||
|
:type if_generation_match: long
|
||
|
:param if_generation_match: (Optional) Make the operation conditional on whether
|
||
|
the blob's current generation matches the given value.
|
||
|
Setting to 0 makes the operation succeed only if there
|
||
|
are no live versions of the blob.
|
||
|
|
||
|
:type if_generation_not_match: long
|
||
|
:param if_generation_not_match: (Optional) Make the operation conditional on whether
|
||
|
the blob's current generation does not match the given
|
||
|
value. If no live blob exists, the precondition fails.
|
||
|
Setting to 0 makes the operation succeed only if there
|
||
|
is a live version of the blob.
|
||
|
|
||
|
:type if_metageneration_match: long
|
||
|
:param if_metageneration_match: (Optional) Make the operation conditional on whether the
|
||
|
blob's current metageneration matches the given value.
|
||
|
|
||
|
:type if_metageneration_not_match: long
|
||
|
:param if_metageneration_not_match: (Optional) Make the operation conditional on whether the
|
||
|
blob's current metageneration does not match the given value.
|
||
|
"""
|
||
|
client = self._require_client(client)
|
||
|
query_params = self._query_params
|
||
|
# Pass only '?projection=noAcl' here because 'acl' and related
|
||
|
# are handled via custom endpoints.
|
||
|
query_params["projection"] = projection
|
||
|
_add_generation_match_parameters(
|
||
|
query_params,
|
||
|
if_generation_match=if_generation_match,
|
||
|
if_generation_not_match=if_generation_not_match,
|
||
|
if_metageneration_match=if_metageneration_match,
|
||
|
if_metageneration_not_match=if_metageneration_not_match,
|
||
|
)
|
||
|
api_response = client._connection.api_request(
|
||
|
method="GET",
|
||
|
path=self.path,
|
||
|
query_params=query_params,
|
||
|
headers=self._encryption_headers(),
|
||
|
_target_object=self,
|
||
|
timeout=timeout,
|
||
|
)
|
||
|
self._set_properties(api_response)
|
||
|
|
||
|
def _patch_property(self, name, value):
|
||
|
"""Update field of this object's properties.
|
||
|
|
||
|
This method will only update the field provided and will not
|
||
|
touch the other fields.
|
||
|
|
||
|
It **will not** reload the properties from the server. The behavior is
|
||
|
local only and syncing occurs via :meth:`patch`.
|
||
|
|
||
|
:type name: str
|
||
|
:param name: The field name to update.
|
||
|
|
||
|
:type value: object
|
||
|
:param value: The value being updated.
|
||
|
"""
|
||
|
self._changes.add(name)
|
||
|
self._properties[name] = value
|
||
|
|
||
|
def _set_properties(self, value):
|
||
|
"""Set the properties for the current object.
|
||
|
|
||
|
:type value: dict or :class:`google.cloud.storage.batch._FutureDict`
|
||
|
:param value: The properties to be set.
|
||
|
"""
|
||
|
self._properties = value
|
||
|
# If the values are reset, the changes must as well.
|
||
|
self._changes = set()
|
||
|
|
||
|
def patch(
|
||
|
self,
|
||
|
client=None,
|
||
|
timeout=_DEFAULT_TIMEOUT,
|
||
|
if_generation_match=None,
|
||
|
if_generation_not_match=None,
|
||
|
if_metageneration_match=None,
|
||
|
if_metageneration_not_match=None,
|
||
|
):
|
||
|
"""Sends all changed properties in a PATCH request.
|
||
|
|
||
|
Updates the ``_properties`` with the response from the backend.
|
||
|
|
||
|
If :attr:`user_project` is set, bills the API request to that project.
|
||
|
|
||
|
:type client: :class:`~google.cloud.storage.client.Client` or
|
||
|
``NoneType``
|
||
|
:param client: the client to use. If not passed, falls back to the
|
||
|
``client`` stored on the current object.
|
||
|
|
||
|
:type timeout: float or tuple
|
||
|
:param timeout: (Optional) The amount of time, in seconds, to wait
|
||
|
for the server response.
|
||
|
|
||
|
Can also be passed as a tuple (connect_timeout, read_timeout).
|
||
|
See :meth:`requests.Session.request` documentation for details.
|
||
|
|
||
|
:type if_generation_match: long
|
||
|
:param if_generation_match: (Optional) Make the operation conditional on whether
|
||
|
the blob's current generation matches the given value.
|
||
|
Setting to 0 makes the operation succeed only if there
|
||
|
are no live versions of the blob.
|
||
|
|
||
|
:type if_generation_not_match: long
|
||
|
:param if_generation_not_match: (Optional) Make the operation conditional on whether
|
||
|
the blob's current generation does not match the given
|
||
|
value. If no live blob exists, the precondition fails.
|
||
|
Setting to 0 makes the operation succeed only if there
|
||
|
is a live version of the blob.
|
||
|
|
||
|
:type if_metageneration_match: long
|
||
|
:param if_metageneration_match: (Optional) Make the operation conditional on whether the
|
||
|
blob's current metageneration matches the given value.
|
||
|
|
||
|
:type if_metageneration_not_match: long
|
||
|
:param if_metageneration_not_match: (Optional) Make the operation conditional on whether the
|
||
|
blob's current metageneration does not match the given value.
|
||
|
"""
|
||
|
client = self._require_client(client)
|
||
|
query_params = self._query_params
|
||
|
# Pass '?projection=full' here because 'PATCH' documented not
|
||
|
# to work properly w/ 'noAcl'.
|
||
|
query_params["projection"] = "full"
|
||
|
_add_generation_match_parameters(
|
||
|
query_params,
|
||
|
if_generation_match=if_generation_match,
|
||
|
if_generation_not_match=if_generation_not_match,
|
||
|
if_metageneration_match=if_metageneration_match,
|
||
|
if_metageneration_not_match=if_metageneration_not_match,
|
||
|
)
|
||
|
update_properties = {key: self._properties[key] for key in self._changes}
|
||
|
|
||
|
# Make the API call.
|
||
|
api_response = client._connection.api_request(
|
||
|
method="PATCH",
|
||
|
path=self.path,
|
||
|
data=update_properties,
|
||
|
query_params=query_params,
|
||
|
_target_object=self,
|
||
|
timeout=timeout,
|
||
|
)
|
||
|
self._set_properties(api_response)
|
||
|
|
||
|
def update(
|
||
|
self,
|
||
|
client=None,
|
||
|
timeout=_DEFAULT_TIMEOUT,
|
||
|
if_generation_match=None,
|
||
|
if_generation_not_match=None,
|
||
|
if_metageneration_match=None,
|
||
|
if_metageneration_not_match=None,
|
||
|
):
|
||
|
"""Sends all properties in a PUT request.
|
||
|
|
||
|
Updates the ``_properties`` with the response from the backend.
|
||
|
|
||
|
If :attr:`user_project` is set, bills the API request to that project.
|
||
|
|
||
|
:type client: :class:`~google.cloud.storage.client.Client` or
|
||
|
``NoneType``
|
||
|
:param client: the client to use. If not passed, falls back to the
|
||
|
``client`` stored on the current object.
|
||
|
|
||
|
:type timeout: float or tuple
|
||
|
:param timeout: (Optional) The amount of time, in seconds, to wait
|
||
|
for the server response.
|
||
|
|
||
|
Can also be passed as a tuple (connect_timeout, read_timeout).
|
||
|
See :meth:`requests.Session.request` documentation for details.
|
||
|
|
||
|
:type if_generation_match: long
|
||
|
:param if_generation_match: (Optional) Make the operation conditional on whether
|
||
|
the blob's current generation matches the given value.
|
||
|
Setting to 0 makes the operation succeed only if there
|
||
|
are no live versions of the blob.
|
||
|
|
||
|
:type if_generation_not_match: long
|
||
|
:param if_generation_not_match: (Optional) Make the operation conditional on whether
|
||
|
the blob's current generation does not match the given
|
||
|
value. If no live blob exists, the precondition fails.
|
||
|
Setting to 0 makes the operation succeed only if there
|
||
|
is a live version of the blob.
|
||
|
|
||
|
:type if_metageneration_match: long
|
||
|
:param if_metageneration_match: (Optional) Make the operation conditional on whether the
|
||
|
blob's current metageneration matches the given value.
|
||
|
|
||
|
:type if_metageneration_not_match: long
|
||
|
:param if_metageneration_not_match: (Optional) Make the operation conditional on whether the
|
||
|
blob's current metageneration does not match the given value.
|
||
|
"""
|
||
|
client = self._require_client(client)
|
||
|
|
||
|
query_params = self._query_params
|
||
|
query_params["projection"] = "full"
|
||
|
_add_generation_match_parameters(
|
||
|
query_params,
|
||
|
if_generation_match=if_generation_match,
|
||
|
if_generation_not_match=if_generation_not_match,
|
||
|
if_metageneration_match=if_metageneration_match,
|
||
|
if_metageneration_not_match=if_metageneration_not_match,
|
||
|
)
|
||
|
api_response = client._connection.api_request(
|
||
|
method="PUT",
|
||
|
path=self.path,
|
||
|
data=self._properties,
|
||
|
query_params=query_params,
|
||
|
_target_object=self,
|
||
|
timeout=timeout,
|
||
|
)
|
||
|
self._set_properties(api_response)
|
||
|
|
||
|
|
||
|
def _scalar_property(fieldname):
|
||
|
"""Create a property descriptor around the :class:`_PropertyMixin` helpers.
|
||
|
"""
|
||
|
|
||
|
def _getter(self):
|
||
|
"""Scalar property getter."""
|
||
|
return self._properties.get(fieldname)
|
||
|
|
||
|
def _setter(self, value):
|
||
|
"""Scalar property setter."""
|
||
|
self._patch_property(fieldname, value)
|
||
|
|
||
|
return property(_getter, _setter)
|
||
|
|
||
|
|
||
|
def _write_buffer_to_hash(buffer_object, hash_obj, digest_block_size=8192):
|
||
|
"""Read blocks from a buffer and update a hash with them.
|
||
|
|
||
|
:type buffer_object: bytes buffer
|
||
|
:param buffer_object: Buffer containing bytes used to update a hash object.
|
||
|
|
||
|
:type hash_obj: object that implements update
|
||
|
:param hash_obj: A hash object (MD5 or CRC32-C).
|
||
|
|
||
|
:type digest_block_size: int
|
||
|
:param digest_block_size: The block size to write to the hash.
|
||
|
Defaults to 8192.
|
||
|
"""
|
||
|
block = buffer_object.read(digest_block_size)
|
||
|
|
||
|
while len(block) > 0:
|
||
|
hash_obj.update(block)
|
||
|
# Update the block for the next iteration.
|
||
|
block = buffer_object.read(digest_block_size)
|
||
|
|
||
|
|
||
|
def _base64_md5hash(buffer_object):
|
||
|
"""Get MD5 hash of bytes (as base64).
|
||
|
|
||
|
:type buffer_object: bytes buffer
|
||
|
:param buffer_object: Buffer containing bytes used to compute an MD5
|
||
|
hash (as base64).
|
||
|
|
||
|
:rtype: str
|
||
|
:returns: A base64 encoded digest of the MD5 hash.
|
||
|
"""
|
||
|
hash_obj = md5()
|
||
|
_write_buffer_to_hash(buffer_object, hash_obj)
|
||
|
digest_bytes = hash_obj.digest()
|
||
|
return base64.b64encode(digest_bytes)
|
||
|
|
||
|
|
||
|
def _convert_to_timestamp(value):
|
||
|
"""Convert non-none datetime to timestamp.
|
||
|
|
||
|
:type value: :class:`datetime.datetime`
|
||
|
:param value: The datetime to convert.
|
||
|
|
||
|
:rtype: int
|
||
|
:returns: The timestamp.
|
||
|
"""
|
||
|
utc_naive = value.replace(tzinfo=None) - value.utcoffset()
|
||
|
mtime = (utc_naive - datetime(1970, 1, 1)).total_seconds()
|
||
|
return mtime
|
||
|
|
||
|
|
||
|
def _add_generation_match_parameters(parameters, **match_parameters):
|
||
|
"""Add generation match parameters into the given parameters list.
|
||
|
|
||
|
:type parameters: list or dict
|
||
|
:param parameters: Parameters list or dict.
|
||
|
|
||
|
:type match_parameters: dict
|
||
|
:param match_parameters: if*generation*match parameters to add.
|
||
|
|
||
|
:raises: :exc:`ValueError` if ``parameters`` is not a ``list()``
|
||
|
or a ``dict()``.
|
||
|
"""
|
||
|
for snakecase_name, camelcase_name in _GENERATION_MATCH_PARAMETERS:
|
||
|
value = match_parameters.get(snakecase_name)
|
||
|
|
||
|
if value is not None:
|
||
|
if isinstance(parameters, list):
|
||
|
parameters.append((camelcase_name, value))
|
||
|
|
||
|
elif isinstance(parameters, dict):
|
||
|
parameters[camelcase_name] = value
|
||
|
|
||
|
else:
|
||
|
raise ValueError(
|
||
|
"`parameters` argument should be a dict() or a list()."
|
||
|
)
|
||
|
|
||
|
|
||
|
def _raise_if_more_than_one_set(**kwargs):
|
||
|
"""Raise ``ValueError`` exception if more than one parameter was set.
|
||
|
|
||
|
:type error: :exc:`ValueError`
|
||
|
:param error: Description of which fields were set
|
||
|
|
||
|
:raises: :class:`~ValueError` containing the fields that were set
|
||
|
"""
|
||
|
if sum(arg is not None for arg in kwargs.values()) > 1:
|
||
|
escaped_keys = ["'%s'" % name for name in kwargs.keys()]
|
||
|
|
||
|
keys_but_last = ", ".join(escaped_keys[:-1])
|
||
|
last_key = escaped_keys[-1]
|
||
|
|
||
|
msg = "Pass at most one of {keys_but_last} and {last_key}".format(
|
||
|
keys_but_last=keys_but_last, last_key=last_key
|
||
|
)
|
||
|
|
||
|
raise ValueError(msg)
|
||
|
|
||
|
|
||
|
def _bucket_bound_hostname_url(host, scheme=None):
|
||
|
"""Helper to build bucket bound hostname URL.
|
||
|
|
||
|
:type host: str
|
||
|
:param host: Host name.
|
||
|
|
||
|
:type scheme: str
|
||
|
:param scheme: (Optional) Web scheme. If passed, use it
|
||
|
as a scheme in the result URL.
|
||
|
|
||
|
:rtype: str
|
||
|
:returns: A bucket bound hostname URL.
|
||
|
"""
|
||
|
url_parts = urlsplit(host)
|
||
|
if url_parts.scheme and url_parts.netloc:
|
||
|
return host
|
||
|
|
||
|
return "{scheme}://{host}/".format(scheme=scheme, host=host)
|