326 lines
13 KiB
Python
326 lines
13 KiB
Python
# Copyright 2016 Google Inc. All rights reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
"""Google Cloud Bigtable HappyBase batch module."""
|
|
|
|
|
|
import datetime
|
|
import warnings
|
|
|
|
import six
|
|
|
|
from gcloud._helpers import _datetime_from_microseconds
|
|
from gcloud.bigtable.row_filters import TimestampRange
|
|
|
|
|
|
_WAL_SENTINEL = object()
|
|
# Assumed granularity of timestamps in Cloud Bigtable.
|
|
_ONE_MILLISECOND = datetime.timedelta(microseconds=1000)
|
|
_WARN = warnings.warn
|
|
_WAL_WARNING = ('The wal argument (Write-Ahead-Log) is not '
|
|
'supported by Cloud Bigtable.')
|
|
|
|
|
|
class Batch(object):
|
|
"""Batch class for accumulating mutations.
|
|
|
|
.. note::
|
|
|
|
When using a batch with ``transaction=False`` as a context manager
|
|
(i.e. in a ``with`` statement), mutations will still be sent as
|
|
row mutations even if the context manager exits with an error.
|
|
This behavior is in place to match the behavior in the HappyBase
|
|
HBase / Thrift implementation.
|
|
|
|
:type table: :class:`Table <gcloud.bigtable.happybase.table.Table>`
|
|
:param table: The table where mutations will be applied.
|
|
|
|
:type timestamp: int
|
|
:param timestamp: (Optional) Timestamp (in milliseconds since the epoch)
|
|
that all mutations will be applied at.
|
|
|
|
:type batch_size: int
|
|
:param batch_size: (Optional) The maximum number of mutations to allow
|
|
to accumulate before committing them.
|
|
|
|
:type transaction: bool
|
|
:param transaction: Flag indicating if the mutations should be sent
|
|
transactionally or not. If ``transaction=True`` and
|
|
an error occurs while a :class:`Batch` is active,
|
|
then none of the accumulated mutations will be
|
|
committed. If ``batch_size`` is set, the mutation
|
|
can't be transactional.
|
|
|
|
:type wal: object
|
|
:param wal: Unused parameter (Boolean for using the HBase Write Ahead Log).
|
|
Provided for compatibility with HappyBase, but irrelevant for
|
|
Cloud Bigtable since it does not have a Write Ahead Log.
|
|
|
|
:raises: :class:`TypeError <exceptions.TypeError>` if ``batch_size``
|
|
is set and ``transaction=True``.
|
|
:class:`ValueError <exceptions.ValueError>` if ``batch_size``
|
|
is not positive.
|
|
"""
|
|
|
|
def __init__(self, table, timestamp=None, batch_size=None,
|
|
transaction=False, wal=_WAL_SENTINEL):
|
|
if wal is not _WAL_SENTINEL:
|
|
_WARN(_WAL_WARNING)
|
|
|
|
if batch_size is not None:
|
|
if transaction:
|
|
raise TypeError('When batch_size is set, a Batch cannot be '
|
|
'transactional')
|
|
if batch_size <= 0:
|
|
raise ValueError('batch_size must be positive')
|
|
|
|
self._table = table
|
|
self._batch_size = batch_size
|
|
self._timestamp = self._delete_range = None
|
|
|
|
# Timestamp is in milliseconds, convert to microseconds.
|
|
if timestamp is not None:
|
|
self._timestamp = _datetime_from_microseconds(1000 * timestamp)
|
|
# For deletes, we get the very next timestamp (assuming timestamp
|
|
# granularity is milliseconds). This is because HappyBase users
|
|
# expect HBase deletes to go **up to** and **including** the
|
|
# timestamp while Cloud Bigtable Time Ranges **exclude** the
|
|
# final timestamp.
|
|
next_timestamp = self._timestamp + _ONE_MILLISECOND
|
|
self._delete_range = TimestampRange(end=next_timestamp)
|
|
|
|
self._transaction = transaction
|
|
|
|
# Internal state for tracking mutations.
|
|
self._row_map = {}
|
|
self._mutation_count = 0
|
|
|
|
def send(self):
|
|
"""Send / commit the batch of mutations to the server."""
|
|
for row in self._row_map.values():
|
|
# commit() does nothing if row hasn't accumulated any mutations.
|
|
row.commit()
|
|
|
|
self._row_map.clear()
|
|
self._mutation_count = 0
|
|
|
|
def _try_send(self):
|
|
"""Send / commit the batch if mutations have exceeded batch size."""
|
|
if self._batch_size and self._mutation_count >= self._batch_size:
|
|
self.send()
|
|
|
|
def _get_row(self, row_key):
|
|
"""Gets a row that will hold mutations.
|
|
|
|
If the row is not already cached on the current batch, a new row will
|
|
be created.
|
|
|
|
:type row_key: str
|
|
:param row_key: The row key for a row stored in the map.
|
|
|
|
:rtype: :class:`Row <gcloud.bigtable.row.Row>`
|
|
:returns: The newly created or stored row that will hold mutations.
|
|
"""
|
|
if row_key not in self._row_map:
|
|
table = self._table._low_level_table
|
|
self._row_map[row_key] = table.row(row_key)
|
|
|
|
return self._row_map[row_key]
|
|
|
|
def put(self, row, data, wal=_WAL_SENTINEL):
|
|
"""Insert data into a row in the table owned by this batch.
|
|
|
|
:type row: str
|
|
:param row: The row key where the mutation will be "put".
|
|
|
|
:type data: dict
|
|
:param data: Dictionary containing the data to be inserted. The keys
|
|
are columns names (of the form ``fam:col``) and the values
|
|
are strings (bytes) to be stored in those columns.
|
|
|
|
:type wal: object
|
|
:param wal: Unused parameter (to over-ride the default on the
|
|
instance). Provided for compatibility with HappyBase, but
|
|
irrelevant for Cloud Bigtable since it does not have a
|
|
Write Ahead Log.
|
|
"""
|
|
if wal is not _WAL_SENTINEL:
|
|
_WARN(_WAL_WARNING)
|
|
|
|
row_object = self._get_row(row)
|
|
# Make sure all the keys are valid before beginning
|
|
# to add mutations.
|
|
column_pairs = _get_column_pairs(six.iterkeys(data),
|
|
require_qualifier=True)
|
|
for column_family_id, column_qualifier in column_pairs:
|
|
value = data[column_family_id + ':' + column_qualifier]
|
|
row_object.set_cell(column_family_id, column_qualifier,
|
|
value, timestamp=self._timestamp)
|
|
|
|
self._mutation_count += len(data)
|
|
self._try_send()
|
|
|
|
def _delete_columns(self, columns, row_object):
|
|
"""Adds delete mutations for a list of columns and column families.
|
|
|
|
:type columns: list
|
|
:param columns: Iterable containing column names (as
|
|
strings). Each column name can be either
|
|
|
|
* an entire column family: ``fam`` or ``fam:``
|
|
* a single column: ``fam:col``
|
|
|
|
:type row_object: :class:`Row <gcloud_bigtable.row.Row>`
|
|
:param row_object: The row which will hold the delete mutations.
|
|
|
|
:raises: :class:`ValueError <exceptions.ValueError>` if the delete
|
|
timestamp range is set on the current batch, but a
|
|
column family delete is attempted.
|
|
"""
|
|
column_pairs = _get_column_pairs(columns)
|
|
for column_family_id, column_qualifier in column_pairs:
|
|
if column_qualifier is None:
|
|
if self._delete_range is not None:
|
|
raise ValueError('The Cloud Bigtable API does not support '
|
|
'adding a timestamp to '
|
|
'"DeleteFromFamily" ')
|
|
row_object.delete_cells(column_family_id,
|
|
columns=row_object.ALL_COLUMNS)
|
|
else:
|
|
row_object.delete_cell(column_family_id,
|
|
column_qualifier,
|
|
time_range=self._delete_range)
|
|
|
|
def delete(self, row, columns=None, wal=_WAL_SENTINEL):
|
|
"""Delete data from a row in the table owned by this batch.
|
|
|
|
:type row: str
|
|
:param row: The row key where the delete will occur.
|
|
|
|
:type columns: list
|
|
:param columns: (Optional) Iterable containing column names (as
|
|
strings). Each column name can be either
|
|
|
|
* an entire column family: ``fam`` or ``fam:``
|
|
* a single column: ``fam:col``
|
|
|
|
If not used, will delete the entire row.
|
|
|
|
:type wal: object
|
|
:param wal: Unused parameter (to over-ride the default on the
|
|
instance). Provided for compatibility with HappyBase, but
|
|
irrelevant for Cloud Bigtable since it does not have a
|
|
Write Ahead Log.
|
|
|
|
:raises: If the delete timestamp range is set on the
|
|
current batch, but a full row delete is attempted.
|
|
"""
|
|
if wal is not _WAL_SENTINEL:
|
|
_WARN(_WAL_WARNING)
|
|
|
|
row_object = self._get_row(row)
|
|
|
|
if columns is None:
|
|
# Delete entire row.
|
|
if self._delete_range is not None:
|
|
raise ValueError('The Cloud Bigtable API does not support '
|
|
'adding a timestamp to "DeleteFromRow" '
|
|
'mutations')
|
|
row_object.delete()
|
|
self._mutation_count += 1
|
|
else:
|
|
self._delete_columns(columns, row_object)
|
|
self._mutation_count += len(columns)
|
|
|
|
self._try_send()
|
|
|
|
def __enter__(self):
|
|
"""Enter context manager, no set-up required."""
|
|
return self
|
|
|
|
def __exit__(self, exc_type, exc_value, traceback):
|
|
"""Exit context manager, no set-up required.
|
|
|
|
:type exc_type: type
|
|
:param exc_type: The type of the exception if one occurred while the
|
|
context manager was active. Otherwise, :data:`None`.
|
|
|
|
:type exc_value: :class:`Exception <exceptions.Exception>`
|
|
:param exc_value: An instance of ``exc_type`` if an exception occurred
|
|
while the context was active.
|
|
Otherwise, :data:`None`.
|
|
|
|
:type traceback: ``traceback`` type
|
|
:param traceback: The traceback where the exception occurred (if one
|
|
did occur). Otherwise, :data:`None`.
|
|
"""
|
|
# If the context manager encountered an exception and the batch is
|
|
# transactional, we don't commit the mutations.
|
|
if self._transaction and exc_type is not None:
|
|
return
|
|
|
|
# NOTE: For non-transactional batches, this will even commit mutations
|
|
# if an error occurred during the context manager.
|
|
self.send()
|
|
|
|
|
|
def _get_column_pairs(columns, require_qualifier=False):
|
|
"""Turns a list of column or column families into parsed pairs.
|
|
|
|
Turns a column family (``fam`` or ``fam:``) into a pair such
|
|
as ``['fam', None]`` and turns a column (``fam:col``) into
|
|
``['fam', 'col']``.
|
|
|
|
:type columns: list
|
|
:param columns: Iterable containing column names (as
|
|
strings). Each column name can be either
|
|
|
|
* an entire column family: ``fam`` or ``fam:``
|
|
* a single column: ``fam:col``
|
|
|
|
:type require_qualifier: bool
|
|
:param require_qualifier: Boolean indicating if the columns should
|
|
all have a qualifier or not.
|
|
|
|
:rtype: list
|
|
:returns: List of pairs, where the first element in each pair is the
|
|
column family and the second is the column qualifier
|
|
(or :data:`None`).
|
|
:raises: :class:`ValueError <exceptions.ValueError>` if any of the columns
|
|
are not of the expected format.
|
|
:class:`ValueError <exceptions.ValueError>` if
|
|
``require_qualifier`` is :data:`True` and one of the values is
|
|
for an entire column family
|
|
"""
|
|
column_pairs = []
|
|
for column in columns:
|
|
if isinstance(column, six.binary_type):
|
|
column = column.decode('utf-8')
|
|
# Remove trailing colons (i.e. for standalone column family).
|
|
if column.endswith(u':'):
|
|
column = column[:-1]
|
|
num_colons = column.count(u':')
|
|
if num_colons == 0:
|
|
# column is a column family.
|
|
if require_qualifier:
|
|
raise ValueError('column does not contain a qualifier',
|
|
column)
|
|
else:
|
|
column_pairs.append([column, None])
|
|
elif num_colons == 1:
|
|
column_pairs.append(column.split(u':'))
|
|
else:
|
|
raise ValueError('Column contains the : separator more than once')
|
|
|
|
return column_pairs
|