Vehicle-Anti-Theft-Face-Rec.../venv/Lib/site-packages/gcloud/bigtable/happybase/table.py

980 lines
38 KiB
Python

# Copyright 2016 Google Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Google Cloud Bigtable HappyBase table module."""
import struct
import warnings
import six
from gcloud._helpers import _datetime_from_microseconds
from gcloud._helpers import _microseconds_from_datetime
from gcloud._helpers import _to_bytes
from gcloud._helpers import _total_seconds
from gcloud.bigtable.column_family import GCRuleIntersection
from gcloud.bigtable.column_family import MaxAgeGCRule
from gcloud.bigtable.column_family import MaxVersionsGCRule
from gcloud.bigtable.happybase.batch import _get_column_pairs
from gcloud.bigtable.happybase.batch import _WAL_SENTINEL
from gcloud.bigtable.happybase.batch import Batch
from gcloud.bigtable.row_filters import CellsColumnLimitFilter
from gcloud.bigtable.row_filters import ColumnQualifierRegexFilter
from gcloud.bigtable.row_filters import FamilyNameRegexFilter
from gcloud.bigtable.row_filters import RowFilterChain
from gcloud.bigtable.row_filters import RowFilterUnion
from gcloud.bigtable.row_filters import RowKeyRegexFilter
from gcloud.bigtable.row_filters import TimestampRange
from gcloud.bigtable.row_filters import TimestampRangeFilter
from gcloud.bigtable.table import Table as _LowLevelTable
_WARN = warnings.warn
_UNPACK_I64 = struct.Struct('>q').unpack
_SIMPLE_GC_RULES = (MaxAgeGCRule, MaxVersionsGCRule)
def make_row(cell_map, include_timestamp):
"""Make a row dict for a Thrift cell mapping.
.. warning::
This method is only provided for HappyBase compatibility, but does not
actually work.
:type cell_map: dict
:param cell_map: Dictionary with ``fam:col`` strings as keys and ``TCell``
instances as values.
:type include_timestamp: bool
:param include_timestamp: Flag to indicate if cell timestamps should be
included with the output.
:raises: :class:`NotImplementedError <exceptions.NotImplementedError>`
always
"""
raise NotImplementedError('The Cloud Bigtable API output is not the same '
'as the output from the Thrift server, so this '
'helper can not be implemented.', 'Called with',
cell_map, include_timestamp)
def make_ordered_row(sorted_columns, include_timestamp):
"""Make a row dict for sorted Thrift column results from scans.
.. warning::
This method is only provided for HappyBase compatibility, but does not
actually work.
:type sorted_columns: list
:param sorted_columns: List of ``TColumn`` instances from Thrift.
:type include_timestamp: bool
:param include_timestamp: Flag to indicate if cell timestamps should be
included with the output.
:raises: :class:`NotImplementedError <exceptions.NotImplementedError>`
always
"""
raise NotImplementedError('The Cloud Bigtable API output is not the same '
'as the output from the Thrift server, so this '
'helper can not be implemented.', 'Called with',
sorted_columns, include_timestamp)
class Table(object):
"""Representation of Cloud Bigtable table.
Used for adding data and
:type name: str
:param name: The name of the table.
:type connection: :class:`Connection <.happybase.connection.Connection>`
:param connection: The connection which has access to the table.
"""
def __init__(self, name, connection):
self.name = name
# This remains as legacy for HappyBase, but only the instance
# from the connection is needed.
self.connection = connection
self._low_level_table = None
if self.connection is not None:
self._low_level_table = _LowLevelTable(self.name,
self.connection._instance)
def __repr__(self):
return '<table.Table name=%r>' % (self.name,)
def families(self):
"""Retrieve the column families for this table.
:rtype: dict
:returns: Mapping from column family name to garbage collection rule
for a column family.
"""
column_family_map = self._low_level_table.list_column_families()
result = {}
for col_fam, col_fam_obj in six.iteritems(column_family_map):
result[col_fam] = _gc_rule_to_dict(col_fam_obj.gc_rule)
return result
def regions(self):
"""Retrieve the regions for this table.
.. warning::
Cloud Bigtable does not give information about how a table is laid
out in memory, so this method does not work. It is
provided simply for compatibility.
:raises: :class:`NotImplementedError <exceptions.NotImplementedError>`
always
"""
raise NotImplementedError('The Cloud Bigtable API does not have a '
'concept of splitting a table into regions.')
def row(self, row, columns=None, timestamp=None, include_timestamp=False):
"""Retrieve a single row of data.
Returns the latest cells in each column (or all columns if ``columns``
is not specified). If a ``timestamp`` is set, then **latest** becomes
**latest** up until ``timestamp``.
:type row: str
:param row: Row key for the row we are reading from.
:type columns: list
:param columns: (Optional) Iterable containing column names (as
strings). Each column name can be either
* an entire column family: ``fam`` or ``fam:``
* a single column: ``fam:col``
:type timestamp: int
:param timestamp: (Optional) Timestamp (in milliseconds since the
epoch). If specified, only cells returned before the
the timestamp will be returned.
:type include_timestamp: bool
:param include_timestamp: Flag to indicate if cell timestamps should be
included with the output.
:rtype: dict
:returns: Dictionary containing all the latest column values in
the row.
"""
filters = []
if columns is not None:
filters.append(_columns_filter_helper(columns))
# versions == 1 since we only want the latest.
filter_ = _filter_chain_helper(versions=1, timestamp=timestamp,
filters=filters)
partial_row_data = self._low_level_table.read_row(
row, filter_=filter_)
if partial_row_data is None:
return {}
return _partial_row_to_dict(partial_row_data,
include_timestamp=include_timestamp)
def rows(self, rows, columns=None, timestamp=None,
include_timestamp=False):
"""Retrieve multiple rows of data.
All optional arguments behave the same in this method as they do in
:meth:`row`.
:type rows: list
:param rows: Iterable of the row keys for the rows we are reading from.
:type columns: list
:param columns: (Optional) Iterable containing column names (as
strings). Each column name can be either
* an entire column family: ``fam`` or ``fam:``
* a single column: ``fam:col``
:type timestamp: int
:param timestamp: (Optional) Timestamp (in milliseconds since the
epoch). If specified, only cells returned before (or
at) the timestamp will be returned.
:type include_timestamp: bool
:param include_timestamp: Flag to indicate if cell timestamps should be
included with the output.
:rtype: list
:returns: A list of pairs, where the first is the row key and the
second is a dictionary with the filtered values returned.
"""
if not rows:
# Avoid round-trip if the result is empty anyway
return []
filters = []
if columns is not None:
filters.append(_columns_filter_helper(columns))
filters.append(_row_keys_filter_helper(rows))
# versions == 1 since we only want the latest.
filter_ = _filter_chain_helper(versions=1, timestamp=timestamp,
filters=filters)
partial_rows_data = self._low_level_table.read_rows(filter_=filter_)
# NOTE: We could use max_loops = 1000 or some similar value to ensure
# that the stream isn't open too long.
partial_rows_data.consume_all()
result = []
for row_key in rows:
if row_key not in partial_rows_data.rows:
continue
curr_row_data = partial_rows_data.rows[row_key]
curr_row_dict = _partial_row_to_dict(
curr_row_data, include_timestamp=include_timestamp)
result.append((row_key, curr_row_dict))
return result
def cells(self, row, column, versions=None, timestamp=None,
include_timestamp=False):
"""Retrieve multiple versions of a single cell from the table.
:type row: str
:param row: Row key for the row we are reading from.
:type column: str
:param column: Column we are reading from; of the form ``fam:col``.
:type versions: int
:param versions: (Optional) The maximum number of cells to return. If
not set, returns all cells found.
:type timestamp: int
:param timestamp: (Optional) Timestamp (in milliseconds since the
epoch). If specified, only cells returned before (or
at) the timestamp will be returned.
:type include_timestamp: bool
:param include_timestamp: Flag to indicate if cell timestamps should be
included with the output.
:rtype: list
:returns: List of values in the cell (with timestamps if
``include_timestamp`` is :data:`True`).
"""
filter_ = _filter_chain_helper(column=column, versions=versions,
timestamp=timestamp)
partial_row_data = self._low_level_table.read_row(row, filter_=filter_)
if partial_row_data is None:
return []
else:
cells = partial_row_data._cells
# We know that `_filter_chain_helper` has already verified that
# column will split as such.
column_family_id, column_qualifier = column.split(':')
# NOTE: We expect the only key in `cells` is `column_family_id`
# and the only key `cells[column_family_id]` is
# `column_qualifier`. But we don't check that this is true.
curr_cells = cells[column_family_id][column_qualifier]
return _cells_to_pairs(
curr_cells, include_timestamp=include_timestamp)
def scan(self, row_start=None, row_stop=None, row_prefix=None,
columns=None, timestamp=None,
include_timestamp=False, limit=None, **kwargs):
"""Create a scanner for data in this table.
This method returns a generator that can be used for looping over the
matching rows.
If ``row_prefix`` is specified, only rows with row keys matching the
prefix will be returned. If given, ``row_start`` and ``row_stop``
cannot be used.
.. note::
Both ``row_start`` and ``row_stop`` can be :data:`None` to specify
the start and the end of the table respectively. If both are
omitted, a full table scan is done. Note that this usually results
in severe performance problems.
The keyword argument ``filter`` is also supported (beyond column and
row range filters supported here). HappyBase / HBase users will have
used this as an HBase filter string. (See the `Thrift docs`_ for more
details on those filters.) However, Google Cloud Bigtable doesn't
support those filter strings so a
:class:`~gcloud.bigtable.row.RowFilter` should be used instead.
.. _Thrift docs: http://hbase.apache.org/0.94/book/thrift.html
The arguments ``batch_size``, ``scan_batching`` and ``sorted_columns``
are allowed (as keyword arguments) for compatibility with
HappyBase. However, they will not be used in any way, and will cause a
warning if passed. (The ``batch_size`` determines the number of
results to retrieve per request. The HBase scanner defaults to reading
one record at a time, so this argument allows HappyBase to increase
that number. However, the Cloud Bigtable API uses HTTP/2 streaming so
there is no concept of a batched scan. The ``sorted_columns`` flag
tells HBase to return columns in order, but Cloud Bigtable doesn't
have this feature.)
:type row_start: str
:param row_start: (Optional) Row key where the scanner should start
(includes ``row_start``). If not specified, reads
from the first key. If the table does not contain
``row_start``, it will start from the next key after
it that **is** contained in the table.
:type row_stop: str
:param row_stop: (Optional) Row key where the scanner should stop
(excludes ``row_stop``). If not specified, reads
until the last key. The table does not have to contain
``row_stop``.
:type row_prefix: str
:param row_prefix: (Optional) Prefix to match row keys.
:type columns: list
:param columns: (Optional) Iterable containing column names (as
strings). Each column name can be either
* an entire column family: ``fam`` or ``fam:``
* a single column: ``fam:col``
:type timestamp: int
:param timestamp: (Optional) Timestamp (in milliseconds since the
epoch). If specified, only cells returned before (or
at) the timestamp will be returned.
:type include_timestamp: bool
:param include_timestamp: Flag to indicate if cell timestamps should be
included with the output.
:type limit: int
:param limit: (Optional) Maximum number of rows to return.
:type kwargs: dict
:param kwargs: Remaining keyword arguments. Provided for HappyBase
compatibility.
:raises: If ``limit`` is set but non-positive, or if ``row_prefix`` is
used with row start/stop,
:class:`TypeError <exceptions.TypeError>` if a string
``filter`` is used.
"""
row_start, row_stop, filter_chain = _scan_filter_helper(
row_start, row_stop, row_prefix, columns, timestamp, limit, kwargs)
partial_rows_data = self._low_level_table.read_rows(
start_key=row_start, end_key=row_stop,
limit=limit, filter_=filter_chain)
# Mutable copy of data.
rows_dict = partial_rows_data.rows
while True:
try:
partial_rows_data.consume_next()
for row_key in sorted(rows_dict):
curr_row_data = rows_dict.pop(row_key)
# NOTE: We expect len(rows_dict) == 0, but don't check it.
curr_row_dict = _partial_row_to_dict(
curr_row_data, include_timestamp=include_timestamp)
yield (row_key, curr_row_dict)
except StopIteration:
break
def put(self, row, data, timestamp=None, wal=_WAL_SENTINEL):
"""Insert data into a row in this table.
.. note::
This method will send a request with a single "put" mutation.
In many situations, :meth:`batch` is a more appropriate
method to manipulate data since it helps combine many mutations
into a single request.
:type row: str
:param row: The row key where the mutation will be "put".
:type data: dict
:param data: Dictionary containing the data to be inserted. The keys
are columns names (of the form ``fam:col``) and the values
are strings (bytes) to be stored in those columns.
:type timestamp: int
:param timestamp: (Optional) Timestamp (in milliseconds since the
epoch) that the mutation will be applied at.
:type wal: object
:param wal: Unused parameter (to be passed to a created batch).
Provided for compatibility with HappyBase, but irrelevant
for Cloud Bigtable since it does not have a Write Ahead
Log.
"""
with self.batch(timestamp=timestamp, wal=wal) as batch:
batch.put(row, data)
def delete(self, row, columns=None, timestamp=None, wal=_WAL_SENTINEL):
"""Delete data from a row in this table.
This method deletes the entire ``row`` if ``columns`` is not
specified.
.. note::
This method will send a request with a single delete mutation.
In many situations, :meth:`batch` is a more appropriate
method to manipulate data since it helps combine many mutations
into a single request.
:type row: str
:param row: The row key where the delete will occur.
:type columns: list
:param columns: (Optional) Iterable containing column names (as
strings). Each column name can be either
* an entire column family: ``fam`` or ``fam:``
* a single column: ``fam:col``
:type timestamp: int
:param timestamp: (Optional) Timestamp (in milliseconds since the
epoch) that the mutation will be applied at.
:type wal: object
:param wal: Unused parameter (to be passed to a created batch).
Provided for compatibility with HappyBase, but irrelevant
for Cloud Bigtable since it does not have a Write Ahead
Log.
"""
with self.batch(timestamp=timestamp, wal=wal) as batch:
batch.delete(row, columns)
def batch(self, timestamp=None, batch_size=None, transaction=False,
wal=_WAL_SENTINEL):
"""Create a new batch operation for this table.
This method returns a new
:class:`Batch <.happybase.batch.Batch>` instance that can be
used for mass data manipulation.
:type timestamp: int
:param timestamp: (Optional) Timestamp (in milliseconds since the
epoch) that all mutations will be applied at.
:type batch_size: int
:param batch_size: (Optional) The maximum number of mutations to allow
to accumulate before committing them.
:type transaction: bool
:param transaction: Flag indicating if the mutations should be sent
transactionally or not. If ``transaction=True`` and
an error occurs while a
:class:`Batch <.happybase.batch.Batch>` is
active, then none of the accumulated mutations will
be committed. If ``batch_size`` is set, the
mutation can't be transactional.
:type wal: object
:param wal: Unused parameter (to be passed to the created batch).
Provided for compatibility with HappyBase, but irrelevant
for Cloud Bigtable since it does not have a Write Ahead
Log.
:rtype: :class:`Batch <gcloud.bigtable.happybase.batch.Batch>`
:returns: A batch bound to this table.
"""
return Batch(self, timestamp=timestamp, batch_size=batch_size,
transaction=transaction, wal=wal)
def counter_get(self, row, column):
"""Retrieve the current value of a counter column.
This method retrieves the current value of a counter column. If the
counter column does not exist, this function initializes it to ``0``.
.. note::
Application code should **never** store a counter value directly;
use the atomic :meth:`counter_inc` and :meth:`counter_dec` methods
for that.
:type row: str
:param row: Row key for the row we are getting a counter from.
:type column: str
:param column: Column we are ``get``-ing from; of the form ``fam:col``.
:rtype: int
:returns: Counter value (after initializing / incrementing by 0).
"""
# Don't query directly, but increment with value=0 so that the counter
# is correctly initialized if didn't exist yet.
return self.counter_inc(row, column, value=0)
def counter_set(self, row, column, value=0):
"""Set a counter column to a specific value.
This method is provided in HappyBase, but we do not provide it here
because it defeats the purpose of using atomic increment and decrement
of a counter.
:type row: str
:param row: Row key for the row we are setting a counter in.
:type column: str
:param column: Column we are setting a value in; of
the form ``fam:col``.
:type value: int
:param value: Value to set the counter to.
:raises: :class:`NotImplementedError <exceptions.NotImplementedError>`
always
"""
raise NotImplementedError('Table.counter_set will not be implemented. '
'Instead use the increment/decrement '
'methods along with counter_get.')
def counter_inc(self, row, column, value=1):
"""Atomically increment a counter column.
This method atomically increments a counter column in ``row``.
If the counter column does not exist, it is automatically initialized
to ``0`` before being incremented.
:type row: str
:param row: Row key for the row we are incrementing a counter in.
:type column: str
:param column: Column we are incrementing a value in; of the
form ``fam:col``.
:type value: int
:param value: Amount to increment the counter by. (If negative,
this is equivalent to decrement.)
:rtype: int
:returns: Counter value after incrementing.
"""
row = self._low_level_table.row(row, append=True)
if isinstance(column, six.binary_type):
column = column.decode('utf-8')
column_family_id, column_qualifier = column.split(':')
row.increment_cell_value(column_family_id, column_qualifier, value)
# See AppendRow.commit() will return a dictionary:
# {
# u'col-fam-id': {
# b'col-name1': [
# (b'cell-val', datetime.datetime(...)),
# ...
# ],
# ...
# },
# }
modified_cells = row.commit()
# Get the cells in the modified column,
column_cells = modified_cells[column_family_id][column_qualifier]
# Make sure there is exactly one cell in the column.
if len(column_cells) != 1:
raise ValueError('Expected server to return one modified cell.')
column_cell = column_cells[0]
# Get the bytes value from the column and convert it to an integer.
bytes_value = column_cell[0]
int_value, = _UNPACK_I64(bytes_value)
return int_value
def counter_dec(self, row, column, value=1):
"""Atomically decrement a counter column.
This method atomically decrements a counter column in ``row``.
If the counter column does not exist, it is automatically initialized
to ``0`` before being decremented.
:type row: str
:param row: Row key for the row we are decrementing a counter in.
:type column: str
:param column: Column we are decrementing a value in; of the
form ``fam:col``.
:type value: int
:param value: Amount to decrement the counter by. (If negative,
this is equivalent to increment.)
:rtype: int
:returns: Counter value after decrementing.
"""
return self.counter_inc(row, column, -value)
def _gc_rule_to_dict(gc_rule):
"""Converts garbage collection rule to dictionary if possible.
This is in place to support dictionary values as was done
in HappyBase, which has somewhat different garbage collection rule
settings for column families.
Only does this if the garbage collection rule is:
* :class:`gcloud.bigtable.column_family.MaxAgeGCRule`
* :class:`gcloud.bigtable.column_family.MaxVersionsGCRule`
* Composite :class:`gcloud.bigtable.column_family.GCRuleIntersection`
with two rules, one each of type
:class:`gcloud.bigtable.column_family.MaxAgeGCRule` and
:class:`gcloud.bigtable.column_family.MaxVersionsGCRule`
Otherwise, just returns the input without change.
:type gc_rule: :data:`NoneType <types.NoneType>`,
:class:`.GarbageCollectionRule`
:param gc_rule: A garbage collection rule to convert to a dictionary
(if possible).
:rtype: dict or
:class:`gcloud.bigtable.column_family.GarbageCollectionRule`
:returns: The converted garbage collection rule.
"""
result = gc_rule
if gc_rule is None:
result = {}
elif isinstance(gc_rule, MaxAgeGCRule):
result = {'time_to_live': _total_seconds(gc_rule.max_age)}
elif isinstance(gc_rule, MaxVersionsGCRule):
result = {'max_versions': gc_rule.max_num_versions}
elif isinstance(gc_rule, GCRuleIntersection):
if len(gc_rule.rules) == 2:
rule1, rule2 = gc_rule.rules
if (isinstance(rule1, _SIMPLE_GC_RULES) and
isinstance(rule2, _SIMPLE_GC_RULES)):
rule1 = _gc_rule_to_dict(rule1)
rule2 = _gc_rule_to_dict(rule2)
key1, = rule1.keys()
key2, = rule2.keys()
if key1 != key2:
result = {key1: rule1[key1], key2: rule2[key2]}
return result
def _next_char(str_val, index):
"""Gets the next character based on a position in a string.
:type str_val: str
:param str_val: A string containing the character to update.
:type index: int
:param index: An integer index in ``str_val``.
:rtype: str
:returns: The next character after the character at ``index``
in ``str_val``.
"""
ord_val = six.indexbytes(str_val, index)
return _to_bytes(chr(ord_val + 1), encoding='latin-1')
def _string_successor(str_val):
"""Increment and truncate a byte string.
Determines shortest string that sorts after the given string when
compared using regular string comparison semantics.
Modeled after implementation in ``gcloud-golang``.
Increments the last byte that is smaller than ``0xFF``, and
drops everything after it. If the string only contains ``0xFF`` bytes,
``''`` is returned.
:type str_val: str
:param str_val: String to increment.
:rtype: str
:returns: The next string in lexical order after ``str_val``.
"""
str_val = _to_bytes(str_val, encoding='latin-1')
if str_val == b'':
return str_val
index = len(str_val) - 1
while index >= 0:
if six.indexbytes(str_val, index) != 0xff:
break
index -= 1
if index == -1:
return b''
return str_val[:index] + _next_char(str_val, index)
def _convert_to_time_range(timestamp=None):
"""Create a timestamp range from an HBase / HappyBase timestamp.
HBase uses timestamp as an argument to specify an exclusive end
deadline. Cloud Bigtable also uses exclusive end times, so
the behavior matches.
:type timestamp: int
:param timestamp: (Optional) Timestamp (in milliseconds since the
epoch). Intended to be used as the end of an HBase
time range, which is exclusive.
:rtype: :class:`gcloud.bigtable.row.TimestampRange`,
:data:`NoneType <types.NoneType>`
:returns: The timestamp range corresponding to the passed in
``timestamp``.
"""
if timestamp is None:
return None
next_timestamp = _datetime_from_microseconds(1000 * timestamp)
return TimestampRange(end=next_timestamp)
def _cells_to_pairs(cells, include_timestamp=False):
"""Converts list of cells to HappyBase format.
For example::
>>> import datetime
>>> from gcloud.bigtable.row_data import Cell
>>> cell1 = Cell(b'val1', datetime.datetime.utcnow())
>>> cell2 = Cell(b'val2', datetime.datetime.utcnow())
>>> _cells_to_pairs([cell1, cell2])
[b'val1', b'val2']
>>> _cells_to_pairs([cell1, cell2], include_timestamp=True)
[(b'val1', 1456361486255), (b'val2', 1456361491927)]
:type cells: list
:param cells: List of :class:`gcloud.bigtable.row_data.Cell` returned
from a read request.
:type include_timestamp: bool
:param include_timestamp: Flag to indicate if cell timestamps should be
included with the output.
:rtype: list
:returns: List of values in the cell. If ``include_timestamp=True``, each
value will be a pair, with the first part the bytes value in
the cell and the second part the number of milliseconds in the
timestamp on the cell.
"""
result = []
for cell in cells:
if include_timestamp:
ts_millis = _microseconds_from_datetime(cell.timestamp) // 1000
result.append((cell.value, ts_millis))
else:
result.append(cell.value)
return result
def _partial_row_to_dict(partial_row_data, include_timestamp=False):
"""Convert a low-level row data object to a dictionary.
Assumes only the latest value in each row is needed. This assumption
is due to the fact that this method is used by callers which use
a ``CellsColumnLimitFilter(1)`` filter.
For example::
>>> import datetime
>>> from gcloud.bigtable.row_data import Cell, PartialRowData
>>> cell1 = Cell(b'val1', datetime.datetime.utcnow())
>>> cell2 = Cell(b'val2', datetime.datetime.utcnow())
>>> row_data = PartialRowData(b'row-key')
>>> _partial_row_to_dict(row_data)
{}
>>> row_data._cells[u'fam1'] = {b'col1': [cell1], b'col2': [cell2]}
>>> _partial_row_to_dict(row_data)
{b'fam1:col2': b'val2', b'fam1:col1': b'val1'}
>>> _partial_row_to_dict(row_data, include_timestamp=True)
{b'fam1:col2': (b'val2', 1456361724480),
b'fam1:col1': (b'val1', 1456361721135)}
:type partial_row_data: :class:`.row_data.PartialRowData`
:param partial_row_data: Row data consumed from a stream.
:type include_timestamp: bool
:param include_timestamp: Flag to indicate if cell timestamps should be
included with the output.
:rtype: dict
:returns: The row data converted to a dictionary.
"""
result = {}
for column, cells in six.iteritems(partial_row_data.to_dict()):
cell_vals = _cells_to_pairs(cells,
include_timestamp=include_timestamp)
# NOTE: We assume there is exactly 1 version since we used that in
# our filter, but we don't check this.
result[column] = cell_vals[0]
return result
def _filter_chain_helper(column=None, versions=None, timestamp=None,
filters=None):
"""Create filter chain to limit a results set.
:type column: str
:param column: (Optional) The column (``fam:col``) to be selected
with the filter.
:type versions: int
:param versions: (Optional) The maximum number of cells to return.
:type timestamp: int
:param timestamp: (Optional) Timestamp (in milliseconds since the
epoch). If specified, only cells returned before (or
at) the timestamp will be matched.
:type filters: list
:param filters: (Optional) List of existing filters to be extended.
:rtype: :class:`RowFilter <gcloud.bigtable.row.RowFilter>`
:returns: The chained filter created, or just a single filter if only
one was needed.
:raises: :class:`ValueError <exceptions.ValueError>` if there are no
filters to chain.
"""
if filters is None:
filters = []
if column is not None:
if isinstance(column, six.binary_type):
column = column.decode('utf-8')
column_family_id, column_qualifier = column.split(':')
fam_filter = FamilyNameRegexFilter(column_family_id)
qual_filter = ColumnQualifierRegexFilter(column_qualifier)
filters.extend([fam_filter, qual_filter])
if versions is not None:
filters.append(CellsColumnLimitFilter(versions))
time_range = _convert_to_time_range(timestamp=timestamp)
if time_range is not None:
filters.append(TimestampRangeFilter(time_range))
num_filters = len(filters)
if num_filters == 0:
raise ValueError('Must have at least one filter.')
elif num_filters == 1:
return filters[0]
else:
return RowFilterChain(filters=filters)
def _scan_filter_helper(row_start, row_stop, row_prefix, columns,
timestamp, limit, kwargs):
"""Helper for :meth:`scan`: build up a filter chain."""
filter_ = kwargs.pop('filter', None)
legacy_args = []
for kw_name in ('batch_size', 'scan_batching', 'sorted_columns'):
if kw_name in kwargs:
legacy_args.append(kw_name)
kwargs.pop(kw_name)
if legacy_args:
legacy_args = ', '.join(legacy_args)
message = ('The HappyBase legacy arguments %s were used. These '
'arguments are unused by gcloud.' % (legacy_args,))
_WARN(message)
if kwargs:
raise TypeError('Received unexpected arguments', kwargs.keys())
if limit is not None and limit < 1:
raise ValueError('limit must be positive')
if row_prefix is not None:
if row_start is not None or row_stop is not None:
raise ValueError('row_prefix cannot be combined with '
'row_start or row_stop')
row_start = row_prefix
row_stop = _string_successor(row_prefix)
filters = []
if isinstance(filter_, six.string_types):
raise TypeError('Specifying filters as a string is not supported '
'by Cloud Bigtable. Use a '
'gcloud.bigtable.row.RowFilter instead.')
elif filter_ is not None:
filters.append(filter_)
if columns is not None:
filters.append(_columns_filter_helper(columns))
# versions == 1 since we only want the latest.
filter_ = _filter_chain_helper(versions=1, timestamp=timestamp,
filters=filters)
return row_start, row_stop, filter_
def _columns_filter_helper(columns):
"""Creates a union filter for a list of columns.
:type columns: list
:param columns: Iterable containing column names (as strings). Each column
name can be either
* an entire column family: ``fam`` or ``fam:``
* a single column: ``fam:col``
:rtype: :class:`RowFilter <gcloud.bigtable.row.RowFilter>`
:returns: The union filter created containing all of the matched columns.
:raises: :class:`ValueError <exceptions.ValueError>` if there are no
filters to union.
"""
filters = []
for column_family_id, column_qualifier in _get_column_pairs(columns):
fam_filter = FamilyNameRegexFilter(column_family_id)
if column_qualifier is not None:
qual_filter = ColumnQualifierRegexFilter(column_qualifier)
combined_filter = RowFilterChain(
filters=[fam_filter, qual_filter])
filters.append(combined_filter)
else:
filters.append(fam_filter)
num_filters = len(filters)
if num_filters == 0:
raise ValueError('Must have at least one filter.')
elif num_filters == 1:
return filters[0]
else:
return RowFilterUnion(filters=filters)
def _row_keys_filter_helper(row_keys):
"""Creates a union filter for a list of rows.
:type row_keys: list
:param row_keys: Iterable containing row keys (as strings).
:rtype: :class:`RowFilter <gcloud.bigtable.row.RowFilter>`
:returns: The union filter created containing all of the row keys.
:raises: :class:`ValueError <exceptions.ValueError>` if there are no
filters to union.
"""
filters = []
for row_key in row_keys:
filters.append(RowKeyRegexFilter(row_key))
num_filters = len(filters)
if num_filters == 0:
raise ValueError('Must have at least one filter.')
elif num_filters == 1:
return filters[0]
else:
return RowFilterUnion(filters=filters)