116 lines
4.3 KiB
Python
116 lines
4.3 KiB
Python
# Copyright 2016 Google Inc. All rights reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
"""Time series as :mod:`pandas` dataframes."""
|
|
|
|
import itertools
|
|
|
|
TOP_RESOURCE_LABELS = (
|
|
'project_id',
|
|
'aws_account',
|
|
'location',
|
|
'region',
|
|
'zone',
|
|
)
|
|
|
|
|
|
def _build_dataframe(time_series_iterable,
|
|
label=None, labels=None): # pragma: NO COVER
|
|
"""Build a :mod:`pandas` dataframe out of time series.
|
|
|
|
:type time_series_iterable:
|
|
iterable over :class:`~gcloud.monitoring.timeseries.TimeSeries`
|
|
:param time_series_iterable:
|
|
An iterable (e.g., a query object) yielding time series.
|
|
|
|
:type label: string or None
|
|
:param label:
|
|
The label name to use for the dataframe header. This can be the name
|
|
of a resource label or metric label (e.g., ``"instance_name"``), or
|
|
the string ``"resource_type"``.
|
|
|
|
:type labels: list of strings, or None
|
|
:param labels:
|
|
A list or tuple of label names to use for the dataframe header.
|
|
If more than one label name is provided, the resulting dataframe
|
|
will have a multi-level column header.
|
|
|
|
Specifying neither ``label`` or ``labels`` results in a dataframe
|
|
with a multi-level column header including the resource type and
|
|
all available resource and metric labels.
|
|
|
|
Specifying both ``label`` and ``labels`` is an error.
|
|
|
|
:rtype: :class:`pandas.DataFrame`
|
|
:returns: A dataframe where each column represents one time series.
|
|
"""
|
|
import pandas # pylint: disable=import-error
|
|
|
|
if labels is not None:
|
|
if label is not None:
|
|
raise ValueError('Cannot specify both "label" and "labels".')
|
|
elif not labels:
|
|
raise ValueError('"labels" must be non-empty or None.')
|
|
|
|
columns = []
|
|
headers = []
|
|
for time_series in time_series_iterable:
|
|
pandas_series = pandas.Series(
|
|
data=[point.value for point in time_series.points],
|
|
index=[point.end_time for point in time_series.points],
|
|
)
|
|
columns.append(pandas_series)
|
|
headers.append(time_series.header())
|
|
|
|
# Implement a smart default of using all available labels.
|
|
if label is None and labels is None:
|
|
resource_labels = set(itertools.chain.from_iterable(
|
|
header.resource.labels for header in headers))
|
|
metric_labels = set(itertools.chain.from_iterable(
|
|
header.metric.labels for header in headers))
|
|
labels = (['resource_type'] +
|
|
_sorted_resource_labels(resource_labels) +
|
|
sorted(metric_labels))
|
|
|
|
# Assemble the columns into a DataFrame.
|
|
dataframe = pandas.DataFrame.from_records(columns).T
|
|
|
|
# Convert the timestamp strings into a DatetimeIndex.
|
|
dataframe.index = pandas.to_datetime(dataframe.index)
|
|
|
|
# Build a multi-level stack of column headers. Some labels may
|
|
# be undefined for some time series.
|
|
levels = []
|
|
for key in labels or [label]:
|
|
level = [header.labels.get(key, '') for header in headers]
|
|
levels.append(level)
|
|
|
|
# Build a column Index or MultiIndex. Do not include level names
|
|
# in the column header if the user requested a single-level header
|
|
# by specifying "label".
|
|
dataframe.columns = pandas.MultiIndex.from_arrays(
|
|
levels,
|
|
names=labels or None)
|
|
|
|
# Sort the rows just in case (since the API doesn't guarantee the
|
|
# ordering), and sort the columns lexicographically.
|
|
return dataframe.sort_index(axis=0).sort_index(axis=1)
|
|
|
|
|
|
def _sorted_resource_labels(labels):
|
|
"""Sort label names, putting well-known resource labels first."""
|
|
head = [label for label in TOP_RESOURCE_LABELS if label in labels]
|
|
tail = sorted(label for label in labels
|
|
if label not in TOP_RESOURCE_LABELS)
|
|
return head + tail
|