228 lines
7 KiB
Python
228 lines
7 KiB
Python
"""
|
|
Plotting of string "category" data: ``plot(['d', 'f', 'a'], [1, 2, 3])`` will
|
|
plot three points with x-axis values of 'd', 'f', 'a'.
|
|
|
|
See :doc:`/gallery/lines_bars_and_markers/categorical_variables` for an
|
|
example.
|
|
|
|
The module uses Matplotlib's `matplotlib.units` mechanism to convert from
|
|
strings to integers and provides a tick locator, a tick formatter, and the
|
|
`.UnitData` class that creates and stores the string-to-integer mapping.
|
|
"""
|
|
|
|
from collections import OrderedDict
|
|
import dateutil.parser
|
|
import itertools
|
|
import logging
|
|
|
|
import numpy as np
|
|
|
|
from matplotlib import cbook, ticker, units
|
|
|
|
|
|
_log = logging.getLogger(__name__)
|
|
|
|
|
|
class StrCategoryConverter(units.ConversionInterface):
|
|
@staticmethod
|
|
def convert(value, unit, axis):
|
|
"""
|
|
Convert strings in *value* to floats using mapping information stored
|
|
in the *unit* object.
|
|
|
|
Parameters
|
|
----------
|
|
value : str or iterable
|
|
Value or list of values to be converted.
|
|
unit : `.UnitData`
|
|
An object mapping strings to integers.
|
|
axis : `~matplotlib.axis.Axis`
|
|
The axis on which the converted value is plotted.
|
|
|
|
.. note:: *axis* is unused.
|
|
|
|
Returns
|
|
-------
|
|
float or ndarray[float]
|
|
"""
|
|
if unit is None:
|
|
raise ValueError(
|
|
'Missing category information for StrCategoryConverter; '
|
|
'this might be caused by unintendedly mixing categorical and '
|
|
'numeric data')
|
|
# dtype = object preserves numerical pass throughs
|
|
values = np.atleast_1d(np.array(value, dtype=object))
|
|
# pass through sequence of non binary numbers
|
|
if all(units.ConversionInterface.is_numlike(v)
|
|
and not isinstance(v, (str, bytes))
|
|
for v in values):
|
|
return np.asarray(values, dtype=float)
|
|
# force an update so it also does type checking
|
|
unit.update(values)
|
|
return np.vectorize(unit._mapping.__getitem__, otypes=[float])(values)
|
|
|
|
@staticmethod
|
|
def axisinfo(unit, axis):
|
|
"""
|
|
Set the default axis ticks and labels.
|
|
|
|
Parameters
|
|
----------
|
|
unit : `.UnitData`
|
|
object string unit information for value
|
|
axis : `~matplotlib.axis.Axis`
|
|
axis for which information is being set
|
|
|
|
Returns
|
|
-------
|
|
`~matplotlib.units.AxisInfo`
|
|
Information to support default tick labeling
|
|
|
|
.. note: axis is not used
|
|
"""
|
|
# locator and formatter take mapping dict because
|
|
# args need to be pass by reference for updates
|
|
majloc = StrCategoryLocator(unit._mapping)
|
|
majfmt = StrCategoryFormatter(unit._mapping)
|
|
return units.AxisInfo(majloc=majloc, majfmt=majfmt)
|
|
|
|
@staticmethod
|
|
def default_units(data, axis):
|
|
"""
|
|
Set and update the `~matplotlib.axis.Axis` units.
|
|
|
|
Parameters
|
|
----------
|
|
data : str or iterable of str
|
|
axis : `~matplotlib.axis.Axis`
|
|
axis on which the data is plotted
|
|
|
|
Returns
|
|
-------
|
|
`.UnitData`
|
|
object storing string to integer mapping
|
|
"""
|
|
# the conversion call stack is default_units -> axis_info -> convert
|
|
if axis.units is None:
|
|
axis.set_units(UnitData(data))
|
|
else:
|
|
axis.units.update(data)
|
|
return axis.units
|
|
|
|
|
|
class StrCategoryLocator(ticker.Locator):
|
|
"""Tick at every integer mapping of the string data."""
|
|
def __init__(self, units_mapping):
|
|
"""
|
|
Parameters
|
|
-----------
|
|
units_mapping : dict
|
|
Mapping of category names (str) to indices (int).
|
|
"""
|
|
self._units = units_mapping
|
|
|
|
def __call__(self):
|
|
# docstring inherited
|
|
return list(self._units.values())
|
|
|
|
def tick_values(self, vmin, vmax):
|
|
# docstring inherited
|
|
return self()
|
|
|
|
|
|
class StrCategoryFormatter(ticker.Formatter):
|
|
"""String representation of the data at every tick."""
|
|
def __init__(self, units_mapping):
|
|
"""
|
|
Parameters
|
|
----------
|
|
units_mapping : dict
|
|
Mapping of category names (str) to indices (int).
|
|
"""
|
|
self._units = units_mapping
|
|
|
|
def __call__(self, x, pos=None):
|
|
# docstring inherited
|
|
return self.format_ticks([x])[0]
|
|
|
|
def format_ticks(self, values):
|
|
# docstring inherited
|
|
r_mapping = {v: self._text(k) for k, v in self._units.items()}
|
|
return [r_mapping.get(round(val), '') for val in values]
|
|
|
|
@staticmethod
|
|
def _text(value):
|
|
"""Convert text values into utf-8 or ascii strings."""
|
|
if isinstance(value, bytes):
|
|
value = value.decode(encoding='utf-8')
|
|
elif not isinstance(value, str):
|
|
value = str(value)
|
|
return value
|
|
|
|
|
|
class UnitData:
|
|
def __init__(self, data=None):
|
|
"""
|
|
Create mapping between unique categorical values and integer ids.
|
|
|
|
Parameters
|
|
----------
|
|
data : iterable
|
|
sequence of string values
|
|
"""
|
|
self._mapping = OrderedDict()
|
|
self._counter = itertools.count()
|
|
if data is not None:
|
|
self.update(data)
|
|
|
|
@staticmethod
|
|
def _str_is_convertible(val):
|
|
"""
|
|
Helper method to check whether a string can be parsed as float or date.
|
|
"""
|
|
try:
|
|
float(val)
|
|
except ValueError:
|
|
try:
|
|
dateutil.parser.parse(val)
|
|
except (ValueError, TypeError):
|
|
# TypeError if dateutil >= 2.8.1 else ValueError
|
|
return False
|
|
return True
|
|
|
|
def update(self, data):
|
|
"""
|
|
Map new values to integer identifiers.
|
|
|
|
Parameters
|
|
----------
|
|
data : iterable of str or bytes
|
|
|
|
Raises
|
|
------
|
|
TypeError
|
|
If elements in *data* are neither str nor bytes.
|
|
"""
|
|
data = np.atleast_1d(np.array(data, dtype=object))
|
|
# check if convertible to number:
|
|
convertible = True
|
|
for val in OrderedDict.fromkeys(data):
|
|
# OrderedDict just iterates over unique values in data.
|
|
cbook._check_isinstance((str, bytes), value=val)
|
|
if convertible:
|
|
# this will only be called so long as convertible is True.
|
|
convertible = self._str_is_convertible(val)
|
|
if val not in self._mapping:
|
|
self._mapping[val] = next(self._counter)
|
|
if convertible:
|
|
_log.info('Using categorical units to plot a list of strings '
|
|
'that are all parsable as floats or dates. If these '
|
|
'strings should be plotted as numbers, cast to the '
|
|
'appropriate data type before plotting.')
|
|
|
|
|
|
# Register the converter with Matplotlib's unit framework
|
|
units.registry[str] = StrCategoryConverter()
|
|
units.registry[np.str_] = StrCategoryConverter()
|
|
units.registry[bytes] = StrCategoryConverter()
|
|
units.registry[np.bytes_] = StrCategoryConverter()
|