Created starter files for the project.
This commit is contained in:
commit
73f0c0db42
1992 changed files with 769897 additions and 0 deletions
897
venv/Lib/site-packages/numpy/lib/_iotools.py
Normal file
897
venv/Lib/site-packages/numpy/lib/_iotools.py
Normal file
|
@ -0,0 +1,897 @@
|
|||
"""A collection of functions designed to help I/O with ascii files.
|
||||
|
||||
"""
|
||||
__docformat__ = "restructuredtext en"
|
||||
|
||||
import numpy as np
|
||||
import numpy.core.numeric as nx
|
||||
from numpy.compat import asbytes, asunicode, bytes
|
||||
|
||||
|
||||
def _decode_line(line, encoding=None):
|
||||
"""Decode bytes from binary input streams.
|
||||
|
||||
Defaults to decoding from 'latin1'. That differs from the behavior of
|
||||
np.compat.asunicode that decodes from 'ascii'.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
line : str or bytes
|
||||
Line to be decoded.
|
||||
|
||||
Returns
|
||||
-------
|
||||
decoded_line : unicode
|
||||
Unicode in Python 2, a str (unicode) in Python 3.
|
||||
|
||||
"""
|
||||
if type(line) is bytes:
|
||||
if encoding is None:
|
||||
line = line.decode('latin1')
|
||||
else:
|
||||
line = line.decode(encoding)
|
||||
|
||||
return line
|
||||
|
||||
|
||||
def _is_string_like(obj):
|
||||
"""
|
||||
Check whether obj behaves like a string.
|
||||
"""
|
||||
try:
|
||||
obj + ''
|
||||
except (TypeError, ValueError):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _is_bytes_like(obj):
|
||||
"""
|
||||
Check whether obj behaves like a bytes object.
|
||||
"""
|
||||
try:
|
||||
obj + b''
|
||||
except (TypeError, ValueError):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def has_nested_fields(ndtype):
|
||||
"""
|
||||
Returns whether one or several fields of a dtype are nested.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
ndtype : dtype
|
||||
Data-type of a structured array.
|
||||
|
||||
Raises
|
||||
------
|
||||
AttributeError
|
||||
If `ndtype` does not have a `names` attribute.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> dt = np.dtype([('name', 'S4'), ('x', float), ('y', float)])
|
||||
>>> np.lib._iotools.has_nested_fields(dt)
|
||||
False
|
||||
|
||||
"""
|
||||
for name in ndtype.names or ():
|
||||
if ndtype[name].names is not None:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def flatten_dtype(ndtype, flatten_base=False):
|
||||
"""
|
||||
Unpack a structured data-type by collapsing nested fields and/or fields
|
||||
with a shape.
|
||||
|
||||
Note that the field names are lost.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
ndtype : dtype
|
||||
The datatype to collapse
|
||||
flatten_base : bool, optional
|
||||
If True, transform a field with a shape into several fields. Default is
|
||||
False.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> dt = np.dtype([('name', 'S4'), ('x', float), ('y', float),
|
||||
... ('block', int, (2, 3))])
|
||||
>>> np.lib._iotools.flatten_dtype(dt)
|
||||
[dtype('S4'), dtype('float64'), dtype('float64'), dtype('int64')]
|
||||
>>> np.lib._iotools.flatten_dtype(dt, flatten_base=True)
|
||||
[dtype('S4'),
|
||||
dtype('float64'),
|
||||
dtype('float64'),
|
||||
dtype('int64'),
|
||||
dtype('int64'),
|
||||
dtype('int64'),
|
||||
dtype('int64'),
|
||||
dtype('int64'),
|
||||
dtype('int64')]
|
||||
|
||||
"""
|
||||
names = ndtype.names
|
||||
if names is None:
|
||||
if flatten_base:
|
||||
return [ndtype.base] * int(np.prod(ndtype.shape))
|
||||
return [ndtype.base]
|
||||
else:
|
||||
types = []
|
||||
for field in names:
|
||||
info = ndtype.fields[field]
|
||||
flat_dt = flatten_dtype(info[0], flatten_base)
|
||||
types.extend(flat_dt)
|
||||
return types
|
||||
|
||||
|
||||
class LineSplitter:
|
||||
"""
|
||||
Object to split a string at a given delimiter or at given places.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
delimiter : str, int, or sequence of ints, optional
|
||||
If a string, character used to delimit consecutive fields.
|
||||
If an integer or a sequence of integers, width(s) of each field.
|
||||
comments : str, optional
|
||||
Character used to mark the beginning of a comment. Default is '#'.
|
||||
autostrip : bool, optional
|
||||
Whether to strip each individual field. Default is True.
|
||||
|
||||
"""
|
||||
|
||||
def autostrip(self, method):
|
||||
"""
|
||||
Wrapper to strip each member of the output of `method`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
method : function
|
||||
Function that takes a single argument and returns a sequence of
|
||||
strings.
|
||||
|
||||
Returns
|
||||
-------
|
||||
wrapped : function
|
||||
The result of wrapping `method`. `wrapped` takes a single input
|
||||
argument and returns a list of strings that are stripped of
|
||||
white-space.
|
||||
|
||||
"""
|
||||
return lambda input: [_.strip() for _ in method(input)]
|
||||
|
||||
def __init__(self, delimiter=None, comments='#', autostrip=True,
|
||||
encoding=None):
|
||||
delimiter = _decode_line(delimiter)
|
||||
comments = _decode_line(comments)
|
||||
|
||||
self.comments = comments
|
||||
|
||||
# Delimiter is a character
|
||||
if (delimiter is None) or isinstance(delimiter, str):
|
||||
delimiter = delimiter or None
|
||||
_handyman = self._delimited_splitter
|
||||
# Delimiter is a list of field widths
|
||||
elif hasattr(delimiter, '__iter__'):
|
||||
_handyman = self._variablewidth_splitter
|
||||
idx = np.cumsum([0] + list(delimiter))
|
||||
delimiter = [slice(i, j) for (i, j) in zip(idx[:-1], idx[1:])]
|
||||
# Delimiter is a single integer
|
||||
elif int(delimiter):
|
||||
(_handyman, delimiter) = (
|
||||
self._fixedwidth_splitter, int(delimiter))
|
||||
else:
|
||||
(_handyman, delimiter) = (self._delimited_splitter, None)
|
||||
self.delimiter = delimiter
|
||||
if autostrip:
|
||||
self._handyman = self.autostrip(_handyman)
|
||||
else:
|
||||
self._handyman = _handyman
|
||||
self.encoding = encoding
|
||||
|
||||
def _delimited_splitter(self, line):
|
||||
"""Chop off comments, strip, and split at delimiter. """
|
||||
if self.comments is not None:
|
||||
line = line.split(self.comments)[0]
|
||||
line = line.strip(" \r\n")
|
||||
if not line:
|
||||
return []
|
||||
return line.split(self.delimiter)
|
||||
|
||||
def _fixedwidth_splitter(self, line):
|
||||
if self.comments is not None:
|
||||
line = line.split(self.comments)[0]
|
||||
line = line.strip("\r\n")
|
||||
if not line:
|
||||
return []
|
||||
fixed = self.delimiter
|
||||
slices = [slice(i, i + fixed) for i in range(0, len(line), fixed)]
|
||||
return [line[s] for s in slices]
|
||||
|
||||
def _variablewidth_splitter(self, line):
|
||||
if self.comments is not None:
|
||||
line = line.split(self.comments)[0]
|
||||
if not line:
|
||||
return []
|
||||
slices = self.delimiter
|
||||
return [line[s] for s in slices]
|
||||
|
||||
def __call__(self, line):
|
||||
return self._handyman(_decode_line(line, self.encoding))
|
||||
|
||||
|
||||
class NameValidator:
|
||||
"""
|
||||
Object to validate a list of strings to use as field names.
|
||||
|
||||
The strings are stripped of any non alphanumeric character, and spaces
|
||||
are replaced by '_'. During instantiation, the user can define a list
|
||||
of names to exclude, as well as a list of invalid characters. Names in
|
||||
the exclusion list are appended a '_' character.
|
||||
|
||||
Once an instance has been created, it can be called with a list of
|
||||
names, and a list of valid names will be created. The `__call__`
|
||||
method accepts an optional keyword "default" that sets the default name
|
||||
in case of ambiguity. By default this is 'f', so that names will
|
||||
default to `f0`, `f1`, etc.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
excludelist : sequence, optional
|
||||
A list of names to exclude. This list is appended to the default
|
||||
list ['return', 'file', 'print']. Excluded names are appended an
|
||||
underscore: for example, `file` becomes `file_` if supplied.
|
||||
deletechars : str, optional
|
||||
A string combining invalid characters that must be deleted from the
|
||||
names.
|
||||
case_sensitive : {True, False, 'upper', 'lower'}, optional
|
||||
* If True, field names are case-sensitive.
|
||||
* If False or 'upper', field names are converted to upper case.
|
||||
* If 'lower', field names are converted to lower case.
|
||||
|
||||
The default value is True.
|
||||
replace_space : '_', optional
|
||||
Character(s) used in replacement of white spaces.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Calling an instance of `NameValidator` is the same as calling its
|
||||
method `validate`.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> validator = np.lib._iotools.NameValidator()
|
||||
>>> validator(['file', 'field2', 'with space', 'CaSe'])
|
||||
('file_', 'field2', 'with_space', 'CaSe')
|
||||
|
||||
>>> validator = np.lib._iotools.NameValidator(excludelist=['excl'],
|
||||
... deletechars='q',
|
||||
... case_sensitive=False)
|
||||
>>> validator(['excl', 'field2', 'no_q', 'with space', 'CaSe'])
|
||||
('EXCL', 'FIELD2', 'NO_Q', 'WITH_SPACE', 'CASE')
|
||||
|
||||
"""
|
||||
|
||||
defaultexcludelist = ['return', 'file', 'print']
|
||||
defaultdeletechars = set(r"""~!@#$%^&*()-=+~\|]}[{';: /?.>,<""")
|
||||
|
||||
def __init__(self, excludelist=None, deletechars=None,
|
||||
case_sensitive=None, replace_space='_'):
|
||||
# Process the exclusion list ..
|
||||
if excludelist is None:
|
||||
excludelist = []
|
||||
excludelist.extend(self.defaultexcludelist)
|
||||
self.excludelist = excludelist
|
||||
# Process the list of characters to delete
|
||||
if deletechars is None:
|
||||
delete = self.defaultdeletechars
|
||||
else:
|
||||
delete = set(deletechars)
|
||||
delete.add('"')
|
||||
self.deletechars = delete
|
||||
# Process the case option .....
|
||||
if (case_sensitive is None) or (case_sensitive is True):
|
||||
self.case_converter = lambda x: x
|
||||
elif (case_sensitive is False) or case_sensitive.startswith('u'):
|
||||
self.case_converter = lambda x: x.upper()
|
||||
elif case_sensitive.startswith('l'):
|
||||
self.case_converter = lambda x: x.lower()
|
||||
else:
|
||||
msg = 'unrecognized case_sensitive value %s.' % case_sensitive
|
||||
raise ValueError(msg)
|
||||
|
||||
self.replace_space = replace_space
|
||||
|
||||
def validate(self, names, defaultfmt="f%i", nbfields=None):
|
||||
"""
|
||||
Validate a list of strings as field names for a structured array.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
names : sequence of str
|
||||
Strings to be validated.
|
||||
defaultfmt : str, optional
|
||||
Default format string, used if validating a given string
|
||||
reduces its length to zero.
|
||||
nbfields : integer, optional
|
||||
Final number of validated names, used to expand or shrink the
|
||||
initial list of names.
|
||||
|
||||
Returns
|
||||
-------
|
||||
validatednames : list of str
|
||||
The list of validated field names.
|
||||
|
||||
Notes
|
||||
-----
|
||||
A `NameValidator` instance can be called directly, which is the
|
||||
same as calling `validate`. For examples, see `NameValidator`.
|
||||
|
||||
"""
|
||||
# Initial checks ..............
|
||||
if (names is None):
|
||||
if (nbfields is None):
|
||||
return None
|
||||
names = []
|
||||
if isinstance(names, str):
|
||||
names = [names, ]
|
||||
if nbfields is not None:
|
||||
nbnames = len(names)
|
||||
if (nbnames < nbfields):
|
||||
names = list(names) + [''] * (nbfields - nbnames)
|
||||
elif (nbnames > nbfields):
|
||||
names = names[:nbfields]
|
||||
# Set some shortcuts ...........
|
||||
deletechars = self.deletechars
|
||||
excludelist = self.excludelist
|
||||
case_converter = self.case_converter
|
||||
replace_space = self.replace_space
|
||||
# Initializes some variables ...
|
||||
validatednames = []
|
||||
seen = dict()
|
||||
nbempty = 0
|
||||
|
||||
for item in names:
|
||||
item = case_converter(item).strip()
|
||||
if replace_space:
|
||||
item = item.replace(' ', replace_space)
|
||||
item = ''.join([c for c in item if c not in deletechars])
|
||||
if item == '':
|
||||
item = defaultfmt % nbempty
|
||||
while item in names:
|
||||
nbempty += 1
|
||||
item = defaultfmt % nbempty
|
||||
nbempty += 1
|
||||
elif item in excludelist:
|
||||
item += '_'
|
||||
cnt = seen.get(item, 0)
|
||||
if cnt > 0:
|
||||
validatednames.append(item + '_%d' % cnt)
|
||||
else:
|
||||
validatednames.append(item)
|
||||
seen[item] = cnt + 1
|
||||
return tuple(validatednames)
|
||||
|
||||
def __call__(self, names, defaultfmt="f%i", nbfields=None):
|
||||
return self.validate(names, defaultfmt=defaultfmt, nbfields=nbfields)
|
||||
|
||||
|
||||
def str2bool(value):
|
||||
"""
|
||||
Tries to transform a string supposed to represent a boolean to a boolean.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
value : str
|
||||
The string that is transformed to a boolean.
|
||||
|
||||
Returns
|
||||
-------
|
||||
boolval : bool
|
||||
The boolean representation of `value`.
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
If the string is not 'True' or 'False' (case independent)
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> np.lib._iotools.str2bool('TRUE')
|
||||
True
|
||||
>>> np.lib._iotools.str2bool('false')
|
||||
False
|
||||
|
||||
"""
|
||||
value = value.upper()
|
||||
if value == 'TRUE':
|
||||
return True
|
||||
elif value == 'FALSE':
|
||||
return False
|
||||
else:
|
||||
raise ValueError("Invalid boolean")
|
||||
|
||||
|
||||
class ConverterError(Exception):
|
||||
"""
|
||||
Exception raised when an error occurs in a converter for string values.
|
||||
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class ConverterLockError(ConverterError):
|
||||
"""
|
||||
Exception raised when an attempt is made to upgrade a locked converter.
|
||||
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class ConversionWarning(UserWarning):
|
||||
"""
|
||||
Warning issued when a string converter has a problem.
|
||||
|
||||
Notes
|
||||
-----
|
||||
In `genfromtxt` a `ConversionWarning` is issued if raising exceptions
|
||||
is explicitly suppressed with the "invalid_raise" keyword.
|
||||
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class StringConverter:
|
||||
"""
|
||||
Factory class for function transforming a string into another object
|
||||
(int, float).
|
||||
|
||||
After initialization, an instance can be called to transform a string
|
||||
into another object. If the string is recognized as representing a
|
||||
missing value, a default value is returned.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
func : function
|
||||
Function used for the conversion.
|
||||
default : any
|
||||
Default value to return when the input corresponds to a missing
|
||||
value.
|
||||
type : type
|
||||
Type of the output.
|
||||
_status : int
|
||||
Integer representing the order of the conversion.
|
||||
_mapper : sequence of tuples
|
||||
Sequence of tuples (dtype, function, default value) to evaluate in
|
||||
order.
|
||||
_locked : bool
|
||||
Holds `locked` parameter.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
dtype_or_func : {None, dtype, function}, optional
|
||||
If a `dtype`, specifies the input data type, used to define a basic
|
||||
function and a default value for missing data. For example, when
|
||||
`dtype` is float, the `func` attribute is set to `float` and the
|
||||
default value to `np.nan`. If a function, this function is used to
|
||||
convert a string to another object. In this case, it is recommended
|
||||
to give an associated default value as input.
|
||||
default : any, optional
|
||||
Value to return by default, that is, when the string to be
|
||||
converted is flagged as missing. If not given, `StringConverter`
|
||||
tries to supply a reasonable default value.
|
||||
missing_values : {None, sequence of str}, optional
|
||||
``None`` or sequence of strings indicating a missing value. If ``None``
|
||||
then missing values are indicated by empty entries. The default is
|
||||
``None``.
|
||||
locked : bool, optional
|
||||
Whether the StringConverter should be locked to prevent automatic
|
||||
upgrade or not. Default is False.
|
||||
|
||||
"""
|
||||
_mapper = [(nx.bool_, str2bool, False),
|
||||
(nx.int_, int, -1),]
|
||||
|
||||
# On 32-bit systems, we need to make sure that we explicitly include
|
||||
# nx.int64 since ns.int_ is nx.int32.
|
||||
if nx.dtype(nx.int_).itemsize < nx.dtype(nx.int64).itemsize:
|
||||
_mapper.append((nx.int64, int, -1))
|
||||
|
||||
_mapper.extend([(nx.float64, float, nx.nan),
|
||||
(nx.complex128, complex, nx.nan + 0j),
|
||||
(nx.longdouble, nx.longdouble, nx.nan),
|
||||
# If a non-default dtype is passed, fall back to generic
|
||||
# ones (should only be used for the converter)
|
||||
(nx.integer, int, -1),
|
||||
(nx.floating, float, nx.nan),
|
||||
(nx.complexfloating, complex, nx.nan + 0j),
|
||||
# Last, try with the string types (must be last, because
|
||||
# `_mapper[-1]` is used as default in some cases)
|
||||
(nx.unicode_, asunicode, '???'),
|
||||
(nx.string_, asbytes, '???'),
|
||||
])
|
||||
|
||||
@classmethod
|
||||
def _getdtype(cls, val):
|
||||
"""Returns the dtype of the input variable."""
|
||||
return np.array(val).dtype
|
||||
|
||||
@classmethod
|
||||
def _getsubdtype(cls, val):
|
||||
"""Returns the type of the dtype of the input variable."""
|
||||
return np.array(val).dtype.type
|
||||
|
||||
@classmethod
|
||||
def _dtypeortype(cls, dtype):
|
||||
"""Returns dtype for datetime64 and type of dtype otherwise."""
|
||||
|
||||
# This is a bit annoying. We want to return the "general" type in most
|
||||
# cases (ie. "string" rather than "S10"), but we want to return the
|
||||
# specific type for datetime64 (ie. "datetime64[us]" rather than
|
||||
# "datetime64").
|
||||
if dtype.type == np.datetime64:
|
||||
return dtype
|
||||
return dtype.type
|
||||
|
||||
@classmethod
|
||||
def upgrade_mapper(cls, func, default=None):
|
||||
"""
|
||||
Upgrade the mapper of a StringConverter by adding a new function and
|
||||
its corresponding default.
|
||||
|
||||
The input function (or sequence of functions) and its associated
|
||||
default value (if any) is inserted in penultimate position of the
|
||||
mapper. The corresponding type is estimated from the dtype of the
|
||||
default value.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
func : var
|
||||
Function, or sequence of functions
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import dateutil.parser
|
||||
>>> import datetime
|
||||
>>> dateparser = dateutil.parser.parse
|
||||
>>> defaultdate = datetime.date(2000, 1, 1)
|
||||
>>> StringConverter.upgrade_mapper(dateparser, default=defaultdate)
|
||||
"""
|
||||
# Func is a single functions
|
||||
if hasattr(func, '__call__'):
|
||||
cls._mapper.insert(-1, (cls._getsubdtype(default), func, default))
|
||||
return
|
||||
elif hasattr(func, '__iter__'):
|
||||
if isinstance(func[0], (tuple, list)):
|
||||
for _ in func:
|
||||
cls._mapper.insert(-1, _)
|
||||
return
|
||||
if default is None:
|
||||
default = [None] * len(func)
|
||||
else:
|
||||
default = list(default)
|
||||
default.append([None] * (len(func) - len(default)))
|
||||
for fct, dft in zip(func, default):
|
||||
cls._mapper.insert(-1, (cls._getsubdtype(dft), fct, dft))
|
||||
|
||||
@classmethod
|
||||
def _find_map_entry(cls, dtype):
|
||||
# if a converter for the specific dtype is available use that
|
||||
for i, (deftype, func, default_def) in enumerate(cls._mapper):
|
||||
if dtype.type == deftype:
|
||||
return i, (deftype, func, default_def)
|
||||
|
||||
# otherwise find an inexact match
|
||||
for i, (deftype, func, default_def) in enumerate(cls._mapper):
|
||||
if np.issubdtype(dtype.type, deftype):
|
||||
return i, (deftype, func, default_def)
|
||||
|
||||
raise LookupError
|
||||
|
||||
def __init__(self, dtype_or_func=None, default=None, missing_values=None,
|
||||
locked=False):
|
||||
# Defines a lock for upgrade
|
||||
self._locked = bool(locked)
|
||||
# No input dtype: minimal initialization
|
||||
if dtype_or_func is None:
|
||||
self.func = str2bool
|
||||
self._status = 0
|
||||
self.default = default or False
|
||||
dtype = np.dtype('bool')
|
||||
else:
|
||||
# Is the input a np.dtype ?
|
||||
try:
|
||||
self.func = None
|
||||
dtype = np.dtype(dtype_or_func)
|
||||
except TypeError:
|
||||
# dtype_or_func must be a function, then
|
||||
if not hasattr(dtype_or_func, '__call__'):
|
||||
errmsg = ("The input argument `dtype` is neither a"
|
||||
" function nor a dtype (got '%s' instead)")
|
||||
raise TypeError(errmsg % type(dtype_or_func))
|
||||
# Set the function
|
||||
self.func = dtype_or_func
|
||||
# If we don't have a default, try to guess it or set it to
|
||||
# None
|
||||
if default is None:
|
||||
try:
|
||||
default = self.func('0')
|
||||
except ValueError:
|
||||
default = None
|
||||
dtype = self._getdtype(default)
|
||||
|
||||
# find the best match in our mapper
|
||||
try:
|
||||
self._status, (_, func, default_def) = self._find_map_entry(dtype)
|
||||
except LookupError:
|
||||
# no match
|
||||
self.default = default
|
||||
_, func, _ = self._mapper[-1]
|
||||
self._status = 0
|
||||
else:
|
||||
# use the found default only if we did not already have one
|
||||
if default is None:
|
||||
self.default = default_def
|
||||
else:
|
||||
self.default = default
|
||||
|
||||
# If the input was a dtype, set the function to the last we saw
|
||||
if self.func is None:
|
||||
self.func = func
|
||||
|
||||
# If the status is 1 (int), change the function to
|
||||
# something more robust.
|
||||
if self.func == self._mapper[1][1]:
|
||||
if issubclass(dtype.type, np.uint64):
|
||||
self.func = np.uint64
|
||||
elif issubclass(dtype.type, np.int64):
|
||||
self.func = np.int64
|
||||
else:
|
||||
self.func = lambda x: int(float(x))
|
||||
# Store the list of strings corresponding to missing values.
|
||||
if missing_values is None:
|
||||
self.missing_values = {''}
|
||||
else:
|
||||
if isinstance(missing_values, str):
|
||||
missing_values = missing_values.split(",")
|
||||
self.missing_values = set(list(missing_values) + [''])
|
||||
|
||||
self._callingfunction = self._strict_call
|
||||
self.type = self._dtypeortype(dtype)
|
||||
self._checked = False
|
||||
self._initial_default = default
|
||||
|
||||
def _loose_call(self, value):
|
||||
try:
|
||||
return self.func(value)
|
||||
except ValueError:
|
||||
return self.default
|
||||
|
||||
def _strict_call(self, value):
|
||||
try:
|
||||
|
||||
# We check if we can convert the value using the current function
|
||||
new_value = self.func(value)
|
||||
|
||||
# In addition to having to check whether func can convert the
|
||||
# value, we also have to make sure that we don't get overflow
|
||||
# errors for integers.
|
||||
if self.func is int:
|
||||
try:
|
||||
np.array(value, dtype=self.type)
|
||||
except OverflowError:
|
||||
raise ValueError
|
||||
|
||||
# We're still here so we can now return the new value
|
||||
return new_value
|
||||
|
||||
except ValueError:
|
||||
if value.strip() in self.missing_values:
|
||||
if not self._status:
|
||||
self._checked = False
|
||||
return self.default
|
||||
raise ValueError("Cannot convert string '%s'" % value)
|
||||
|
||||
def __call__(self, value):
|
||||
return self._callingfunction(value)
|
||||
|
||||
def _do_upgrade(self):
|
||||
# Raise an exception if we locked the converter...
|
||||
if self._locked:
|
||||
errmsg = "Converter is locked and cannot be upgraded"
|
||||
raise ConverterLockError(errmsg)
|
||||
_statusmax = len(self._mapper)
|
||||
# Complains if we try to upgrade by the maximum
|
||||
_status = self._status
|
||||
if _status == _statusmax:
|
||||
errmsg = "Could not find a valid conversion function"
|
||||
raise ConverterError(errmsg)
|
||||
elif _status < _statusmax - 1:
|
||||
_status += 1
|
||||
self.type, self.func, default = self._mapper[_status]
|
||||
self._status = _status
|
||||
if self._initial_default is not None:
|
||||
self.default = self._initial_default
|
||||
else:
|
||||
self.default = default
|
||||
|
||||
def upgrade(self, value):
|
||||
"""
|
||||
Find the best converter for a given string, and return the result.
|
||||
|
||||
The supplied string `value` is converted by testing different
|
||||
converters in order. First the `func` method of the
|
||||
`StringConverter` instance is tried, if this fails other available
|
||||
converters are tried. The order in which these other converters
|
||||
are tried is determined by the `_status` attribute of the instance.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
value : str
|
||||
The string to convert.
|
||||
|
||||
Returns
|
||||
-------
|
||||
out : any
|
||||
The result of converting `value` with the appropriate converter.
|
||||
|
||||
"""
|
||||
self._checked = True
|
||||
try:
|
||||
return self._strict_call(value)
|
||||
except ValueError:
|
||||
self._do_upgrade()
|
||||
return self.upgrade(value)
|
||||
|
||||
def iterupgrade(self, value):
|
||||
self._checked = True
|
||||
if not hasattr(value, '__iter__'):
|
||||
value = (value,)
|
||||
_strict_call = self._strict_call
|
||||
try:
|
||||
for _m in value:
|
||||
_strict_call(_m)
|
||||
except ValueError:
|
||||
self._do_upgrade()
|
||||
self.iterupgrade(value)
|
||||
|
||||
def update(self, func, default=None, testing_value=None,
|
||||
missing_values='', locked=False):
|
||||
"""
|
||||
Set StringConverter attributes directly.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
func : function
|
||||
Conversion function.
|
||||
default : any, optional
|
||||
Value to return by default, that is, when the string to be
|
||||
converted is flagged as missing. If not given,
|
||||
`StringConverter` tries to supply a reasonable default value.
|
||||
testing_value : str, optional
|
||||
A string representing a standard input value of the converter.
|
||||
This string is used to help defining a reasonable default
|
||||
value.
|
||||
missing_values : {sequence of str, None}, optional
|
||||
Sequence of strings indicating a missing value. If ``None``, then
|
||||
the existing `missing_values` are cleared. The default is `''`.
|
||||
locked : bool, optional
|
||||
Whether the StringConverter should be locked to prevent
|
||||
automatic upgrade or not. Default is False.
|
||||
|
||||
Notes
|
||||
-----
|
||||
`update` takes the same parameters as the constructor of
|
||||
`StringConverter`, except that `func` does not accept a `dtype`
|
||||
whereas `dtype_or_func` in the constructor does.
|
||||
|
||||
"""
|
||||
self.func = func
|
||||
self._locked = locked
|
||||
|
||||
# Don't reset the default to None if we can avoid it
|
||||
if default is not None:
|
||||
self.default = default
|
||||
self.type = self._dtypeortype(self._getdtype(default))
|
||||
else:
|
||||
try:
|
||||
tester = func(testing_value or '1')
|
||||
except (TypeError, ValueError):
|
||||
tester = None
|
||||
self.type = self._dtypeortype(self._getdtype(tester))
|
||||
|
||||
# Add the missing values to the existing set or clear it.
|
||||
if missing_values is None:
|
||||
# Clear all missing values even though the ctor initializes it to
|
||||
# set(['']) when the argument is None.
|
||||
self.missing_values = set()
|
||||
else:
|
||||
if not np.iterable(missing_values):
|
||||
missing_values = [missing_values]
|
||||
if not all(isinstance(v, str) for v in missing_values):
|
||||
raise TypeError("missing_values must be strings or unicode")
|
||||
self.missing_values.update(missing_values)
|
||||
|
||||
|
||||
def easy_dtype(ndtype, names=None, defaultfmt="f%i", **validationargs):
|
||||
"""
|
||||
Convenience function to create a `np.dtype` object.
|
||||
|
||||
The function processes the input `dtype` and matches it with the given
|
||||
names.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
ndtype : var
|
||||
Definition of the dtype. Can be any string or dictionary recognized
|
||||
by the `np.dtype` function, or a sequence of types.
|
||||
names : str or sequence, optional
|
||||
Sequence of strings to use as field names for a structured dtype.
|
||||
For convenience, `names` can be a string of a comma-separated list
|
||||
of names.
|
||||
defaultfmt : str, optional
|
||||
Format string used to define missing names, such as ``"f%i"``
|
||||
(default) or ``"fields_%02i"``.
|
||||
validationargs : optional
|
||||
A series of optional arguments used to initialize a
|
||||
`NameValidator`.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> np.lib._iotools.easy_dtype(float)
|
||||
dtype('float64')
|
||||
>>> np.lib._iotools.easy_dtype("i4, f8")
|
||||
dtype([('f0', '<i4'), ('f1', '<f8')])
|
||||
>>> np.lib._iotools.easy_dtype("i4, f8", defaultfmt="field_%03i")
|
||||
dtype([('field_000', '<i4'), ('field_001', '<f8')])
|
||||
|
||||
>>> np.lib._iotools.easy_dtype((int, float, float), names="a,b,c")
|
||||
dtype([('a', '<i8'), ('b', '<f8'), ('c', '<f8')])
|
||||
>>> np.lib._iotools.easy_dtype(float, names="a,b,c")
|
||||
dtype([('a', '<f8'), ('b', '<f8'), ('c', '<f8')])
|
||||
|
||||
"""
|
||||
try:
|
||||
ndtype = np.dtype(ndtype)
|
||||
except TypeError:
|
||||
validate = NameValidator(**validationargs)
|
||||
nbfields = len(ndtype)
|
||||
if names is None:
|
||||
names = [''] * len(ndtype)
|
||||
elif isinstance(names, str):
|
||||
names = names.split(",")
|
||||
names = validate(names, nbfields=nbfields, defaultfmt=defaultfmt)
|
||||
ndtype = np.dtype(dict(formats=ndtype, names=names))
|
||||
else:
|
||||
# Explicit names
|
||||
if names is not None:
|
||||
validate = NameValidator(**validationargs)
|
||||
if isinstance(names, str):
|
||||
names = names.split(",")
|
||||
# Simple dtype: repeat to match the nb of names
|
||||
if ndtype.names is None:
|
||||
formats = tuple([ndtype.type] * len(names))
|
||||
names = validate(names, defaultfmt=defaultfmt)
|
||||
ndtype = np.dtype(list(zip(names, formats)))
|
||||
# Structured dtype: just validate the names as needed
|
||||
else:
|
||||
ndtype.names = validate(names, nbfields=len(ndtype.names),
|
||||
defaultfmt=defaultfmt)
|
||||
# No implicit names
|
||||
elif ndtype.names is not None:
|
||||
validate = NameValidator(**validationargs)
|
||||
# Default initial names : should we change the format ?
|
||||
numbered_names = tuple("f%i" % i for i in range(len(ndtype.names)))
|
||||
if ((ndtype.names == numbered_names) and (defaultfmt != "f%i")):
|
||||
ndtype.names = validate([''] * len(ndtype.names),
|
||||
defaultfmt=defaultfmt)
|
||||
# Explicit initial names : just validate
|
||||
else:
|
||||
ndtype.names = validate(ndtype.names, defaultfmt=defaultfmt)
|
||||
return ndtype
|
Loading…
Add table
Add a link
Reference in a new issue