Fixed database typo and removed unnecessary class identifier.

2020-10-14 10:10:37 -04:00 · 2020-10-14 10:10:37 -04:00 · 45fb349a7d
commit 45fb349a7d
parent 00ad49a143
5098 changed files with 952558 additions and 85 deletions
--- a/venv/Lib/site-packages/scipy/io/arff/init.py
+++ b/venv/Lib/site-packages/scipy/io/arff/init.py
@ -0,0 +1,25 @@
+"""
+Module to read ARFF files
+=========================
+ARFF is the standard data format for WEKA.
+It is a text file format which support numerical, string and data values.
+The format can also represent missing data and sparse data.
+
+Notes
+-----
+The ARFF support in ``scipy.io`` provides file reading functionality only.
+For more extensive ARFF functionality, see `liac-arff
+<https://github.com/renatopp/liac-arff>`_.
+
+See the `WEKA website <http://weka.wikispaces.com/ARFF>`_
+for more details about the ARFF format and available datasets.
+
+"""
+from .arffread import *
+from . import arffread
+
+__all__ = arffread.__all__
+
+from scipy._lib._testutils import PytestTester
+test = PytestTester(__name__)
+del PytestTester
--- a/venv/Lib/site-packages/scipy/io/arff/pycache/init.cpython-36.pyc
+++ b/venv/Lib/site-packages/scipy/io/arff/pycache/init.cpython-36.pyc
--- a/venv/Lib/site-packages/scipy/io/arff/pycache/arffread.cpython-36.pyc
+++ b/venv/Lib/site-packages/scipy/io/arff/pycache/arffread.cpython-36.pyc
--- a/venv/Lib/site-packages/scipy/io/arff/pycache/setup.cpython-36.pyc
+++ b/venv/Lib/site-packages/scipy/io/arff/pycache/setup.cpython-36.pyc
--- a/venv/Lib/site-packages/scipy/io/arff/arffread.py
+++ b/venv/Lib/site-packages/scipy/io/arff/arffread.py
@ -0,0 +1,905 @@
+# Last Change: Mon Aug 20 08:00 PM 2007 J
+import re
+import datetime
+from collections import OrderedDict
+
+import numpy as np
+
+import csv
+import ctypes
+
+"""A module to read arff files."""
+
+__all__ = ['MetaData', 'loadarff', 'ArffError', 'ParseArffError']
+
+# An Arff file is basically two parts:
+#   - header
+#   - data
+#
+# A header has each of its components starting by @META where META is one of
+# the keyword (attribute of relation, for now).
+
+# TODO:
+#   - both integer and reals are treated as numeric -> the integer info
+#    is lost!
+#   - Replace ValueError by ParseError or something
+
+# We know can handle the following:
+#   - numeric and nominal attributes
+#   - missing values for numeric attributes
+
+r_meta = re.compile(r'^\s*@')
+# Match a comment
+r_comment = re.compile(r'^%')
+# Match an empty line
+r_empty = re.compile(r'^\s+$')
+# Match a header line, that is a line which starts by @ + a word
+r_headerline = re.compile(r'^\s*@\S*')
+r_datameta = re.compile(r'^@[Dd][Aa][Tt][Aa]')
+r_relation = re.compile(r'^@[Rr][Ee][Ll][Aa][Tt][Ii][Oo][Nn]\s*(\S*)')
+r_attribute = re.compile(r'^\s*@[Aa][Tt][Tt][Rr][Ii][Bb][Uu][Tt][Ee]\s*(..*$)')
+
+r_nominal = re.compile('{(.+)}')
+r_date = re.compile(r"[Dd][Aa][Tt][Ee]\s+[\"']?(.+?)[\"']?$")
+
+# To get attributes name enclosed with ''
+r_comattrval = re.compile(r"'(..+)'\s+(..+$)")
+# To get normal attributes
+r_wcomattrval = re.compile(r"(\S+)\s+(..+$)")
+
+# ------------------------
+# Module defined exception
+# ------------------------
+
+
+class ArffError(IOError):
+    pass
+
+
+class ParseArffError(ArffError):
+    pass
+
+
+# ----------
+# Attributes
+# ----------
+class Attribute(object):
+
+    type_name = None
+
+    def __init__(self, name):
+        self.name = name
+        self.range = None
+        self.dtype = np.object_
+
+    @classmethod
+    def parse_attribute(cls, name, attr_string):
+        """
+        Parse the attribute line if it knows how. Returns the parsed
+        attribute, or None.
+        """
+        return None
+
+    def parse_data(self, data_str):
+        """
+        Parse a value of this type.
+        """
+        return None
+
+    def __str__(self):
+        """
+        Parse a value of this type.
+        """
+        return self.name + ',' + self.type_name
+
+
+class NominalAttribute(Attribute):
+
+    type_name = 'nominal'
+
+    def __init__(self, name, values):
+        super().__init__(name)
+        self.values = values
+        self.range = values
+        self.dtype = (np.string_, max(len(i) for i in values))
+
+    @staticmethod
+    def _get_nom_val(atrv):
+        """Given a string containing a nominal type, returns a tuple of the
+        possible values.
+
+        A nominal type is defined as something framed between braces ({}).
+
+        Parameters
+        ----------
+        atrv : str
+           Nominal type definition
+
+        Returns
+        -------
+        poss_vals : tuple
+           possible values
+
+        Examples
+        --------
+        >>> get_nom_val("{floup, bouga, fl, ratata}")
+        ('floup', 'bouga', 'fl', 'ratata')
+        """
+        m = r_nominal.match(atrv)
+        if m:
+            attrs, _ = split_data_line(m.group(1))
+            return tuple(attrs)
+        else:
+            raise ValueError("This does not look like a nominal string")
+
+    @classmethod
+    def parse_attribute(cls, name, attr_string):
+        """
+        Parse the attribute line if it knows how. Returns the parsed
+        attribute, or None.
+
+        For nominal attributes, the attribute string would be like '{<attr_1>,
+         <attr2>, <attr_3>}'.
+        """
+        if attr_string[0] == '{':
+            values = cls._get_nom_val(attr_string)
+            return cls(name, values)
+        else:
+            return None
+
+    def parse_data(self, data_str):
+        """
+        Parse a value of this type.
+        """
+        if data_str in self.values:
+            return data_str
+        elif data_str == '?':
+            return data_str
+        else:
+            raise ValueError("%s value not in %s" % (str(data_str),
+                                                     str(self.values)))
+
+    def __str__(self):
+        msg = self.name + ",{"
+        for i in range(len(self.values)-1):
+            msg += self.values[i] + ","
+        msg += self.values[-1]
+        msg += "}"
+        return msg
+
+
+class NumericAttribute(Attribute):
+
+    def __init__(self, name):
+        super().__init__(name)
+        self.type_name = 'numeric'
+        self.dtype = np.float_
+
+    @classmethod
+    def parse_attribute(cls, name, attr_string):
+        """
+        Parse the attribute line if it knows how. Returns the parsed
+        attribute, or None.
+
+        For numeric attributes, the attribute string would be like
+        'numeric' or 'int' or 'real'.
+        """
+
+        attr_string = attr_string.lower().strip()
+
+        if(attr_string[:len('numeric')] == 'numeric' or
+           attr_string[:len('int')] == 'int' or
+           attr_string[:len('real')] == 'real'):
+            return cls(name)
+        else:
+            return None
+
+    def parse_data(self, data_str):
+        """
+        Parse a value of this type.
+
+        Parameters
+        ----------
+        data_str : str
+           string to convert
+
+        Returns
+        -------
+        f : float
+           where float can be nan
+
+        Examples
+        --------
+        >>> atr = NumericAttribute('atr')
+        >>> atr.parse_data('1')
+        1.0
+        >>> atr.parse_data('1\\n')
+        1.0
+        >>> atr.parse_data('?\\n')
+        nan
+        """
+        if '?' in data_str:
+            return np.nan
+        else:
+            return float(data_str)
+
+    def _basic_stats(self, data):
+        nbfac = data.size * 1. / (data.size - 1)
+        return (np.nanmin(data), np.nanmax(data),
+                np.mean(data), np.std(data) * nbfac)
+
+
+class StringAttribute(Attribute):
+
+    def __init__(self, name):
+        super().__init__(name)
+        self.type_name = 'string'
+
+    @classmethod
+    def parse_attribute(cls, name, attr_string):
+        """
+        Parse the attribute line if it knows how. Returns the parsed
+        attribute, or None.
+
+        For string attributes, the attribute string would be like
+        'string'.
+        """
+
+        attr_string = attr_string.lower().strip()
+
+        if attr_string[:len('string')] == 'string':
+            return cls(name)
+        else:
+            return None
+
+
+class DateAttribute(Attribute):
+
+    def __init__(self, name, date_format, datetime_unit):
+        super().__init__(name)
+        self.date_format = date_format
+        self.datetime_unit = datetime_unit
+        self.type_name = 'date'
+        self.range = date_format
+        self.dtype = np.datetime64(0, self.datetime_unit)
+
+    @staticmethod
+    def _get_date_format(atrv):
+        m = r_date.match(atrv)
+        if m:
+            pattern = m.group(1).strip()
+            # convert time pattern from Java's SimpleDateFormat to C's format
+            datetime_unit = None
+            if "yyyy" in pattern:
+                pattern = pattern.replace("yyyy", "%Y")
+                datetime_unit = "Y"
+            elif "yy":
+                pattern = pattern.replace("yy", "%y")
+                datetime_unit = "Y"
+            if "MM" in pattern:
+                pattern = pattern.replace("MM", "%m")
+                datetime_unit = "M"
+            if "dd" in pattern:
+                pattern = pattern.replace("dd", "%d")
+                datetime_unit = "D"
+            if "HH" in pattern:
+                pattern = pattern.replace("HH", "%H")
+                datetime_unit = "h"
+            if "mm" in pattern:
+                pattern = pattern.replace("mm", "%M")
+                datetime_unit = "m"
+            if "ss" in pattern:
+                pattern = pattern.replace("ss", "%S")
+                datetime_unit = "s"
+            if "z" in pattern or "Z" in pattern:
+                raise ValueError("Date type attributes with time zone not "
+                                 "supported, yet")
+
+            if datetime_unit is None:
+                raise ValueError("Invalid or unsupported date format")
+
+            return pattern, datetime_unit
+        else:
+            raise ValueError("Invalid or no date format")
+
+    @classmethod
+    def parse_attribute(cls, name, attr_string):
+        """
+        Parse the attribute line if it knows how. Returns the parsed
+        attribute, or None.
+
+        For date attributes, the attribute string would be like
+        'date <format>'.
+        """
+
+        attr_string_lower = attr_string.lower().strip()
+
+        if attr_string_lower[:len('date')] == 'date':
+            date_format, datetime_unit = cls._get_date_format(attr_string)
+            return cls(name, date_format, datetime_unit)
+        else:
+            return None
+
+    def parse_data(self, data_str):
+        """
+        Parse a value of this type.
+        """
+        date_str = data_str.strip().strip("'").strip('"')
+        if date_str == '?':
+            return np.datetime64('NaT', self.datetime_unit)
+        else:
+            dt = datetime.datetime.strptime(date_str, self.date_format)
+            return np.datetime64(dt).astype(
+                "datetime64[%s]" % self.datetime_unit)
+
+    def __str__(self):
+        return super(DateAttribute, self).__str__() + ',' + self.date_format
+
+
+class RelationalAttribute(Attribute):
+
+    def __init__(self, name):
+        super().__init__(name)
+        self.type_name = 'relational'
+        self.dtype = np.object_
+        self.attributes = []
+        self.dialect = None
+
+    @classmethod
+    def parse_attribute(cls, name, attr_string):
+        """
+        Parse the attribute line if it knows how. Returns the parsed
+        attribute, or None.
+
+        For date attributes, the attribute string would be like
+        'date <format>'.
+        """
+
+        attr_string_lower = attr_string.lower().strip()
+
+        if attr_string_lower[:len('relational')] == 'relational':
+            return cls(name)
+        else:
+            return None
+
+    def parse_data(self, data_str):
+        # Copy-pasted
+        elems = list(range(len(self.attributes)))
+
+        escaped_string = data_str.encode().decode("unicode-escape")
+
+        row_tuples = []
+
+        for raw in escaped_string.split("\n"):
+            row, self.dialect = split_data_line(raw, self.dialect)
+
+            row_tuples.append(tuple(
+                [self.attributes[i].parse_data(row[i]) for i in elems]))
+
+        return np.array(row_tuples,
+                        [(a.name, a.dtype) for a in self.attributes])
+
+    def __str__(self):
+        return (super(RelationalAttribute, self).__str__() + '\n\t' +
+                '\n\t'.join(str(a) for a in self.attributes))
+
+
+# -----------------
+# Various utilities
+# -----------------
+def to_attribute(name, attr_string):
+    attr_classes = (NominalAttribute, NumericAttribute, DateAttribute,
+                    StringAttribute, RelationalAttribute)
+
+    for cls in attr_classes:
+        attr = cls.parse_attribute(name, attr_string)
+        if attr is not None:
+            return attr
+
+    raise ParseArffError("unknown attribute %s" % attr_string)
+
+
+def csv_sniffer_has_bug_last_field():
+    """
+    Checks if the bug https://bugs.python.org/issue30157 is unpatched.
+    """
+
+    # We only compute this once.
+    has_bug = getattr(csv_sniffer_has_bug_last_field, "has_bug", None)
+
+    if has_bug is None:
+        dialect = csv.Sniffer().sniff("3, 'a'")
+        csv_sniffer_has_bug_last_field.has_bug = dialect.quotechar != "'"
+        has_bug = csv_sniffer_has_bug_last_field.has_bug
+
+    return has_bug
+
+
+def workaround_csv_sniffer_bug_last_field(sniff_line, dialect, delimiters):
+    """
+    Workaround for the bug https://bugs.python.org/issue30157 if is unpatched.
+    """
+    if csv_sniffer_has_bug_last_field():
+        # Reuses code from the csv module
+        right_regex = r'(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?:$|\n)'
+
+        for restr in (r'(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?P=delim)',  # ,".*?",
+                      r'(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?P<delim>[^\w\n"\'])(?P<space> ?)',  # .*?",
+                      right_regex,  # ,".*?"
+                      r'(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?:$|\n)'):  # ".*?" (no delim, no space)
+            regexp = re.compile(restr, re.DOTALL | re.MULTILINE)
+            matches = regexp.findall(sniff_line)
+            if matches:
+                break
+
+        # If it does not match the expression that was bugged, then this bug does not apply
+        if restr != right_regex:
+            return
+
+        groupindex = regexp.groupindex
+
+        # There is only one end of the string
+        assert len(matches) == 1
+        m = matches[0]
+
+        n = groupindex['quote'] - 1
+        quote = m[n]
+
+        n = groupindex['delim'] - 1
+        delim = m[n]
+
+        n = groupindex['space'] - 1
+        space = bool(m[n])
+
+        dq_regexp = re.compile(
+            r"((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$)" %
+            {'delim': re.escape(delim), 'quote': quote}, re.MULTILINE
+        )
+
+        doublequote = bool(dq_regexp.search(sniff_line))
+
+        dialect.quotechar = quote
+        if delim in delimiters:
+            dialect.delimiter = delim
+        dialect.doublequote = doublequote
+        dialect.skipinitialspace = space
+
+
+def split_data_line(line, dialect=None):
+    delimiters = ",\t"
+
+    # This can not be done in a per reader basis, and relational fields
+    # can be HUGE
+    csv.field_size_limit(int(ctypes.c_ulong(-1).value // 2))
+
+    # Remove the line end if any
+    if line[-1] == '\n':
+        line = line[:-1]
+
+    sniff_line = line
+
+    # Add a delimiter if none is present, so that the csv.Sniffer
+    # does not complain for a single-field CSV.
+    if not any(d in line for d in delimiters):
+        sniff_line += ","
+
+    if dialect is None:
+        dialect = csv.Sniffer().sniff(sniff_line, delimiters=delimiters)
+        workaround_csv_sniffer_bug_last_field(sniff_line=sniff_line,
+                                              dialect=dialect,
+                                              delimiters=delimiters)
+
+    row = next(csv.reader([line], dialect))
+
+    return row, dialect
+
+
+# --------------
+# Parsing header
+# --------------
+def tokenize_attribute(iterable, attribute):
+    """Parse a raw string in header (e.g., starts by @attribute).
+
+    Given a raw string attribute, try to get the name and type of the
+    attribute. Constraints:
+
+    * The first line must start with @attribute (case insensitive, and
+      space like characters before @attribute are allowed)
+    * Works also if the attribute is spread on multilines.
+    * Works if empty lines or comments are in between
+
+    Parameters
+    ----------
+    attribute : str
+       the attribute string.
+
+    Returns
+    -------
+    name : str
+       name of the attribute
+    value : str
+       value of the attribute
+    next : str
+       next line to be parsed
+
+    Examples
+    --------
+    If attribute is a string defined in python as r"floupi real", will
+    return floupi as name, and real as value.
+
+    >>> iterable = iter([0] * 10) # dummy iterator
+    >>> tokenize_attribute(iterable, r"@attribute floupi real")
+    ('floupi', 'real', 0)
+
+    If attribute is r"'floupi 2' real", will return 'floupi 2' as name,
+    and real as value.
+
+    >>> tokenize_attribute(iterable, r"  @attribute 'floupi 2' real   ")
+    ('floupi 2', 'real', 0)
+
+    """
+    sattr = attribute.strip()
+    mattr = r_attribute.match(sattr)
+    if mattr:
+        # atrv is everything after @attribute
+        atrv = mattr.group(1)
+        if r_comattrval.match(atrv):
+            name, type = tokenize_single_comma(atrv)
+            next_item = next(iterable)
+        elif r_wcomattrval.match(atrv):
+            name, type = tokenize_single_wcomma(atrv)
+            next_item = next(iterable)
+        else:
+            # Not sure we should support this, as it does not seem supported by
+            # weka.
+            raise ValueError("multi line not supported yet")
+    else:
+        raise ValueError("First line unparsable: %s" % sattr)
+
+    attribute = to_attribute(name, type)
+
+    if type.lower() == 'relational':
+        next_item = read_relational_attribute(iterable, attribute, next_item)
+    #    raise ValueError("relational attributes not supported yet")
+
+    return attribute, next_item
+
+
+def tokenize_single_comma(val):
+    # XXX we match twice the same string (here and at the caller level). It is
+    # stupid, but it is easier for now...
+    m = r_comattrval.match(val)
+    if m:
+        try:
+            name = m.group(1).strip()
+            type = m.group(2).strip()
+        except IndexError:
+            raise ValueError("Error while tokenizing attribute")
+    else:
+        raise ValueError("Error while tokenizing single %s" % val)
+    return name, type
+
+
+def tokenize_single_wcomma(val):
+    # XXX we match twice the same string (here and at the caller level). It is
+    # stupid, but it is easier for now...
+    m = r_wcomattrval.match(val)
+    if m:
+        try:
+            name = m.group(1).strip()
+            type = m.group(2).strip()
+        except IndexError:
+            raise ValueError("Error while tokenizing attribute")
+    else:
+        raise ValueError("Error while tokenizing single %s" % val)
+    return name, type
+
+
+def read_relational_attribute(ofile, relational_attribute, i):
+    """Read the nested attributes of a relational attribute"""
+
+    r_end_relational = re.compile(r'^@[Ee][Nn][Dd]\s*' +
+                                  relational_attribute.name + r'\s*$')
+
+    while not r_end_relational.match(i):
+        m = r_headerline.match(i)
+        if m:
+            isattr = r_attribute.match(i)
+            if isattr:
+                attr, i = tokenize_attribute(ofile, i)
+                relational_attribute.attributes.append(attr)
+            else:
+                raise ValueError("Error parsing line %s" % i)
+        else:
+            i = next(ofile)
+
+    i = next(ofile)
+    return i
+
+
+def read_header(ofile):
+    """Read the header of the iterable ofile."""
+    i = next(ofile)
+
+    # Pass first comments
+    while r_comment.match(i):
+        i = next(ofile)
+
+    # Header is everything up to DATA attribute ?
+    relation = None
+    attributes = []
+    while not r_datameta.match(i):
+        m = r_headerline.match(i)
+        if m:
+            isattr = r_attribute.match(i)
+            if isattr:
+                attr, i = tokenize_attribute(ofile, i)
+                attributes.append(attr)
+            else:
+                isrel = r_relation.match(i)
+                if isrel:
+                    relation = isrel.group(1)
+                else:
+                    raise ValueError("Error parsing line %s" % i)
+                i = next(ofile)
+        else:
+            i = next(ofile)
+
+    return relation, attributes
+
+
+class MetaData(object):
+    """Small container to keep useful information on a ARFF dataset.
+
+    Knows about attributes names and types.
+
+    Examples
+    --------
+    ::
+
+        data, meta = loadarff('iris.arff')
+        # This will print the attributes names of the iris.arff dataset
+        for i in meta:
+            print(i)
+        # This works too
+        meta.names()
+        # Getting attribute type
+        types = meta.types()
+
+    Methods
+    -------
+    names
+    types
+
+    Notes
+    -----
+    Also maintains the list of attributes in order, i.e., doing for i in
+    meta, where meta is an instance of MetaData, will return the
+    different attribute names in the order they were defined.
+    """
+    def __init__(self, rel, attr):
+        self.name = rel
+
+        # We need the dictionary to be ordered
+        self._attributes = OrderedDict((a.name, a) for a in attr)
+
+    def __repr__(self):
+        msg = ""
+        msg += "Dataset: %s\n" % self.name
+        for i in self._attributes:
+            msg += "\t%s's type is %s" % (i, self._attributes[i].type_name)
+            if self._attributes[i].range:
+                msg += ", range is %s" % str(self._attributes[i].range)
+            msg += '\n'
+        return msg
+
+    def __iter__(self):
+        return iter(self._attributes)
+
+    def __getitem__(self, key):
+        attr = self._attributes[key]
+
+        return (attr.type_name, attr.range)
+
+    def names(self):
+        """Return the list of attribute names.
+
+        Returns
+        -------
+        attrnames : list of str
+            The attribute names.
+        """
+        return list(self._attributes)
+
+    def types(self):
+        """Return the list of attribute types.
+
+        Returns
+        -------
+        attr_types : list of str
+            The attribute types.
+        """
+        attr_types = [self._attributes[name].type_name
+                      for name in self._attributes]
+        return attr_types
+
+
+def loadarff(f):
+    """
+    Read an arff file.
+
+    The data is returned as a record array, which can be accessed much like
+    a dictionary of NumPy arrays. For example, if one of the attributes is
+    called 'pressure', then its first 10 data points can be accessed from the
+    ``data`` record array like so: ``data['pressure'][0:10]``
+
+
+    Parameters
+    ----------
+    f : file-like or str
+       File-like object to read from, or filename to open.
+
+    Returns
+    -------
+    data : record array
+       The data of the arff file, accessible by attribute names.
+    meta : `MetaData`
+       Contains information about the arff file such as name and
+       type of attributes, the relation (name of the dataset), etc.
+
+    Raises
+    ------
+    ParseArffError
+        This is raised if the given file is not ARFF-formatted.
+    NotImplementedError
+        The ARFF file has an attribute which is not supported yet.
+
+    Notes
+    -----
+
+    This function should be able to read most arff files. Not
+    implemented functionality include:
+
+    * date type attributes
+    * string type attributes
+
+    It can read files with numeric and nominal attributes. It cannot read
+    files with sparse data ({} in the file). However, this function can
+    read files with missing data (? in the file), representing the data
+    points as NaNs.
+
+    Examples
+    --------
+    >>> from scipy.io import arff
+    >>> from io import StringIO
+    >>> content = \"\"\"
+    ... @relation foo
+    ... @attribute width  numeric
+    ... @attribute height numeric
+    ... @attribute color  {red,green,blue,yellow,black}
+    ... @data
+    ... 5.0,3.25,blue
+    ... 4.5,3.75,green
+    ... 3.0,4.00,red
+    ... \"\"\"
+    >>> f = StringIO(content)
+    >>> data, meta = arff.loadarff(f)
+    >>> data
+    array([(5.0, 3.25, 'blue'), (4.5, 3.75, 'green'), (3.0, 4.0, 'red')],
+          dtype=[('width', '<f8'), ('height', '<f8'), ('color', '|S6')])
+    >>> meta
+    Dataset: foo
+    \twidth's type is numeric
+    \theight's type is numeric
+    \tcolor's type is nominal, range is ('red', 'green', 'blue', 'yellow', 'black')
+
+    """
+    if hasattr(f, 'read'):
+        ofile = f
+    else:
+        ofile = open(f, 'rt')
+    try:
+        return _loadarff(ofile)
+    finally:
+        if ofile is not f:  # only close what we opened
+            ofile.close()
+
+
+def _loadarff(ofile):
+    # Parse the header file
+    try:
+        rel, attr = read_header(ofile)
+    except ValueError as e:
+        msg = "Error while parsing header, error was: " + str(e)
+        raise ParseArffError(msg)
+
+    # Check whether we have a string attribute (not supported yet)
+    hasstr = False
+    for a in attr:
+        if isinstance(a, StringAttribute):
+            hasstr = True
+
+    meta = MetaData(rel, attr)
+
+    # XXX The following code is not great
+    # Build the type descriptor descr and the list of convertors to convert
+    # each attribute to the suitable type (which should match the one in
+    # descr).
+
+    # This can be used once we want to support integer as integer values and
+    # not as numeric anymore (using masked arrays ?).
+
+    if hasstr:
+        # How to support string efficiently ? Ideally, we should know the max
+        # size of the string before allocating the numpy array.
+        raise NotImplementedError("String attributes not supported yet, sorry")
+
+    ni = len(attr)
+
+    def generator(row_iter, delim=','):
+        # TODO: this is where we are spending time (~80%). I think things
+        # could be made more efficiently:
+        #   - We could for example "compile" the function, because some values
+        #   do not change here.
+        #   - The function to convert a line to dtyped values could also be
+        #   generated on the fly from a string and be executed instead of
+        #   looping.
+        #   - The regex are overkill: for comments, checking that a line starts
+        #   by % should be enough and faster, and for empty lines, same thing
+        #   --> this does not seem to change anything.
+
+        # 'compiling' the range since it does not change
+        # Note, I have already tried zipping the converters and
+        # row elements and got slightly worse performance.
+        elems = list(range(ni))
+
+        dialect = None
+        for raw in row_iter:
+            # We do not abstract skipping comments and empty lines for
+            # performance reasons.
+            if r_comment.match(raw) or r_empty.match(raw):
+                continue
+
+            row, dialect = split_data_line(raw, dialect)
+
+            yield tuple([attr[i].parse_data(row[i]) for i in elems])
+
+    a = list(generator(ofile))
+    # No error should happen here: it is a bug otherwise
+    data = np.array(a, [(a.name, a.dtype) for a in attr])
+    return data, meta
+
+
+# ----
+# Misc
+# ----
+def basic_stats(data):
+    nbfac = data.size * 1. / (data.size - 1)
+    return np.nanmin(data), np.nanmax(data), np.mean(data), np.std(data) * nbfac
+
+
+def print_attribute(name, tp, data):
+    type = tp.type_name
+    if type == 'numeric' or type == 'real' or type == 'integer':
+        min, max, mean, std = basic_stats(data)
+        print("%s,%s,%f,%f,%f,%f" % (name, type, min, max, mean, std))
+    else:
+        print(str(tp))
+
+
+def test_weka(filename):
+    data, meta = loadarff(filename)
+    print(len(data.dtype))
+    print(data.size)
+    for i in meta:
+        print_attribute(i, meta[i], data[i])
+
+
+# make sure nose does not find this as a test
+test_weka.__test__ = False
+
+
+if __name__ == '__main__':
+    import sys
+    filename = sys.argv[1]
+    test_weka(filename)
--- a/venv/Lib/site-packages/scipy/io/arff/setup.py
+++ b/venv/Lib/site-packages/scipy/io/arff/setup.py
@ -0,0 +1,11 @@
+
+def configuration(parent_package='io',top_path=None):
+    from numpy.distutils.misc_util import Configuration
+    config = Configuration('arff', parent_package, top_path)
+    config.add_data_dir('tests')
+    return config
+
+
+if __name__ == '__main__':
+    from numpy.distutils.core import setup
+    setup(**configuration(top_path='').todict())
--- a/venv/Lib/site-packages/scipy/io/arff/tests/init.py
+++ b/venv/Lib/site-packages/scipy/io/arff/tests/init.py
--- a/venv/Lib/site-packages/scipy/io/arff/tests/pycache/init.cpython-36.pyc
+++ b/venv/Lib/site-packages/scipy/io/arff/tests/pycache/init.cpython-36.pyc
--- a/venv/Lib/site-packages/scipy/io/arff/tests/pycache/test_arffread.cpython-36.pyc
+++ b/venv/Lib/site-packages/scipy/io/arff/tests/pycache/test_arffread.cpython-36.pyc
--- a/venv/Lib/site-packages/scipy/io/arff/tests/data/iris.arff
+++ b/venv/Lib/site-packages/scipy/io/arff/tests/data/iris.arff
@ -0,0 +1,225 @@
+% 1. Title: Iris Plants Database
+% 
+% 2. Sources:
+%      (a) Creator: R.A. Fisher
+%      (b) Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
+%      (c) Date: July, 1988
+% 
+% 3. Past Usage:
+%    - Publications: too many to mention!!!  Here are a few.
+%    1. Fisher,R.A. "The use of multiple measurements in taxonomic problems"
+%       Annual Eugenics, 7, Part II, 179-188 (1936); also in "Contributions
+%       to Mathematical Statistics" (John Wiley, NY, 1950).
+%    2. Duda,R.O., & Hart,P.E. (1973) Pattern Classification and Scene Analysis.
+%       (Q327.D83) John Wiley & Sons.  ISBN 0-471-22361-1.  See page 218.
+%    3. Dasarathy, B.V. (1980) "Nosing Around the Neighborhood: A New System
+%       Structure and Classification Rule for Recognition in Partially Exposed
+%       Environments".  IEEE Transactions on Pattern Analysis and Machine
+%       Intelligence, Vol. PAMI-2, No. 1, 67-71.
+%       -- Results:
+%          -- very low misclassification rates (0% for the setosa class)
+%    4. Gates, G.W. (1972) "The Reduced Nearest Neighbor Rule".  IEEE 
+%       Transactions on Information Theory, May 1972, 431-433.
+%       -- Results:
+%          -- very low misclassification rates again
+%    5. See also: 1988 MLC Proceedings, 54-64.  Cheeseman et al's AUTOCLASS II
+%       conceptual clustering system finds 3 classes in the data.
+% 
+% 4. Relevant Information:
+%    --- This is perhaps the best known database to be found in the pattern
+%        recognition literature.  Fisher's paper is a classic in the field
+%        and is referenced frequently to this day.  (See Duda & Hart, for
+%        example.)  The data set contains 3 classes of 50 instances each,
+%        where each class refers to a type of iris plant.  One class is
+%        linearly separable from the other 2; the latter are NOT linearly
+%        separable from each other.
+%    --- Predicted attribute: class of iris plant.
+%    --- This is an exceedingly simple domain.
+% 
+% 5. Number of Instances: 150 (50 in each of three classes)
+% 
+% 6. Number of Attributes: 4 numeric, predictive attributes and the class
+% 
+% 7. Attribute Information:
+%    1. sepal length in cm
+%    2. sepal width in cm
+%    3. petal length in cm
+%    4. petal width in cm
+%    5. class: 
+%       -- Iris Setosa
+%       -- Iris Versicolour
+%       -- Iris Virginica
+% 
+% 8. Missing Attribute Values: None
+% 
+% Summary Statistics:
+%  	           Min  Max   Mean    SD   Class Correlation
+%    sepal length: 4.3  7.9   5.84  0.83    0.7826   
+%     sepal width: 2.0  4.4   3.05  0.43   -0.4194
+%    petal length: 1.0  6.9   3.76  1.76    0.9490  (high!)
+%     petal width: 0.1  2.5   1.20  0.76    0.9565  (high!)
+% 
+% 9. Class Distribution: 33.3% for each of 3 classes.
+
+@RELATION iris
+
+@ATTRIBUTE sepallength	REAL
+@ATTRIBUTE sepalwidth 	REAL
+@ATTRIBUTE petallength 	REAL
+@ATTRIBUTE petalwidth	REAL
+@ATTRIBUTE class 	{Iris-setosa,Iris-versicolor,Iris-virginica}
+
+@DATA
+5.1,3.5,1.4,0.2,Iris-setosa
+4.9,3.0,1.4,0.2,Iris-setosa
+4.7,3.2,1.3,0.2,Iris-setosa
+4.6,3.1,1.5,0.2,Iris-setosa
+5.0,3.6,1.4,0.2,Iris-setosa
+5.4,3.9,1.7,0.4,Iris-setosa
+4.6,3.4,1.4,0.3,Iris-setosa
+5.0,3.4,1.5,0.2,Iris-setosa
+4.4,2.9,1.4,0.2,Iris-setosa
+4.9,3.1,1.5,0.1,Iris-setosa
+5.4,3.7,1.5,0.2,Iris-setosa
+4.8,3.4,1.6,0.2,Iris-setosa
+4.8,3.0,1.4,0.1,Iris-setosa
+4.3,3.0,1.1,0.1,Iris-setosa
+5.8,4.0,1.2,0.2,Iris-setosa
+5.7,4.4,1.5,0.4,Iris-setosa
+5.4,3.9,1.3,0.4,Iris-setosa
+5.1,3.5,1.4,0.3,Iris-setosa
+5.7,3.8,1.7,0.3,Iris-setosa
+5.1,3.8,1.5,0.3,Iris-setosa
+5.4,3.4,1.7,0.2,Iris-setosa
+5.1,3.7,1.5,0.4,Iris-setosa
+4.6,3.6,1.0,0.2,Iris-setosa
+5.1,3.3,1.7,0.5,Iris-setosa
+4.8,3.4,1.9,0.2,Iris-setosa
+5.0,3.0,1.6,0.2,Iris-setosa
+5.0,3.4,1.6,0.4,Iris-setosa
+5.2,3.5,1.5,0.2,Iris-setosa
+5.2,3.4,1.4,0.2,Iris-setosa
+4.7,3.2,1.6,0.2,Iris-setosa
+4.8,3.1,1.6,0.2,Iris-setosa
+5.4,3.4,1.5,0.4,Iris-setosa
+5.2,4.1,1.5,0.1,Iris-setosa
+5.5,4.2,1.4,0.2,Iris-setosa
+4.9,3.1,1.5,0.1,Iris-setosa
+5.0,3.2,1.2,0.2,Iris-setosa
+5.5,3.5,1.3,0.2,Iris-setosa
+4.9,3.1,1.5,0.1,Iris-setosa
+4.4,3.0,1.3,0.2,Iris-setosa
+5.1,3.4,1.5,0.2,Iris-setosa
+5.0,3.5,1.3,0.3,Iris-setosa
+4.5,2.3,1.3,0.3,Iris-setosa
+4.4,3.2,1.3,0.2,Iris-setosa
+5.0,3.5,1.6,0.6,Iris-setosa
+5.1,3.8,1.9,0.4,Iris-setosa
+4.8,3.0,1.4,0.3,Iris-setosa
+5.1,3.8,1.6,0.2,Iris-setosa
+4.6,3.2,1.4,0.2,Iris-setosa
+5.3,3.7,1.5,0.2,Iris-setosa
+5.0,3.3,1.4,0.2,Iris-setosa
+7.0,3.2,4.7,1.4,Iris-versicolor
+6.4,3.2,4.5,1.5,Iris-versicolor
+6.9,3.1,4.9,1.5,Iris-versicolor
+5.5,2.3,4.0,1.3,Iris-versicolor
+6.5,2.8,4.6,1.5,Iris-versicolor
+5.7,2.8,4.5,1.3,Iris-versicolor
+6.3,3.3,4.7,1.6,Iris-versicolor
+4.9,2.4,3.3,1.0,Iris-versicolor
+6.6,2.9,4.6,1.3,Iris-versicolor
+5.2,2.7,3.9,1.4,Iris-versicolor
+5.0,2.0,3.5,1.0,Iris-versicolor
+5.9,3.0,4.2,1.5,Iris-versicolor
+6.0,2.2,4.0,1.0,Iris-versicolor
+6.1,2.9,4.7,1.4,Iris-versicolor
+5.6,2.9,3.6,1.3,Iris-versicolor
+6.7,3.1,4.4,1.4,Iris-versicolor
+5.6,3.0,4.5,1.5,Iris-versicolor
+5.8,2.7,4.1,1.0,Iris-versicolor
+6.2,2.2,4.5,1.5,Iris-versicolor
+5.6,2.5,3.9,1.1,Iris-versicolor
+5.9,3.2,4.8,1.8,Iris-versicolor
+6.1,2.8,4.0,1.3,Iris-versicolor
+6.3,2.5,4.9,1.5,Iris-versicolor
+6.1,2.8,4.7,1.2,Iris-versicolor
+6.4,2.9,4.3,1.3,Iris-versicolor
+6.6,3.0,4.4,1.4,Iris-versicolor
+6.8,2.8,4.8,1.4,Iris-versicolor
+6.7,3.0,5.0,1.7,Iris-versicolor
+6.0,2.9,4.5,1.5,Iris-versicolor
+5.7,2.6,3.5,1.0,Iris-versicolor
+5.5,2.4,3.8,1.1,Iris-versicolor
+5.5,2.4,3.7,1.0,Iris-versicolor
+5.8,2.7,3.9,1.2,Iris-versicolor
+6.0,2.7,5.1,1.6,Iris-versicolor
+5.4,3.0,4.5,1.5,Iris-versicolor
+6.0,3.4,4.5,1.6,Iris-versicolor
+6.7,3.1,4.7,1.5,Iris-versicolor
+6.3,2.3,4.4,1.3,Iris-versicolor
+5.6,3.0,4.1,1.3,Iris-versicolor
+5.5,2.5,4.0,1.3,Iris-versicolor
+5.5,2.6,4.4,1.2,Iris-versicolor
+6.1,3.0,4.6,1.4,Iris-versicolor
+5.8,2.6,4.0,1.2,Iris-versicolor
+5.0,2.3,3.3,1.0,Iris-versicolor
+5.6,2.7,4.2,1.3,Iris-versicolor
+5.7,3.0,4.2,1.2,Iris-versicolor
+5.7,2.9,4.2,1.3,Iris-versicolor
+6.2,2.9,4.3,1.3,Iris-versicolor
+5.1,2.5,3.0,1.1,Iris-versicolor
+5.7,2.8,4.1,1.3,Iris-versicolor
+6.3,3.3,6.0,2.5,Iris-virginica
+5.8,2.7,5.1,1.9,Iris-virginica
+7.1,3.0,5.9,2.1,Iris-virginica
+6.3,2.9,5.6,1.8,Iris-virginica
+6.5,3.0,5.8,2.2,Iris-virginica
+7.6,3.0,6.6,2.1,Iris-virginica
+4.9,2.5,4.5,1.7,Iris-virginica
+7.3,2.9,6.3,1.8,Iris-virginica
+6.7,2.5,5.8,1.8,Iris-virginica
+7.2,3.6,6.1,2.5,Iris-virginica
+6.5,3.2,5.1,2.0,Iris-virginica
+6.4,2.7,5.3,1.9,Iris-virginica
+6.8,3.0,5.5,2.1,Iris-virginica
+5.7,2.5,5.0,2.0,Iris-virginica
+5.8,2.8,5.1,2.4,Iris-virginica
+6.4,3.2,5.3,2.3,Iris-virginica
+6.5,3.0,5.5,1.8,Iris-virginica
+7.7,3.8,6.7,2.2,Iris-virginica
+7.7,2.6,6.9,2.3,Iris-virginica
+6.0,2.2,5.0,1.5,Iris-virginica
+6.9,3.2,5.7,2.3,Iris-virginica
+5.6,2.8,4.9,2.0,Iris-virginica
+7.7,2.8,6.7,2.0,Iris-virginica
+6.3,2.7,4.9,1.8,Iris-virginica
+6.7,3.3,5.7,2.1,Iris-virginica
+7.2,3.2,6.0,1.8,Iris-virginica
+6.2,2.8,4.8,1.8,Iris-virginica
+6.1,3.0,4.9,1.8,Iris-virginica
+6.4,2.8,5.6,2.1,Iris-virginica
+7.2,3.0,5.8,1.6,Iris-virginica
+7.4,2.8,6.1,1.9,Iris-virginica
+7.9,3.8,6.4,2.0,Iris-virginica
+6.4,2.8,5.6,2.2,Iris-virginica
+6.3,2.8,5.1,1.5,Iris-virginica
+6.1,2.6,5.6,1.4,Iris-virginica
+7.7,3.0,6.1,2.3,Iris-virginica
+6.3,3.4,5.6,2.4,Iris-virginica
+6.4,3.1,5.5,1.8,Iris-virginica
+6.0,3.0,4.8,1.8,Iris-virginica
+6.9,3.1,5.4,2.1,Iris-virginica
+6.7,3.1,5.6,2.4,Iris-virginica
+6.9,3.1,5.1,2.3,Iris-virginica
+5.8,2.7,5.1,1.9,Iris-virginica
+6.8,3.2,5.9,2.3,Iris-virginica
+6.7,3.3,5.7,2.5,Iris-virginica
+6.7,3.0,5.2,2.3,Iris-virginica
+6.3,2.5,5.0,1.9,Iris-virginica
+6.5,3.0,5.2,2.0,Iris-virginica
+6.2,3.4,5.4,2.3,Iris-virginica
+5.9,3.0,5.1,1.8,Iris-virginica
+%
+%
+%
--- a/venv/Lib/site-packages/scipy/io/arff/tests/data/missing.arff
+++ b/venv/Lib/site-packages/scipy/io/arff/tests/data/missing.arff
@ -0,0 +1,8 @@
+% This arff file contains some missing data
+@relation missing
+@attribute yop real
+@attribute yap real
+@data
+1,5
+2,4
+?,?
--- a/venv/Lib/site-packages/scipy/io/arff/tests/data/nodata.arff
+++ b/venv/Lib/site-packages/scipy/io/arff/tests/data/nodata.arff
@ -0,0 +1,11 @@
+@RELATION iris
+
+@ATTRIBUTE sepallength  REAL
+@ATTRIBUTE sepalwidth   REAL
+@ATTRIBUTE petallength  REAL
+@ATTRIBUTE petalwidth   REAL
+@ATTRIBUTE class    {Iris-setosa,Iris-versicolor,Iris-virginica}
+
+@DATA
+
+% This file has no data
--- a/venv/Lib/site-packages/scipy/io/arff/tests/data/quoted_nominal.arff
+++ b/venv/Lib/site-packages/scipy/io/arff/tests/data/quoted_nominal.arff
@ -0,0 +1,13 @@
+% Regression test for issue #10232 : Exception in loadarff with quoted nominal attributes
+% Spaces between elements are stripped by the parser
+
+@relation SOME_DATA
+@attribute age numeric
+@attribute smoker {'yes', 'no'}
+@data
+18,  'no'
+24, 'yes'
+44,     'no'
+56, 'no'
+89,'yes'
+11,  'no'
--- a/venv/Lib/site-packages/scipy/io/arff/tests/data/quoted_nominal_spaces.arff
+++ b/venv/Lib/site-packages/scipy/io/arff/tests/data/quoted_nominal_spaces.arff
@ -0,0 +1,13 @@
+% Regression test for issue #10232 : Exception in loadarff with quoted nominal attributes
+% Spaces inside quotes are NOT stripped by the parser
+
+@relation SOME_DATA
+@attribute age numeric
+@attribute smoker {'  yes', 'no  '}
+@data
+18,'no  '
+24,'  yes'
+44,'no  '
+56,'no  '
+89,'  yes'
+11,'no  '
--- a/venv/Lib/site-packages/scipy/io/arff/tests/data/test1.arff
+++ b/venv/Lib/site-packages/scipy/io/arff/tests/data/test1.arff
@ -0,0 +1,10 @@
+@RELATION test1
+
+@ATTRIBUTE attr0	REAL
+@ATTRIBUTE attr1 	REAL
+@ATTRIBUTE attr2 	REAL
+@ATTRIBUTE attr3	REAL
+@ATTRIBUTE class 	{class0, class1, class2, class3}
+
+@DATA
+0.1, 0.2, 0.3, 0.4,class1
--- a/venv/Lib/site-packages/scipy/io/arff/tests/data/test10.arff
+++ b/venv/Lib/site-packages/scipy/io/arff/tests/data/test10.arff
--- a/venv/Lib/site-packages/scipy/io/arff/tests/data/test2.arff
+++ b/venv/Lib/site-packages/scipy/io/arff/tests/data/test2.arff
@ -0,0 +1,15 @@
+@RELATION test2
+
+@ATTRIBUTE attr0	REAL
+@ATTRIBUTE attr1 	real
+@ATTRIBUTE attr2 	integer
+@ATTRIBUTE attr3	Integer
+@ATTRIBUTE attr4 	Numeric
+@ATTRIBUTE attr5	numeric
+@ATTRIBUTE attr6 	string
+@ATTRIBUTE attr7 	STRING
+@ATTRIBUTE attr8 	{bla}
+@ATTRIBUTE attr9 	{bla, bla}
+
+@DATA
+0.1, 0.2, 0.3, 0.4,class1
--- a/venv/Lib/site-packages/scipy/io/arff/tests/data/test3.arff
+++ b/venv/Lib/site-packages/scipy/io/arff/tests/data/test3.arff
@ -0,0 +1,6 @@
+@RELATION test3
+
+@ATTRIBUTE attr0	crap
+
+@DATA
+0.1, 0.2, 0.3, 0.4,class1
--- a/venv/Lib/site-packages/scipy/io/arff/tests/data/test4.arff
+++ b/venv/Lib/site-packages/scipy/io/arff/tests/data/test4.arff
@ -0,0 +1,11 @@
+@RELATION test5
+
+@ATTRIBUTE attr0	REAL
+@ATTRIBUTE attr1 	REAL
+@ATTRIBUTE attr2 	REAL
+@ATTRIBUTE attr3	REAL
+@ATTRIBUTE class 	{class0, class1, class2, class3}
+@DATA
+0.1, 0.2, 0.3, 0.4,class1
+-0.1, -0.2, -0.3, -0.4,class2
+1, 2, 3, 4,class3
--- a/venv/Lib/site-packages/scipy/io/arff/tests/data/test5.arff
+++ b/venv/Lib/site-packages/scipy/io/arff/tests/data/test5.arff
@ -0,0 +1,26 @@
+@RELATION test4
+
+@ATTRIBUTE attr0	REAL
+@ATTRIBUTE attr1 	REAL
+@ATTRIBUTE attr2 	REAL
+@ATTRIBUTE attr3	REAL
+@ATTRIBUTE class 	{class0, class1, class2, class3}
+
+@DATA
+
+% lsdflkjhaksjdhf
+
+% lsdflkjhaksjdhf
+
+0.1, 0.2, 0.3, 0.4,class1
+% laksjdhf
+
+% lsdflkjhaksjdhf
+-0.1, -0.2, -0.3, -0.4,class2
+
+% lsdflkjhaksjdhf
+% lsdflkjhaksjdhf
+
+% lsdflkjhaksjdhf
+
+1, 2, 3, 4,class3
--- a/venv/Lib/site-packages/scipy/io/arff/tests/data/test6.arff
+++ b/venv/Lib/site-packages/scipy/io/arff/tests/data/test6.arff
@ -0,0 +1,12 @@
+@RELATION test6
+
+@ATTRIBUTE attr0	REAL
+@ATTRIBUTE attr1 	REAL
+@ATTRIBUTE attr2 	REAL
+@ATTRIBUTE attr3	REAL
+@ATTRIBUTE class 	{C}
+
+@DATA
+0.1, 0.2, 0.3, 0.4,C
+-0.1, -0.2, -0.3, -0.4,C
+1, 2, 3, 4,C
--- a/venv/Lib/site-packages/scipy/io/arff/tests/data/test7.arff
+++ b/venv/Lib/site-packages/scipy/io/arff/tests/data/test7.arff
@ -0,0 +1,15 @@
+@RELATION test7
+
+@ATTRIBUTE attr_year	DATE yyyy
+@ATTRIBUTE attr_month	DATE yyyy-MM
+@ATTRIBUTE attr_date	DATE yyyy-MM-dd
+@ATTRIBUTE attr_datetime_local	DATE "yyyy-MM-dd HH:mm"
+@ATTRIBUTE attr_datetime_missing	DATE "yyyy-MM-dd HH:mm"
+
+@DATA
+1999,1999-01,1999-01-31,"1999-01-31 00:01",?
+2004,2004-12,2004-12-01,"2004-12-01 23:59","2004-12-01 23:59"
+1817,1817-04,1817-04-28,"1817-04-28 13:00",?
+2100,2100-09,2100-09-10,"2100-09-10 12:00",?
+2013,2013-11,2013-11-30,"2013-11-30 04:55","2013-11-30 04:55"
+1631,1631-10,1631-10-15,"1631-10-15 20:04","1631-10-15 20:04"
--- a/venv/Lib/site-packages/scipy/io/arff/tests/data/test8.arff
+++ b/venv/Lib/site-packages/scipy/io/arff/tests/data/test8.arff
@ -0,0 +1,12 @@
+@RELATION test8
+
+@ATTRIBUTE attr_datetime_utc	DATE "yyyy-MM-dd HH:mm Z"
+@ATTRIBUTE attr_datetime_full	DATE "yy-MM-dd HH:mm:ss z"
+
+@DATA
+"1999-01-31 00:01 UTC","99-01-31 00:01:08 +0430"
+"2004-12-01 23:59 UTC","04-12-01 23:59:59 -0800"
+"1817-04-28 13:00 UTC","17-04-28 13:00:33 +1000"
+"2100-09-10 12:00 UTC","21-09-10 12:00:21 -0300"
+"2013-11-30 04:55 UTC","13-11-30 04:55:48 -1100"
+"1631-10-15 20:04 UTC","31-10-15 20:04:10 +0000"
--- a/venv/Lib/site-packages/scipy/io/arff/tests/data/test9.arff
+++ b/venv/Lib/site-packages/scipy/io/arff/tests/data/test9.arff
@ -0,0 +1,14 @@
+@RELATION test9
+
+@ATTRIBUTE attr_date_number	    RELATIONAL
+	@ATTRIBUTE attr_date	DATE "yyyy-MM-dd"
+	@ATTRIBUTE attr_number	INTEGER
+@END attr_date_number
+
+@DATA
+"1999-01-31	1\n1935-11-27	10"
+"2004-12-01	2\n1942-08-13	20"
+"1817-04-28	3"
+"2100-09-10	4\n1957-04-17	40\n1721-01-14	400"
+"2013-11-30	5"
+"1631-10-15	6"
--- a/venv/Lib/site-packages/scipy/io/arff/tests/test_arffread.py
+++ b/venv/Lib/site-packages/scipy/io/arff/tests/test_arffread.py
@ -0,0 +1,412 @@
+import datetime
+import os
+import sys
+from os.path import join as pjoin
+
+from io import StringIO
+
+import numpy as np
+
+from numpy.testing import (assert_array_almost_equal,
+                           assert_array_equal, assert_equal, assert_)
+import pytest
+from pytest import raises as assert_raises
+
+from scipy.io.arff.arffread import loadarff
+from scipy.io.arff.arffread import read_header, ParseArffError
+
+
+data_path = pjoin(os.path.dirname(__file__), 'data')
+
+test1 = pjoin(data_path, 'test1.arff')
+test2 = pjoin(data_path, 'test2.arff')
+test3 = pjoin(data_path, 'test3.arff')
+
+test4 = pjoin(data_path, 'test4.arff')
+test5 = pjoin(data_path, 'test5.arff')
+test6 = pjoin(data_path, 'test6.arff')
+test7 = pjoin(data_path, 'test7.arff')
+test8 = pjoin(data_path, 'test8.arff')
+test9 = pjoin(data_path, 'test9.arff')
+test10 = pjoin(data_path, 'test10.arff')
+test11 = pjoin(data_path, 'test11.arff')
+test_quoted_nominal = pjoin(data_path, 'quoted_nominal.arff')
+test_quoted_nominal_spaces = pjoin(data_path, 'quoted_nominal_spaces.arff')
+
+expect4_data = [(0.1, 0.2, 0.3, 0.4, 'class1'),
+                (-0.1, -0.2, -0.3, -0.4, 'class2'),
+                (1, 2, 3, 4, 'class3')]
+expected_types = ['numeric', 'numeric', 'numeric', 'numeric', 'nominal']
+
+missing = pjoin(data_path, 'missing.arff')
+expect_missing_raw = np.array([[1, 5], [2, 4], [np.nan, np.nan]])
+expect_missing = np.empty(3, [('yop', float), ('yap', float)])
+expect_missing['yop'] = expect_missing_raw[:, 0]
+expect_missing['yap'] = expect_missing_raw[:, 1]
+
+
+class TestData(object):
+    def test1(self):
+        # Parsing trivial file with nothing.
+        self._test(test4)
+
+    def test2(self):
+        # Parsing trivial file with some comments in the data section.
+        self._test(test5)
+
+    def test3(self):
+        # Parsing trivial file with nominal attribute of 1 character.
+        self._test(test6)
+
+    def _test(self, test_file):
+        data, meta = loadarff(test_file)
+        for i in range(len(data)):
+            for j in range(4):
+                assert_array_almost_equal(expect4_data[i][j], data[i][j])
+        assert_equal(meta.types(), expected_types)
+
+    def test_filelike(self):
+        # Test reading from file-like object (StringIO)
+        with open(test1) as f1:
+            data1, meta1 = loadarff(f1)
+        with open(test1) as f2:
+            data2, meta2 = loadarff(StringIO(f2.read()))
+        assert_(data1 == data2)
+        assert_(repr(meta1) == repr(meta2))
+
+    @pytest.mark.skipif(sys.version_info < (3, 6),
+                        reason='Passing path-like objects to IO functions requires Python >= 3.6')
+    def test_path(self):
+        # Test reading from `pathlib.Path` object
+        from pathlib import Path
+
+        with open(test1) as f1:
+            data1, meta1 = loadarff(f1)
+
+        data2, meta2 = loadarff(Path(test1))
+
+        assert_(data1 == data2)
+        assert_(repr(meta1) == repr(meta2))
+
+
+class TestMissingData(object):
+    def test_missing(self):
+        data, meta = loadarff(missing)
+        for i in ['yop', 'yap']:
+            assert_array_almost_equal(data[i], expect_missing[i])
+
+
+class TestNoData(object):
+    def test_nodata(self):
+        # The file nodata.arff has no data in the @DATA section.
+        # Reading it should result in an array with length 0.
+        nodata_filename = os.path.join(data_path, 'nodata.arff')
+        data, meta = loadarff(nodata_filename)
+        expected_dtype = np.dtype([('sepallength', '<f8'),
+                                   ('sepalwidth', '<f8'),
+                                   ('petallength', '<f8'),
+                                   ('petalwidth', '<f8'),
+                                   ('class', 'S15')])
+        assert_equal(data.dtype, expected_dtype)
+        assert_equal(data.size, 0)
+
+
+class TestHeader(object):
+    def test_type_parsing(self):
+        # Test parsing type of attribute from their value.
+        with open(test2) as ofile:
+            rel, attrs = read_header(ofile)
+
+        expected = ['numeric', 'numeric', 'numeric', 'numeric', 'numeric',
+                    'numeric', 'string', 'string', 'nominal', 'nominal']
+
+        for i in range(len(attrs)):
+            assert_(attrs[i].type_name == expected[i])
+
+    def test_badtype_parsing(self):
+        # Test parsing wrong type of attribute from their value.
+        def badtype_read():
+            with open(test3) as ofile:
+                _, _ = read_header(ofile)
+
+        assert_raises(ParseArffError, badtype_read)
+
+    def test_fullheader1(self):
+        # Parsing trivial header with nothing.
+        with open(test1) as ofile:
+            rel, attrs = read_header(ofile)
+
+        # Test relation
+        assert_(rel == 'test1')
+
+        # Test numerical attributes
+        assert_(len(attrs) == 5)
+        for i in range(4):
+            assert_(attrs[i].name == 'attr%d' % i)
+            assert_(attrs[i].type_name == 'numeric')
+
+        # Test nominal attribute
+        assert_(attrs[4].name == 'class')
+        assert_(attrs[4].values == ('class0', 'class1', 'class2', 'class3'))
+
+    def test_dateheader(self):
+        with open(test7) as ofile:
+            rel, attrs = read_header(ofile)
+
+        assert_(rel == 'test7')
+
+        assert_(len(attrs) == 5)
+
+        assert_(attrs[0].name == 'attr_year')
+        assert_(attrs[0].date_format == '%Y')
+
+        assert_(attrs[1].name == 'attr_month')
+        assert_(attrs[1].date_format == '%Y-%m')
+
+        assert_(attrs[2].name == 'attr_date')
+        assert_(attrs[2].date_format == '%Y-%m-%d')
+
+        assert_(attrs[3].name == 'attr_datetime_local')
+        assert_(attrs[3].date_format == '%Y-%m-%d %H:%M')
+
+        assert_(attrs[4].name == 'attr_datetime_missing')
+        assert_(attrs[4].date_format == '%Y-%m-%d %H:%M')
+
+    def test_dateheader_unsupported(self):
+        def read_dateheader_unsupported():
+            with open(test8) as ofile:
+                _, _ = read_header(ofile)
+
+        assert_raises(ValueError, read_dateheader_unsupported)
+
+
+class TestDateAttribute(object):
+    def setup_method(self):
+        self.data, self.meta = loadarff(test7)
+
+    def test_year_attribute(self):
+        expected = np.array([
+            '1999',
+            '2004',
+            '1817',
+            '2100',
+            '2013',
+            '1631'
+        ], dtype='datetime64[Y]')
+
+        assert_array_equal(self.data["attr_year"], expected)
+
+    def test_month_attribute(self):
+        expected = np.array([
+            '1999-01',
+            '2004-12',
+            '1817-04',
+            '2100-09',
+            '2013-11',
+            '1631-10'
+        ], dtype='datetime64[M]')
+
+        assert_array_equal(self.data["attr_month"], expected)
+
+    def test_date_attribute(self):
+        expected = np.array([
+            '1999-01-31',
+            '2004-12-01',
+            '1817-04-28',
+            '2100-09-10',
+            '2013-11-30',
+            '1631-10-15'
+        ], dtype='datetime64[D]')
+
+        assert_array_equal(self.data["attr_date"], expected)
+
+    def test_datetime_local_attribute(self):
+        expected = np.array([
+            datetime.datetime(year=1999, month=1, day=31, hour=0, minute=1),
+            datetime.datetime(year=2004, month=12, day=1, hour=23, minute=59),
+            datetime.datetime(year=1817, month=4, day=28, hour=13, minute=0),
+            datetime.datetime(year=2100, month=9, day=10, hour=12, minute=0),
+            datetime.datetime(year=2013, month=11, day=30, hour=4, minute=55),
+            datetime.datetime(year=1631, month=10, day=15, hour=20, minute=4)
+        ], dtype='datetime64[m]')
+
+        assert_array_equal(self.data["attr_datetime_local"], expected)
+
+    def test_datetime_missing(self):
+        expected = np.array([
+            'nat',
+            '2004-12-01T23:59',
+            'nat',
+            'nat',
+            '2013-11-30T04:55',
+            '1631-10-15T20:04'
+        ], dtype='datetime64[m]')
+
+        assert_array_equal(self.data["attr_datetime_missing"], expected)
+
+    def test_datetime_timezone(self):
+        assert_raises(ParseArffError, loadarff, test8)
+
+
+class TestRelationalAttribute(object):
+    def setup_method(self):
+        self.data, self.meta = loadarff(test9)
+
+    def test_attributes(self):
+        assert_equal(len(self.meta._attributes), 1)
+
+        relational = list(self.meta._attributes.values())[0]
+
+        assert_equal(relational.name, 'attr_date_number')
+        assert_equal(relational.type_name, 'relational')
+        assert_equal(len(relational.attributes), 2)
+        assert_equal(relational.attributes[0].name,
+                     'attr_date')
+        assert_equal(relational.attributes[0].type_name,
+                     'date')
+        assert_equal(relational.attributes[1].name,
+                     'attr_number')
+        assert_equal(relational.attributes[1].type_name,
+                     'numeric')
+
+    def test_data(self):
+        dtype_instance = [('attr_date', 'datetime64[D]'),
+                          ('attr_number', np.float_)]
+
+        expected = [
+            np.array([('1999-01-31', 1), ('1935-11-27', 10)],
+                     dtype=dtype_instance),
+            np.array([('2004-12-01', 2), ('1942-08-13', 20)],
+                     dtype=dtype_instance),
+            np.array([('1817-04-28', 3)],
+                     dtype=dtype_instance),
+            np.array([('2100-09-10', 4), ('1957-04-17', 40),
+                      ('1721-01-14', 400)],
+                     dtype=dtype_instance),
+            np.array([('2013-11-30', 5)],
+                     dtype=dtype_instance),
+            np.array([('1631-10-15', 6)],
+                     dtype=dtype_instance)
+        ]
+
+        for i in range(len(self.data["attr_date_number"])):
+            assert_array_equal(self.data["attr_date_number"][i],
+                               expected[i])
+
+
+class TestRelationalAttributeLong(object):
+    def setup_method(self):
+        self.data, self.meta = loadarff(test10)
+
+    def test_attributes(self):
+        assert_equal(len(self.meta._attributes), 1)
+
+        relational = list(self.meta._attributes.values())[0]
+
+        assert_equal(relational.name, 'attr_relational')
+        assert_equal(relational.type_name, 'relational')
+        assert_equal(len(relational.attributes), 1)
+        assert_equal(relational.attributes[0].name,
+                     'attr_number')
+        assert_equal(relational.attributes[0].type_name, 'numeric')
+
+    def test_data(self):
+        dtype_instance = [('attr_number', np.float_)]
+
+        expected = np.array([(n,) for n in range(30000)],
+                            dtype=dtype_instance)
+
+        assert_array_equal(self.data["attr_relational"][0],
+                           expected)
+
+
+class TestQuotedNominal(object):
+    """
+    Regression test for issue #10232 : Exception in loadarff with quoted nominal attributes.
+    """
+
+    def setup_method(self):
+        self.data, self.meta = loadarff(test_quoted_nominal)
+
+    def test_attributes(self):
+        assert_equal(len(self.meta._attributes), 2)
+
+        age, smoker = self.meta._attributes.values()
+
+        assert_equal(age.name, 'age')
+        assert_equal(age.type_name, 'numeric')
+        assert_equal(smoker.name, 'smoker')
+        assert_equal(smoker.type_name, 'nominal')
+        assert_equal(smoker.values, ['yes', 'no'])
+
+    def test_data(self):
+
+        age_dtype_instance = np.float_
+        smoker_dtype_instance = '<S3'
+
+        age_expected = np.array([
+            18,
+            24,
+            44,
+            56,
+            89,
+            11,
+        ], dtype=age_dtype_instance)
+
+        smoker_expected = np.array([
+            'no',
+            'yes',
+            'no',
+            'no',
+            'yes',
+            'no',
+        ], dtype=smoker_dtype_instance)
+
+        assert_array_equal(self.data["age"], age_expected)
+        assert_array_equal(self.data["smoker"], smoker_expected)
+
+
+class TestQuotedNominalSpaces(object):
+    """
+    Regression test for issue #10232 : Exception in loadarff with quoted nominal attributes.
+    """
+
+    def setup_method(self):
+        self.data, self.meta = loadarff(test_quoted_nominal_spaces)
+
+    def test_attributes(self):
+        assert_equal(len(self.meta._attributes), 2)
+
+        age, smoker = self.meta._attributes.values()
+
+        assert_equal(age.name, 'age')
+        assert_equal(age.type_name, 'numeric')
+        assert_equal(smoker.name, 'smoker')
+        assert_equal(smoker.type_name, 'nominal')
+        assert_equal(smoker.values, ['  yes', 'no  '])
+
+    def test_data(self):
+
+        age_dtype_instance = np.float_
+        smoker_dtype_instance = '<S5'
+
+        age_expected = np.array([
+            18,
+            24,
+            44,
+            56,
+            89,
+            11,
+        ], dtype=age_dtype_instance)
+
+        smoker_expected = np.array([
+            'no  ',
+            '  yes',
+            'no  ',
+            'no  ',
+            '  yes',
+            'no  ',
+        ], dtype=smoker_dtype_instance)
+
+        assert_array_equal(self.data["age"], age_expected)
+        assert_array_equal(self.data["smoker"], smoker_expected)