Fixed database typo and removed unnecessary class identifier.
This commit is contained in:
		
							parent
							
								
									00ad49a143
								
							
						
					
					
						commit
						45fb349a7d
					
				
					 5098 changed files with 952558 additions and 85 deletions
				
			
		
							
								
								
									
										25
									
								
								venv/Lib/site-packages/scipy/io/arff/__init__.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										25
									
								
								venv/Lib/site-packages/scipy/io/arff/__init__.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,25 @@ | |||
| """ | ||||
| Module to read ARFF files | ||||
| ========================= | ||||
| ARFF is the standard data format for WEKA. | ||||
| It is a text file format which support numerical, string and data values. | ||||
| The format can also represent missing data and sparse data. | ||||
| 
 | ||||
| Notes | ||||
| ----- | ||||
| The ARFF support in ``scipy.io`` provides file reading functionality only. | ||||
| For more extensive ARFF functionality, see `liac-arff | ||||
| <https://github.com/renatopp/liac-arff>`_. | ||||
| 
 | ||||
| See the `WEKA website <http://weka.wikispaces.com/ARFF>`_ | ||||
| for more details about the ARFF format and available datasets. | ||||
| 
 | ||||
| """ | ||||
| from .arffread import * | ||||
| from . import arffread | ||||
| 
 | ||||
| __all__ = arffread.__all__ | ||||
| 
 | ||||
| from scipy._lib._testutils import PytestTester | ||||
| test = PytestTester(__name__) | ||||
| del PytestTester | ||||
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										905
									
								
								venv/Lib/site-packages/scipy/io/arff/arffread.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										905
									
								
								venv/Lib/site-packages/scipy/io/arff/arffread.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,905 @@ | |||
| # Last Change: Mon Aug 20 08:00 PM 2007 J | ||||
| import re | ||||
| import datetime | ||||
| from collections import OrderedDict | ||||
| 
 | ||||
| import numpy as np | ||||
| 
 | ||||
| import csv | ||||
| import ctypes | ||||
| 
 | ||||
| """A module to read arff files.""" | ||||
| 
 | ||||
| __all__ = ['MetaData', 'loadarff', 'ArffError', 'ParseArffError'] | ||||
| 
 | ||||
| # An Arff file is basically two parts: | ||||
| #   - header | ||||
| #   - data | ||||
| # | ||||
| # A header has each of its components starting by @META where META is one of | ||||
| # the keyword (attribute of relation, for now). | ||||
| 
 | ||||
| # TODO: | ||||
| #   - both integer and reals are treated as numeric -> the integer info | ||||
| #    is lost! | ||||
| #   - Replace ValueError by ParseError or something | ||||
| 
 | ||||
| # We know can handle the following: | ||||
| #   - numeric and nominal attributes | ||||
| #   - missing values for numeric attributes | ||||
| 
 | ||||
| r_meta = re.compile(r'^\s*@') | ||||
| # Match a comment | ||||
| r_comment = re.compile(r'^%') | ||||
| # Match an empty line | ||||
| r_empty = re.compile(r'^\s+$') | ||||
| # Match a header line, that is a line which starts by @ + a word | ||||
| r_headerline = re.compile(r'^\s*@\S*') | ||||
| r_datameta = re.compile(r'^@[Dd][Aa][Tt][Aa]') | ||||
| r_relation = re.compile(r'^@[Rr][Ee][Ll][Aa][Tt][Ii][Oo][Nn]\s*(\S*)') | ||||
| r_attribute = re.compile(r'^\s*@[Aa][Tt][Tt][Rr][Ii][Bb][Uu][Tt][Ee]\s*(..*$)') | ||||
| 
 | ||||
| r_nominal = re.compile('{(.+)}') | ||||
| r_date = re.compile(r"[Dd][Aa][Tt][Ee]\s+[\"']?(.+?)[\"']?$") | ||||
| 
 | ||||
| # To get attributes name enclosed with '' | ||||
| r_comattrval = re.compile(r"'(..+)'\s+(..+$)") | ||||
| # To get normal attributes | ||||
| r_wcomattrval = re.compile(r"(\S+)\s+(..+$)") | ||||
| 
 | ||||
| # ------------------------ | ||||
| # Module defined exception | ||||
| # ------------------------ | ||||
| 
 | ||||
| 
 | ||||
| class ArffError(IOError): | ||||
|     pass | ||||
| 
 | ||||
| 
 | ||||
| class ParseArffError(ArffError): | ||||
|     pass | ||||
| 
 | ||||
| 
 | ||||
| # ---------- | ||||
| # Attributes | ||||
| # ---------- | ||||
| class Attribute(object): | ||||
| 
 | ||||
|     type_name = None | ||||
| 
 | ||||
|     def __init__(self, name): | ||||
|         self.name = name | ||||
|         self.range = None | ||||
|         self.dtype = np.object_ | ||||
| 
 | ||||
|     @classmethod | ||||
|     def parse_attribute(cls, name, attr_string): | ||||
|         """ | ||||
|         Parse the attribute line if it knows how. Returns the parsed | ||||
|         attribute, or None. | ||||
|         """ | ||||
|         return None | ||||
| 
 | ||||
|     def parse_data(self, data_str): | ||||
|         """ | ||||
|         Parse a value of this type. | ||||
|         """ | ||||
|         return None | ||||
| 
 | ||||
|     def __str__(self): | ||||
|         """ | ||||
|         Parse a value of this type. | ||||
|         """ | ||||
|         return self.name + ',' + self.type_name | ||||
| 
 | ||||
| 
 | ||||
| class NominalAttribute(Attribute): | ||||
| 
 | ||||
|     type_name = 'nominal' | ||||
| 
 | ||||
|     def __init__(self, name, values): | ||||
|         super().__init__(name) | ||||
|         self.values = values | ||||
|         self.range = values | ||||
|         self.dtype = (np.string_, max(len(i) for i in values)) | ||||
| 
 | ||||
|     @staticmethod | ||||
|     def _get_nom_val(atrv): | ||||
|         """Given a string containing a nominal type, returns a tuple of the | ||||
|         possible values. | ||||
| 
 | ||||
|         A nominal type is defined as something framed between braces ({}). | ||||
| 
 | ||||
|         Parameters | ||||
|         ---------- | ||||
|         atrv : str | ||||
|            Nominal type definition | ||||
| 
 | ||||
|         Returns | ||||
|         ------- | ||||
|         poss_vals : tuple | ||||
|            possible values | ||||
| 
 | ||||
|         Examples | ||||
|         -------- | ||||
|         >>> get_nom_val("{floup, bouga, fl, ratata}") | ||||
|         ('floup', 'bouga', 'fl', 'ratata') | ||||
|         """ | ||||
|         m = r_nominal.match(atrv) | ||||
|         if m: | ||||
|             attrs, _ = split_data_line(m.group(1)) | ||||
|             return tuple(attrs) | ||||
|         else: | ||||
|             raise ValueError("This does not look like a nominal string") | ||||
| 
 | ||||
|     @classmethod | ||||
|     def parse_attribute(cls, name, attr_string): | ||||
|         """ | ||||
|         Parse the attribute line if it knows how. Returns the parsed | ||||
|         attribute, or None. | ||||
| 
 | ||||
|         For nominal attributes, the attribute string would be like '{<attr_1>, | ||||
|          <attr2>, <attr_3>}'. | ||||
|         """ | ||||
|         if attr_string[0] == '{': | ||||
|             values = cls._get_nom_val(attr_string) | ||||
|             return cls(name, values) | ||||
|         else: | ||||
|             return None | ||||
| 
 | ||||
|     def parse_data(self, data_str): | ||||
|         """ | ||||
|         Parse a value of this type. | ||||
|         """ | ||||
|         if data_str in self.values: | ||||
|             return data_str | ||||
|         elif data_str == '?': | ||||
|             return data_str | ||||
|         else: | ||||
|             raise ValueError("%s value not in %s" % (str(data_str), | ||||
|                                                      str(self.values))) | ||||
| 
 | ||||
|     def __str__(self): | ||||
|         msg = self.name + ",{" | ||||
|         for i in range(len(self.values)-1): | ||||
|             msg += self.values[i] + "," | ||||
|         msg += self.values[-1] | ||||
|         msg += "}" | ||||
|         return msg | ||||
| 
 | ||||
| 
 | ||||
| class NumericAttribute(Attribute): | ||||
| 
 | ||||
|     def __init__(self, name): | ||||
|         super().__init__(name) | ||||
|         self.type_name = 'numeric' | ||||
|         self.dtype = np.float_ | ||||
| 
 | ||||
|     @classmethod | ||||
|     def parse_attribute(cls, name, attr_string): | ||||
|         """ | ||||
|         Parse the attribute line if it knows how. Returns the parsed | ||||
|         attribute, or None. | ||||
| 
 | ||||
|         For numeric attributes, the attribute string would be like | ||||
|         'numeric' or 'int' or 'real'. | ||||
|         """ | ||||
| 
 | ||||
|         attr_string = attr_string.lower().strip() | ||||
| 
 | ||||
|         if(attr_string[:len('numeric')] == 'numeric' or | ||||
|            attr_string[:len('int')] == 'int' or | ||||
|            attr_string[:len('real')] == 'real'): | ||||
|             return cls(name) | ||||
|         else: | ||||
|             return None | ||||
| 
 | ||||
|     def parse_data(self, data_str): | ||||
|         """ | ||||
|         Parse a value of this type. | ||||
| 
 | ||||
|         Parameters | ||||
|         ---------- | ||||
|         data_str : str | ||||
|            string to convert | ||||
| 
 | ||||
|         Returns | ||||
|         ------- | ||||
|         f : float | ||||
|            where float can be nan | ||||
| 
 | ||||
|         Examples | ||||
|         -------- | ||||
|         >>> atr = NumericAttribute('atr') | ||||
|         >>> atr.parse_data('1') | ||||
|         1.0 | ||||
|         >>> atr.parse_data('1\\n') | ||||
|         1.0 | ||||
|         >>> atr.parse_data('?\\n') | ||||
|         nan | ||||
|         """ | ||||
|         if '?' in data_str: | ||||
|             return np.nan | ||||
|         else: | ||||
|             return float(data_str) | ||||
| 
 | ||||
|     def _basic_stats(self, data): | ||||
|         nbfac = data.size * 1. / (data.size - 1) | ||||
|         return (np.nanmin(data), np.nanmax(data), | ||||
|                 np.mean(data), np.std(data) * nbfac) | ||||
| 
 | ||||
| 
 | ||||
| class StringAttribute(Attribute): | ||||
| 
 | ||||
|     def __init__(self, name): | ||||
|         super().__init__(name) | ||||
|         self.type_name = 'string' | ||||
| 
 | ||||
|     @classmethod | ||||
|     def parse_attribute(cls, name, attr_string): | ||||
|         """ | ||||
|         Parse the attribute line if it knows how. Returns the parsed | ||||
|         attribute, or None. | ||||
| 
 | ||||
|         For string attributes, the attribute string would be like | ||||
|         'string'. | ||||
|         """ | ||||
| 
 | ||||
|         attr_string = attr_string.lower().strip() | ||||
| 
 | ||||
|         if attr_string[:len('string')] == 'string': | ||||
|             return cls(name) | ||||
|         else: | ||||
|             return None | ||||
| 
 | ||||
| 
 | ||||
| class DateAttribute(Attribute): | ||||
| 
 | ||||
|     def __init__(self, name, date_format, datetime_unit): | ||||
|         super().__init__(name) | ||||
|         self.date_format = date_format | ||||
|         self.datetime_unit = datetime_unit | ||||
|         self.type_name = 'date' | ||||
|         self.range = date_format | ||||
|         self.dtype = np.datetime64(0, self.datetime_unit) | ||||
| 
 | ||||
|     @staticmethod | ||||
|     def _get_date_format(atrv): | ||||
|         m = r_date.match(atrv) | ||||
|         if m: | ||||
|             pattern = m.group(1).strip() | ||||
|             # convert time pattern from Java's SimpleDateFormat to C's format | ||||
|             datetime_unit = None | ||||
|             if "yyyy" in pattern: | ||||
|                 pattern = pattern.replace("yyyy", "%Y") | ||||
|                 datetime_unit = "Y" | ||||
|             elif "yy": | ||||
|                 pattern = pattern.replace("yy", "%y") | ||||
|                 datetime_unit = "Y" | ||||
|             if "MM" in pattern: | ||||
|                 pattern = pattern.replace("MM", "%m") | ||||
|                 datetime_unit = "M" | ||||
|             if "dd" in pattern: | ||||
|                 pattern = pattern.replace("dd", "%d") | ||||
|                 datetime_unit = "D" | ||||
|             if "HH" in pattern: | ||||
|                 pattern = pattern.replace("HH", "%H") | ||||
|                 datetime_unit = "h" | ||||
|             if "mm" in pattern: | ||||
|                 pattern = pattern.replace("mm", "%M") | ||||
|                 datetime_unit = "m" | ||||
|             if "ss" in pattern: | ||||
|                 pattern = pattern.replace("ss", "%S") | ||||
|                 datetime_unit = "s" | ||||
|             if "z" in pattern or "Z" in pattern: | ||||
|                 raise ValueError("Date type attributes with time zone not " | ||||
|                                  "supported, yet") | ||||
| 
 | ||||
|             if datetime_unit is None: | ||||
|                 raise ValueError("Invalid or unsupported date format") | ||||
| 
 | ||||
|             return pattern, datetime_unit | ||||
|         else: | ||||
|             raise ValueError("Invalid or no date format") | ||||
| 
 | ||||
|     @classmethod | ||||
|     def parse_attribute(cls, name, attr_string): | ||||
|         """ | ||||
|         Parse the attribute line if it knows how. Returns the parsed | ||||
|         attribute, or None. | ||||
| 
 | ||||
|         For date attributes, the attribute string would be like | ||||
|         'date <format>'. | ||||
|         """ | ||||
| 
 | ||||
|         attr_string_lower = attr_string.lower().strip() | ||||
| 
 | ||||
|         if attr_string_lower[:len('date')] == 'date': | ||||
|             date_format, datetime_unit = cls._get_date_format(attr_string) | ||||
|             return cls(name, date_format, datetime_unit) | ||||
|         else: | ||||
|             return None | ||||
| 
 | ||||
|     def parse_data(self, data_str): | ||||
|         """ | ||||
|         Parse a value of this type. | ||||
|         """ | ||||
|         date_str = data_str.strip().strip("'").strip('"') | ||||
|         if date_str == '?': | ||||
|             return np.datetime64('NaT', self.datetime_unit) | ||||
|         else: | ||||
|             dt = datetime.datetime.strptime(date_str, self.date_format) | ||||
|             return np.datetime64(dt).astype( | ||||
|                 "datetime64[%s]" % self.datetime_unit) | ||||
| 
 | ||||
|     def __str__(self): | ||||
|         return super(DateAttribute, self).__str__() + ',' + self.date_format | ||||
| 
 | ||||
| 
 | ||||
| class RelationalAttribute(Attribute): | ||||
| 
 | ||||
|     def __init__(self, name): | ||||
|         super().__init__(name) | ||||
|         self.type_name = 'relational' | ||||
|         self.dtype = np.object_ | ||||
|         self.attributes = [] | ||||
|         self.dialect = None | ||||
| 
 | ||||
|     @classmethod | ||||
|     def parse_attribute(cls, name, attr_string): | ||||
|         """ | ||||
|         Parse the attribute line if it knows how. Returns the parsed | ||||
|         attribute, or None. | ||||
| 
 | ||||
|         For date attributes, the attribute string would be like | ||||
|         'date <format>'. | ||||
|         """ | ||||
| 
 | ||||
|         attr_string_lower = attr_string.lower().strip() | ||||
| 
 | ||||
|         if attr_string_lower[:len('relational')] == 'relational': | ||||
|             return cls(name) | ||||
|         else: | ||||
|             return None | ||||
| 
 | ||||
|     def parse_data(self, data_str): | ||||
|         # Copy-pasted | ||||
|         elems = list(range(len(self.attributes))) | ||||
| 
 | ||||
|         escaped_string = data_str.encode().decode("unicode-escape") | ||||
| 
 | ||||
|         row_tuples = [] | ||||
| 
 | ||||
|         for raw in escaped_string.split("\n"): | ||||
|             row, self.dialect = split_data_line(raw, self.dialect) | ||||
| 
 | ||||
|             row_tuples.append(tuple( | ||||
|                 [self.attributes[i].parse_data(row[i]) for i in elems])) | ||||
| 
 | ||||
|         return np.array(row_tuples, | ||||
|                         [(a.name, a.dtype) for a in self.attributes]) | ||||
| 
 | ||||
|     def __str__(self): | ||||
|         return (super(RelationalAttribute, self).__str__() + '\n\t' + | ||||
|                 '\n\t'.join(str(a) for a in self.attributes)) | ||||
| 
 | ||||
| 
 | ||||
| # ----------------- | ||||
| # Various utilities | ||||
| # ----------------- | ||||
| def to_attribute(name, attr_string): | ||||
|     attr_classes = (NominalAttribute, NumericAttribute, DateAttribute, | ||||
|                     StringAttribute, RelationalAttribute) | ||||
| 
 | ||||
|     for cls in attr_classes: | ||||
|         attr = cls.parse_attribute(name, attr_string) | ||||
|         if attr is not None: | ||||
|             return attr | ||||
| 
 | ||||
|     raise ParseArffError("unknown attribute %s" % attr_string) | ||||
| 
 | ||||
| 
 | ||||
| def csv_sniffer_has_bug_last_field(): | ||||
|     """ | ||||
|     Checks if the bug https://bugs.python.org/issue30157 is unpatched. | ||||
|     """ | ||||
| 
 | ||||
|     # We only compute this once. | ||||
|     has_bug = getattr(csv_sniffer_has_bug_last_field, "has_bug", None) | ||||
| 
 | ||||
|     if has_bug is None: | ||||
|         dialect = csv.Sniffer().sniff("3, 'a'") | ||||
|         csv_sniffer_has_bug_last_field.has_bug = dialect.quotechar != "'" | ||||
|         has_bug = csv_sniffer_has_bug_last_field.has_bug | ||||
| 
 | ||||
|     return has_bug | ||||
| 
 | ||||
| 
 | ||||
| def workaround_csv_sniffer_bug_last_field(sniff_line, dialect, delimiters): | ||||
|     """ | ||||
|     Workaround for the bug https://bugs.python.org/issue30157 if is unpatched. | ||||
|     """ | ||||
|     if csv_sniffer_has_bug_last_field(): | ||||
|         # Reuses code from the csv module | ||||
|         right_regex = r'(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?:$|\n)' | ||||
| 
 | ||||
|         for restr in (r'(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?P=delim)',  # ,".*?", | ||||
|                       r'(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?P<delim>[^\w\n"\'])(?P<space> ?)',  # .*?", | ||||
|                       right_regex,  # ,".*?" | ||||
|                       r'(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?:$|\n)'):  # ".*?" (no delim, no space) | ||||
|             regexp = re.compile(restr, re.DOTALL | re.MULTILINE) | ||||
|             matches = regexp.findall(sniff_line) | ||||
|             if matches: | ||||
|                 break | ||||
| 
 | ||||
|         # If it does not match the expression that was bugged, then this bug does not apply | ||||
|         if restr != right_regex: | ||||
|             return | ||||
| 
 | ||||
|         groupindex = regexp.groupindex | ||||
| 
 | ||||
|         # There is only one end of the string | ||||
|         assert len(matches) == 1 | ||||
|         m = matches[0] | ||||
| 
 | ||||
|         n = groupindex['quote'] - 1 | ||||
|         quote = m[n] | ||||
| 
 | ||||
|         n = groupindex['delim'] - 1 | ||||
|         delim = m[n] | ||||
| 
 | ||||
|         n = groupindex['space'] - 1 | ||||
|         space = bool(m[n]) | ||||
| 
 | ||||
|         dq_regexp = re.compile( | ||||
|             r"((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$)" % | ||||
|             {'delim': re.escape(delim), 'quote': quote}, re.MULTILINE | ||||
|         ) | ||||
| 
 | ||||
|         doublequote = bool(dq_regexp.search(sniff_line)) | ||||
| 
 | ||||
|         dialect.quotechar = quote | ||||
|         if delim in delimiters: | ||||
|             dialect.delimiter = delim | ||||
|         dialect.doublequote = doublequote | ||||
|         dialect.skipinitialspace = space | ||||
| 
 | ||||
| 
 | ||||
| def split_data_line(line, dialect=None): | ||||
|     delimiters = ",\t" | ||||
| 
 | ||||
|     # This can not be done in a per reader basis, and relational fields | ||||
|     # can be HUGE | ||||
|     csv.field_size_limit(int(ctypes.c_ulong(-1).value // 2)) | ||||
| 
 | ||||
|     # Remove the line end if any | ||||
|     if line[-1] == '\n': | ||||
|         line = line[:-1] | ||||
| 
 | ||||
|     sniff_line = line | ||||
| 
 | ||||
|     # Add a delimiter if none is present, so that the csv.Sniffer | ||||
|     # does not complain for a single-field CSV. | ||||
|     if not any(d in line for d in delimiters): | ||||
|         sniff_line += "," | ||||
| 
 | ||||
|     if dialect is None: | ||||
|         dialect = csv.Sniffer().sniff(sniff_line, delimiters=delimiters) | ||||
|         workaround_csv_sniffer_bug_last_field(sniff_line=sniff_line, | ||||
|                                               dialect=dialect, | ||||
|                                               delimiters=delimiters) | ||||
| 
 | ||||
|     row = next(csv.reader([line], dialect)) | ||||
| 
 | ||||
|     return row, dialect | ||||
| 
 | ||||
| 
 | ||||
| # -------------- | ||||
| # Parsing header | ||||
| # -------------- | ||||
| def tokenize_attribute(iterable, attribute): | ||||
|     """Parse a raw string in header (e.g., starts by @attribute). | ||||
| 
 | ||||
|     Given a raw string attribute, try to get the name and type of the | ||||
|     attribute. Constraints: | ||||
| 
 | ||||
|     * The first line must start with @attribute (case insensitive, and | ||||
|       space like characters before @attribute are allowed) | ||||
|     * Works also if the attribute is spread on multilines. | ||||
|     * Works if empty lines or comments are in between | ||||
| 
 | ||||
|     Parameters | ||||
|     ---------- | ||||
|     attribute : str | ||||
|        the attribute string. | ||||
| 
 | ||||
|     Returns | ||||
|     ------- | ||||
|     name : str | ||||
|        name of the attribute | ||||
|     value : str | ||||
|        value of the attribute | ||||
|     next : str | ||||
|        next line to be parsed | ||||
| 
 | ||||
|     Examples | ||||
|     -------- | ||||
|     If attribute is a string defined in python as r"floupi real", will | ||||
|     return floupi as name, and real as value. | ||||
| 
 | ||||
|     >>> iterable = iter([0] * 10) # dummy iterator | ||||
|     >>> tokenize_attribute(iterable, r"@attribute floupi real") | ||||
|     ('floupi', 'real', 0) | ||||
| 
 | ||||
|     If attribute is r"'floupi 2' real", will return 'floupi 2' as name, | ||||
|     and real as value. | ||||
| 
 | ||||
|     >>> tokenize_attribute(iterable, r"  @attribute 'floupi 2' real   ") | ||||
|     ('floupi 2', 'real', 0) | ||||
| 
 | ||||
|     """ | ||||
|     sattr = attribute.strip() | ||||
|     mattr = r_attribute.match(sattr) | ||||
|     if mattr: | ||||
|         # atrv is everything after @attribute | ||||
|         atrv = mattr.group(1) | ||||
|         if r_comattrval.match(atrv): | ||||
|             name, type = tokenize_single_comma(atrv) | ||||
|             next_item = next(iterable) | ||||
|         elif r_wcomattrval.match(atrv): | ||||
|             name, type = tokenize_single_wcomma(atrv) | ||||
|             next_item = next(iterable) | ||||
|         else: | ||||
|             # Not sure we should support this, as it does not seem supported by | ||||
|             # weka. | ||||
|             raise ValueError("multi line not supported yet") | ||||
|     else: | ||||
|         raise ValueError("First line unparsable: %s" % sattr) | ||||
| 
 | ||||
|     attribute = to_attribute(name, type) | ||||
| 
 | ||||
|     if type.lower() == 'relational': | ||||
|         next_item = read_relational_attribute(iterable, attribute, next_item) | ||||
|     #    raise ValueError("relational attributes not supported yet") | ||||
| 
 | ||||
|     return attribute, next_item | ||||
| 
 | ||||
| 
 | ||||
| def tokenize_single_comma(val): | ||||
|     # XXX we match twice the same string (here and at the caller level). It is | ||||
|     # stupid, but it is easier for now... | ||||
|     m = r_comattrval.match(val) | ||||
|     if m: | ||||
|         try: | ||||
|             name = m.group(1).strip() | ||||
|             type = m.group(2).strip() | ||||
|         except IndexError: | ||||
|             raise ValueError("Error while tokenizing attribute") | ||||
|     else: | ||||
|         raise ValueError("Error while tokenizing single %s" % val) | ||||
|     return name, type | ||||
| 
 | ||||
| 
 | ||||
| def tokenize_single_wcomma(val): | ||||
|     # XXX we match twice the same string (here and at the caller level). It is | ||||
|     # stupid, but it is easier for now... | ||||
|     m = r_wcomattrval.match(val) | ||||
|     if m: | ||||
|         try: | ||||
|             name = m.group(1).strip() | ||||
|             type = m.group(2).strip() | ||||
|         except IndexError: | ||||
|             raise ValueError("Error while tokenizing attribute") | ||||
|     else: | ||||
|         raise ValueError("Error while tokenizing single %s" % val) | ||||
|     return name, type | ||||
| 
 | ||||
| 
 | ||||
| def read_relational_attribute(ofile, relational_attribute, i): | ||||
|     """Read the nested attributes of a relational attribute""" | ||||
| 
 | ||||
|     r_end_relational = re.compile(r'^@[Ee][Nn][Dd]\s*' + | ||||
|                                   relational_attribute.name + r'\s*$') | ||||
| 
 | ||||
|     while not r_end_relational.match(i): | ||||
|         m = r_headerline.match(i) | ||||
|         if m: | ||||
|             isattr = r_attribute.match(i) | ||||
|             if isattr: | ||||
|                 attr, i = tokenize_attribute(ofile, i) | ||||
|                 relational_attribute.attributes.append(attr) | ||||
|             else: | ||||
|                 raise ValueError("Error parsing line %s" % i) | ||||
|         else: | ||||
|             i = next(ofile) | ||||
| 
 | ||||
|     i = next(ofile) | ||||
|     return i | ||||
| 
 | ||||
| 
 | ||||
| def read_header(ofile): | ||||
|     """Read the header of the iterable ofile.""" | ||||
|     i = next(ofile) | ||||
| 
 | ||||
|     # Pass first comments | ||||
|     while r_comment.match(i): | ||||
|         i = next(ofile) | ||||
| 
 | ||||
|     # Header is everything up to DATA attribute ? | ||||
|     relation = None | ||||
|     attributes = [] | ||||
|     while not r_datameta.match(i): | ||||
|         m = r_headerline.match(i) | ||||
|         if m: | ||||
|             isattr = r_attribute.match(i) | ||||
|             if isattr: | ||||
|                 attr, i = tokenize_attribute(ofile, i) | ||||
|                 attributes.append(attr) | ||||
|             else: | ||||
|                 isrel = r_relation.match(i) | ||||
|                 if isrel: | ||||
|                     relation = isrel.group(1) | ||||
|                 else: | ||||
|                     raise ValueError("Error parsing line %s" % i) | ||||
|                 i = next(ofile) | ||||
|         else: | ||||
|             i = next(ofile) | ||||
| 
 | ||||
|     return relation, attributes | ||||
| 
 | ||||
| 
 | ||||
| class MetaData(object): | ||||
|     """Small container to keep useful information on a ARFF dataset. | ||||
| 
 | ||||
|     Knows about attributes names and types. | ||||
| 
 | ||||
|     Examples | ||||
|     -------- | ||||
|     :: | ||||
| 
 | ||||
|         data, meta = loadarff('iris.arff') | ||||
|         # This will print the attributes names of the iris.arff dataset | ||||
|         for i in meta: | ||||
|             print(i) | ||||
|         # This works too | ||||
|         meta.names() | ||||
|         # Getting attribute type | ||||
|         types = meta.types() | ||||
| 
 | ||||
|     Methods | ||||
|     ------- | ||||
|     names | ||||
|     types | ||||
| 
 | ||||
|     Notes | ||||
|     ----- | ||||
|     Also maintains the list of attributes in order, i.e., doing for i in | ||||
|     meta, where meta is an instance of MetaData, will return the | ||||
|     different attribute names in the order they were defined. | ||||
|     """ | ||||
|     def __init__(self, rel, attr): | ||||
|         self.name = rel | ||||
| 
 | ||||
|         # We need the dictionary to be ordered | ||||
|         self._attributes = OrderedDict((a.name, a) for a in attr) | ||||
| 
 | ||||
|     def __repr__(self): | ||||
|         msg = "" | ||||
|         msg += "Dataset: %s\n" % self.name | ||||
|         for i in self._attributes: | ||||
|             msg += "\t%s's type is %s" % (i, self._attributes[i].type_name) | ||||
|             if self._attributes[i].range: | ||||
|                 msg += ", range is %s" % str(self._attributes[i].range) | ||||
|             msg += '\n' | ||||
|         return msg | ||||
| 
 | ||||
|     def __iter__(self): | ||||
|         return iter(self._attributes) | ||||
| 
 | ||||
|     def __getitem__(self, key): | ||||
|         attr = self._attributes[key] | ||||
| 
 | ||||
|         return (attr.type_name, attr.range) | ||||
| 
 | ||||
|     def names(self): | ||||
|         """Return the list of attribute names. | ||||
| 
 | ||||
|         Returns | ||||
|         ------- | ||||
|         attrnames : list of str | ||||
|             The attribute names. | ||||
|         """ | ||||
|         return list(self._attributes) | ||||
| 
 | ||||
|     def types(self): | ||||
|         """Return the list of attribute types. | ||||
| 
 | ||||
|         Returns | ||||
|         ------- | ||||
|         attr_types : list of str | ||||
|             The attribute types. | ||||
|         """ | ||||
|         attr_types = [self._attributes[name].type_name | ||||
|                       for name in self._attributes] | ||||
|         return attr_types | ||||
| 
 | ||||
| 
 | ||||
| def loadarff(f): | ||||
|     """ | ||||
|     Read an arff file. | ||||
| 
 | ||||
|     The data is returned as a record array, which can be accessed much like | ||||
|     a dictionary of NumPy arrays. For example, if one of the attributes is | ||||
|     called 'pressure', then its first 10 data points can be accessed from the | ||||
|     ``data`` record array like so: ``data['pressure'][0:10]`` | ||||
| 
 | ||||
| 
 | ||||
|     Parameters | ||||
|     ---------- | ||||
|     f : file-like or str | ||||
|        File-like object to read from, or filename to open. | ||||
| 
 | ||||
|     Returns | ||||
|     ------- | ||||
|     data : record array | ||||
|        The data of the arff file, accessible by attribute names. | ||||
|     meta : `MetaData` | ||||
|        Contains information about the arff file such as name and | ||||
|        type of attributes, the relation (name of the dataset), etc. | ||||
| 
 | ||||
|     Raises | ||||
|     ------ | ||||
|     ParseArffError | ||||
|         This is raised if the given file is not ARFF-formatted. | ||||
|     NotImplementedError | ||||
|         The ARFF file has an attribute which is not supported yet. | ||||
| 
 | ||||
|     Notes | ||||
|     ----- | ||||
| 
 | ||||
|     This function should be able to read most arff files. Not | ||||
|     implemented functionality include: | ||||
| 
 | ||||
|     * date type attributes | ||||
|     * string type attributes | ||||
| 
 | ||||
|     It can read files with numeric and nominal attributes. It cannot read | ||||
|     files with sparse data ({} in the file). However, this function can | ||||
|     read files with missing data (? in the file), representing the data | ||||
|     points as NaNs. | ||||
| 
 | ||||
|     Examples | ||||
|     -------- | ||||
|     >>> from scipy.io import arff | ||||
|     >>> from io import StringIO | ||||
|     >>> content = \"\"\" | ||||
|     ... @relation foo | ||||
|     ... @attribute width  numeric | ||||
|     ... @attribute height numeric | ||||
|     ... @attribute color  {red,green,blue,yellow,black} | ||||
|     ... @data | ||||
|     ... 5.0,3.25,blue | ||||
|     ... 4.5,3.75,green | ||||
|     ... 3.0,4.00,red | ||||
|     ... \"\"\" | ||||
|     >>> f = StringIO(content) | ||||
|     >>> data, meta = arff.loadarff(f) | ||||
|     >>> data | ||||
|     array([(5.0, 3.25, 'blue'), (4.5, 3.75, 'green'), (3.0, 4.0, 'red')], | ||||
|           dtype=[('width', '<f8'), ('height', '<f8'), ('color', '|S6')]) | ||||
|     >>> meta | ||||
|     Dataset: foo | ||||
|     \twidth's type is numeric | ||||
|     \theight's type is numeric | ||||
|     \tcolor's type is nominal, range is ('red', 'green', 'blue', 'yellow', 'black') | ||||
| 
 | ||||
|     """ | ||||
|     if hasattr(f, 'read'): | ||||
|         ofile = f | ||||
|     else: | ||||
|         ofile = open(f, 'rt') | ||||
|     try: | ||||
|         return _loadarff(ofile) | ||||
|     finally: | ||||
|         if ofile is not f:  # only close what we opened | ||||
|             ofile.close() | ||||
| 
 | ||||
| 
 | ||||
| def _loadarff(ofile): | ||||
|     # Parse the header file | ||||
|     try: | ||||
|         rel, attr = read_header(ofile) | ||||
|     except ValueError as e: | ||||
|         msg = "Error while parsing header, error was: " + str(e) | ||||
|         raise ParseArffError(msg) | ||||
| 
 | ||||
|     # Check whether we have a string attribute (not supported yet) | ||||
|     hasstr = False | ||||
|     for a in attr: | ||||
|         if isinstance(a, StringAttribute): | ||||
|             hasstr = True | ||||
| 
 | ||||
|     meta = MetaData(rel, attr) | ||||
| 
 | ||||
|     # XXX The following code is not great | ||||
|     # Build the type descriptor descr and the list of convertors to convert | ||||
|     # each attribute to the suitable type (which should match the one in | ||||
|     # descr). | ||||
| 
 | ||||
|     # This can be used once we want to support integer as integer values and | ||||
|     # not as numeric anymore (using masked arrays ?). | ||||
| 
 | ||||
|     if hasstr: | ||||
|         # How to support string efficiently ? Ideally, we should know the max | ||||
|         # size of the string before allocating the numpy array. | ||||
|         raise NotImplementedError("String attributes not supported yet, sorry") | ||||
| 
 | ||||
|     ni = len(attr) | ||||
| 
 | ||||
|     def generator(row_iter, delim=','): | ||||
|         # TODO: this is where we are spending time (~80%). I think things | ||||
|         # could be made more efficiently: | ||||
|         #   - We could for example "compile" the function, because some values | ||||
|         #   do not change here. | ||||
|         #   - The function to convert a line to dtyped values could also be | ||||
|         #   generated on the fly from a string and be executed instead of | ||||
|         #   looping. | ||||
|         #   - The regex are overkill: for comments, checking that a line starts | ||||
|         #   by % should be enough and faster, and for empty lines, same thing | ||||
|         #   --> this does not seem to change anything. | ||||
| 
 | ||||
|         # 'compiling' the range since it does not change | ||||
|         # Note, I have already tried zipping the converters and | ||||
|         # row elements and got slightly worse performance. | ||||
|         elems = list(range(ni)) | ||||
| 
 | ||||
|         dialect = None | ||||
|         for raw in row_iter: | ||||
|             # We do not abstract skipping comments and empty lines for | ||||
|             # performance reasons. | ||||
|             if r_comment.match(raw) or r_empty.match(raw): | ||||
|                 continue | ||||
| 
 | ||||
|             row, dialect = split_data_line(raw, dialect) | ||||
| 
 | ||||
|             yield tuple([attr[i].parse_data(row[i]) for i in elems]) | ||||
| 
 | ||||
|     a = list(generator(ofile)) | ||||
|     # No error should happen here: it is a bug otherwise | ||||
|     data = np.array(a, [(a.name, a.dtype) for a in attr]) | ||||
|     return data, meta | ||||
| 
 | ||||
| 
 | ||||
| # ---- | ||||
| # Misc | ||||
| # ---- | ||||
| def basic_stats(data): | ||||
|     nbfac = data.size * 1. / (data.size - 1) | ||||
|     return np.nanmin(data), np.nanmax(data), np.mean(data), np.std(data) * nbfac | ||||
| 
 | ||||
| 
 | ||||
| def print_attribute(name, tp, data): | ||||
|     type = tp.type_name | ||||
|     if type == 'numeric' or type == 'real' or type == 'integer': | ||||
|         min, max, mean, std = basic_stats(data) | ||||
|         print("%s,%s,%f,%f,%f,%f" % (name, type, min, max, mean, std)) | ||||
|     else: | ||||
|         print(str(tp)) | ||||
| 
 | ||||
| 
 | ||||
| def test_weka(filename): | ||||
|     data, meta = loadarff(filename) | ||||
|     print(len(data.dtype)) | ||||
|     print(data.size) | ||||
|     for i in meta: | ||||
|         print_attribute(i, meta[i], data[i]) | ||||
| 
 | ||||
| 
 | ||||
| # make sure nose does not find this as a test | ||||
| test_weka.__test__ = False | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|     import sys | ||||
|     filename = sys.argv[1] | ||||
|     test_weka(filename) | ||||
							
								
								
									
										11
									
								
								venv/Lib/site-packages/scipy/io/arff/setup.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								venv/Lib/site-packages/scipy/io/arff/setup.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,11 @@ | |||
| 
 | ||||
| def configuration(parent_package='io',top_path=None): | ||||
|     from numpy.distutils.misc_util import Configuration | ||||
|     config = Configuration('arff', parent_package, top_path) | ||||
|     config.add_data_dir('tests') | ||||
|     return config | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|     from numpy.distutils.core import setup | ||||
|     setup(**configuration(top_path='').todict()) | ||||
							
								
								
									
										0
									
								
								venv/Lib/site-packages/scipy/io/arff/tests/__init__.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								venv/Lib/site-packages/scipy/io/arff/tests/__init__.py
									
										
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										225
									
								
								venv/Lib/site-packages/scipy/io/arff/tests/data/iris.arff
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										225
									
								
								venv/Lib/site-packages/scipy/io/arff/tests/data/iris.arff
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,225 @@ | |||
| % 1. Title: Iris Plants Database | ||||
| %  | ||||
| % 2. Sources: | ||||
| %      (a) Creator: R.A. Fisher | ||||
| %      (b) Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov) | ||||
| %      (c) Date: July, 1988 | ||||
| %  | ||||
| % 3. Past Usage: | ||||
| %    - Publications: too many to mention!!!  Here are a few. | ||||
| %    1. Fisher,R.A. "The use of multiple measurements in taxonomic problems" | ||||
| %       Annual Eugenics, 7, Part II, 179-188 (1936); also in "Contributions | ||||
| %       to Mathematical Statistics" (John Wiley, NY, 1950). | ||||
| %    2. Duda,R.O., & Hart,P.E. (1973) Pattern Classification and Scene Analysis. | ||||
| %       (Q327.D83) John Wiley & Sons.  ISBN 0-471-22361-1.  See page 218. | ||||
| %    3. Dasarathy, B.V. (1980) "Nosing Around the Neighborhood: A New System | ||||
| %       Structure and Classification Rule for Recognition in Partially Exposed | ||||
| %       Environments".  IEEE Transactions on Pattern Analysis and Machine | ||||
| %       Intelligence, Vol. PAMI-2, No. 1, 67-71. | ||||
| %       -- Results: | ||||
| %          -- very low misclassification rates (0% for the setosa class) | ||||
| %    4. Gates, G.W. (1972) "The Reduced Nearest Neighbor Rule".  IEEE  | ||||
| %       Transactions on Information Theory, May 1972, 431-433. | ||||
| %       -- Results: | ||||
| %          -- very low misclassification rates again | ||||
| %    5. See also: 1988 MLC Proceedings, 54-64.  Cheeseman et al's AUTOCLASS II | ||||
| %       conceptual clustering system finds 3 classes in the data. | ||||
| %  | ||||
| % 4. Relevant Information: | ||||
| %    --- This is perhaps the best known database to be found in the pattern | ||||
| %        recognition literature.  Fisher's paper is a classic in the field | ||||
| %        and is referenced frequently to this day.  (See Duda & Hart, for | ||||
| %        example.)  The data set contains 3 classes of 50 instances each, | ||||
| %        where each class refers to a type of iris plant.  One class is | ||||
| %        linearly separable from the other 2; the latter are NOT linearly | ||||
| %        separable from each other. | ||||
| %    --- Predicted attribute: class of iris plant. | ||||
| %    --- This is an exceedingly simple domain. | ||||
| %  | ||||
| % 5. Number of Instances: 150 (50 in each of three classes) | ||||
| %  | ||||
| % 6. Number of Attributes: 4 numeric, predictive attributes and the class | ||||
| %  | ||||
| % 7. Attribute Information: | ||||
| %    1. sepal length in cm | ||||
| %    2. sepal width in cm | ||||
| %    3. petal length in cm | ||||
| %    4. petal width in cm | ||||
| %    5. class:  | ||||
| %       -- Iris Setosa | ||||
| %       -- Iris Versicolour | ||||
| %       -- Iris Virginica | ||||
| %  | ||||
| % 8. Missing Attribute Values: None | ||||
| %  | ||||
| % Summary Statistics: | ||||
| %  	           Min  Max   Mean    SD   Class Correlation | ||||
| %    sepal length: 4.3  7.9   5.84  0.83    0.7826    | ||||
| %     sepal width: 2.0  4.4   3.05  0.43   -0.4194 | ||||
| %    petal length: 1.0  6.9   3.76  1.76    0.9490  (high!) | ||||
| %     petal width: 0.1  2.5   1.20  0.76    0.9565  (high!) | ||||
| %  | ||||
| % 9. Class Distribution: 33.3% for each of 3 classes. | ||||
| 
 | ||||
| @RELATION iris | ||||
| 
 | ||||
| @ATTRIBUTE sepallength	REAL | ||||
| @ATTRIBUTE sepalwidth 	REAL | ||||
| @ATTRIBUTE petallength 	REAL | ||||
| @ATTRIBUTE petalwidth	REAL | ||||
| @ATTRIBUTE class 	{Iris-setosa,Iris-versicolor,Iris-virginica} | ||||
| 
 | ||||
| @DATA | ||||
| 5.1,3.5,1.4,0.2,Iris-setosa | ||||
| 4.9,3.0,1.4,0.2,Iris-setosa | ||||
| 4.7,3.2,1.3,0.2,Iris-setosa | ||||
| 4.6,3.1,1.5,0.2,Iris-setosa | ||||
| 5.0,3.6,1.4,0.2,Iris-setosa | ||||
| 5.4,3.9,1.7,0.4,Iris-setosa | ||||
| 4.6,3.4,1.4,0.3,Iris-setosa | ||||
| 5.0,3.4,1.5,0.2,Iris-setosa | ||||
| 4.4,2.9,1.4,0.2,Iris-setosa | ||||
| 4.9,3.1,1.5,0.1,Iris-setosa | ||||
| 5.4,3.7,1.5,0.2,Iris-setosa | ||||
| 4.8,3.4,1.6,0.2,Iris-setosa | ||||
| 4.8,3.0,1.4,0.1,Iris-setosa | ||||
| 4.3,3.0,1.1,0.1,Iris-setosa | ||||
| 5.8,4.0,1.2,0.2,Iris-setosa | ||||
| 5.7,4.4,1.5,0.4,Iris-setosa | ||||
| 5.4,3.9,1.3,0.4,Iris-setosa | ||||
| 5.1,3.5,1.4,0.3,Iris-setosa | ||||
| 5.7,3.8,1.7,0.3,Iris-setosa | ||||
| 5.1,3.8,1.5,0.3,Iris-setosa | ||||
| 5.4,3.4,1.7,0.2,Iris-setosa | ||||
| 5.1,3.7,1.5,0.4,Iris-setosa | ||||
| 4.6,3.6,1.0,0.2,Iris-setosa | ||||
| 5.1,3.3,1.7,0.5,Iris-setosa | ||||
| 4.8,3.4,1.9,0.2,Iris-setosa | ||||
| 5.0,3.0,1.6,0.2,Iris-setosa | ||||
| 5.0,3.4,1.6,0.4,Iris-setosa | ||||
| 5.2,3.5,1.5,0.2,Iris-setosa | ||||
| 5.2,3.4,1.4,0.2,Iris-setosa | ||||
| 4.7,3.2,1.6,0.2,Iris-setosa | ||||
| 4.8,3.1,1.6,0.2,Iris-setosa | ||||
| 5.4,3.4,1.5,0.4,Iris-setosa | ||||
| 5.2,4.1,1.5,0.1,Iris-setosa | ||||
| 5.5,4.2,1.4,0.2,Iris-setosa | ||||
| 4.9,3.1,1.5,0.1,Iris-setosa | ||||
| 5.0,3.2,1.2,0.2,Iris-setosa | ||||
| 5.5,3.5,1.3,0.2,Iris-setosa | ||||
| 4.9,3.1,1.5,0.1,Iris-setosa | ||||
| 4.4,3.0,1.3,0.2,Iris-setosa | ||||
| 5.1,3.4,1.5,0.2,Iris-setosa | ||||
| 5.0,3.5,1.3,0.3,Iris-setosa | ||||
| 4.5,2.3,1.3,0.3,Iris-setosa | ||||
| 4.4,3.2,1.3,0.2,Iris-setosa | ||||
| 5.0,3.5,1.6,0.6,Iris-setosa | ||||
| 5.1,3.8,1.9,0.4,Iris-setosa | ||||
| 4.8,3.0,1.4,0.3,Iris-setosa | ||||
| 5.1,3.8,1.6,0.2,Iris-setosa | ||||
| 4.6,3.2,1.4,0.2,Iris-setosa | ||||
| 5.3,3.7,1.5,0.2,Iris-setosa | ||||
| 5.0,3.3,1.4,0.2,Iris-setosa | ||||
| 7.0,3.2,4.7,1.4,Iris-versicolor | ||||
| 6.4,3.2,4.5,1.5,Iris-versicolor | ||||
| 6.9,3.1,4.9,1.5,Iris-versicolor | ||||
| 5.5,2.3,4.0,1.3,Iris-versicolor | ||||
| 6.5,2.8,4.6,1.5,Iris-versicolor | ||||
| 5.7,2.8,4.5,1.3,Iris-versicolor | ||||
| 6.3,3.3,4.7,1.6,Iris-versicolor | ||||
| 4.9,2.4,3.3,1.0,Iris-versicolor | ||||
| 6.6,2.9,4.6,1.3,Iris-versicolor | ||||
| 5.2,2.7,3.9,1.4,Iris-versicolor | ||||
| 5.0,2.0,3.5,1.0,Iris-versicolor | ||||
| 5.9,3.0,4.2,1.5,Iris-versicolor | ||||
| 6.0,2.2,4.0,1.0,Iris-versicolor | ||||
| 6.1,2.9,4.7,1.4,Iris-versicolor | ||||
| 5.6,2.9,3.6,1.3,Iris-versicolor | ||||
| 6.7,3.1,4.4,1.4,Iris-versicolor | ||||
| 5.6,3.0,4.5,1.5,Iris-versicolor | ||||
| 5.8,2.7,4.1,1.0,Iris-versicolor | ||||
| 6.2,2.2,4.5,1.5,Iris-versicolor | ||||
| 5.6,2.5,3.9,1.1,Iris-versicolor | ||||
| 5.9,3.2,4.8,1.8,Iris-versicolor | ||||
| 6.1,2.8,4.0,1.3,Iris-versicolor | ||||
| 6.3,2.5,4.9,1.5,Iris-versicolor | ||||
| 6.1,2.8,4.7,1.2,Iris-versicolor | ||||
| 6.4,2.9,4.3,1.3,Iris-versicolor | ||||
| 6.6,3.0,4.4,1.4,Iris-versicolor | ||||
| 6.8,2.8,4.8,1.4,Iris-versicolor | ||||
| 6.7,3.0,5.0,1.7,Iris-versicolor | ||||
| 6.0,2.9,4.5,1.5,Iris-versicolor | ||||
| 5.7,2.6,3.5,1.0,Iris-versicolor | ||||
| 5.5,2.4,3.8,1.1,Iris-versicolor | ||||
| 5.5,2.4,3.7,1.0,Iris-versicolor | ||||
| 5.8,2.7,3.9,1.2,Iris-versicolor | ||||
| 6.0,2.7,5.1,1.6,Iris-versicolor | ||||
| 5.4,3.0,4.5,1.5,Iris-versicolor | ||||
| 6.0,3.4,4.5,1.6,Iris-versicolor | ||||
| 6.7,3.1,4.7,1.5,Iris-versicolor | ||||
| 6.3,2.3,4.4,1.3,Iris-versicolor | ||||
| 5.6,3.0,4.1,1.3,Iris-versicolor | ||||
| 5.5,2.5,4.0,1.3,Iris-versicolor | ||||
| 5.5,2.6,4.4,1.2,Iris-versicolor | ||||
| 6.1,3.0,4.6,1.4,Iris-versicolor | ||||
| 5.8,2.6,4.0,1.2,Iris-versicolor | ||||
| 5.0,2.3,3.3,1.0,Iris-versicolor | ||||
| 5.6,2.7,4.2,1.3,Iris-versicolor | ||||
| 5.7,3.0,4.2,1.2,Iris-versicolor | ||||
| 5.7,2.9,4.2,1.3,Iris-versicolor | ||||
| 6.2,2.9,4.3,1.3,Iris-versicolor | ||||
| 5.1,2.5,3.0,1.1,Iris-versicolor | ||||
| 5.7,2.8,4.1,1.3,Iris-versicolor | ||||
| 6.3,3.3,6.0,2.5,Iris-virginica | ||||
| 5.8,2.7,5.1,1.9,Iris-virginica | ||||
| 7.1,3.0,5.9,2.1,Iris-virginica | ||||
| 6.3,2.9,5.6,1.8,Iris-virginica | ||||
| 6.5,3.0,5.8,2.2,Iris-virginica | ||||
| 7.6,3.0,6.6,2.1,Iris-virginica | ||||
| 4.9,2.5,4.5,1.7,Iris-virginica | ||||
| 7.3,2.9,6.3,1.8,Iris-virginica | ||||
| 6.7,2.5,5.8,1.8,Iris-virginica | ||||
| 7.2,3.6,6.1,2.5,Iris-virginica | ||||
| 6.5,3.2,5.1,2.0,Iris-virginica | ||||
| 6.4,2.7,5.3,1.9,Iris-virginica | ||||
| 6.8,3.0,5.5,2.1,Iris-virginica | ||||
| 5.7,2.5,5.0,2.0,Iris-virginica | ||||
| 5.8,2.8,5.1,2.4,Iris-virginica | ||||
| 6.4,3.2,5.3,2.3,Iris-virginica | ||||
| 6.5,3.0,5.5,1.8,Iris-virginica | ||||
| 7.7,3.8,6.7,2.2,Iris-virginica | ||||
| 7.7,2.6,6.9,2.3,Iris-virginica | ||||
| 6.0,2.2,5.0,1.5,Iris-virginica | ||||
| 6.9,3.2,5.7,2.3,Iris-virginica | ||||
| 5.6,2.8,4.9,2.0,Iris-virginica | ||||
| 7.7,2.8,6.7,2.0,Iris-virginica | ||||
| 6.3,2.7,4.9,1.8,Iris-virginica | ||||
| 6.7,3.3,5.7,2.1,Iris-virginica | ||||
| 7.2,3.2,6.0,1.8,Iris-virginica | ||||
| 6.2,2.8,4.8,1.8,Iris-virginica | ||||
| 6.1,3.0,4.9,1.8,Iris-virginica | ||||
| 6.4,2.8,5.6,2.1,Iris-virginica | ||||
| 7.2,3.0,5.8,1.6,Iris-virginica | ||||
| 7.4,2.8,6.1,1.9,Iris-virginica | ||||
| 7.9,3.8,6.4,2.0,Iris-virginica | ||||
| 6.4,2.8,5.6,2.2,Iris-virginica | ||||
| 6.3,2.8,5.1,1.5,Iris-virginica | ||||
| 6.1,2.6,5.6,1.4,Iris-virginica | ||||
| 7.7,3.0,6.1,2.3,Iris-virginica | ||||
| 6.3,3.4,5.6,2.4,Iris-virginica | ||||
| 6.4,3.1,5.5,1.8,Iris-virginica | ||||
| 6.0,3.0,4.8,1.8,Iris-virginica | ||||
| 6.9,3.1,5.4,2.1,Iris-virginica | ||||
| 6.7,3.1,5.6,2.4,Iris-virginica | ||||
| 6.9,3.1,5.1,2.3,Iris-virginica | ||||
| 5.8,2.7,5.1,1.9,Iris-virginica | ||||
| 6.8,3.2,5.9,2.3,Iris-virginica | ||||
| 6.7,3.3,5.7,2.5,Iris-virginica | ||||
| 6.7,3.0,5.2,2.3,Iris-virginica | ||||
| 6.3,2.5,5.0,1.9,Iris-virginica | ||||
| 6.5,3.0,5.2,2.0,Iris-virginica | ||||
| 6.2,3.4,5.4,2.3,Iris-virginica | ||||
| 5.9,3.0,5.1,1.8,Iris-virginica | ||||
| % | ||||
| % | ||||
| % | ||||
|  | @ -0,0 +1,8 @@ | |||
| % This arff file contains some missing data | ||||
| @relation missing | ||||
| @attribute yop real | ||||
| @attribute yap real | ||||
| @data | ||||
| 1,5 | ||||
| 2,4 | ||||
| ?,? | ||||
							
								
								
									
										11
									
								
								venv/Lib/site-packages/scipy/io/arff/tests/data/nodata.arff
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								venv/Lib/site-packages/scipy/io/arff/tests/data/nodata.arff
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,11 @@ | |||
| @RELATION iris | ||||
| 
 | ||||
| @ATTRIBUTE sepallength  REAL | ||||
| @ATTRIBUTE sepalwidth   REAL | ||||
| @ATTRIBUTE petallength  REAL | ||||
| @ATTRIBUTE petalwidth   REAL | ||||
| @ATTRIBUTE class    {Iris-setosa,Iris-versicolor,Iris-virginica} | ||||
| 
 | ||||
| @DATA | ||||
| 
 | ||||
| % This file has no data | ||||
|  | @ -0,0 +1,13 @@ | |||
| % Regression test for issue #10232 : Exception in loadarff with quoted nominal attributes | ||||
| % Spaces between elements are stripped by the parser | ||||
| 
 | ||||
| @relation SOME_DATA | ||||
| @attribute age numeric | ||||
| @attribute smoker {'yes', 'no'} | ||||
| @data | ||||
| 18,  'no' | ||||
| 24, 'yes' | ||||
| 44,     'no' | ||||
| 56, 'no' | ||||
| 89,'yes' | ||||
| 11,  'no' | ||||
|  | @ -0,0 +1,13 @@ | |||
| % Regression test for issue #10232 : Exception in loadarff with quoted nominal attributes | ||||
| % Spaces inside quotes are NOT stripped by the parser | ||||
| 
 | ||||
| @relation SOME_DATA | ||||
| @attribute age numeric | ||||
| @attribute smoker {'  yes', 'no  '} | ||||
| @data | ||||
| 18,'no  ' | ||||
| 24,'  yes' | ||||
| 44,'no  ' | ||||
| 56,'no  ' | ||||
| 89,'  yes' | ||||
| 11,'no  ' | ||||
							
								
								
									
										10
									
								
								venv/Lib/site-packages/scipy/io/arff/tests/data/test1.arff
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								venv/Lib/site-packages/scipy/io/arff/tests/data/test1.arff
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,10 @@ | |||
| @RELATION test1 | ||||
| 
 | ||||
| @ATTRIBUTE attr0	REAL | ||||
| @ATTRIBUTE attr1 	REAL | ||||
| @ATTRIBUTE attr2 	REAL | ||||
| @ATTRIBUTE attr3	REAL | ||||
| @ATTRIBUTE class 	{class0, class1, class2, class3} | ||||
| 
 | ||||
| @DATA | ||||
| 0.1, 0.2, 0.3, 0.4,class1 | ||||
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
							
								
								
									
										15
									
								
								venv/Lib/site-packages/scipy/io/arff/tests/data/test2.arff
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										15
									
								
								venv/Lib/site-packages/scipy/io/arff/tests/data/test2.arff
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,15 @@ | |||
| @RELATION test2 | ||||
| 
 | ||||
| @ATTRIBUTE attr0	REAL | ||||
| @ATTRIBUTE attr1 	real | ||||
| @ATTRIBUTE attr2 	integer | ||||
| @ATTRIBUTE attr3	Integer | ||||
| @ATTRIBUTE attr4 	Numeric | ||||
| @ATTRIBUTE attr5	numeric | ||||
| @ATTRIBUTE attr6 	string | ||||
| @ATTRIBUTE attr7 	STRING | ||||
| @ATTRIBUTE attr8 	{bla} | ||||
| @ATTRIBUTE attr9 	{bla, bla} | ||||
| 
 | ||||
| @DATA | ||||
| 0.1, 0.2, 0.3, 0.4,class1 | ||||
|  | @ -0,0 +1,6 @@ | |||
| @RELATION test3 | ||||
| 
 | ||||
| @ATTRIBUTE attr0	crap | ||||
| 
 | ||||
| @DATA | ||||
| 0.1, 0.2, 0.3, 0.4,class1 | ||||
							
								
								
									
										11
									
								
								venv/Lib/site-packages/scipy/io/arff/tests/data/test4.arff
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								venv/Lib/site-packages/scipy/io/arff/tests/data/test4.arff
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,11 @@ | |||
| @RELATION test5 | ||||
| 
 | ||||
| @ATTRIBUTE attr0	REAL | ||||
| @ATTRIBUTE attr1 	REAL | ||||
| @ATTRIBUTE attr2 	REAL | ||||
| @ATTRIBUTE attr3	REAL | ||||
| @ATTRIBUTE class 	{class0, class1, class2, class3} | ||||
| @DATA | ||||
| 0.1, 0.2, 0.3, 0.4,class1 | ||||
| -0.1, -0.2, -0.3, -0.4,class2 | ||||
| 1, 2, 3, 4,class3 | ||||
							
								
								
									
										26
									
								
								venv/Lib/site-packages/scipy/io/arff/tests/data/test5.arff
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								venv/Lib/site-packages/scipy/io/arff/tests/data/test5.arff
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,26 @@ | |||
| @RELATION test4 | ||||
| 
 | ||||
| @ATTRIBUTE attr0	REAL | ||||
| @ATTRIBUTE attr1 	REAL | ||||
| @ATTRIBUTE attr2 	REAL | ||||
| @ATTRIBUTE attr3	REAL | ||||
| @ATTRIBUTE class 	{class0, class1, class2, class3} | ||||
| 
 | ||||
| @DATA | ||||
| 
 | ||||
| % lsdflkjhaksjdhf | ||||
| 
 | ||||
| % lsdflkjhaksjdhf | ||||
| 
 | ||||
| 0.1, 0.2, 0.3, 0.4,class1 | ||||
| % laksjdhf | ||||
| 
 | ||||
| % lsdflkjhaksjdhf | ||||
| -0.1, -0.2, -0.3, -0.4,class2 | ||||
| 
 | ||||
| % lsdflkjhaksjdhf | ||||
| % lsdflkjhaksjdhf | ||||
| 
 | ||||
| % lsdflkjhaksjdhf | ||||
| 
 | ||||
| 1, 2, 3, 4,class3 | ||||
							
								
								
									
										12
									
								
								venv/Lib/site-packages/scipy/io/arff/tests/data/test6.arff
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										12
									
								
								venv/Lib/site-packages/scipy/io/arff/tests/data/test6.arff
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,12 @@ | |||
| @RELATION test6 | ||||
| 
 | ||||
| @ATTRIBUTE attr0	REAL | ||||
| @ATTRIBUTE attr1 	REAL | ||||
| @ATTRIBUTE attr2 	REAL | ||||
| @ATTRIBUTE attr3	REAL | ||||
| @ATTRIBUTE class 	{C} | ||||
| 
 | ||||
| @DATA | ||||
| 0.1, 0.2, 0.3, 0.4,C | ||||
| -0.1, -0.2, -0.3, -0.4,C | ||||
| 1, 2, 3, 4,C | ||||
							
								
								
									
										15
									
								
								venv/Lib/site-packages/scipy/io/arff/tests/data/test7.arff
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										15
									
								
								venv/Lib/site-packages/scipy/io/arff/tests/data/test7.arff
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,15 @@ | |||
| @RELATION test7 | ||||
| 
 | ||||
| @ATTRIBUTE attr_year	DATE yyyy | ||||
| @ATTRIBUTE attr_month	DATE yyyy-MM | ||||
| @ATTRIBUTE attr_date	DATE yyyy-MM-dd | ||||
| @ATTRIBUTE attr_datetime_local	DATE "yyyy-MM-dd HH:mm" | ||||
| @ATTRIBUTE attr_datetime_missing	DATE "yyyy-MM-dd HH:mm" | ||||
| 
 | ||||
| @DATA | ||||
| 1999,1999-01,1999-01-31,"1999-01-31 00:01",? | ||||
| 2004,2004-12,2004-12-01,"2004-12-01 23:59","2004-12-01 23:59" | ||||
| 1817,1817-04,1817-04-28,"1817-04-28 13:00",? | ||||
| 2100,2100-09,2100-09-10,"2100-09-10 12:00",? | ||||
| 2013,2013-11,2013-11-30,"2013-11-30 04:55","2013-11-30 04:55" | ||||
| 1631,1631-10,1631-10-15,"1631-10-15 20:04","1631-10-15 20:04" | ||||
							
								
								
									
										12
									
								
								venv/Lib/site-packages/scipy/io/arff/tests/data/test8.arff
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										12
									
								
								venv/Lib/site-packages/scipy/io/arff/tests/data/test8.arff
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,12 @@ | |||
| @RELATION test8 | ||||
| 
 | ||||
| @ATTRIBUTE attr_datetime_utc	DATE "yyyy-MM-dd HH:mm Z" | ||||
| @ATTRIBUTE attr_datetime_full	DATE "yy-MM-dd HH:mm:ss z" | ||||
| 
 | ||||
| @DATA | ||||
| "1999-01-31 00:01 UTC","99-01-31 00:01:08 +0430" | ||||
| "2004-12-01 23:59 UTC","04-12-01 23:59:59 -0800" | ||||
| "1817-04-28 13:00 UTC","17-04-28 13:00:33 +1000" | ||||
| "2100-09-10 12:00 UTC","21-09-10 12:00:21 -0300" | ||||
| "2013-11-30 04:55 UTC","13-11-30 04:55:48 -1100" | ||||
| "1631-10-15 20:04 UTC","31-10-15 20:04:10 +0000" | ||||
							
								
								
									
										14
									
								
								venv/Lib/site-packages/scipy/io/arff/tests/data/test9.arff
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								venv/Lib/site-packages/scipy/io/arff/tests/data/test9.arff
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,14 @@ | |||
| @RELATION test9 | ||||
| 
 | ||||
| @ATTRIBUTE attr_date_number	    RELATIONAL | ||||
| 	@ATTRIBUTE attr_date	DATE "yyyy-MM-dd" | ||||
| 	@ATTRIBUTE attr_number	INTEGER | ||||
| @END attr_date_number | ||||
| 
 | ||||
| @DATA | ||||
| "1999-01-31	1\n1935-11-27	10" | ||||
| "2004-12-01	2\n1942-08-13	20" | ||||
| "1817-04-28	3" | ||||
| "2100-09-10	4\n1957-04-17	40\n1721-01-14	400" | ||||
| "2013-11-30	5" | ||||
| "1631-10-15	6" | ||||
							
								
								
									
										412
									
								
								venv/Lib/site-packages/scipy/io/arff/tests/test_arffread.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										412
									
								
								venv/Lib/site-packages/scipy/io/arff/tests/test_arffread.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,412 @@ | |||
| import datetime | ||||
| import os | ||||
| import sys | ||||
| from os.path import join as pjoin | ||||
| 
 | ||||
| from io import StringIO | ||||
| 
 | ||||
| import numpy as np | ||||
| 
 | ||||
| from numpy.testing import (assert_array_almost_equal, | ||||
|                            assert_array_equal, assert_equal, assert_) | ||||
| import pytest | ||||
| from pytest import raises as assert_raises | ||||
| 
 | ||||
| from scipy.io.arff.arffread import loadarff | ||||
| from scipy.io.arff.arffread import read_header, ParseArffError | ||||
| 
 | ||||
| 
 | ||||
| data_path = pjoin(os.path.dirname(__file__), 'data') | ||||
| 
 | ||||
| test1 = pjoin(data_path, 'test1.arff') | ||||
| test2 = pjoin(data_path, 'test2.arff') | ||||
| test3 = pjoin(data_path, 'test3.arff') | ||||
| 
 | ||||
| test4 = pjoin(data_path, 'test4.arff') | ||||
| test5 = pjoin(data_path, 'test5.arff') | ||||
| test6 = pjoin(data_path, 'test6.arff') | ||||
| test7 = pjoin(data_path, 'test7.arff') | ||||
| test8 = pjoin(data_path, 'test8.arff') | ||||
| test9 = pjoin(data_path, 'test9.arff') | ||||
| test10 = pjoin(data_path, 'test10.arff') | ||||
| test11 = pjoin(data_path, 'test11.arff') | ||||
| test_quoted_nominal = pjoin(data_path, 'quoted_nominal.arff') | ||||
| test_quoted_nominal_spaces = pjoin(data_path, 'quoted_nominal_spaces.arff') | ||||
| 
 | ||||
| expect4_data = [(0.1, 0.2, 0.3, 0.4, 'class1'), | ||||
|                 (-0.1, -0.2, -0.3, -0.4, 'class2'), | ||||
|                 (1, 2, 3, 4, 'class3')] | ||||
| expected_types = ['numeric', 'numeric', 'numeric', 'numeric', 'nominal'] | ||||
| 
 | ||||
| missing = pjoin(data_path, 'missing.arff') | ||||
| expect_missing_raw = np.array([[1, 5], [2, 4], [np.nan, np.nan]]) | ||||
| expect_missing = np.empty(3, [('yop', float), ('yap', float)]) | ||||
| expect_missing['yop'] = expect_missing_raw[:, 0] | ||||
| expect_missing['yap'] = expect_missing_raw[:, 1] | ||||
| 
 | ||||
| 
 | ||||
| class TestData(object): | ||||
|     def test1(self): | ||||
|         # Parsing trivial file with nothing. | ||||
|         self._test(test4) | ||||
| 
 | ||||
|     def test2(self): | ||||
|         # Parsing trivial file with some comments in the data section. | ||||
|         self._test(test5) | ||||
| 
 | ||||
|     def test3(self): | ||||
|         # Parsing trivial file with nominal attribute of 1 character. | ||||
|         self._test(test6) | ||||
| 
 | ||||
|     def _test(self, test_file): | ||||
|         data, meta = loadarff(test_file) | ||||
|         for i in range(len(data)): | ||||
|             for j in range(4): | ||||
|                 assert_array_almost_equal(expect4_data[i][j], data[i][j]) | ||||
|         assert_equal(meta.types(), expected_types) | ||||
| 
 | ||||
|     def test_filelike(self): | ||||
|         # Test reading from file-like object (StringIO) | ||||
|         with open(test1) as f1: | ||||
|             data1, meta1 = loadarff(f1) | ||||
|         with open(test1) as f2: | ||||
|             data2, meta2 = loadarff(StringIO(f2.read())) | ||||
|         assert_(data1 == data2) | ||||
|         assert_(repr(meta1) == repr(meta2)) | ||||
| 
 | ||||
|     @pytest.mark.skipif(sys.version_info < (3, 6), | ||||
|                         reason='Passing path-like objects to IO functions requires Python >= 3.6') | ||||
|     def test_path(self): | ||||
|         # Test reading from `pathlib.Path` object | ||||
|         from pathlib import Path | ||||
| 
 | ||||
|         with open(test1) as f1: | ||||
|             data1, meta1 = loadarff(f1) | ||||
| 
 | ||||
|         data2, meta2 = loadarff(Path(test1)) | ||||
| 
 | ||||
|         assert_(data1 == data2) | ||||
|         assert_(repr(meta1) == repr(meta2)) | ||||
| 
 | ||||
| 
 | ||||
| class TestMissingData(object): | ||||
|     def test_missing(self): | ||||
|         data, meta = loadarff(missing) | ||||
|         for i in ['yop', 'yap']: | ||||
|             assert_array_almost_equal(data[i], expect_missing[i]) | ||||
| 
 | ||||
| 
 | ||||
| class TestNoData(object): | ||||
|     def test_nodata(self): | ||||
|         # The file nodata.arff has no data in the @DATA section. | ||||
|         # Reading it should result in an array with length 0. | ||||
|         nodata_filename = os.path.join(data_path, 'nodata.arff') | ||||
|         data, meta = loadarff(nodata_filename) | ||||
|         expected_dtype = np.dtype([('sepallength', '<f8'), | ||||
|                                    ('sepalwidth', '<f8'), | ||||
|                                    ('petallength', '<f8'), | ||||
|                                    ('petalwidth', '<f8'), | ||||
|                                    ('class', 'S15')]) | ||||
|         assert_equal(data.dtype, expected_dtype) | ||||
|         assert_equal(data.size, 0) | ||||
| 
 | ||||
| 
 | ||||
| class TestHeader(object): | ||||
|     def test_type_parsing(self): | ||||
|         # Test parsing type of attribute from their value. | ||||
|         with open(test2) as ofile: | ||||
|             rel, attrs = read_header(ofile) | ||||
| 
 | ||||
|         expected = ['numeric', 'numeric', 'numeric', 'numeric', 'numeric', | ||||
|                     'numeric', 'string', 'string', 'nominal', 'nominal'] | ||||
| 
 | ||||
|         for i in range(len(attrs)): | ||||
|             assert_(attrs[i].type_name == expected[i]) | ||||
| 
 | ||||
|     def test_badtype_parsing(self): | ||||
|         # Test parsing wrong type of attribute from their value. | ||||
|         def badtype_read(): | ||||
|             with open(test3) as ofile: | ||||
|                 _, _ = read_header(ofile) | ||||
| 
 | ||||
|         assert_raises(ParseArffError, badtype_read) | ||||
| 
 | ||||
|     def test_fullheader1(self): | ||||
|         # Parsing trivial header with nothing. | ||||
|         with open(test1) as ofile: | ||||
|             rel, attrs = read_header(ofile) | ||||
| 
 | ||||
|         # Test relation | ||||
|         assert_(rel == 'test1') | ||||
| 
 | ||||
|         # Test numerical attributes | ||||
|         assert_(len(attrs) == 5) | ||||
|         for i in range(4): | ||||
|             assert_(attrs[i].name == 'attr%d' % i) | ||||
|             assert_(attrs[i].type_name == 'numeric') | ||||
| 
 | ||||
|         # Test nominal attribute | ||||
|         assert_(attrs[4].name == 'class') | ||||
|         assert_(attrs[4].values == ('class0', 'class1', 'class2', 'class3')) | ||||
| 
 | ||||
|     def test_dateheader(self): | ||||
|         with open(test7) as ofile: | ||||
|             rel, attrs = read_header(ofile) | ||||
| 
 | ||||
|         assert_(rel == 'test7') | ||||
| 
 | ||||
|         assert_(len(attrs) == 5) | ||||
| 
 | ||||
|         assert_(attrs[0].name == 'attr_year') | ||||
|         assert_(attrs[0].date_format == '%Y') | ||||
| 
 | ||||
|         assert_(attrs[1].name == 'attr_month') | ||||
|         assert_(attrs[1].date_format == '%Y-%m') | ||||
| 
 | ||||
|         assert_(attrs[2].name == 'attr_date') | ||||
|         assert_(attrs[2].date_format == '%Y-%m-%d') | ||||
| 
 | ||||
|         assert_(attrs[3].name == 'attr_datetime_local') | ||||
|         assert_(attrs[3].date_format == '%Y-%m-%d %H:%M') | ||||
| 
 | ||||
|         assert_(attrs[4].name == 'attr_datetime_missing') | ||||
|         assert_(attrs[4].date_format == '%Y-%m-%d %H:%M') | ||||
| 
 | ||||
|     def test_dateheader_unsupported(self): | ||||
|         def read_dateheader_unsupported(): | ||||
|             with open(test8) as ofile: | ||||
|                 _, _ = read_header(ofile) | ||||
| 
 | ||||
|         assert_raises(ValueError, read_dateheader_unsupported) | ||||
| 
 | ||||
| 
 | ||||
| class TestDateAttribute(object): | ||||
|     def setup_method(self): | ||||
|         self.data, self.meta = loadarff(test7) | ||||
| 
 | ||||
|     def test_year_attribute(self): | ||||
|         expected = np.array([ | ||||
|             '1999', | ||||
|             '2004', | ||||
|             '1817', | ||||
|             '2100', | ||||
|             '2013', | ||||
|             '1631' | ||||
|         ], dtype='datetime64[Y]') | ||||
| 
 | ||||
|         assert_array_equal(self.data["attr_year"], expected) | ||||
| 
 | ||||
|     def test_month_attribute(self): | ||||
|         expected = np.array([ | ||||
|             '1999-01', | ||||
|             '2004-12', | ||||
|             '1817-04', | ||||
|             '2100-09', | ||||
|             '2013-11', | ||||
|             '1631-10' | ||||
|         ], dtype='datetime64[M]') | ||||
| 
 | ||||
|         assert_array_equal(self.data["attr_month"], expected) | ||||
| 
 | ||||
|     def test_date_attribute(self): | ||||
|         expected = np.array([ | ||||
|             '1999-01-31', | ||||
|             '2004-12-01', | ||||
|             '1817-04-28', | ||||
|             '2100-09-10', | ||||
|             '2013-11-30', | ||||
|             '1631-10-15' | ||||
|         ], dtype='datetime64[D]') | ||||
| 
 | ||||
|         assert_array_equal(self.data["attr_date"], expected) | ||||
| 
 | ||||
|     def test_datetime_local_attribute(self): | ||||
|         expected = np.array([ | ||||
|             datetime.datetime(year=1999, month=1, day=31, hour=0, minute=1), | ||||
|             datetime.datetime(year=2004, month=12, day=1, hour=23, minute=59), | ||||
|             datetime.datetime(year=1817, month=4, day=28, hour=13, minute=0), | ||||
|             datetime.datetime(year=2100, month=9, day=10, hour=12, minute=0), | ||||
|             datetime.datetime(year=2013, month=11, day=30, hour=4, minute=55), | ||||
|             datetime.datetime(year=1631, month=10, day=15, hour=20, minute=4) | ||||
|         ], dtype='datetime64[m]') | ||||
| 
 | ||||
|         assert_array_equal(self.data["attr_datetime_local"], expected) | ||||
| 
 | ||||
|     def test_datetime_missing(self): | ||||
|         expected = np.array([ | ||||
|             'nat', | ||||
|             '2004-12-01T23:59', | ||||
|             'nat', | ||||
|             'nat', | ||||
|             '2013-11-30T04:55', | ||||
|             '1631-10-15T20:04' | ||||
|         ], dtype='datetime64[m]') | ||||
| 
 | ||||
|         assert_array_equal(self.data["attr_datetime_missing"], expected) | ||||
| 
 | ||||
|     def test_datetime_timezone(self): | ||||
|         assert_raises(ParseArffError, loadarff, test8) | ||||
| 
 | ||||
| 
 | ||||
| class TestRelationalAttribute(object): | ||||
|     def setup_method(self): | ||||
|         self.data, self.meta = loadarff(test9) | ||||
| 
 | ||||
|     def test_attributes(self): | ||||
|         assert_equal(len(self.meta._attributes), 1) | ||||
| 
 | ||||
|         relational = list(self.meta._attributes.values())[0] | ||||
| 
 | ||||
|         assert_equal(relational.name, 'attr_date_number') | ||||
|         assert_equal(relational.type_name, 'relational') | ||||
|         assert_equal(len(relational.attributes), 2) | ||||
|         assert_equal(relational.attributes[0].name, | ||||
|                      'attr_date') | ||||
|         assert_equal(relational.attributes[0].type_name, | ||||
|                      'date') | ||||
|         assert_equal(relational.attributes[1].name, | ||||
|                      'attr_number') | ||||
|         assert_equal(relational.attributes[1].type_name, | ||||
|                      'numeric') | ||||
| 
 | ||||
|     def test_data(self): | ||||
|         dtype_instance = [('attr_date', 'datetime64[D]'), | ||||
|                           ('attr_number', np.float_)] | ||||
| 
 | ||||
|         expected = [ | ||||
|             np.array([('1999-01-31', 1), ('1935-11-27', 10)], | ||||
|                      dtype=dtype_instance), | ||||
|             np.array([('2004-12-01', 2), ('1942-08-13', 20)], | ||||
|                      dtype=dtype_instance), | ||||
|             np.array([('1817-04-28', 3)], | ||||
|                      dtype=dtype_instance), | ||||
|             np.array([('2100-09-10', 4), ('1957-04-17', 40), | ||||
|                       ('1721-01-14', 400)], | ||||
|                      dtype=dtype_instance), | ||||
|             np.array([('2013-11-30', 5)], | ||||
|                      dtype=dtype_instance), | ||||
|             np.array([('1631-10-15', 6)], | ||||
|                      dtype=dtype_instance) | ||||
|         ] | ||||
| 
 | ||||
|         for i in range(len(self.data["attr_date_number"])): | ||||
|             assert_array_equal(self.data["attr_date_number"][i], | ||||
|                                expected[i]) | ||||
| 
 | ||||
| 
 | ||||
| class TestRelationalAttributeLong(object): | ||||
|     def setup_method(self): | ||||
|         self.data, self.meta = loadarff(test10) | ||||
| 
 | ||||
|     def test_attributes(self): | ||||
|         assert_equal(len(self.meta._attributes), 1) | ||||
| 
 | ||||
|         relational = list(self.meta._attributes.values())[0] | ||||
| 
 | ||||
|         assert_equal(relational.name, 'attr_relational') | ||||
|         assert_equal(relational.type_name, 'relational') | ||||
|         assert_equal(len(relational.attributes), 1) | ||||
|         assert_equal(relational.attributes[0].name, | ||||
|                      'attr_number') | ||||
|         assert_equal(relational.attributes[0].type_name, 'numeric') | ||||
| 
 | ||||
|     def test_data(self): | ||||
|         dtype_instance = [('attr_number', np.float_)] | ||||
| 
 | ||||
|         expected = np.array([(n,) for n in range(30000)], | ||||
|                             dtype=dtype_instance) | ||||
| 
 | ||||
|         assert_array_equal(self.data["attr_relational"][0], | ||||
|                            expected) | ||||
| 
 | ||||
| 
 | ||||
| class TestQuotedNominal(object): | ||||
|     """ | ||||
|     Regression test for issue #10232 : Exception in loadarff with quoted nominal attributes. | ||||
|     """ | ||||
| 
 | ||||
|     def setup_method(self): | ||||
|         self.data, self.meta = loadarff(test_quoted_nominal) | ||||
| 
 | ||||
|     def test_attributes(self): | ||||
|         assert_equal(len(self.meta._attributes), 2) | ||||
| 
 | ||||
|         age, smoker = self.meta._attributes.values() | ||||
| 
 | ||||
|         assert_equal(age.name, 'age') | ||||
|         assert_equal(age.type_name, 'numeric') | ||||
|         assert_equal(smoker.name, 'smoker') | ||||
|         assert_equal(smoker.type_name, 'nominal') | ||||
|         assert_equal(smoker.values, ['yes', 'no']) | ||||
| 
 | ||||
|     def test_data(self): | ||||
| 
 | ||||
|         age_dtype_instance = np.float_ | ||||
|         smoker_dtype_instance = '<S3' | ||||
| 
 | ||||
|         age_expected = np.array([ | ||||
|             18, | ||||
|             24, | ||||
|             44, | ||||
|             56, | ||||
|             89, | ||||
|             11, | ||||
|         ], dtype=age_dtype_instance) | ||||
| 
 | ||||
|         smoker_expected = np.array([ | ||||
|             'no', | ||||
|             'yes', | ||||
|             'no', | ||||
|             'no', | ||||
|             'yes', | ||||
|             'no', | ||||
|         ], dtype=smoker_dtype_instance) | ||||
| 
 | ||||
|         assert_array_equal(self.data["age"], age_expected) | ||||
|         assert_array_equal(self.data["smoker"], smoker_expected) | ||||
| 
 | ||||
| 
 | ||||
| class TestQuotedNominalSpaces(object): | ||||
|     """ | ||||
|     Regression test for issue #10232 : Exception in loadarff with quoted nominal attributes. | ||||
|     """ | ||||
| 
 | ||||
|     def setup_method(self): | ||||
|         self.data, self.meta = loadarff(test_quoted_nominal_spaces) | ||||
| 
 | ||||
|     def test_attributes(self): | ||||
|         assert_equal(len(self.meta._attributes), 2) | ||||
| 
 | ||||
|         age, smoker = self.meta._attributes.values() | ||||
| 
 | ||||
|         assert_equal(age.name, 'age') | ||||
|         assert_equal(age.type_name, 'numeric') | ||||
|         assert_equal(smoker.name, 'smoker') | ||||
|         assert_equal(smoker.type_name, 'nominal') | ||||
|         assert_equal(smoker.values, ['  yes', 'no  ']) | ||||
| 
 | ||||
|     def test_data(self): | ||||
| 
 | ||||
|         age_dtype_instance = np.float_ | ||||
|         smoker_dtype_instance = '<S5' | ||||
| 
 | ||||
|         age_expected = np.array([ | ||||
|             18, | ||||
|             24, | ||||
|             44, | ||||
|             56, | ||||
|             89, | ||||
|             11, | ||||
|         ], dtype=age_dtype_instance) | ||||
| 
 | ||||
|         smoker_expected = np.array([ | ||||
|             'no  ', | ||||
|             '  yes', | ||||
|             'no  ', | ||||
|             'no  ', | ||||
|             '  yes', | ||||
|             'no  ', | ||||
|         ], dtype=smoker_dtype_instance) | ||||
| 
 | ||||
|         assert_array_equal(self.data["age"], age_expected) | ||||
|         assert_array_equal(self.data["smoker"], smoker_expected) | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue