import datetime import os import sys from os.path import join as pjoin from io import StringIO import numpy as np from numpy.testing import (assert_array_almost_equal, assert_array_equal, assert_equal, assert_) import pytest from pytest import raises as assert_raises from scipy.io.arff.arffread import loadarff from scipy.io.arff.arffread import read_header, ParseArffError data_path = pjoin(os.path.dirname(__file__), 'data') test1 = pjoin(data_path, 'test1.arff') test2 = pjoin(data_path, 'test2.arff') test3 = pjoin(data_path, 'test3.arff') test4 = pjoin(data_path, 'test4.arff') test5 = pjoin(data_path, 'test5.arff') test6 = pjoin(data_path, 'test6.arff') test7 = pjoin(data_path, 'test7.arff') test8 = pjoin(data_path, 'test8.arff') test9 = pjoin(data_path, 'test9.arff') test10 = pjoin(data_path, 'test10.arff') test11 = pjoin(data_path, 'test11.arff') test_quoted_nominal = pjoin(data_path, 'quoted_nominal.arff') test_quoted_nominal_spaces = pjoin(data_path, 'quoted_nominal_spaces.arff') expect4_data = [(0.1, 0.2, 0.3, 0.4, 'class1'), (-0.1, -0.2, -0.3, -0.4, 'class2'), (1, 2, 3, 4, 'class3')] expected_types = ['numeric', 'numeric', 'numeric', 'numeric', 'nominal'] missing = pjoin(data_path, 'missing.arff') expect_missing_raw = np.array([[1, 5], [2, 4], [np.nan, np.nan]]) expect_missing = np.empty(3, [('yop', float), ('yap', float)]) expect_missing['yop'] = expect_missing_raw[:, 0] expect_missing['yap'] = expect_missing_raw[:, 1] class TestData(object): def test1(self): # Parsing trivial file with nothing. self._test(test4) def test2(self): # Parsing trivial file with some comments in the data section. self._test(test5) def test3(self): # Parsing trivial file with nominal attribute of 1 character. self._test(test6) def _test(self, test_file): data, meta = loadarff(test_file) for i in range(len(data)): for j in range(4): assert_array_almost_equal(expect4_data[i][j], data[i][j]) assert_equal(meta.types(), expected_types) def test_filelike(self): # Test reading from file-like object (StringIO) with open(test1) as f1: data1, meta1 = loadarff(f1) with open(test1) as f2: data2, meta2 = loadarff(StringIO(f2.read())) assert_(data1 == data2) assert_(repr(meta1) == repr(meta2)) @pytest.mark.skipif(sys.version_info < (3, 6), reason='Passing path-like objects to IO functions requires Python >= 3.6') def test_path(self): # Test reading from `pathlib.Path` object from pathlib import Path with open(test1) as f1: data1, meta1 = loadarff(f1) data2, meta2 = loadarff(Path(test1)) assert_(data1 == data2) assert_(repr(meta1) == repr(meta2)) class TestMissingData(object): def test_missing(self): data, meta = loadarff(missing) for i in ['yop', 'yap']: assert_array_almost_equal(data[i], expect_missing[i]) class TestNoData(object): def test_nodata(self): # The file nodata.arff has no data in the @DATA section. # Reading it should result in an array with length 0. nodata_filename = os.path.join(data_path, 'nodata.arff') data, meta = loadarff(nodata_filename) expected_dtype = np.dtype([('sepallength', '