46 lines
1.4 KiB
Python
46 lines
1.4 KiB
Python
"""Test kddcup99 loader, if the data is available,
|
|
or if specifically requested via environment variable
|
|
(e.g. for travis cron job).
|
|
|
|
Only 'percent10' mode is tested, as the full data
|
|
is too big to use in unit-testing.
|
|
"""
|
|
|
|
from sklearn.datasets.tests.test_common import check_return_X_y
|
|
from functools import partial
|
|
|
|
|
|
def test_percent10(fetch_kddcup99_fxt):
|
|
data = fetch_kddcup99_fxt()
|
|
|
|
assert data.data.shape == (494021, 41)
|
|
assert data.target.shape == (494021,)
|
|
|
|
data_shuffled = fetch_kddcup99_fxt(shuffle=True, random_state=0)
|
|
assert data.data.shape == data_shuffled.data.shape
|
|
assert data.target.shape == data_shuffled.target.shape
|
|
|
|
data = fetch_kddcup99_fxt('SA')
|
|
assert data.data.shape == (100655, 41)
|
|
assert data.target.shape == (100655,)
|
|
|
|
data = fetch_kddcup99_fxt('SF')
|
|
assert data.data.shape == (73237, 4)
|
|
assert data.target.shape == (73237,)
|
|
|
|
data = fetch_kddcup99_fxt('http')
|
|
assert data.data.shape == (58725, 3)
|
|
assert data.target.shape == (58725,)
|
|
|
|
data = fetch_kddcup99_fxt('smtp')
|
|
assert data.data.shape == (9571, 3)
|
|
assert data.target.shape == (9571,)
|
|
|
|
fetch_func = partial(fetch_kddcup99_fxt, 'smtp')
|
|
check_return_X_y(data, fetch_func)
|
|
|
|
|
|
def test_shuffle(fetch_kddcup99_fxt):
|
|
dataset = fetch_kddcup99_fxt(random_state=0, subset='SA', shuffle=True,
|
|
percent10=True)
|
|
assert(any(dataset.target[-100:] == b'normal.'))
|