Uploaded Test files
This commit is contained in:
parent
f584ad9d97
commit
2e81cb7d99
16627 changed files with 2065359 additions and 102444 deletions
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,121 @@
|
|||
# Authors: Lars Buitinck
|
||||
# Dan Blanchard <dblanchard@ets.org>
|
||||
# License: BSD 3 clause
|
||||
|
||||
from random import Random
|
||||
import numpy as np
|
||||
import scipy.sparse as sp
|
||||
from numpy.testing import assert_array_equal
|
||||
|
||||
import pytest
|
||||
|
||||
from sklearn.feature_extraction import DictVectorizer
|
||||
from sklearn.feature_selection import SelectKBest, chi2
|
||||
|
||||
|
||||
@pytest.mark.parametrize('sparse', (True, False))
|
||||
@pytest.mark.parametrize('dtype', (int, np.float32, np.int16))
|
||||
@pytest.mark.parametrize('sort', (True, False))
|
||||
@pytest.mark.parametrize('iterable', (True, False))
|
||||
def test_dictvectorizer(sparse, dtype, sort, iterable):
|
||||
D = [{"foo": 1, "bar": 3},
|
||||
{"bar": 4, "baz": 2},
|
||||
{"bar": 1, "quux": 1, "quuux": 2}]
|
||||
|
||||
v = DictVectorizer(sparse=sparse, dtype=dtype, sort=sort)
|
||||
X = v.fit_transform(iter(D) if iterable else D)
|
||||
|
||||
assert sp.issparse(X) == sparse
|
||||
assert X.shape == (3, 5)
|
||||
assert X.sum() == 14
|
||||
assert v.inverse_transform(X) == D
|
||||
|
||||
if sparse:
|
||||
# CSR matrices can't be compared for equality
|
||||
assert_array_equal(X.A, v.transform(iter(D) if iterable
|
||||
else D).A)
|
||||
else:
|
||||
assert_array_equal(X, v.transform(iter(D) if iterable
|
||||
else D))
|
||||
|
||||
if sort:
|
||||
assert (v.feature_names_ ==
|
||||
sorted(v.feature_names_))
|
||||
|
||||
|
||||
def test_feature_selection():
|
||||
# make two feature dicts with two useful features and a bunch of useless
|
||||
# ones, in terms of chi2
|
||||
d1 = dict([("useless%d" % i, 10) for i in range(20)],
|
||||
useful1=1, useful2=20)
|
||||
d2 = dict([("useless%d" % i, 10) for i in range(20)],
|
||||
useful1=20, useful2=1)
|
||||
|
||||
for indices in (True, False):
|
||||
v = DictVectorizer().fit([d1, d2])
|
||||
X = v.transform([d1, d2])
|
||||
sel = SelectKBest(chi2, k=2).fit(X, [0, 1])
|
||||
|
||||
v.restrict(sel.get_support(indices=indices), indices=indices)
|
||||
assert v.get_feature_names() == ["useful1", "useful2"]
|
||||
|
||||
|
||||
def test_one_of_k():
|
||||
D_in = [{"version": "1", "ham": 2},
|
||||
{"version": "2", "spam": .3},
|
||||
{"version=3": True, "spam": -1}]
|
||||
v = DictVectorizer()
|
||||
X = v.fit_transform(D_in)
|
||||
assert X.shape == (3, 5)
|
||||
|
||||
D_out = v.inverse_transform(X)
|
||||
assert D_out[0] == {"version=1": 1, "ham": 2}
|
||||
|
||||
names = v.get_feature_names()
|
||||
assert "version=2" in names
|
||||
assert "version" not in names
|
||||
|
||||
|
||||
def test_unseen_or_no_features():
|
||||
D = [{"camelot": 0, "spamalot": 1}]
|
||||
for sparse in [True, False]:
|
||||
v = DictVectorizer(sparse=sparse).fit(D)
|
||||
|
||||
X = v.transform({"push the pram a lot": 2})
|
||||
if sparse:
|
||||
X = X.toarray()
|
||||
assert_array_equal(X, np.zeros((1, 2)))
|
||||
|
||||
X = v.transform({})
|
||||
if sparse:
|
||||
X = X.toarray()
|
||||
assert_array_equal(X, np.zeros((1, 2)))
|
||||
|
||||
try:
|
||||
v.transform([])
|
||||
except ValueError as e:
|
||||
assert "empty" in str(e)
|
||||
|
||||
|
||||
def test_deterministic_vocabulary():
|
||||
# Generate equal dictionaries with different memory layouts
|
||||
items = [("%03d" % i, i) for i in range(1000)]
|
||||
rng = Random(42)
|
||||
d_sorted = dict(items)
|
||||
rng.shuffle(items)
|
||||
d_shuffled = dict(items)
|
||||
|
||||
# check that the memory layout does not impact the resulting vocabulary
|
||||
v_1 = DictVectorizer().fit([d_sorted])
|
||||
v_2 = DictVectorizer().fit([d_shuffled])
|
||||
|
||||
assert v_1.vocabulary_ == v_2.vocabulary_
|
||||
|
||||
|
||||
def test_n_features_in():
|
||||
# For vectorizers, n_features_in_ does not make sense and does not exist.
|
||||
dv = DictVectorizer()
|
||||
assert not hasattr(dv, 'n_features_in_')
|
||||
d = [{'foo': 1, 'bar': 2}, {'foo': 3, 'baz': 1}]
|
||||
dv.fit(d)
|
||||
assert not hasattr(dv, 'n_features_in_')
|
|
@ -0,0 +1,171 @@
|
|||
|
||||
import numpy as np
|
||||
from numpy.testing import assert_array_equal
|
||||
import pytest
|
||||
|
||||
from sklearn.feature_extraction import FeatureHasher
|
||||
from sklearn.utils._testing import (ignore_warnings,
|
||||
fails_if_pypy)
|
||||
|
||||
pytestmark = fails_if_pypy
|
||||
|
||||
|
||||
def test_feature_hasher_dicts():
|
||||
h = FeatureHasher(n_features=16)
|
||||
assert "dict" == h.input_type
|
||||
|
||||
raw_X = [{"foo": "bar", "dada": 42, "tzara": 37},
|
||||
{"foo": "baz", "gaga": "string1"}]
|
||||
X1 = FeatureHasher(n_features=16).transform(raw_X)
|
||||
gen = (iter(d.items()) for d in raw_X)
|
||||
X2 = FeatureHasher(n_features=16, input_type="pair").transform(gen)
|
||||
assert_array_equal(X1.toarray(), X2.toarray())
|
||||
|
||||
|
||||
def test_feature_hasher_strings():
|
||||
# mix byte and Unicode strings; note that "foo" is a duplicate in row 0
|
||||
raw_X = [["foo", "bar", "baz", "foo".encode("ascii")],
|
||||
["bar".encode("ascii"), "baz", "quux"]]
|
||||
|
||||
for lg_n_features in (7, 9, 11, 16, 22):
|
||||
n_features = 2 ** lg_n_features
|
||||
|
||||
it = (x for x in raw_X) # iterable
|
||||
|
||||
h = FeatureHasher(n_features=n_features, input_type="string",
|
||||
alternate_sign=False)
|
||||
X = h.transform(it)
|
||||
|
||||
assert X.shape[0] == len(raw_X)
|
||||
assert X.shape[1] == n_features
|
||||
|
||||
assert X[0].sum() == 4
|
||||
assert X[1].sum() == 3
|
||||
|
||||
assert X.nnz == 6
|
||||
|
||||
|
||||
def test_hashing_transform_seed():
|
||||
# check the influence of the seed when computing the hashes
|
||||
# import is here to avoid importing on pypy
|
||||
from sklearn.feature_extraction._hashing_fast import (
|
||||
transform as _hashing_transform)
|
||||
raw_X = [["foo", "bar", "baz", "foo".encode("ascii")],
|
||||
["bar".encode("ascii"), "baz", "quux"]]
|
||||
|
||||
raw_X_ = (((f, 1) for f in x) for x in raw_X)
|
||||
indices, indptr, _ = _hashing_transform(raw_X_, 2 ** 7, str,
|
||||
False)
|
||||
|
||||
raw_X_ = (((f, 1) for f in x) for x in raw_X)
|
||||
indices_0, indptr_0, _ = _hashing_transform(raw_X_, 2 ** 7, str,
|
||||
False, seed=0)
|
||||
assert_array_equal(indices, indices_0)
|
||||
assert_array_equal(indptr, indptr_0)
|
||||
|
||||
raw_X_ = (((f, 1) for f in x) for x in raw_X)
|
||||
indices_1, _, _ = _hashing_transform(raw_X_, 2 ** 7, str,
|
||||
False, seed=1)
|
||||
with pytest.raises(AssertionError):
|
||||
assert_array_equal(indices, indices_1)
|
||||
|
||||
|
||||
def test_feature_hasher_pairs():
|
||||
raw_X = (iter(d.items()) for d in [{"foo": 1, "bar": 2},
|
||||
{"baz": 3, "quux": 4, "foo": -1}])
|
||||
h = FeatureHasher(n_features=16, input_type="pair")
|
||||
x1, x2 = h.transform(raw_X).toarray()
|
||||
x1_nz = sorted(np.abs(x1[x1 != 0]))
|
||||
x2_nz = sorted(np.abs(x2[x2 != 0]))
|
||||
assert [1, 2] == x1_nz
|
||||
assert [1, 3, 4] == x2_nz
|
||||
|
||||
|
||||
def test_feature_hasher_pairs_with_string_values():
|
||||
raw_X = (iter(d.items()) for d in [{"foo": 1, "bar": "a"},
|
||||
{"baz": "abc", "quux": 4, "foo": -1}])
|
||||
h = FeatureHasher(n_features=16, input_type="pair")
|
||||
x1, x2 = h.transform(raw_X).toarray()
|
||||
x1_nz = sorted(np.abs(x1[x1 != 0]))
|
||||
x2_nz = sorted(np.abs(x2[x2 != 0]))
|
||||
assert [1, 1] == x1_nz
|
||||
assert [1, 1, 4] == x2_nz
|
||||
|
||||
raw_X = (iter(d.items()) for d in [{"bax": "abc"},
|
||||
{"bax": "abc"}])
|
||||
x1, x2 = h.transform(raw_X).toarray()
|
||||
x1_nz = np.abs(x1[x1 != 0])
|
||||
x2_nz = np.abs(x2[x2 != 0])
|
||||
assert [1] == x1_nz
|
||||
assert [1] == x2_nz
|
||||
assert_array_equal(x1, x2)
|
||||
|
||||
|
||||
def test_hash_empty_input():
|
||||
n_features = 16
|
||||
raw_X = [[], (), iter(range(0))]
|
||||
|
||||
h = FeatureHasher(n_features=n_features, input_type="string")
|
||||
X = h.transform(raw_X)
|
||||
|
||||
assert_array_equal(X.A, np.zeros((len(raw_X), n_features)))
|
||||
|
||||
|
||||
def test_hasher_invalid_input():
|
||||
with pytest.raises(ValueError):
|
||||
FeatureHasher(input_type="gobbledygook")
|
||||
with pytest.raises(ValueError):
|
||||
FeatureHasher(n_features=-1)
|
||||
with pytest.raises(ValueError):
|
||||
FeatureHasher(n_features=0)
|
||||
with pytest.raises(TypeError):
|
||||
FeatureHasher(n_features='ham')
|
||||
|
||||
h = FeatureHasher(n_features=np.uint16(2 ** 6))
|
||||
with pytest.raises(ValueError):
|
||||
h.transform([])
|
||||
with pytest.raises(Exception):
|
||||
h.transform([[5.5]])
|
||||
with pytest.raises(Exception):
|
||||
h.transform([[None]])
|
||||
|
||||
|
||||
def test_hasher_set_params():
|
||||
# Test delayed input validation in fit (useful for grid search).
|
||||
hasher = FeatureHasher()
|
||||
hasher.set_params(n_features=np.inf)
|
||||
with pytest.raises(TypeError):
|
||||
hasher.fit()
|
||||
|
||||
|
||||
def test_hasher_zeros():
|
||||
# Assert that no zeros are materialized in the output.
|
||||
X = FeatureHasher().transform([{'foo': 0}])
|
||||
assert X.data.shape == (0,)
|
||||
|
||||
|
||||
@ignore_warnings(category=FutureWarning)
|
||||
def test_hasher_alternate_sign():
|
||||
X = [list("Thequickbrownfoxjumped")]
|
||||
|
||||
Xt = FeatureHasher(alternate_sign=True,
|
||||
input_type='string').fit_transform(X)
|
||||
assert Xt.data.min() < 0 and Xt.data.max() > 0
|
||||
|
||||
Xt = FeatureHasher(alternate_sign=False,
|
||||
input_type='string').fit_transform(X)
|
||||
assert Xt.data.min() > 0
|
||||
|
||||
|
||||
def test_hash_collisions():
|
||||
X = [list("Thequickbrownfoxjumped")]
|
||||
|
||||
Xt = FeatureHasher(alternate_sign=True, n_features=1,
|
||||
input_type='string').fit_transform(X)
|
||||
# check that some of the hashed tokens are added
|
||||
# with an opposite sign and cancel out
|
||||
assert abs(Xt.data[0]) < len(X[0])
|
||||
|
||||
Xt = FeatureHasher(alternate_sign=False, n_features=1,
|
||||
input_type='string').fit_transform(X)
|
||||
assert Xt.data[0] == len(X[0])
|
|
@ -0,0 +1,344 @@
|
|||
# Authors: Emmanuelle Gouillart <emmanuelle.gouillart@normalesup.org>
|
||||
# Gael Varoquaux <gael.varoquaux@normalesup.org>
|
||||
# License: BSD 3 clause
|
||||
|
||||
import numpy as np
|
||||
import scipy as sp
|
||||
from scipy import ndimage
|
||||
from scipy.sparse.csgraph import connected_components
|
||||
import pytest
|
||||
|
||||
from sklearn.feature_extraction.image import (
|
||||
img_to_graph, grid_to_graph, extract_patches_2d,
|
||||
reconstruct_from_patches_2d, PatchExtractor, _extract_patches,
|
||||
extract_patches)
|
||||
from sklearn.utils._testing import ignore_warnings
|
||||
|
||||
|
||||
def test_img_to_graph():
|
||||
x, y = np.mgrid[:4, :4] - 10
|
||||
grad_x = img_to_graph(x)
|
||||
grad_y = img_to_graph(y)
|
||||
assert grad_x.nnz == grad_y.nnz
|
||||
# Negative elements are the diagonal: the elements of the original
|
||||
# image. Positive elements are the values of the gradient, they
|
||||
# should all be equal on grad_x and grad_y
|
||||
np.testing.assert_array_equal(grad_x.data[grad_x.data > 0],
|
||||
grad_y.data[grad_y.data > 0])
|
||||
|
||||
|
||||
def test_grid_to_graph():
|
||||
# Checking that the function works with graphs containing no edges
|
||||
size = 2
|
||||
roi_size = 1
|
||||
# Generating two convex parts with one vertex
|
||||
# Thus, edges will be empty in _to_graph
|
||||
mask = np.zeros((size, size), dtype=np.bool)
|
||||
mask[0:roi_size, 0:roi_size] = True
|
||||
mask[-roi_size:, -roi_size:] = True
|
||||
mask = mask.reshape(size ** 2)
|
||||
A = grid_to_graph(n_x=size, n_y=size, mask=mask, return_as=np.ndarray)
|
||||
assert connected_components(A)[0] == 2
|
||||
|
||||
# Checking that the function works whatever the type of mask is
|
||||
mask = np.ones((size, size), dtype=np.int16)
|
||||
A = grid_to_graph(n_x=size, n_y=size, n_z=size, mask=mask)
|
||||
assert connected_components(A)[0] == 1
|
||||
|
||||
# Checking dtype of the graph
|
||||
mask = np.ones((size, size))
|
||||
A = grid_to_graph(n_x=size, n_y=size, n_z=size, mask=mask, dtype=np.bool)
|
||||
assert A.dtype == np.bool
|
||||
A = grid_to_graph(n_x=size, n_y=size, n_z=size, mask=mask, dtype=np.int)
|
||||
assert A.dtype == np.int
|
||||
A = grid_to_graph(n_x=size, n_y=size, n_z=size, mask=mask,
|
||||
dtype=np.float64)
|
||||
assert A.dtype == np.float64
|
||||
|
||||
|
||||
@ignore_warnings(category=DeprecationWarning) # scipy deprecation inside face
|
||||
def test_connect_regions():
|
||||
try:
|
||||
face = sp.face(gray=True)
|
||||
except AttributeError:
|
||||
# Newer versions of scipy have face in misc
|
||||
from scipy import misc
|
||||
face = misc.face(gray=True)
|
||||
# subsample by 4 to reduce run time
|
||||
face = face[::4, ::4]
|
||||
for thr in (50, 150):
|
||||
mask = face > thr
|
||||
graph = img_to_graph(face, mask=mask)
|
||||
assert ndimage.label(mask)[1] == connected_components(graph)[0]
|
||||
|
||||
|
||||
@ignore_warnings(category=DeprecationWarning) # scipy deprecation inside face
|
||||
def test_connect_regions_with_grid():
|
||||
try:
|
||||
face = sp.face(gray=True)
|
||||
except AttributeError:
|
||||
# Newer versions of scipy have face in misc
|
||||
from scipy import misc
|
||||
face = misc.face(gray=True)
|
||||
|
||||
# subsample by 4 to reduce run time
|
||||
face = face[::4, ::4]
|
||||
|
||||
mask = face > 50
|
||||
graph = grid_to_graph(*face.shape, mask=mask)
|
||||
assert ndimage.label(mask)[1] == connected_components(graph)[0]
|
||||
|
||||
mask = face > 150
|
||||
graph = grid_to_graph(*face.shape, mask=mask, dtype=None)
|
||||
assert ndimage.label(mask)[1] == connected_components(graph)[0]
|
||||
|
||||
|
||||
def _downsampled_face():
|
||||
try:
|
||||
face = sp.face(gray=True)
|
||||
except AttributeError:
|
||||
# Newer versions of scipy have face in misc
|
||||
from scipy import misc
|
||||
face = misc.face(gray=True)
|
||||
face = face.astype(np.float32)
|
||||
face = (face[::2, ::2] + face[1::2, ::2] + face[::2, 1::2]
|
||||
+ face[1::2, 1::2])
|
||||
face = (face[::2, ::2] + face[1::2, ::2] + face[::2, 1::2]
|
||||
+ face[1::2, 1::2])
|
||||
face = face.astype(np.float32)
|
||||
face /= 16.0
|
||||
return face
|
||||
|
||||
|
||||
def _orange_face(face=None):
|
||||
face = _downsampled_face() if face is None else face
|
||||
face_color = np.zeros(face.shape + (3,))
|
||||
face_color[:, :, 0] = 256 - face
|
||||
face_color[:, :, 1] = 256 - face / 2
|
||||
face_color[:, :, 2] = 256 - face / 4
|
||||
return face_color
|
||||
|
||||
|
||||
def _make_images(face=None):
|
||||
face = _downsampled_face() if face is None else face
|
||||
# make a collection of faces
|
||||
images = np.zeros((3,) + face.shape)
|
||||
images[0] = face
|
||||
images[1] = face + 1
|
||||
images[2] = face + 2
|
||||
return images
|
||||
|
||||
downsampled_face = _downsampled_face()
|
||||
orange_face = _orange_face(downsampled_face)
|
||||
face_collection = _make_images(downsampled_face)
|
||||
|
||||
|
||||
def test_extract_patches_all():
|
||||
face = downsampled_face
|
||||
i_h, i_w = face.shape
|
||||
p_h, p_w = 16, 16
|
||||
expected_n_patches = (i_h - p_h + 1) * (i_w - p_w + 1)
|
||||
patches = extract_patches_2d(face, (p_h, p_w))
|
||||
assert patches.shape == (expected_n_patches, p_h, p_w)
|
||||
|
||||
|
||||
def test_extract_patches_all_color():
|
||||
face = orange_face
|
||||
i_h, i_w = face.shape[:2]
|
||||
p_h, p_w = 16, 16
|
||||
expected_n_patches = (i_h - p_h + 1) * (i_w - p_w + 1)
|
||||
patches = extract_patches_2d(face, (p_h, p_w))
|
||||
assert patches.shape == (expected_n_patches, p_h, p_w, 3)
|
||||
|
||||
|
||||
def test_extract_patches_all_rect():
|
||||
face = downsampled_face
|
||||
face = face[:, 32:97]
|
||||
i_h, i_w = face.shape
|
||||
p_h, p_w = 16, 12
|
||||
expected_n_patches = (i_h - p_h + 1) * (i_w - p_w + 1)
|
||||
|
||||
patches = extract_patches_2d(face, (p_h, p_w))
|
||||
assert patches.shape == (expected_n_patches, p_h, p_w)
|
||||
|
||||
|
||||
def test_extract_patches_max_patches():
|
||||
face = downsampled_face
|
||||
i_h, i_w = face.shape
|
||||
p_h, p_w = 16, 16
|
||||
|
||||
patches = extract_patches_2d(face, (p_h, p_w), max_patches=100)
|
||||
assert patches.shape == (100, p_h, p_w)
|
||||
|
||||
expected_n_patches = int(0.5 * (i_h - p_h + 1) * (i_w - p_w + 1))
|
||||
patches = extract_patches_2d(face, (p_h, p_w), max_patches=0.5)
|
||||
assert patches.shape == (expected_n_patches, p_h, p_w)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
extract_patches_2d(face, (p_h, p_w), max_patches=2.0)
|
||||
with pytest.raises(ValueError):
|
||||
extract_patches_2d(face, (p_h, p_w), max_patches=-1.0)
|
||||
|
||||
|
||||
def test_extract_patch_same_size_image():
|
||||
face = downsampled_face
|
||||
# Request patches of the same size as image
|
||||
# Should return just the single patch a.k.a. the image
|
||||
patches = extract_patches_2d(face, face.shape, max_patches=2)
|
||||
assert patches.shape[0] == 1
|
||||
|
||||
|
||||
def test_extract_patches_less_than_max_patches():
|
||||
face = downsampled_face
|
||||
i_h, i_w = face.shape
|
||||
p_h, p_w = 3 * i_h // 4, 3 * i_w // 4
|
||||
# this is 3185
|
||||
expected_n_patches = (i_h - p_h + 1) * (i_w - p_w + 1)
|
||||
|
||||
patches = extract_patches_2d(face, (p_h, p_w), max_patches=4000)
|
||||
assert patches.shape == (expected_n_patches, p_h, p_w)
|
||||
|
||||
|
||||
def test_reconstruct_patches_perfect():
|
||||
face = downsampled_face
|
||||
p_h, p_w = 16, 16
|
||||
|
||||
patches = extract_patches_2d(face, (p_h, p_w))
|
||||
face_reconstructed = reconstruct_from_patches_2d(patches, face.shape)
|
||||
np.testing.assert_array_almost_equal(face, face_reconstructed)
|
||||
|
||||
|
||||
def test_reconstruct_patches_perfect_color():
|
||||
face = orange_face
|
||||
p_h, p_w = 16, 16
|
||||
|
||||
patches = extract_patches_2d(face, (p_h, p_w))
|
||||
face_reconstructed = reconstruct_from_patches_2d(patches, face.shape)
|
||||
np.testing.assert_array_almost_equal(face, face_reconstructed)
|
||||
|
||||
|
||||
def test_patch_extractor_fit():
|
||||
faces = face_collection
|
||||
extr = PatchExtractor(patch_size=(8, 8), max_patches=100, random_state=0)
|
||||
assert extr == extr.fit(faces)
|
||||
|
||||
|
||||
def test_patch_extractor_max_patches():
|
||||
faces = face_collection
|
||||
i_h, i_w = faces.shape[1:3]
|
||||
p_h, p_w = 8, 8
|
||||
|
||||
max_patches = 100
|
||||
expected_n_patches = len(faces) * max_patches
|
||||
extr = PatchExtractor(patch_size=(p_h, p_w), max_patches=max_patches,
|
||||
random_state=0)
|
||||
patches = extr.transform(faces)
|
||||
assert patches.shape == (expected_n_patches, p_h, p_w)
|
||||
|
||||
max_patches = 0.5
|
||||
expected_n_patches = len(faces) * int((i_h - p_h + 1) * (i_w - p_w + 1)
|
||||
* max_patches)
|
||||
extr = PatchExtractor(patch_size=(p_h, p_w), max_patches=max_patches,
|
||||
random_state=0)
|
||||
patches = extr.transform(faces)
|
||||
assert patches.shape == (expected_n_patches, p_h, p_w)
|
||||
|
||||
|
||||
def test_patch_extractor_max_patches_default():
|
||||
faces = face_collection
|
||||
extr = PatchExtractor(max_patches=100, random_state=0)
|
||||
patches = extr.transform(faces)
|
||||
assert patches.shape == (len(faces) * 100, 19, 25)
|
||||
|
||||
|
||||
def test_patch_extractor_all_patches():
|
||||
faces = face_collection
|
||||
i_h, i_w = faces.shape[1:3]
|
||||
p_h, p_w = 8, 8
|
||||
expected_n_patches = len(faces) * (i_h - p_h + 1) * (i_w - p_w + 1)
|
||||
extr = PatchExtractor(patch_size=(p_h, p_w), random_state=0)
|
||||
patches = extr.transform(faces)
|
||||
assert patches.shape == (expected_n_patches, p_h, p_w)
|
||||
|
||||
|
||||
def test_patch_extractor_color():
|
||||
faces = _make_images(orange_face)
|
||||
i_h, i_w = faces.shape[1:3]
|
||||
p_h, p_w = 8, 8
|
||||
expected_n_patches = len(faces) * (i_h - p_h + 1) * (i_w - p_w + 1)
|
||||
extr = PatchExtractor(patch_size=(p_h, p_w), random_state=0)
|
||||
patches = extr.transform(faces)
|
||||
assert patches.shape == (expected_n_patches, p_h, p_w, 3)
|
||||
|
||||
|
||||
def test_extract_patches_strided():
|
||||
|
||||
image_shapes_1D = [(10,), (10,), (11,), (10,)]
|
||||
patch_sizes_1D = [(1,), (2,), (3,), (8,)]
|
||||
patch_steps_1D = [(1,), (1,), (4,), (2,)]
|
||||
|
||||
expected_views_1D = [(10,), (9,), (3,), (2,)]
|
||||
last_patch_1D = [(10,), (8,), (8,), (2,)]
|
||||
|
||||
image_shapes_2D = [(10, 20), (10, 20), (10, 20), (11, 20)]
|
||||
patch_sizes_2D = [(2, 2), (10, 10), (10, 11), (6, 6)]
|
||||
patch_steps_2D = [(5, 5), (3, 10), (3, 4), (4, 2)]
|
||||
|
||||
expected_views_2D = [(2, 4), (1, 2), (1, 3), (2, 8)]
|
||||
last_patch_2D = [(5, 15), (0, 10), (0, 8), (4, 14)]
|
||||
|
||||
image_shapes_3D = [(5, 4, 3), (3, 3, 3), (7, 8, 9), (7, 8, 9)]
|
||||
patch_sizes_3D = [(2, 2, 3), (2, 2, 2), (1, 7, 3), (1, 3, 3)]
|
||||
patch_steps_3D = [(1, 2, 10), (1, 1, 1), (2, 1, 3), (3, 3, 4)]
|
||||
|
||||
expected_views_3D = [(4, 2, 1), (2, 2, 2), (4, 2, 3), (3, 2, 2)]
|
||||
last_patch_3D = [(3, 2, 0), (1, 1, 1), (6, 1, 6), (6, 3, 4)]
|
||||
|
||||
image_shapes = image_shapes_1D + image_shapes_2D + image_shapes_3D
|
||||
patch_sizes = patch_sizes_1D + patch_sizes_2D + patch_sizes_3D
|
||||
patch_steps = patch_steps_1D + patch_steps_2D + patch_steps_3D
|
||||
expected_views = expected_views_1D + expected_views_2D + expected_views_3D
|
||||
last_patches = last_patch_1D + last_patch_2D + last_patch_3D
|
||||
|
||||
for (image_shape, patch_size, patch_step, expected_view,
|
||||
last_patch) in zip(image_shapes, patch_sizes, patch_steps,
|
||||
expected_views, last_patches):
|
||||
image = np.arange(np.prod(image_shape)).reshape(image_shape)
|
||||
patches = _extract_patches(image, patch_shape=patch_size,
|
||||
extraction_step=patch_step)
|
||||
|
||||
ndim = len(image_shape)
|
||||
|
||||
assert patches.shape[:ndim] == expected_view
|
||||
last_patch_slices = tuple(slice(i, i + j, None) for i, j in
|
||||
zip(last_patch, patch_size))
|
||||
assert (patches[(-1, None, None) * ndim] ==
|
||||
image[last_patch_slices].squeeze()).all()
|
||||
|
||||
|
||||
def test_extract_patches_square():
|
||||
# test same patch size for all dimensions
|
||||
face = downsampled_face
|
||||
i_h, i_w = face.shape
|
||||
p = 8
|
||||
expected_n_patches = ((i_h - p + 1), (i_w - p + 1))
|
||||
patches = _extract_patches(face, patch_shape=p)
|
||||
assert patches.shape == (expected_n_patches[0],
|
||||
expected_n_patches[1], p, p)
|
||||
|
||||
|
||||
def test_width_patch():
|
||||
# width and height of the patch should be less than the image
|
||||
x = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
|
||||
with pytest.raises(ValueError):
|
||||
extract_patches_2d(x, (4, 1))
|
||||
with pytest.raises(ValueError):
|
||||
extract_patches_2d(x, (1, 4))
|
||||
|
||||
|
||||
# TODO: Remove in 0.24
|
||||
def test_extract_patches_deprecated():
|
||||
msg = ("The function feature_extraction.image.extract_patches has been "
|
||||
"deprecated in 0.22 and will be removed in 0.24.")
|
||||
with pytest.warns(FutureWarning, match=msg):
|
||||
extract_patches(downsampled_face)
|
1374
venv/Lib/site-packages/sklearn/feature_extraction/tests/test_text.py
Normal file
1374
venv/Lib/site-packages/sklearn/feature_extraction/tests/test_text.py
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue