189 lines
6.4 KiB
Python
189 lines
6.4 KiB
Python
|
from itertools import product
|
||
|
import numpy as np
|
||
|
from numpy.testing import assert_almost_equal, assert_array_almost_equal
|
||
|
import pytest
|
||
|
|
||
|
from sklearn import datasets
|
||
|
from sklearn import manifold
|
||
|
from sklearn import neighbors
|
||
|
from sklearn import pipeline
|
||
|
from sklearn import preprocessing
|
||
|
|
||
|
from scipy.sparse import rand as sparse_rand
|
||
|
|
||
|
eigen_solvers = ['auto', 'dense', 'arpack']
|
||
|
path_methods = ['auto', 'FW', 'D']
|
||
|
|
||
|
|
||
|
def test_isomap_simple_grid():
|
||
|
# Isomap should preserve distances when all neighbors are used
|
||
|
N_per_side = 5
|
||
|
Npts = N_per_side ** 2
|
||
|
n_neighbors = Npts - 1
|
||
|
|
||
|
# grid of equidistant points in 2D, n_components = n_dim
|
||
|
X = np.array(list(product(range(N_per_side), repeat=2)))
|
||
|
|
||
|
# distances from each point to all others
|
||
|
G = neighbors.kneighbors_graph(X, n_neighbors,
|
||
|
mode='distance').toarray()
|
||
|
|
||
|
for eigen_solver in eigen_solvers:
|
||
|
for path_method in path_methods:
|
||
|
clf = manifold.Isomap(n_neighbors=n_neighbors, n_components=2,
|
||
|
eigen_solver=eigen_solver,
|
||
|
path_method=path_method)
|
||
|
clf.fit(X)
|
||
|
|
||
|
G_iso = neighbors.kneighbors_graph(clf.embedding_,
|
||
|
n_neighbors,
|
||
|
mode='distance').toarray()
|
||
|
assert_array_almost_equal(G, G_iso)
|
||
|
|
||
|
|
||
|
def test_isomap_reconstruction_error():
|
||
|
# Same setup as in test_isomap_simple_grid, with an added dimension
|
||
|
N_per_side = 5
|
||
|
Npts = N_per_side ** 2
|
||
|
n_neighbors = Npts - 1
|
||
|
|
||
|
# grid of equidistant points in 2D, n_components = n_dim
|
||
|
X = np.array(list(product(range(N_per_side), repeat=2)))
|
||
|
|
||
|
# add noise in a third dimension
|
||
|
rng = np.random.RandomState(0)
|
||
|
noise = 0.1 * rng.randn(Npts, 1)
|
||
|
X = np.concatenate((X, noise), 1)
|
||
|
|
||
|
# compute input kernel
|
||
|
G = neighbors.kneighbors_graph(X, n_neighbors,
|
||
|
mode='distance').toarray()
|
||
|
|
||
|
centerer = preprocessing.KernelCenterer()
|
||
|
K = centerer.fit_transform(-0.5 * G ** 2)
|
||
|
|
||
|
for eigen_solver in eigen_solvers:
|
||
|
for path_method in path_methods:
|
||
|
clf = manifold.Isomap(n_neighbors=n_neighbors, n_components=2,
|
||
|
eigen_solver=eigen_solver,
|
||
|
path_method=path_method)
|
||
|
clf.fit(X)
|
||
|
|
||
|
# compute output kernel
|
||
|
G_iso = neighbors.kneighbors_graph(clf.embedding_,
|
||
|
n_neighbors,
|
||
|
mode='distance').toarray()
|
||
|
|
||
|
K_iso = centerer.fit_transform(-0.5 * G_iso ** 2)
|
||
|
|
||
|
# make sure error agrees
|
||
|
reconstruction_error = np.linalg.norm(K - K_iso) / Npts
|
||
|
assert_almost_equal(reconstruction_error,
|
||
|
clf.reconstruction_error())
|
||
|
|
||
|
|
||
|
def test_transform():
|
||
|
n_samples = 200
|
||
|
n_components = 10
|
||
|
noise_scale = 0.01
|
||
|
|
||
|
# Create S-curve dataset
|
||
|
X, y = datasets.make_s_curve(n_samples, random_state=0)
|
||
|
|
||
|
# Compute isomap embedding
|
||
|
iso = manifold.Isomap(n_components=n_components, n_neighbors=2)
|
||
|
X_iso = iso.fit_transform(X)
|
||
|
|
||
|
# Re-embed a noisy version of the points
|
||
|
rng = np.random.RandomState(0)
|
||
|
noise = noise_scale * rng.randn(*X.shape)
|
||
|
X_iso2 = iso.transform(X + noise)
|
||
|
|
||
|
# Make sure the rms error on re-embedding is comparable to noise_scale
|
||
|
assert np.sqrt(np.mean((X_iso - X_iso2) ** 2)) < 2 * noise_scale
|
||
|
|
||
|
|
||
|
def test_pipeline():
|
||
|
# check that Isomap works fine as a transformer in a Pipeline
|
||
|
# only checks that no error is raised.
|
||
|
# TODO check that it actually does something useful
|
||
|
X, y = datasets.make_blobs(random_state=0)
|
||
|
clf = pipeline.Pipeline(
|
||
|
[('isomap', manifold.Isomap()),
|
||
|
('clf', neighbors.KNeighborsClassifier())])
|
||
|
clf.fit(X, y)
|
||
|
assert .9 < clf.score(X, y)
|
||
|
|
||
|
|
||
|
def test_pipeline_with_nearest_neighbors_transformer():
|
||
|
# Test chaining NearestNeighborsTransformer and Isomap with
|
||
|
# neighbors_algorithm='precomputed'
|
||
|
algorithm = 'auto'
|
||
|
n_neighbors = 10
|
||
|
|
||
|
X, _ = datasets.make_blobs(random_state=0)
|
||
|
X2, _ = datasets.make_blobs(random_state=1)
|
||
|
|
||
|
# compare the chained version and the compact version
|
||
|
est_chain = pipeline.make_pipeline(
|
||
|
neighbors.KNeighborsTransformer(
|
||
|
n_neighbors=n_neighbors, algorithm=algorithm, mode='distance'),
|
||
|
manifold.Isomap(n_neighbors=n_neighbors, metric='precomputed'))
|
||
|
est_compact = manifold.Isomap(n_neighbors=n_neighbors,
|
||
|
neighbors_algorithm=algorithm)
|
||
|
|
||
|
Xt_chain = est_chain.fit_transform(X)
|
||
|
Xt_compact = est_compact.fit_transform(X)
|
||
|
assert_array_almost_equal(Xt_chain, Xt_compact)
|
||
|
|
||
|
Xt_chain = est_chain.transform(X2)
|
||
|
Xt_compact = est_compact.transform(X2)
|
||
|
assert_array_almost_equal(Xt_chain, Xt_compact)
|
||
|
|
||
|
|
||
|
def test_different_metric():
|
||
|
# Test that the metric parameters work correctly, and default to euclidean
|
||
|
def custom_metric(x1, x2):
|
||
|
return np.sqrt(np.sum(x1 ** 2 + x2 ** 2))
|
||
|
|
||
|
# metric, p, is_euclidean
|
||
|
metrics = [('euclidean', 2, True),
|
||
|
('manhattan', 1, False),
|
||
|
('minkowski', 1, False),
|
||
|
('minkowski', 2, True),
|
||
|
(custom_metric, 2, False)]
|
||
|
|
||
|
X, _ = datasets.make_blobs(random_state=0)
|
||
|
reference = manifold.Isomap().fit_transform(X)
|
||
|
|
||
|
for metric, p, is_euclidean in metrics:
|
||
|
embedding = manifold.Isomap(metric=metric, p=p).fit_transform(X)
|
||
|
|
||
|
if is_euclidean:
|
||
|
assert_array_almost_equal(embedding, reference)
|
||
|
else:
|
||
|
with pytest.raises(AssertionError, match='not almost equal'):
|
||
|
assert_array_almost_equal(embedding, reference)
|
||
|
|
||
|
|
||
|
def test_isomap_clone_bug():
|
||
|
# regression test for bug reported in #6062
|
||
|
model = manifold.Isomap()
|
||
|
for n_neighbors in [10, 15, 20]:
|
||
|
model.set_params(n_neighbors=n_neighbors)
|
||
|
model.fit(np.random.rand(50, 2))
|
||
|
assert (model.nbrs_.n_neighbors ==
|
||
|
n_neighbors)
|
||
|
|
||
|
|
||
|
def test_sparse_input():
|
||
|
X = sparse_rand(100, 3, density=0.1, format='csr')
|
||
|
|
||
|
# Should not error
|
||
|
for eigen_solver in eigen_solvers:
|
||
|
for path_method in path_methods:
|
||
|
clf = manifold.Isomap(n_components=2,
|
||
|
eigen_solver=eigen_solver,
|
||
|
path_method=path_method)
|
||
|
clf.fit(X)
|