Uploaded Test files
This commit is contained in:
parent
f584ad9d97
commit
2e81cb7d99
16627 changed files with 2065359 additions and 102444 deletions
|
@ -0,0 +1,246 @@
|
|||
"""
|
||||
Testing for Clustering methods
|
||||
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from scipy.sparse import csr_matrix
|
||||
|
||||
from sklearn.exceptions import ConvergenceWarning
|
||||
from sklearn.utils._testing import (
|
||||
assert_array_equal, assert_warns,
|
||||
assert_warns_message, assert_no_warnings)
|
||||
|
||||
from sklearn.cluster import AffinityPropagation
|
||||
from sklearn.cluster._affinity_propagation import (
|
||||
_equal_similarities_and_preferences
|
||||
)
|
||||
from sklearn.cluster import affinity_propagation
|
||||
from sklearn.datasets import make_blobs
|
||||
from sklearn.metrics import euclidean_distances
|
||||
|
||||
n_clusters = 3
|
||||
centers = np.array([[1, 1], [-1, -1], [1, -1]]) + 10
|
||||
X, _ = make_blobs(n_samples=60, n_features=2, centers=centers,
|
||||
cluster_std=0.4, shuffle=True, random_state=0)
|
||||
|
||||
|
||||
def test_affinity_propagation():
|
||||
# Affinity Propagation algorithm
|
||||
# Compute similarities
|
||||
S = -euclidean_distances(X, squared=True)
|
||||
preference = np.median(S) * 10
|
||||
# Compute Affinity Propagation
|
||||
cluster_centers_indices, labels = affinity_propagation(
|
||||
S, preference=preference, random_state=39)
|
||||
|
||||
n_clusters_ = len(cluster_centers_indices)
|
||||
|
||||
assert n_clusters == n_clusters_
|
||||
|
||||
af = AffinityPropagation(preference=preference, affinity="precomputed",
|
||||
random_state=28)
|
||||
labels_precomputed = af.fit(S).labels_
|
||||
|
||||
af = AffinityPropagation(preference=preference, verbose=True,
|
||||
random_state=37)
|
||||
labels = af.fit(X).labels_
|
||||
|
||||
assert_array_equal(labels, labels_precomputed)
|
||||
|
||||
cluster_centers_indices = af.cluster_centers_indices_
|
||||
|
||||
n_clusters_ = len(cluster_centers_indices)
|
||||
assert np.unique(labels).size == n_clusters_
|
||||
assert n_clusters == n_clusters_
|
||||
|
||||
# Test also with no copy
|
||||
_, labels_no_copy = affinity_propagation(S, preference=preference,
|
||||
copy=False, random_state=74)
|
||||
assert_array_equal(labels, labels_no_copy)
|
||||
|
||||
# Test input validation
|
||||
with pytest.raises(ValueError):
|
||||
affinity_propagation(S[:, :-1])
|
||||
with pytest.raises(ValueError):
|
||||
affinity_propagation(S, damping=0)
|
||||
af = AffinityPropagation(affinity="unknown", random_state=78)
|
||||
with pytest.raises(ValueError):
|
||||
af.fit(X)
|
||||
af_2 = AffinityPropagation(affinity='precomputed', random_state=21)
|
||||
with pytest.raises(TypeError):
|
||||
af_2.fit(csr_matrix((3, 3)))
|
||||
|
||||
def test_affinity_propagation_predict():
|
||||
# Test AffinityPropagation.predict
|
||||
af = AffinityPropagation(affinity="euclidean", random_state=63)
|
||||
labels = af.fit_predict(X)
|
||||
labels2 = af.predict(X)
|
||||
assert_array_equal(labels, labels2)
|
||||
|
||||
|
||||
def test_affinity_propagation_predict_error():
|
||||
# Test exception in AffinityPropagation.predict
|
||||
# Not fitted.
|
||||
af = AffinityPropagation(affinity="euclidean")
|
||||
with pytest.raises(ValueError):
|
||||
af.predict(X)
|
||||
|
||||
# Predict not supported when affinity="precomputed".
|
||||
S = np.dot(X, X.T)
|
||||
af = AffinityPropagation(affinity="precomputed", random_state=57)
|
||||
af.fit(S)
|
||||
with pytest.raises(ValueError):
|
||||
af.predict(X)
|
||||
|
||||
|
||||
def test_affinity_propagation_fit_non_convergence():
|
||||
# In case of non-convergence of affinity_propagation(), the cluster
|
||||
# centers should be an empty array and training samples should be labelled
|
||||
# as noise (-1)
|
||||
X = np.array([[0, 0], [1, 1], [-2, -2]])
|
||||
|
||||
# Force non-convergence by allowing only a single iteration
|
||||
af = AffinityPropagation(preference=-10, max_iter=1, random_state=82)
|
||||
|
||||
assert_warns(ConvergenceWarning, af.fit, X)
|
||||
assert_array_equal(np.empty((0, 2)), af.cluster_centers_)
|
||||
assert_array_equal(np.array([-1, -1, -1]), af.labels_)
|
||||
|
||||
|
||||
def test_affinity_propagation_equal_mutual_similarities():
|
||||
X = np.array([[-1, 1], [1, -1]])
|
||||
S = -euclidean_distances(X, squared=True)
|
||||
|
||||
# setting preference > similarity
|
||||
cluster_center_indices, labels = assert_warns_message(
|
||||
UserWarning, "mutually equal", affinity_propagation, S, preference=0)
|
||||
|
||||
# expect every sample to become an exemplar
|
||||
assert_array_equal([0, 1], cluster_center_indices)
|
||||
assert_array_equal([0, 1], labels)
|
||||
|
||||
# setting preference < similarity
|
||||
cluster_center_indices, labels = assert_warns_message(
|
||||
UserWarning, "mutually equal", affinity_propagation, S, preference=-10)
|
||||
|
||||
# expect one cluster, with arbitrary (first) sample as exemplar
|
||||
assert_array_equal([0], cluster_center_indices)
|
||||
assert_array_equal([0, 0], labels)
|
||||
|
||||
# setting different preferences
|
||||
cluster_center_indices, labels = assert_no_warnings(
|
||||
affinity_propagation, S, preference=[-20, -10], random_state=37)
|
||||
|
||||
# expect one cluster, with highest-preference sample as exemplar
|
||||
assert_array_equal([1], cluster_center_indices)
|
||||
assert_array_equal([0, 0], labels)
|
||||
|
||||
|
||||
def test_affinity_propagation_predict_non_convergence():
|
||||
# In case of non-convergence of affinity_propagation(), the cluster
|
||||
# centers should be an empty array
|
||||
X = np.array([[0, 0], [1, 1], [-2, -2]])
|
||||
|
||||
# Force non-convergence by allowing only a single iteration
|
||||
af = assert_warns(ConvergenceWarning,
|
||||
AffinityPropagation(preference=-10,
|
||||
max_iter=1, random_state=75).fit, X)
|
||||
|
||||
# At prediction time, consider new samples as noise since there are no
|
||||
# clusters
|
||||
to_predict = np.array([[2, 2], [3, 3], [4, 4]])
|
||||
y = assert_warns(ConvergenceWarning, af.predict, to_predict)
|
||||
assert_array_equal(np.array([-1, -1, -1]), y)
|
||||
|
||||
|
||||
def test_affinity_propagation_non_convergence_regressiontest():
|
||||
X = np.array([[1, 0, 0, 0, 0, 0],
|
||||
[0, 1, 1, 1, 0, 0],
|
||||
[0, 0, 1, 0, 0, 1]])
|
||||
af = AffinityPropagation(affinity='euclidean',
|
||||
max_iter=2, random_state=34).fit(X)
|
||||
assert_array_equal(np.array([-1, -1, -1]), af.labels_)
|
||||
|
||||
|
||||
def test_equal_similarities_and_preferences():
|
||||
# Unequal distances
|
||||
X = np.array([[0, 0], [1, 1], [-2, -2]])
|
||||
S = -euclidean_distances(X, squared=True)
|
||||
|
||||
assert not _equal_similarities_and_preferences(S, np.array(0))
|
||||
assert not _equal_similarities_and_preferences(S, np.array([0, 0]))
|
||||
assert not _equal_similarities_and_preferences(S, np.array([0, 1]))
|
||||
|
||||
# Equal distances
|
||||
X = np.array([[0, 0], [1, 1]])
|
||||
S = -euclidean_distances(X, squared=True)
|
||||
|
||||
# Different preferences
|
||||
assert not _equal_similarities_and_preferences(S, np.array([0, 1]))
|
||||
|
||||
# Same preferences
|
||||
assert _equal_similarities_and_preferences(S, np.array([0, 0]))
|
||||
assert _equal_similarities_and_preferences(S, np.array(0))
|
||||
|
||||
|
||||
def test_affinity_propagation_random_state():
|
||||
# Significance of random_state parameter
|
||||
# Generate sample data
|
||||
centers = [[1, 1], [-1, -1], [1, -1]]
|
||||
X, labels_true = make_blobs(n_samples=300, centers=centers,
|
||||
cluster_std=0.5, random_state=0)
|
||||
# random_state = 0
|
||||
ap = AffinityPropagation(convergence_iter=1, max_iter=2, random_state=0)
|
||||
ap.fit(X)
|
||||
centers0 = ap.cluster_centers_
|
||||
|
||||
# random_state = 76
|
||||
ap = AffinityPropagation(convergence_iter=1, max_iter=2, random_state=76)
|
||||
ap.fit(X)
|
||||
centers76 = ap.cluster_centers_
|
||||
|
||||
assert np.mean((centers0 - centers76) ** 2) > 1
|
||||
|
||||
|
||||
# FIXME: to be removed in 0.25
|
||||
def test_affinity_propagation_random_state_warning():
|
||||
# test that a warning is raised when random_state is not defined.
|
||||
X = np.array([[0, 0], [1, 1], [-2, -2]])
|
||||
match = ("'random_state' has been introduced in 0.23. "
|
||||
"It will be set to None starting from 0.25 which "
|
||||
"means that results will differ at every function "
|
||||
"call. Set 'random_state' to None to silence this "
|
||||
"warning, or to 0 to keep the behavior of versions "
|
||||
"<0.23.")
|
||||
with pytest.warns(FutureWarning, match=match):
|
||||
AffinityPropagation().fit(X)
|
||||
|
||||
@pytest.mark.parametrize('centers', [csr_matrix(np.zeros((1, 10))),
|
||||
np.zeros((1, 10))])
|
||||
def test_affinity_propagation_convergence_warning_dense_sparse(centers):
|
||||
"""Non-regression, see #13334"""
|
||||
rng = np.random.RandomState(42)
|
||||
X = rng.rand(40, 10)
|
||||
y = (4 * rng.rand(40)).astype(np.int)
|
||||
ap = AffinityPropagation(random_state=46)
|
||||
ap.fit(X, y)
|
||||
ap.cluster_centers_ = centers
|
||||
with pytest.warns(None) as record:
|
||||
assert_array_equal(ap.predict(X),
|
||||
np.zeros(X.shape[0], dtype=int))
|
||||
assert len(record) == 0
|
||||
|
||||
|
||||
def test_affinity_propagation_float32():
|
||||
# Test to fix incorrect clusters due to dtype change
|
||||
# (non-regression test for issue #10832)
|
||||
X = np.array([[1, 0, 0, 0],
|
||||
[0, 1, 1, 0],
|
||||
[0, 1, 1, 0],
|
||||
[0, 0, 0, 1]], dtype='float32')
|
||||
afp = AffinityPropagation(preference=1, affinity='precomputed',
|
||||
random_state=0).fit(X)
|
||||
expected = np.array([0, 1, 1, 2])
|
||||
assert_array_equal(afp.labels_, expected)
|
Loading…
Add table
Add a link
Reference in a new issue