""" Testing for Clustering methods """ import numpy as np import pytest from scipy.sparse import csr_matrix from sklearn.exceptions import ConvergenceWarning from sklearn.utils._testing import ( assert_array_equal, assert_warns, assert_warns_message, assert_no_warnings) from sklearn.cluster import AffinityPropagation from sklearn.cluster._affinity_propagation import ( _equal_similarities_and_preferences ) from sklearn.cluster import affinity_propagation from sklearn.datasets import make_blobs from sklearn.metrics import euclidean_distances n_clusters = 3 centers = np.array([[1, 1], [-1, -1], [1, -1]]) + 10 X, _ = make_blobs(n_samples=60, n_features=2, centers=centers, cluster_std=0.4, shuffle=True, random_state=0) def test_affinity_propagation(): # Affinity Propagation algorithm # Compute similarities S = -euclidean_distances(X, squared=True) preference = np.median(S) * 10 # Compute Affinity Propagation cluster_centers_indices, labels = affinity_propagation( S, preference=preference, random_state=39) n_clusters_ = len(cluster_centers_indices) assert n_clusters == n_clusters_ af = AffinityPropagation(preference=preference, affinity="precomputed", random_state=28) labels_precomputed = af.fit(S).labels_ af = AffinityPropagation(preference=preference, verbose=True, random_state=37) labels = af.fit(X).labels_ assert_array_equal(labels, labels_precomputed) cluster_centers_indices = af.cluster_centers_indices_ n_clusters_ = len(cluster_centers_indices) assert np.unique(labels).size == n_clusters_ assert n_clusters == n_clusters_ # Test also with no copy _, labels_no_copy = affinity_propagation(S, preference=preference, copy=False, random_state=74) assert_array_equal(labels, labels_no_copy) # Test input validation with pytest.raises(ValueError): affinity_propagation(S[:, :-1]) with pytest.raises(ValueError): affinity_propagation(S, damping=0) af = AffinityPropagation(affinity="unknown", random_state=78) with pytest.raises(ValueError): af.fit(X) af_2 = AffinityPropagation(affinity='precomputed', random_state=21) with pytest.raises(TypeError): af_2.fit(csr_matrix((3, 3))) def test_affinity_propagation_predict(): # Test AffinityPropagation.predict af = AffinityPropagation(affinity="euclidean", random_state=63) labels = af.fit_predict(X) labels2 = af.predict(X) assert_array_equal(labels, labels2) def test_affinity_propagation_predict_error(): # Test exception in AffinityPropagation.predict # Not fitted. af = AffinityPropagation(affinity="euclidean") with pytest.raises(ValueError): af.predict(X) # Predict not supported when affinity="precomputed". S = np.dot(X, X.T) af = AffinityPropagation(affinity="precomputed", random_state=57) af.fit(S) with pytest.raises(ValueError): af.predict(X) def test_affinity_propagation_fit_non_convergence(): # In case of non-convergence of affinity_propagation(), the cluster # centers should be an empty array and training samples should be labelled # as noise (-1) X = np.array([[0, 0], [1, 1], [-2, -2]]) # Force non-convergence by allowing only a single iteration af = AffinityPropagation(preference=-10, max_iter=1, random_state=82) assert_warns(ConvergenceWarning, af.fit, X) assert_array_equal(np.empty((0, 2)), af.cluster_centers_) assert_array_equal(np.array([-1, -1, -1]), af.labels_) def test_affinity_propagation_equal_mutual_similarities(): X = np.array([[-1, 1], [1, -1]]) S = -euclidean_distances(X, squared=True) # setting preference > similarity cluster_center_indices, labels = assert_warns_message( UserWarning, "mutually equal", affinity_propagation, S, preference=0) # expect every sample to become an exemplar assert_array_equal([0, 1], cluster_center_indices) assert_array_equal([0, 1], labels) # setting preference < similarity cluster_center_indices, labels = assert_warns_message( UserWarning, "mutually equal", affinity_propagation, S, preference=-10) # expect one cluster, with arbitrary (first) sample as exemplar assert_array_equal([0], cluster_center_indices) assert_array_equal([0, 0], labels) # setting different preferences cluster_center_indices, labels = assert_no_warnings( affinity_propagation, S, preference=[-20, -10], random_state=37) # expect one cluster, with highest-preference sample as exemplar assert_array_equal([1], cluster_center_indices) assert_array_equal([0, 0], labels) def test_affinity_propagation_predict_non_convergence(): # In case of non-convergence of affinity_propagation(), the cluster # centers should be an empty array X = np.array([[0, 0], [1, 1], [-2, -2]]) # Force non-convergence by allowing only a single iteration af = assert_warns(ConvergenceWarning, AffinityPropagation(preference=-10, max_iter=1, random_state=75).fit, X) # At prediction time, consider new samples as noise since there are no # clusters to_predict = np.array([[2, 2], [3, 3], [4, 4]]) y = assert_warns(ConvergenceWarning, af.predict, to_predict) assert_array_equal(np.array([-1, -1, -1]), y) def test_affinity_propagation_non_convergence_regressiontest(): X = np.array([[1, 0, 0, 0, 0, 0], [0, 1, 1, 1, 0, 0], [0, 0, 1, 0, 0, 1]]) af = AffinityPropagation(affinity='euclidean', max_iter=2, random_state=34).fit(X) assert_array_equal(np.array([-1, -1, -1]), af.labels_) def test_equal_similarities_and_preferences(): # Unequal distances X = np.array([[0, 0], [1, 1], [-2, -2]]) S = -euclidean_distances(X, squared=True) assert not _equal_similarities_and_preferences(S, np.array(0)) assert not _equal_similarities_and_preferences(S, np.array([0, 0])) assert not _equal_similarities_and_preferences(S, np.array([0, 1])) # Equal distances X = np.array([[0, 0], [1, 1]]) S = -euclidean_distances(X, squared=True) # Different preferences assert not _equal_similarities_and_preferences(S, np.array([0, 1])) # Same preferences assert _equal_similarities_and_preferences(S, np.array([0, 0])) assert _equal_similarities_and_preferences(S, np.array(0)) def test_affinity_propagation_random_state(): # Significance of random_state parameter # Generate sample data centers = [[1, 1], [-1, -1], [1, -1]] X, labels_true = make_blobs(n_samples=300, centers=centers, cluster_std=0.5, random_state=0) # random_state = 0 ap = AffinityPropagation(convergence_iter=1, max_iter=2, random_state=0) ap.fit(X) centers0 = ap.cluster_centers_ # random_state = 76 ap = AffinityPropagation(convergence_iter=1, max_iter=2, random_state=76) ap.fit(X) centers76 = ap.cluster_centers_ assert np.mean((centers0 - centers76) ** 2) > 1 # FIXME: to be removed in 0.25 def test_affinity_propagation_random_state_warning(): # test that a warning is raised when random_state is not defined. X = np.array([[0, 0], [1, 1], [-2, -2]]) match = ("'random_state' has been introduced in 0.23. " "It will be set to None starting from 0.25 which " "means that results will differ at every function " "call. Set 'random_state' to None to silence this " "warning, or to 0 to keep the behavior of versions " "<0.23.") with pytest.warns(FutureWarning, match=match): AffinityPropagation().fit(X) @pytest.mark.parametrize('centers', [csr_matrix(np.zeros((1, 10))), np.zeros((1, 10))]) def test_affinity_propagation_convergence_warning_dense_sparse(centers): """Non-regression, see #13334""" rng = np.random.RandomState(42) X = rng.rand(40, 10) y = (4 * rng.rand(40)).astype(np.int) ap = AffinityPropagation(random_state=46) ap.fit(X, y) ap.cluster_centers_ = centers with pytest.warns(None) as record: assert_array_equal(ap.predict(X), np.zeros(X.shape[0], dtype=int)) assert len(record) == 0 def test_affinity_propagation_float32(): # Test to fix incorrect clusters due to dtype change # (non-regression test for issue #10832) X = np.array([[1, 0, 0, 0], [0, 1, 1, 0], [0, 1, 1, 0], [0, 0, 0, 1]], dtype='float32') afp = AffinityPropagation(preference=1, affinity='precomputed', random_state=0).fit(X) expected = np.array([0, 1, 1, 2]) assert_array_equal(afp.labels_, expected)