Uploaded Test files

2020-11-12 11:05:57 -05:00 · 2020-11-12 11:05:57 -05:00 · 2e81cb7d99
commit 2e81cb7d99
parent f584ad9d97
16627 changed files with 2065359 additions and 102444 deletions
--- a/venv/Lib/site-packages/sklearn/cluster/tests/init.py
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/init.py
--- a/venv/Lib/site-packages/sklearn/cluster/tests/pycache/init.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/pycache/init.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/tests/pycache/common.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/pycache/common.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/tests/pycache/test_affinity_propagation.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/pycache/test_affinity_propagation.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/tests/pycache/test_bicluster.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/pycache/test_bicluster.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/tests/pycache/test_birch.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/pycache/test_birch.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/tests/pycache/test_dbscan.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/pycache/test_dbscan.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/tests/pycache/test_feature_agglomeration.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/pycache/test_feature_agglomeration.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/tests/pycache/test_hierarchical.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/pycache/test_hierarchical.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/tests/pycache/test_k_means.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/pycache/test_k_means.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/tests/pycache/test_mean_shift.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/pycache/test_mean_shift.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/tests/pycache/test_optics.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/pycache/test_optics.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/tests/pycache/test_spectral.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/pycache/test_spectral.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/tests/common.py
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/common.py
@ -0,0 +1,28 @@
+"""
+Common utilities for testing clustering.
+
+"""
+
+import numpy as np
+
+
+###############################################################################
+# Generate sample data
+
+def generate_clustered_data(seed=0, n_clusters=3, n_features=2,
+                            n_samples_per_cluster=20, std=.4):
+    prng = np.random.RandomState(seed)
+
+    # the data is voluntary shifted away from zero to check clustering
+    # algorithm robustness with regards to non centered data
+    means = np.array([[1, 1, 1, 0],
+                      [-1, -1, 0, 1],
+                      [1, -1, 1, 1],
+                      [-1, 1, 1, 0],
+                     ]) + 10
+
+    X = np.empty((0, n_features))
+    for i in range(n_clusters):
+        X = np.r_[X, means[i][:n_features]
+                  + std * prng.randn(n_samples_per_cluster, n_features)]
+    return X
--- a/venv/Lib/site-packages/sklearn/cluster/tests/test_affinity_propagation.py
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/test_affinity_propagation.py
@ -0,0 +1,246 @@
+"""
+Testing for Clustering methods
+
+"""
+
+import numpy as np
+import pytest
+from scipy.sparse import csr_matrix
+
+from sklearn.exceptions import ConvergenceWarning
+from sklearn.utils._testing import (
+    assert_array_equal, assert_warns,
+    assert_warns_message, assert_no_warnings)
+
+from sklearn.cluster import AffinityPropagation
+from sklearn.cluster._affinity_propagation import (
+    _equal_similarities_and_preferences
+)
+from sklearn.cluster import affinity_propagation
+from sklearn.datasets import make_blobs
+from sklearn.metrics import euclidean_distances
+
+n_clusters = 3
+centers = np.array([[1, 1], [-1, -1], [1, -1]]) + 10
+X, _ = make_blobs(n_samples=60, n_features=2, centers=centers,
+                  cluster_std=0.4, shuffle=True, random_state=0)
+
+
+def test_affinity_propagation():
+    # Affinity Propagation algorithm
+    # Compute similarities
+    S = -euclidean_distances(X, squared=True)
+    preference = np.median(S) * 10
+    # Compute Affinity Propagation
+    cluster_centers_indices, labels = affinity_propagation(
+        S, preference=preference, random_state=39)
+
+    n_clusters_ = len(cluster_centers_indices)
+
+    assert n_clusters == n_clusters_
+
+    af = AffinityPropagation(preference=preference, affinity="precomputed",
+                             random_state=28)
+    labels_precomputed = af.fit(S).labels_
+
+    af = AffinityPropagation(preference=preference, verbose=True,
+                             random_state=37)
+    labels = af.fit(X).labels_
+
+    assert_array_equal(labels, labels_precomputed)
+
+    cluster_centers_indices = af.cluster_centers_indices_
+
+    n_clusters_ = len(cluster_centers_indices)
+    assert np.unique(labels).size == n_clusters_
+    assert n_clusters == n_clusters_
+
+    # Test also with no copy
+    _, labels_no_copy = affinity_propagation(S, preference=preference,
+                                             copy=False, random_state=74)
+    assert_array_equal(labels, labels_no_copy)
+
+    # Test input validation
+    with pytest.raises(ValueError):
+        affinity_propagation(S[:, :-1])
+    with pytest.raises(ValueError):
+        affinity_propagation(S, damping=0)
+    af = AffinityPropagation(affinity="unknown", random_state=78)
+    with pytest.raises(ValueError):
+        af.fit(X)
+    af_2 = AffinityPropagation(affinity='precomputed', random_state=21)
+    with pytest.raises(TypeError):
+        af_2.fit(csr_matrix((3, 3)))
+
+def test_affinity_propagation_predict():
+    # Test AffinityPropagation.predict
+    af = AffinityPropagation(affinity="euclidean", random_state=63)
+    labels = af.fit_predict(X)
+    labels2 = af.predict(X)
+    assert_array_equal(labels, labels2)
+
+
+def test_affinity_propagation_predict_error():
+    # Test exception in AffinityPropagation.predict
+    # Not fitted.
+    af = AffinityPropagation(affinity="euclidean")
+    with pytest.raises(ValueError):
+        af.predict(X)
+
+    # Predict not supported when affinity="precomputed".
+    S = np.dot(X, X.T)
+    af = AffinityPropagation(affinity="precomputed", random_state=57)
+    af.fit(S)
+    with pytest.raises(ValueError):
+        af.predict(X)
+
+
+def test_affinity_propagation_fit_non_convergence():
+    # In case of non-convergence of affinity_propagation(), the cluster
+    # centers should be an empty array and training samples should be labelled
+    # as noise (-1)
+    X = np.array([[0, 0], [1, 1], [-2, -2]])
+
+    # Force non-convergence by allowing only a single iteration
+    af = AffinityPropagation(preference=-10, max_iter=1, random_state=82)
+
+    assert_warns(ConvergenceWarning, af.fit, X)
+    assert_array_equal(np.empty((0, 2)), af.cluster_centers_)
+    assert_array_equal(np.array([-1, -1, -1]), af.labels_)
+
+
+def test_affinity_propagation_equal_mutual_similarities():
+    X = np.array([[-1, 1], [1, -1]])
+    S = -euclidean_distances(X, squared=True)
+
+    # setting preference > similarity
+    cluster_center_indices, labels = assert_warns_message(
+        UserWarning, "mutually equal", affinity_propagation, S, preference=0)
+
+    # expect every sample to become an exemplar
+    assert_array_equal([0, 1], cluster_center_indices)
+    assert_array_equal([0, 1], labels)
+
+    # setting preference < similarity
+    cluster_center_indices, labels = assert_warns_message(
+        UserWarning, "mutually equal", affinity_propagation, S, preference=-10)
+
+    # expect one cluster, with arbitrary (first) sample as exemplar
+    assert_array_equal([0], cluster_center_indices)
+    assert_array_equal([0, 0], labels)
+
+    # setting different preferences
+    cluster_center_indices, labels = assert_no_warnings(
+        affinity_propagation, S, preference=[-20, -10], random_state=37)
+
+    # expect one cluster, with highest-preference sample as exemplar
+    assert_array_equal([1], cluster_center_indices)
+    assert_array_equal([0, 0], labels)
+
+
+def test_affinity_propagation_predict_non_convergence():
+    # In case of non-convergence of affinity_propagation(), the cluster
+    # centers should be an empty array
+    X = np.array([[0, 0], [1, 1], [-2, -2]])
+
+    # Force non-convergence by allowing only a single iteration
+    af = assert_warns(ConvergenceWarning,
+                      AffinityPropagation(preference=-10,
+                                          max_iter=1, random_state=75).fit, X)
+
+    # At prediction time, consider new samples as noise since there are no
+    # clusters
+    to_predict = np.array([[2, 2], [3, 3], [4, 4]])
+    y = assert_warns(ConvergenceWarning, af.predict, to_predict)
+    assert_array_equal(np.array([-1, -1, -1]), y)
+
+
+def test_affinity_propagation_non_convergence_regressiontest():
+    X = np.array([[1, 0, 0, 0, 0, 0],
+                  [0, 1, 1, 1, 0, 0],
+                  [0, 0, 1, 0, 0, 1]])
+    af = AffinityPropagation(affinity='euclidean',
+                             max_iter=2, random_state=34).fit(X)
+    assert_array_equal(np.array([-1, -1, -1]), af.labels_)
+
+
+def test_equal_similarities_and_preferences():
+    # Unequal distances
+    X = np.array([[0, 0], [1, 1], [-2, -2]])
+    S = -euclidean_distances(X, squared=True)
+
+    assert not _equal_similarities_and_preferences(S, np.array(0))
+    assert not _equal_similarities_and_preferences(S, np.array([0, 0]))
+    assert not _equal_similarities_and_preferences(S, np.array([0, 1]))
+
+    # Equal distances
+    X = np.array([[0, 0], [1, 1]])
+    S = -euclidean_distances(X, squared=True)
+
+    # Different preferences
+    assert not _equal_similarities_and_preferences(S, np.array([0, 1]))
+
+    # Same preferences
+    assert _equal_similarities_and_preferences(S, np.array([0, 0]))
+    assert _equal_similarities_and_preferences(S, np.array(0))
+
+
+def test_affinity_propagation_random_state():
+    # Significance of random_state parameter
+    # Generate sample data
+    centers = [[1, 1], [-1, -1], [1, -1]]
+    X, labels_true = make_blobs(n_samples=300, centers=centers,
+                                cluster_std=0.5, random_state=0)
+    # random_state = 0
+    ap = AffinityPropagation(convergence_iter=1, max_iter=2, random_state=0)
+    ap.fit(X)
+    centers0 = ap.cluster_centers_
+
+    # random_state = 76
+    ap = AffinityPropagation(convergence_iter=1, max_iter=2, random_state=76)
+    ap.fit(X)
+    centers76 = ap.cluster_centers_
+
+    assert np.mean((centers0 - centers76) ** 2) > 1
+
+
+# FIXME: to be removed in 0.25
+def test_affinity_propagation_random_state_warning():
+    # test that a warning is raised when random_state is not defined.
+    X = np.array([[0, 0], [1, 1], [-2, -2]])
+    match = ("'random_state' has been introduced in 0.23. "
+             "It will be set to None starting from 0.25 which "
+             "means that results will differ at every function "
+             "call. Set 'random_state' to None to silence this "
+             "warning, or to 0 to keep the behavior of versions "
+             "<0.23.")
+    with pytest.warns(FutureWarning, match=match):
+        AffinityPropagation().fit(X)
+
+@pytest.mark.parametrize('centers', [csr_matrix(np.zeros((1, 10))),
+                                     np.zeros((1, 10))])
+def test_affinity_propagation_convergence_warning_dense_sparse(centers):
+    """Non-regression, see #13334"""
+    rng = np.random.RandomState(42)
+    X = rng.rand(40, 10)
+    y = (4 * rng.rand(40)).astype(np.int)
+    ap = AffinityPropagation(random_state=46)
+    ap.fit(X, y)
+    ap.cluster_centers_ = centers
+    with pytest.warns(None) as record:
+        assert_array_equal(ap.predict(X),
+                           np.zeros(X.shape[0], dtype=int))
+    assert len(record) == 0
+
+
+def test_affinity_propagation_float32():
+    # Test to fix incorrect clusters due to dtype change
+    # (non-regression test for issue #10832)
+    X = np.array([[1, 0, 0, 0],
+                  [0, 1, 1, 0],
+                  [0, 1, 1, 0],
+                  [0, 0, 0, 1]], dtype='float32')
+    afp = AffinityPropagation(preference=1, affinity='precomputed',
+                              random_state=0).fit(X)
+    expected = np.array([0, 1, 1, 2])
+    assert_array_equal(afp.labels_, expected)
--- a/venv/Lib/site-packages/sklearn/cluster/tests/test_bicluster.py
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/test_bicluster.py
@ -0,0 +1,277 @@
+"""Testing for Spectral Biclustering methods"""
+
+import numpy as np
+import pytest
+from scipy.sparse import csr_matrix, issparse
+
+from sklearn.model_selection import ParameterGrid
+
+from sklearn.utils._testing import assert_almost_equal
+from sklearn.utils._testing import assert_array_equal
+from sklearn.utils._testing import assert_array_almost_equal
+
+from sklearn.base import BaseEstimator, BiclusterMixin
+
+from sklearn.cluster import SpectralCoclustering
+from sklearn.cluster import SpectralBiclustering
+from sklearn.cluster._bicluster import _scale_normalize
+from sklearn.cluster._bicluster import _bistochastic_normalize
+from sklearn.cluster._bicluster import _log_normalize
+
+from sklearn.metrics import (consensus_score, v_measure_score)
+
+from sklearn.datasets import make_biclusters, make_checkerboard
+
+
+class MockBiclustering(BiclusterMixin, BaseEstimator):
+    # Mock object for testing get_submatrix.
+    def __init__(self):
+        pass
+
+    def get_indices(self, i):
+        # Overridden to reproduce old get_submatrix test.
+        return (np.where([True, True, False, False, True])[0],
+                np.where([False, False, True, True])[0])
+
+
+def test_get_submatrix():
+    data = np.arange(20).reshape(5, 4)
+    model = MockBiclustering()
+
+    for X in (data, csr_matrix(data), data.tolist()):
+        submatrix = model.get_submatrix(0, X)
+        if issparse(submatrix):
+            submatrix = submatrix.toarray()
+        assert_array_equal(submatrix, [[2, 3],
+                                       [6, 7],
+                                       [18, 19]])
+        submatrix[:] = -1
+        if issparse(X):
+            X = X.toarray()
+        assert np.all(X != -1)
+
+
+def _test_shape_indices(model):
+    # Test get_shape and get_indices on fitted model.
+    for i in range(model.n_clusters):
+        m, n = model.get_shape(i)
+        i_ind, j_ind = model.get_indices(i)
+        assert len(i_ind) == m
+        assert len(j_ind) == n
+
+
+def test_spectral_coclustering():
+    # Test Dhillon's Spectral CoClustering on a simple problem.
+    param_grid = {'svd_method': ['randomized', 'arpack'],
+                  'n_svd_vecs': [None, 20],
+                  'mini_batch': [False, True],
+                  'init': ['k-means++'],
+                  'n_init': [10]}
+    random_state = 0
+    S, rows, cols = make_biclusters((30, 30), 3, noise=0.5,
+                                    random_state=random_state)
+    S -= S.min()  # needs to be nonnegative before making it sparse
+    S = np.where(S < 1, 0, S)  # threshold some values
+    for mat in (S, csr_matrix(S)):
+        for kwargs in ParameterGrid(param_grid):
+            model = SpectralCoclustering(n_clusters=3,
+                                         random_state=random_state,
+                                         **kwargs)
+            model.fit(mat)
+
+            assert model.rows_.shape == (3, 30)
+            assert_array_equal(model.rows_.sum(axis=0), np.ones(30))
+            assert_array_equal(model.columns_.sum(axis=0), np.ones(30))
+            assert consensus_score(model.biclusters_,
+                                   (rows, cols)) == 1
+
+            _test_shape_indices(model)
+
+
+def test_spectral_biclustering():
+    # Test Kluger methods on a checkerboard dataset.
+    S, rows, cols = make_checkerboard((30, 30), 3, noise=0.5,
+                                      random_state=0)
+
+    non_default_params = {'method': ['scale', 'log'],
+                          'svd_method': ['arpack'],
+                          'n_svd_vecs': [20],
+                          'mini_batch': [True]}
+
+    for mat in (S, csr_matrix(S)):
+        for param_name, param_values in non_default_params.items():
+            for param_value in param_values:
+
+                model = SpectralBiclustering(
+                    n_clusters=3,
+                    n_init=3,
+                    init='k-means++',
+                    random_state=0,
+                )
+                model.set_params(**dict([(param_name, param_value)]))
+
+                if issparse(mat) and model.get_params().get('method') == 'log':
+                    # cannot take log of sparse matrix
+                    with pytest.raises(ValueError):
+                        model.fit(mat)
+                    continue
+                else:
+                    model.fit(mat)
+
+                assert model.rows_.shape == (9, 30)
+                assert model.columns_.shape == (9, 30)
+                assert_array_equal(model.rows_.sum(axis=0),
+                                   np.repeat(3, 30))
+                assert_array_equal(model.columns_.sum(axis=0),
+                                   np.repeat(3, 30))
+                assert consensus_score(model.biclusters_,
+                                       (rows, cols)) == 1
+
+                _test_shape_indices(model)
+
+
+def _do_scale_test(scaled):
+    """Check that rows sum to one constant, and columns to another."""
+    row_sum = scaled.sum(axis=1)
+    col_sum = scaled.sum(axis=0)
+    if issparse(scaled):
+        row_sum = np.asarray(row_sum).squeeze()
+        col_sum = np.asarray(col_sum).squeeze()
+    assert_array_almost_equal(row_sum, np.tile(row_sum.mean(), 100),
+                              decimal=1)
+    assert_array_almost_equal(col_sum, np.tile(col_sum.mean(), 100),
+                              decimal=1)
+
+
+def _do_bistochastic_test(scaled):
+    """Check that rows and columns sum to the same constant."""
+    _do_scale_test(scaled)
+    assert_almost_equal(scaled.sum(axis=0).mean(),
+                        scaled.sum(axis=1).mean(),
+                        decimal=1)
+
+
+def test_scale_normalize():
+    generator = np.random.RandomState(0)
+    X = generator.rand(100, 100)
+    for mat in (X, csr_matrix(X)):
+        scaled, _, _ = _scale_normalize(mat)
+        _do_scale_test(scaled)
+        if issparse(mat):
+            assert issparse(scaled)
+
+
+def test_bistochastic_normalize():
+    generator = np.random.RandomState(0)
+    X = generator.rand(100, 100)
+    for mat in (X, csr_matrix(X)):
+        scaled = _bistochastic_normalize(mat)
+        _do_bistochastic_test(scaled)
+        if issparse(mat):
+            assert issparse(scaled)
+
+
+def test_log_normalize():
+    # adding any constant to a log-scaled matrix should make it
+    # bistochastic
+    generator = np.random.RandomState(0)
+    mat = generator.rand(100, 100)
+    scaled = _log_normalize(mat) + 1
+    _do_bistochastic_test(scaled)
+
+
+def test_fit_best_piecewise():
+    model = SpectralBiclustering(random_state=0)
+    vectors = np.array([[0, 0, 0, 1, 1, 1],
+                        [2, 2, 2, 3, 3, 3],
+                        [0, 1, 2, 3, 4, 5]])
+    best = model._fit_best_piecewise(vectors, n_best=2, n_clusters=2)
+    assert_array_equal(best, vectors[:2])
+
+
+def test_project_and_cluster():
+    model = SpectralBiclustering(random_state=0)
+    data = np.array([[1, 1, 1],
+                     [1, 1, 1],
+                     [3, 6, 3],
+                     [3, 6, 3]])
+    vectors = np.array([[1, 0],
+                        [0, 1],
+                        [0, 0]])
+    for mat in (data, csr_matrix(data)):
+        labels = model._project_and_cluster(mat, vectors,
+                                            n_clusters=2)
+        assert_almost_equal(v_measure_score(labels, [0, 0, 1, 1]), 1.0)
+
+
+def test_perfect_checkerboard():
+    # XXX Previously failed on build bot (not reproducible)
+    model = SpectralBiclustering(3, svd_method="arpack", random_state=0)
+
+    S, rows, cols = make_checkerboard((30, 30), 3, noise=0,
+                                      random_state=0)
+    model.fit(S)
+    assert consensus_score(model.biclusters_,
+                           (rows, cols)) == 1
+
+    S, rows, cols = make_checkerboard((40, 30), 3, noise=0,
+                                      random_state=0)
+    model.fit(S)
+    assert consensus_score(model.biclusters_,
+                           (rows, cols)) == 1
+
+    S, rows, cols = make_checkerboard((30, 40), 3, noise=0,
+                                      random_state=0)
+    model.fit(S)
+    assert consensus_score(model.biclusters_,
+                           (rows, cols)) == 1
+
+
+@pytest.mark.parametrize(
+    "args",
+    [{'n_clusters': (3, 3, 3)},
+     {'n_clusters': 'abc'},
+     {'n_clusters': (3, 'abc')},
+     {'method': 'unknown'},
+     {'n_components': 0},
+     {'n_best': 0},
+     {'svd_method': 'unknown'},
+     {'n_components': 3, 'n_best': 4}]
+)
+def test_errors(args):
+    data = np.arange(25).reshape((5, 5))
+
+    model = SpectralBiclustering(**args)
+    with pytest.raises(ValueError):
+        model.fit(data)
+
+
+def test_wrong_shape():
+    model = SpectralBiclustering()
+    data = np.arange(27).reshape((3, 3, 3))
+    with pytest.raises(ValueError):
+        model.fit(data)
+
+
+@pytest.mark.parametrize('est',
+                         (SpectralBiclustering(), SpectralCoclustering()))
+def test_n_features_in_(est):
+
+    X, _, _ = make_biclusters((3, 3), 3, random_state=0)
+
+    assert not hasattr(est, 'n_features_in_')
+    est.fit(X)
+    assert est.n_features_in_ == 3
+
+
+@pytest.mark.parametrize("klass", [SpectralBiclustering, SpectralCoclustering])
+@pytest.mark.parametrize("n_jobs", [None, 1])
+def test_n_jobs_deprecated(klass, n_jobs):
+    # FIXME: remove in 0.25
+    depr_msg = ("'n_jobs' was deprecated in version 0.23 and will be removed "
+                "in 0.25.")
+    S, _, _ = make_biclusters((30, 30), 3, noise=0.5, random_state=0)
+    est = klass(random_state=0, n_jobs=n_jobs)
+
+    with pytest.warns(FutureWarning, match=depr_msg):
+        est.fit(S)
--- a/venv/Lib/site-packages/sklearn/cluster/tests/test_birch.py
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/test_birch.py
@ -0,0 +1,169 @@
+"""
+Tests for the birch clustering algorithm.
+"""
+
+from scipy import sparse
+import numpy as np
+import pytest
+
+from sklearn.cluster.tests.common import generate_clustered_data
+from sklearn.cluster import Birch
+from sklearn.cluster import AgglomerativeClustering
+from sklearn.datasets import make_blobs
+from sklearn.exceptions import ConvergenceWarning
+from sklearn.linear_model import ElasticNet
+from sklearn.metrics import pairwise_distances_argmin, v_measure_score
+
+from sklearn.utils._testing import assert_almost_equal
+from sklearn.utils._testing import assert_array_equal
+from sklearn.utils._testing import assert_array_almost_equal
+from sklearn.utils._testing import assert_warns
+
+
+def test_n_samples_leaves_roots():
+    # Sanity check for the number of samples in leaves and roots
+    X, y = make_blobs(n_samples=10)
+    brc = Birch()
+    brc.fit(X)
+    n_samples_root = sum([sc.n_samples_ for sc in brc.root_.subclusters_])
+    n_samples_leaves = sum([sc.n_samples_ for leaf in brc._get_leaves()
+                            for sc in leaf.subclusters_])
+    assert n_samples_leaves == X.shape[0]
+    assert n_samples_root == X.shape[0]
+
+
+def test_partial_fit():
+    # Test that fit is equivalent to calling partial_fit multiple times
+    X, y = make_blobs(n_samples=100)
+    brc = Birch(n_clusters=3)
+    brc.fit(X)
+    brc_partial = Birch(n_clusters=None)
+    brc_partial.partial_fit(X[:50])
+    brc_partial.partial_fit(X[50:])
+    assert_array_almost_equal(brc_partial.subcluster_centers_,
+                              brc.subcluster_centers_)
+
+    # Test that same global labels are obtained after calling partial_fit
+    # with None
+    brc_partial.set_params(n_clusters=3)
+    brc_partial.partial_fit(None)
+    assert_array_equal(brc_partial.subcluster_labels_, brc.subcluster_labels_)
+
+
+def test_birch_predict():
+    # Test the predict method predicts the nearest centroid.
+    rng = np.random.RandomState(0)
+    X = generate_clustered_data(n_clusters=3, n_features=3,
+                                n_samples_per_cluster=10)
+
+    # n_samples * n_samples_per_cluster
+    shuffle_indices = np.arange(30)
+    rng.shuffle(shuffle_indices)
+    X_shuffle = X[shuffle_indices, :]
+    brc = Birch(n_clusters=4, threshold=1.)
+    brc.fit(X_shuffle)
+    centroids = brc.subcluster_centers_
+    assert_array_equal(brc.labels_, brc.predict(X_shuffle))
+    nearest_centroid = pairwise_distances_argmin(X_shuffle, centroids)
+    assert_almost_equal(v_measure_score(nearest_centroid, brc.labels_), 1.0)
+
+
+def test_n_clusters():
+    # Test that n_clusters param works properly
+    X, y = make_blobs(n_samples=100, centers=10)
+    brc1 = Birch(n_clusters=10)
+    brc1.fit(X)
+    assert len(brc1.subcluster_centers_) > 10
+    assert len(np.unique(brc1.labels_)) == 10
+
+    # Test that n_clusters = Agglomerative Clustering gives
+    # the same results.
+    gc = AgglomerativeClustering(n_clusters=10)
+    brc2 = Birch(n_clusters=gc)
+    brc2.fit(X)
+    assert_array_equal(brc1.subcluster_labels_, brc2.subcluster_labels_)
+    assert_array_equal(brc1.labels_, brc2.labels_)
+
+    # Test that the wrong global clustering step raises an Error.
+    clf = ElasticNet()
+    brc3 = Birch(n_clusters=clf)
+    with pytest.raises(ValueError):
+        brc3.fit(X)
+
+    # Test that a small number of clusters raises a warning.
+    brc4 = Birch(threshold=10000.)
+    assert_warns(ConvergenceWarning, brc4.fit, X)
+
+
+def test_sparse_X():
+    # Test that sparse and dense data give same results
+    X, y = make_blobs(n_samples=100, centers=10)
+    brc = Birch(n_clusters=10)
+    brc.fit(X)
+
+    csr = sparse.csr_matrix(X)
+    brc_sparse = Birch(n_clusters=10)
+    brc_sparse.fit(csr)
+
+    assert_array_equal(brc.labels_, brc_sparse.labels_)
+    assert_array_almost_equal(brc.subcluster_centers_,
+                              brc_sparse.subcluster_centers_)
+
+
+def check_branching_factor(node, branching_factor):
+    subclusters = node.subclusters_
+    assert branching_factor >= len(subclusters)
+    for cluster in subclusters:
+        if cluster.child_:
+            check_branching_factor(cluster.child_, branching_factor)
+
+
+def test_branching_factor():
+    # Test that nodes have at max branching_factor number of subclusters
+    X, y = make_blobs()
+    branching_factor = 9
+
+    # Purposefully set a low threshold to maximize the subclusters.
+    brc = Birch(n_clusters=None, branching_factor=branching_factor,
+                threshold=0.01)
+    brc.fit(X)
+    check_branching_factor(brc.root_, branching_factor)
+    brc = Birch(n_clusters=3, branching_factor=branching_factor,
+                threshold=0.01)
+    brc.fit(X)
+    check_branching_factor(brc.root_, branching_factor)
+
+    # Raises error when branching_factor is set to one.
+    brc = Birch(n_clusters=None, branching_factor=1, threshold=0.01)
+    with pytest.raises(ValueError):
+        brc.fit(X)
+
+
+def check_threshold(birch_instance, threshold):
+    """Use the leaf linked list for traversal"""
+    current_leaf = birch_instance.dummy_leaf_.next_leaf_
+    while current_leaf:
+        subclusters = current_leaf.subclusters_
+        for sc in subclusters:
+            assert threshold >= sc.radius
+        current_leaf = current_leaf.next_leaf_
+
+
+def test_threshold():
+    # Test that the leaf subclusters have a threshold lesser than radius
+    X, y = make_blobs(n_samples=80, centers=4)
+    brc = Birch(threshold=0.5, n_clusters=None)
+    brc.fit(X)
+    check_threshold(brc, 0.5)
+
+    brc = Birch(threshold=5.0, n_clusters=None)
+    brc.fit(X)
+    check_threshold(brc, 5.)
+
+
+def test_birch_n_clusters_long_int():
+    # Check that birch supports n_clusters with np.int64 dtype, for instance
+    # coming from np.arange. #16484
+    X, _ = make_blobs(random_state=0)
+    n_clusters = np.int64(5)
+    Birch(n_clusters=n_clusters).fit(X)
--- a/venv/Lib/site-packages/sklearn/cluster/tests/test_dbscan.py
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/test_dbscan.py
@ -0,0 +1,395 @@
+"""
+Tests for DBSCAN clustering algorithm
+"""
+
+import pickle
+
+import numpy as np
+
+from scipy.spatial import distance
+from scipy import sparse
+
+import pytest
+
+from sklearn.utils._testing import assert_array_equal
+from sklearn.neighbors import NearestNeighbors
+from sklearn.cluster import DBSCAN
+from sklearn.cluster import dbscan
+from sklearn.cluster.tests.common import generate_clustered_data
+from sklearn.metrics.pairwise import pairwise_distances
+
+
+n_clusters = 3
+X = generate_clustered_data(n_clusters=n_clusters)
+
+
+def test_dbscan_similarity():
+    # Tests the DBSCAN algorithm with a similarity array.
+    # Parameters chosen specifically for this task.
+    eps = 0.15
+    min_samples = 10
+    # Compute similarities
+    D = distance.squareform(distance.pdist(X))
+    D /= np.max(D)
+    # Compute DBSCAN
+    core_samples, labels = dbscan(D, metric="precomputed", eps=eps,
+                                  min_samples=min_samples)
+    # number of clusters, ignoring noise if present
+    n_clusters_1 = len(set(labels)) - (1 if -1 in labels else 0)
+
+    assert n_clusters_1 == n_clusters
+
+    db = DBSCAN(metric="precomputed", eps=eps, min_samples=min_samples)
+    labels = db.fit(D).labels_
+
+    n_clusters_2 = len(set(labels)) - int(-1 in labels)
+    assert n_clusters_2 == n_clusters
+
+
+def test_dbscan_feature():
+    # Tests the DBSCAN algorithm with a feature vector array.
+    # Parameters chosen specifically for this task.
+    # Different eps to other test, because distance is not normalised.
+    eps = 0.8
+    min_samples = 10
+    metric = 'euclidean'
+    # Compute DBSCAN
+    # parameters chosen for task
+    core_samples, labels = dbscan(X, metric=metric, eps=eps,
+                                  min_samples=min_samples)
+
+    # number of clusters, ignoring noise if present
+    n_clusters_1 = len(set(labels)) - int(-1 in labels)
+    assert n_clusters_1 == n_clusters
+
+    db = DBSCAN(metric=metric, eps=eps, min_samples=min_samples)
+    labels = db.fit(X).labels_
+
+    n_clusters_2 = len(set(labels)) - int(-1 in labels)
+    assert n_clusters_2 == n_clusters
+
+
+def test_dbscan_sparse():
+    core_sparse, labels_sparse = dbscan(sparse.lil_matrix(X), eps=.8,
+                                        min_samples=10)
+    core_dense, labels_dense = dbscan(X, eps=.8, min_samples=10)
+    assert_array_equal(core_dense, core_sparse)
+    assert_array_equal(labels_dense, labels_sparse)
+
+
+@pytest.mark.parametrize('include_self', [False, True])
+def test_dbscan_sparse_precomputed(include_self):
+    D = pairwise_distances(X)
+    nn = NearestNeighbors(radius=.9).fit(X)
+    X_ = X if include_self else None
+    D_sparse = nn.radius_neighbors_graph(X=X_, mode='distance')
+    # Ensure it is sparse not merely on diagonals:
+    assert D_sparse.nnz < D.shape[0] * (D.shape[0] - 1)
+    core_sparse, labels_sparse = dbscan(D_sparse,
+                                        eps=.8,
+                                        min_samples=10,
+                                        metric='precomputed')
+    core_dense, labels_dense = dbscan(D, eps=.8, min_samples=10,
+                                      metric='precomputed')
+    assert_array_equal(core_dense, core_sparse)
+    assert_array_equal(labels_dense, labels_sparse)
+
+
+def test_dbscan_sparse_precomputed_different_eps():
+    # test that precomputed neighbors graph is filtered if computed with
+    # a radius larger than DBSCAN's eps.
+    lower_eps = 0.2
+    nn = NearestNeighbors(radius=lower_eps).fit(X)
+    D_sparse = nn.radius_neighbors_graph(X, mode='distance')
+    dbscan_lower = dbscan(D_sparse, eps=lower_eps, metric='precomputed')
+
+    higher_eps = lower_eps + 0.7
+    nn = NearestNeighbors(radius=higher_eps).fit(X)
+    D_sparse = nn.radius_neighbors_graph(X, mode='distance')
+    dbscan_higher = dbscan(D_sparse, eps=lower_eps, metric='precomputed')
+
+    assert_array_equal(dbscan_lower[0], dbscan_higher[0])
+    assert_array_equal(dbscan_lower[1], dbscan_higher[1])
+
+
+@pytest.mark.parametrize('use_sparse', [True, False])
+@pytest.mark.parametrize('metric', ['precomputed', 'minkowski'])
+def test_dbscan_input_not_modified(use_sparse, metric):
+    # test that the input is not modified by dbscan
+    X = np.random.RandomState(0).rand(10, 10)
+    X = sparse.csr_matrix(X) if use_sparse else X
+    X_copy = X.copy()
+    dbscan(X, metric=metric)
+
+    if use_sparse:
+        assert_array_equal(X.toarray(), X_copy.toarray())
+    else:
+        assert_array_equal(X, X_copy)
+
+
+def test_dbscan_no_core_samples():
+    rng = np.random.RandomState(0)
+    X = rng.rand(40, 10)
+    X[X < .8] = 0
+
+    for X_ in [X, sparse.csr_matrix(X)]:
+        db = DBSCAN(min_samples=6).fit(X_)
+        assert_array_equal(db.components_, np.empty((0, X_.shape[1])))
+        assert_array_equal(db.labels_, -1)
+        assert db.core_sample_indices_.shape == (0,)
+
+
+def test_dbscan_callable():
+    # Tests the DBSCAN algorithm with a callable metric.
+    # Parameters chosen specifically for this task.
+    # Different eps to other test, because distance is not normalised.
+    eps = 0.8
+    min_samples = 10
+    # metric is the function reference, not the string key.
+    metric = distance.euclidean
+    # Compute DBSCAN
+    # parameters chosen for task
+    core_samples, labels = dbscan(X, metric=metric, eps=eps,
+                                  min_samples=min_samples,
+                                  algorithm='ball_tree')
+
+    # number of clusters, ignoring noise if present
+    n_clusters_1 = len(set(labels)) - int(-1 in labels)
+    assert n_clusters_1 == n_clusters
+
+    db = DBSCAN(metric=metric, eps=eps, min_samples=min_samples,
+                algorithm='ball_tree')
+    labels = db.fit(X).labels_
+
+    n_clusters_2 = len(set(labels)) - int(-1 in labels)
+    assert n_clusters_2 == n_clusters
+
+
+def test_dbscan_metric_params():
+    # Tests that DBSCAN works with the metrics_params argument.
+    eps = 0.8
+    min_samples = 10
+    p = 1
+
+    # Compute DBSCAN with metric_params arg
+    db = DBSCAN(metric='minkowski', metric_params={'p': p}, eps=eps,
+                min_samples=min_samples, algorithm='ball_tree').fit(X)
+    core_sample_1, labels_1 = db.core_sample_indices_, db.labels_
+
+    # Test that sample labels are the same as passing Minkowski 'p' directly
+    db = DBSCAN(metric='minkowski', eps=eps, min_samples=min_samples,
+                algorithm='ball_tree', p=p).fit(X)
+    core_sample_2, labels_2 = db.core_sample_indices_, db.labels_
+
+    assert_array_equal(core_sample_1, core_sample_2)
+    assert_array_equal(labels_1, labels_2)
+
+    # Minkowski with p=1 should be equivalent to Manhattan distance
+    db = DBSCAN(metric='manhattan', eps=eps, min_samples=min_samples,
+                algorithm='ball_tree').fit(X)
+    core_sample_3, labels_3 = db.core_sample_indices_, db.labels_
+
+    assert_array_equal(core_sample_1, core_sample_3)
+    assert_array_equal(labels_1, labels_3)
+
+
+def test_dbscan_balltree():
+    # Tests the DBSCAN algorithm with balltree for neighbor calculation.
+    eps = 0.8
+    min_samples = 10
+
+    D = pairwise_distances(X)
+    core_samples, labels = dbscan(D, metric="precomputed", eps=eps,
+                                  min_samples=min_samples)
+
+    # number of clusters, ignoring noise if present
+    n_clusters_1 = len(set(labels)) - int(-1 in labels)
+    assert n_clusters_1 == n_clusters
+
+    db = DBSCAN(p=2.0, eps=eps, min_samples=min_samples, algorithm='ball_tree')
+    labels = db.fit(X).labels_
+
+    n_clusters_2 = len(set(labels)) - int(-1 in labels)
+    assert n_clusters_2 == n_clusters
+
+    db = DBSCAN(p=2.0, eps=eps, min_samples=min_samples, algorithm='kd_tree')
+    labels = db.fit(X).labels_
+
+    n_clusters_3 = len(set(labels)) - int(-1 in labels)
+    assert n_clusters_3 == n_clusters
+
+    db = DBSCAN(p=1.0, eps=eps, min_samples=min_samples, algorithm='ball_tree')
+    labels = db.fit(X).labels_
+
+    n_clusters_4 = len(set(labels)) - int(-1 in labels)
+    assert n_clusters_4 == n_clusters
+
+    db = DBSCAN(leaf_size=20, eps=eps, min_samples=min_samples,
+                algorithm='ball_tree')
+    labels = db.fit(X).labels_
+
+    n_clusters_5 = len(set(labels)) - int(-1 in labels)
+    assert n_clusters_5 == n_clusters
+
+
+def test_input_validation():
+    # DBSCAN.fit should accept a list of lists.
+    X = [[1., 2.], [3., 4.]]
+    DBSCAN().fit(X)             # must not raise exception
+
+
+@pytest.mark.parametrize(
+    "args",
+    [{'eps': -1.0}, {'algorithm': 'blah'}, {'metric': 'blah'},
+     {'leaf_size': -1}, {'p': -1}]
+)
+def test_dbscan_badargs(args):
+    # Test bad argument values: these should all raise ValueErrors
+    with pytest.raises(ValueError):
+        dbscan(X, **args)
+
+
+def test_pickle():
+    obj = DBSCAN()
+    s = pickle.dumps(obj)
+    assert type(pickle.loads(s)) == obj.__class__
+
+
+def test_boundaries():
+    # ensure min_samples is inclusive of core point
+    core, _ = dbscan([[0], [1]], eps=2, min_samples=2)
+    assert 0 in core
+    # ensure eps is inclusive of circumference
+    core, _ = dbscan([[0], [1], [1]], eps=1, min_samples=2)
+    assert 0 in core
+    core, _ = dbscan([[0], [1], [1]], eps=.99, min_samples=2)
+    assert 0 not in core
+
+
+def test_weighted_dbscan():
+    # ensure sample_weight is validated
+    with pytest.raises(ValueError):
+        dbscan([[0], [1]], sample_weight=[2])
+    with pytest.raises(ValueError):
+        dbscan([[0], [1]], sample_weight=[2, 3, 4])
+
+    # ensure sample_weight has an effect
+    assert_array_equal([], dbscan([[0], [1]], sample_weight=None,
+                                  min_samples=6)[0])
+    assert_array_equal([], dbscan([[0], [1]], sample_weight=[5, 5],
+                                  min_samples=6)[0])
+    assert_array_equal([0], dbscan([[0], [1]], sample_weight=[6, 5],
+                                   min_samples=6)[0])
+    assert_array_equal([0, 1], dbscan([[0], [1]], sample_weight=[6, 6],
+                                      min_samples=6)[0])
+
+    # points within eps of each other:
+    assert_array_equal([0, 1], dbscan([[0], [1]], eps=1.5,
+                                      sample_weight=[5, 1], min_samples=6)[0])
+    # and effect of non-positive and non-integer sample_weight:
+    assert_array_equal([], dbscan([[0], [1]], sample_weight=[5, 0],
+                                  eps=1.5, min_samples=6)[0])
+    assert_array_equal([0, 1], dbscan([[0], [1]], sample_weight=[5.9, 0.1],
+                                      eps=1.5, min_samples=6)[0])
+    assert_array_equal([0, 1], dbscan([[0], [1]], sample_weight=[6, 0],
+                                      eps=1.5, min_samples=6)[0])
+    assert_array_equal([], dbscan([[0], [1]], sample_weight=[6, -1],
+                                  eps=1.5, min_samples=6)[0])
+
+    # for non-negative sample_weight, cores should be identical to repetition
+    rng = np.random.RandomState(42)
+    sample_weight = rng.randint(0, 5, X.shape[0])
+    core1, label1 = dbscan(X, sample_weight=sample_weight)
+    assert len(label1) == len(X)
+
+    X_repeated = np.repeat(X, sample_weight, axis=0)
+    core_repeated, label_repeated = dbscan(X_repeated)
+    core_repeated_mask = np.zeros(X_repeated.shape[0], dtype=bool)
+    core_repeated_mask[core_repeated] = True
+    core_mask = np.zeros(X.shape[0], dtype=bool)
+    core_mask[core1] = True
+    assert_array_equal(np.repeat(core_mask, sample_weight), core_repeated_mask)
+
+    # sample_weight should work with precomputed distance matrix
+    D = pairwise_distances(X)
+    core3, label3 = dbscan(D, sample_weight=sample_weight,
+                           metric='precomputed')
+    assert_array_equal(core1, core3)
+    assert_array_equal(label1, label3)
+
+    # sample_weight should work with estimator
+    est = DBSCAN().fit(X, sample_weight=sample_weight)
+    core4 = est.core_sample_indices_
+    label4 = est.labels_
+    assert_array_equal(core1, core4)
+    assert_array_equal(label1, label4)
+
+    est = DBSCAN()
+    label5 = est.fit_predict(X, sample_weight=sample_weight)
+    core5 = est.core_sample_indices_
+    assert_array_equal(core1, core5)
+    assert_array_equal(label1, label5)
+    assert_array_equal(label1, est.labels_)
+
+
+@pytest.mark.parametrize('algorithm', ['brute', 'kd_tree', 'ball_tree'])
+def test_dbscan_core_samples_toy(algorithm):
+    X = [[0], [2], [3], [4], [6], [8], [10]]
+    n_samples = len(X)
+
+    # Degenerate case: every sample is a core sample, either with its own
+    # cluster or including other close core samples.
+    core_samples, labels = dbscan(X, algorithm=algorithm, eps=1,
+                                  min_samples=1)
+    assert_array_equal(core_samples, np.arange(n_samples))
+    assert_array_equal(labels, [0, 1, 1, 1, 2, 3, 4])
+
+    # With eps=1 and min_samples=2 only the 3 samples from the denser area
+    # are core samples. All other points are isolated and considered noise.
+    core_samples, labels = dbscan(X, algorithm=algorithm, eps=1,
+                                  min_samples=2)
+    assert_array_equal(core_samples, [1, 2, 3])
+    assert_array_equal(labels, [-1, 0, 0, 0, -1, -1, -1])
+
+    # Only the sample in the middle of the dense area is core. Its two
+    # neighbors are edge samples. Remaining samples are noise.
+    core_samples, labels = dbscan(X, algorithm=algorithm, eps=1,
+                                  min_samples=3)
+    assert_array_equal(core_samples, [2])
+    assert_array_equal(labels, [-1, 0, 0, 0, -1, -1, -1])
+
+    # It's no longer possible to extract core samples with eps=1:
+    # everything is noise.
+    core_samples, labels = dbscan(X, algorithm=algorithm, eps=1,
+                                  min_samples=4)
+    assert_array_equal(core_samples, [])
+    assert_array_equal(labels, np.full(n_samples, -1.))
+
+
+def test_dbscan_precomputed_metric_with_degenerate_input_arrays():
+    # see https://github.com/scikit-learn/scikit-learn/issues/4641 for
+    # more details
+    X = np.eye(10)
+    labels = DBSCAN(eps=0.5, metric='precomputed').fit(X).labels_
+    assert len(set(labels)) == 1
+
+    X = np.zeros((10, 10))
+    labels = DBSCAN(eps=0.5, metric='precomputed').fit(X).labels_
+    assert len(set(labels)) == 1
+
+
+def test_dbscan_precomputed_metric_with_initial_rows_zero():
+    # sample matrix with initial two row all zero
+    ar = np.array([
+        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+        [0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0],
+        [0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0],
+        [0.0, 0.0, 0.1, 0.1, 0.0, 0.0, 0.3],
+        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1],
+        [0.0, 0.0, 0.0, 0.0, 0.3, 0.1, 0.0]
+    ])
+    matrix = sparse.csr_matrix(ar)
+    labels = DBSCAN(eps=0.2, metric='precomputed',
+                    min_samples=2).fit(matrix).labels_
+    assert_array_equal(labels, [-1, -1,  0,  0,  0,  1,  1])
--- a/venv/Lib/site-packages/sklearn/cluster/tests/test_feature_agglomeration.py
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/test_feature_agglomeration.py
@ -0,0 +1,43 @@
+"""
+Tests for sklearn.cluster._feature_agglomeration
+"""
+# Authors: Sergul Aydore 2017
+import numpy as np
+from sklearn.cluster import FeatureAgglomeration
+from sklearn.utils._testing import assert_no_warnings
+from sklearn.utils._testing import assert_array_almost_equal
+
+
+def test_feature_agglomeration():
+    n_clusters = 1
+    X = np.array([0, 0, 1]).reshape(1, 3)  # (n_samples, n_features)
+
+    agglo_mean = FeatureAgglomeration(n_clusters=n_clusters,
+                                      pooling_func=np.mean)
+    agglo_median = FeatureAgglomeration(n_clusters=n_clusters,
+                                        pooling_func=np.median)
+    assert_no_warnings(agglo_mean.fit, X)
+    assert_no_warnings(agglo_median.fit, X)
+    assert np.size(np.unique(agglo_mean.labels_)) == n_clusters
+    assert np.size(np.unique(agglo_median.labels_)) == n_clusters
+    assert np.size(agglo_mean.labels_) == X.shape[1]
+    assert np.size(agglo_median.labels_) == X.shape[1]
+
+    # Test transform
+    Xt_mean = agglo_mean.transform(X)
+    Xt_median = agglo_median.transform(X)
+    assert Xt_mean.shape[1] == n_clusters
+    assert Xt_median.shape[1] == n_clusters
+    assert Xt_mean == np.array([1 / 3.])
+    assert Xt_median == np.array([0.])
+
+    # Test inverse transform
+    X_full_mean = agglo_mean.inverse_transform(Xt_mean)
+    X_full_median = agglo_median.inverse_transform(Xt_median)
+    assert np.unique(X_full_mean[0]).size == n_clusters
+    assert np.unique(X_full_median[0]).size == n_clusters
+
+    assert_array_almost_equal(agglo_mean.transform(X_full_mean),
+                              Xt_mean)
+    assert_array_almost_equal(agglo_median.transform(X_full_median),
+                              Xt_median)
--- a/venv/Lib/site-packages/sklearn/cluster/tests/test_hierarchical.py
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/test_hierarchical.py
@ -0,0 +1,765 @@
+"""
+Several basic tests for hierarchical clustering procedures
+
+"""
+# Authors: Vincent Michel, 2010, Gael Varoquaux 2012,
+#          Matteo Visconti di Oleggio Castello 2014
+# License: BSD 3 clause
+from tempfile import mkdtemp
+import shutil
+import pytest
+from functools import partial
+
+import numpy as np
+from scipy import sparse
+from scipy.cluster import hierarchy
+
+from sklearn.metrics.cluster import adjusted_rand_score
+from sklearn.utils._testing import assert_almost_equal
+from sklearn.utils._testing import assert_array_almost_equal
+from sklearn.utils._testing import assert_raise_message
+from sklearn.utils._testing import ignore_warnings
+
+from sklearn.cluster import ward_tree
+from sklearn.cluster import AgglomerativeClustering, FeatureAgglomeration
+from sklearn.cluster._agglomerative import (_hc_cut, _TREE_BUILDERS,
+                                            linkage_tree,
+                                            _fix_connectivity)
+from sklearn.feature_extraction.image import grid_to_graph
+from sklearn.metrics.pairwise import PAIRED_DISTANCES, cosine_distances,\
+    manhattan_distances, pairwise_distances
+from sklearn.metrics.cluster import normalized_mutual_info_score
+from sklearn.neighbors import kneighbors_graph
+from sklearn.cluster._hierarchical_fast import average_merge, max_merge
+from sklearn.utils._fast_dict import IntFloatDict
+from sklearn.utils._testing import assert_array_equal
+from sklearn.utils._testing import assert_warns
+from sklearn.datasets import make_moons, make_circles
+
+
+def test_linkage_misc():
+    # Misc tests on linkage
+    rng = np.random.RandomState(42)
+    X = rng.normal(size=(5, 5))
+    with pytest.raises(ValueError):
+        AgglomerativeClustering(linkage='foo').fit(X)
+
+    with pytest.raises(ValueError):
+        linkage_tree(X, linkage='foo')
+
+    with pytest.raises(ValueError):
+        linkage_tree(X, connectivity=np.ones((4, 4)))
+
+    # Smoke test FeatureAgglomeration
+    FeatureAgglomeration().fit(X)
+
+    # test hierarchical clustering on a precomputed distances matrix
+    dis = cosine_distances(X)
+
+    res = linkage_tree(dis, affinity="precomputed")
+    assert_array_equal(res[0], linkage_tree(X, affinity="cosine")[0])
+
+    # test hierarchical clustering on a precomputed distances matrix
+    res = linkage_tree(X, affinity=manhattan_distances)
+    assert_array_equal(res[0], linkage_tree(X, affinity="manhattan")[0])
+
+
+def test_structured_linkage_tree():
+    # Check that we obtain the correct solution for structured linkage trees.
+    rng = np.random.RandomState(0)
+    mask = np.ones([10, 10], dtype=np.bool)
+    # Avoiding a mask with only 'True' entries
+    mask[4:7, 4:7] = 0
+    X = rng.randn(50, 100)
+    connectivity = grid_to_graph(*mask.shape)
+    for tree_builder in _TREE_BUILDERS.values():
+        children, n_components, n_leaves, parent = \
+            tree_builder(X.T, connectivity=connectivity)
+        n_nodes = 2 * X.shape[1] - 1
+        assert len(children) + n_leaves == n_nodes
+        # Check that ward_tree raises a ValueError with a connectivity matrix
+        # of the wrong shape
+        with pytest.raises(ValueError):
+            tree_builder(X.T, connectivity=np.ones((4, 4)))
+        # Check that fitting with no samples raises an error
+        with pytest.raises(ValueError):
+            tree_builder(X.T[:0], connectivity=connectivity)
+
+
+def test_unstructured_linkage_tree():
+    # Check that we obtain the correct solution for unstructured linkage trees.
+    rng = np.random.RandomState(0)
+    X = rng.randn(50, 100)
+    for this_X in (X, X[0]):
+        # With specified a number of clusters just for the sake of
+        # raising a warning and testing the warning code
+        with ignore_warnings():
+            children, n_nodes, n_leaves, parent = assert_warns(
+                UserWarning, ward_tree, this_X.T, n_clusters=10)
+        n_nodes = 2 * X.shape[1] - 1
+        assert len(children) + n_leaves == n_nodes
+
+    for tree_builder in _TREE_BUILDERS.values():
+        for this_X in (X, X[0]):
+            with ignore_warnings():
+                children, n_nodes, n_leaves, parent = assert_warns(
+                    UserWarning, tree_builder, this_X.T, n_clusters=10)
+
+            n_nodes = 2 * X.shape[1] - 1
+            assert len(children) + n_leaves == n_nodes
+
+
+def test_height_linkage_tree():
+    # Check that the height of the results of linkage tree is sorted.
+    rng = np.random.RandomState(0)
+    mask = np.ones([10, 10], dtype=np.bool)
+    X = rng.randn(50, 100)
+    connectivity = grid_to_graph(*mask.shape)
+    for linkage_func in _TREE_BUILDERS.values():
+        children, n_nodes, n_leaves, parent = linkage_func(
+            X.T, connectivity=connectivity)
+        n_nodes = 2 * X.shape[1] - 1
+        assert len(children) + n_leaves == n_nodes
+
+
+def test_agglomerative_clustering_wrong_arg_memory():
+    # Test either if an error is raised when memory is not
+    # either a str or a joblib.Memory instance
+    rng = np.random.RandomState(0)
+    n_samples = 100
+    X = rng.randn(n_samples, 50)
+    memory = 5
+    clustering = AgglomerativeClustering(memory=memory)
+    with pytest.raises(ValueError):
+        clustering.fit(X)
+
+
+def test_zero_cosine_linkage_tree():
+    # Check that zero vectors in X produce an error when
+    # 'cosine' affinity is used
+    X = np.array([[0, 1],
+                  [0, 0]])
+    msg = 'Cosine affinity cannot be used when X contains zero vectors'
+    assert_raise_message(ValueError, msg, linkage_tree, X, affinity='cosine')
+
+
+def test_agglomerative_clustering():
+    # Check that we obtain the correct number of clusters with
+    # agglomerative clustering.
+    rng = np.random.RandomState(0)
+    mask = np.ones([10, 10], dtype=np.bool)
+    n_samples = 100
+    X = rng.randn(n_samples, 50)
+    connectivity = grid_to_graph(*mask.shape)
+    for linkage in ("ward", "complete", "average", "single"):
+        clustering = AgglomerativeClustering(n_clusters=10,
+                                             connectivity=connectivity,
+                                             linkage=linkage)
+        clustering.fit(X)
+        # test caching
+        try:
+            tempdir = mkdtemp()
+            clustering = AgglomerativeClustering(
+                n_clusters=10, connectivity=connectivity,
+                memory=tempdir,
+                linkage=linkage)
+            clustering.fit(X)
+            labels = clustering.labels_
+            assert np.size(np.unique(labels)) == 10
+        finally:
+            shutil.rmtree(tempdir)
+        # Turn caching off now
+        clustering = AgglomerativeClustering(
+            n_clusters=10, connectivity=connectivity, linkage=linkage)
+        # Check that we obtain the same solution with early-stopping of the
+        # tree building
+        clustering.compute_full_tree = False
+        clustering.fit(X)
+        assert_almost_equal(normalized_mutual_info_score(clustering.labels_,
+                                                         labels), 1)
+        clustering.connectivity = None
+        clustering.fit(X)
+        assert np.size(np.unique(clustering.labels_)) == 10
+        # Check that we raise a TypeError on dense matrices
+        clustering = AgglomerativeClustering(
+            n_clusters=10,
+            connectivity=sparse.lil_matrix(
+                connectivity.toarray()[:10, :10]),
+            linkage=linkage)
+        with pytest.raises(ValueError):
+            clustering.fit(X)
+
+    # Test that using ward with another metric than euclidean raises an
+    # exception
+    clustering = AgglomerativeClustering(
+        n_clusters=10,
+        connectivity=connectivity.toarray(),
+        affinity="manhattan",
+        linkage="ward")
+    with pytest.raises(ValueError):
+        clustering.fit(X)
+
+    # Test using another metric than euclidean works with linkage complete
+    for affinity in PAIRED_DISTANCES.keys():
+        # Compare our (structured) implementation to scipy
+        clustering = AgglomerativeClustering(
+            n_clusters=10,
+            connectivity=np.ones((n_samples, n_samples)),
+            affinity=affinity,
+            linkage="complete")
+        clustering.fit(X)
+        clustering2 = AgglomerativeClustering(
+            n_clusters=10,
+            connectivity=None,
+            affinity=affinity,
+            linkage="complete")
+        clustering2.fit(X)
+        assert_almost_equal(normalized_mutual_info_score(clustering2.labels_,
+                                                         clustering.labels_),
+                            1)
+
+    # Test that using a distance matrix (affinity = 'precomputed') has same
+    # results (with connectivity constraints)
+    clustering = AgglomerativeClustering(n_clusters=10,
+                                         connectivity=connectivity,
+                                         linkage="complete")
+    clustering.fit(X)
+    X_dist = pairwise_distances(X)
+    clustering2 = AgglomerativeClustering(n_clusters=10,
+                                          connectivity=connectivity,
+                                          affinity='precomputed',
+                                          linkage="complete")
+    clustering2.fit(X_dist)
+    assert_array_equal(clustering.labels_, clustering2.labels_)
+
+
+def test_ward_agglomeration():
+    # Check that we obtain the correct solution in a simplistic case
+    rng = np.random.RandomState(0)
+    mask = np.ones([10, 10], dtype=np.bool)
+    X = rng.randn(50, 100)
+    connectivity = grid_to_graph(*mask.shape)
+    agglo = FeatureAgglomeration(n_clusters=5, connectivity=connectivity)
+    agglo.fit(X)
+    assert np.size(np.unique(agglo.labels_)) == 5
+
+    X_red = agglo.transform(X)
+    assert X_red.shape[1] == 5
+    X_full = agglo.inverse_transform(X_red)
+    assert np.unique(X_full[0]).size == 5
+    assert_array_almost_equal(agglo.transform(X_full), X_red)
+
+    # Check that fitting with no samples raises a ValueError
+    with pytest.raises(ValueError):
+        agglo.fit(X[:0])
+
+
+def test_single_linkage_clustering():
+    # Check that we get the correct result in two emblematic cases
+    moons, moon_labels = make_moons(noise=0.05, random_state=42)
+    clustering = AgglomerativeClustering(n_clusters=2, linkage='single')
+    clustering.fit(moons)
+    assert_almost_equal(normalized_mutual_info_score(clustering.labels_,
+                                                     moon_labels), 1)
+
+    circles, circle_labels = make_circles(factor=0.5, noise=0.025,
+                                          random_state=42)
+    clustering = AgglomerativeClustering(n_clusters=2, linkage='single')
+    clustering.fit(circles)
+    assert_almost_equal(normalized_mutual_info_score(clustering.labels_,
+                                                     circle_labels), 1)
+
+
+def assess_same_labelling(cut1, cut2):
+    """Util for comparison with scipy"""
+    co_clust = []
+    for cut in [cut1, cut2]:
+        n = len(cut)
+        k = cut.max() + 1
+        ecut = np.zeros((n, k))
+        ecut[np.arange(n), cut] = 1
+        co_clust.append(np.dot(ecut, ecut.T))
+    assert (co_clust[0] == co_clust[1]).all()
+
+
+def test_sparse_scikit_vs_scipy():
+    # Test scikit linkage with full connectivity (i.e. unstructured) vs scipy
+    n, p, k = 10, 5, 3
+    rng = np.random.RandomState(0)
+
+    # Not using a lil_matrix here, just to check that non sparse
+    # matrices are well handled
+    connectivity = np.ones((n, n))
+    for linkage in _TREE_BUILDERS.keys():
+        for i in range(5):
+            X = .1 * rng.normal(size=(n, p))
+            X -= 4. * np.arange(n)[:, np.newaxis]
+            X -= X.mean(axis=1)[:, np.newaxis]
+
+            out = hierarchy.linkage(X, method=linkage)
+
+            children_ = out[:, :2].astype(np.int, copy=False)
+            children, _, n_leaves, _ = _TREE_BUILDERS[linkage](
+                X, connectivity=connectivity)
+
+            # Sort the order of child nodes per row for consistency
+            children.sort(axis=1)
+            assert_array_equal(children, children_, 'linkage tree differs'
+                                                    ' from scipy impl for'
+                                                    ' linkage: ' + linkage)
+
+            cut = _hc_cut(k, children, n_leaves)
+            cut_ = _hc_cut(k, children_, n_leaves)
+            assess_same_labelling(cut, cut_)
+
+    # Test error management in _hc_cut
+    with pytest.raises(ValueError):
+        _hc_cut(n_leaves + 1, children, n_leaves)
+
+
+# Make sure our custom mst_linkage_core gives
+# the same results as scipy's builtin
+@pytest.mark.parametrize('seed', range(5))
+def test_vector_scikit_single_vs_scipy_single(seed):
+    n_samples, n_features, n_clusters = 10, 5, 3
+    rng = np.random.RandomState(seed)
+    X = .1 * rng.normal(size=(n_samples, n_features))
+    X -= 4. * np.arange(n_samples)[:, np.newaxis]
+    X -= X.mean(axis=1)[:, np.newaxis]
+
+    out = hierarchy.linkage(X, method='single')
+    children_scipy = out[:, :2].astype(np.int)
+
+    children, _, n_leaves, _ = _TREE_BUILDERS['single'](X)
+
+    # Sort the order of child nodes per row for consistency
+    children.sort(axis=1)
+    assert_array_equal(children, children_scipy,
+                       'linkage tree differs'
+                       ' from scipy impl for'
+                       ' single linkage.')
+
+    cut = _hc_cut(n_clusters, children, n_leaves)
+    cut_scipy = _hc_cut(n_clusters, children_scipy, n_leaves)
+    assess_same_labelling(cut, cut_scipy)
+
+
+def test_identical_points():
+    # Ensure identical points are handled correctly when using mst with
+    # a sparse connectivity matrix
+    X = np.array([[0, 0, 0], [0, 0, 0],
+                  [1, 1, 1], [1, 1, 1],
+                  [2, 2, 2], [2, 2, 2]])
+    true_labels = np.array([0, 0, 1, 1, 2, 2])
+    connectivity = kneighbors_graph(X, n_neighbors=3, include_self=False)
+    connectivity = 0.5 * (connectivity + connectivity.T)
+    connectivity, n_components = _fix_connectivity(X,
+                                                   connectivity,
+                                                   'euclidean')
+
+    for linkage in ('single', 'average', 'average', 'ward'):
+        clustering = AgglomerativeClustering(n_clusters=3,
+                                             linkage=linkage,
+                                             connectivity=connectivity)
+        clustering.fit(X)
+
+        assert_almost_equal(normalized_mutual_info_score(clustering.labels_,
+                                                         true_labels), 1)
+
+
+def test_connectivity_propagation():
+    # Check that connectivity in the ward tree is propagated correctly during
+    # merging.
+    X = np.array([(.014, .120), (.014, .099), (.014, .097),
+                  (.017, .153), (.017, .153), (.018, .153),
+                  (.018, .153), (.018, .153), (.018, .153),
+                  (.018, .153), (.018, .153), (.018, .153),
+                  (.018, .152), (.018, .149), (.018, .144)])
+    connectivity = kneighbors_graph(X, 10, include_self=False)
+    ward = AgglomerativeClustering(
+        n_clusters=4, connectivity=connectivity, linkage='ward')
+    # If changes are not propagated correctly, fit crashes with an
+    # IndexError
+    ward.fit(X)
+
+
+def test_ward_tree_children_order():
+    # Check that children are ordered in the same way for both structured and
+    # unstructured versions of ward_tree.
+
+    # test on five random datasets
+    n, p = 10, 5
+    rng = np.random.RandomState(0)
+
+    connectivity = np.ones((n, n))
+    for i in range(5):
+        X = .1 * rng.normal(size=(n, p))
+        X -= 4. * np.arange(n)[:, np.newaxis]
+        X -= X.mean(axis=1)[:, np.newaxis]
+
+        out_unstructured = ward_tree(X)
+        out_structured = ward_tree(X, connectivity=connectivity)
+
+        assert_array_equal(out_unstructured[0], out_structured[0])
+
+
+def test_ward_linkage_tree_return_distance():
+    # Test return_distance option on linkage and ward trees
+
+    # test that return_distance when set true, gives same
+    # output on both structured and unstructured clustering.
+    n, p = 10, 5
+    rng = np.random.RandomState(0)
+
+    connectivity = np.ones((n, n))
+    for i in range(5):
+        X = .1 * rng.normal(size=(n, p))
+        X -= 4. * np.arange(n)[:, np.newaxis]
+        X -= X.mean(axis=1)[:, np.newaxis]
+
+        out_unstructured = ward_tree(X, return_distance=True)
+        out_structured = ward_tree(X, connectivity=connectivity,
+                                   return_distance=True)
+
+        # get children
+        children_unstructured = out_unstructured[0]
+        children_structured = out_structured[0]
+
+        # check if we got the same clusters
+        assert_array_equal(children_unstructured, children_structured)
+
+        # check if the distances are the same
+        dist_unstructured = out_unstructured[-1]
+        dist_structured = out_structured[-1]
+
+        assert_array_almost_equal(dist_unstructured, dist_structured)
+
+        for linkage in ['average', 'complete', 'single']:
+            structured_items = linkage_tree(
+                X, connectivity=connectivity, linkage=linkage,
+                return_distance=True)[-1]
+            unstructured_items = linkage_tree(
+                X, linkage=linkage, return_distance=True)[-1]
+            structured_dist = structured_items[-1]
+            unstructured_dist = unstructured_items[-1]
+            structured_children = structured_items[0]
+            unstructured_children = unstructured_items[0]
+            assert_array_almost_equal(structured_dist, unstructured_dist)
+            assert_array_almost_equal(
+                structured_children, unstructured_children)
+
+    # test on the following dataset where we know the truth
+    # taken from scipy/cluster/tests/hierarchy_test_data.py
+    X = np.array([[1.43054825, -7.5693489],
+                  [6.95887839, 6.82293382],
+                  [2.87137846, -9.68248579],
+                  [7.87974764, -6.05485803],
+                  [8.24018364, -6.09495602],
+                  [7.39020262, 8.54004355]])
+    # truth
+    linkage_X_ward = np.array([[3., 4., 0.36265956, 2.],
+                               [1., 5., 1.77045373, 2.],
+                               [0., 2., 2.55760419, 2.],
+                               [6., 8., 9.10208346, 4.],
+                               [7., 9., 24.7784379, 6.]])
+
+    linkage_X_complete = np.array(
+        [[3., 4., 0.36265956, 2.],
+         [1., 5., 1.77045373, 2.],
+         [0., 2., 2.55760419, 2.],
+         [6., 8., 6.96742194, 4.],
+         [7., 9., 18.77445997, 6.]])
+
+    linkage_X_average = np.array(
+        [[3., 4., 0.36265956, 2.],
+         [1., 5., 1.77045373, 2.],
+         [0., 2., 2.55760419, 2.],
+         [6., 8., 6.55832839, 4.],
+         [7., 9., 15.44089605, 6.]])
+
+    n_samples, n_features = np.shape(X)
+    connectivity_X = np.ones((n_samples, n_samples))
+
+    out_X_unstructured = ward_tree(X, return_distance=True)
+    out_X_structured = ward_tree(X, connectivity=connectivity_X,
+                                 return_distance=True)
+
+    # check that the labels are the same
+    assert_array_equal(linkage_X_ward[:, :2], out_X_unstructured[0])
+    assert_array_equal(linkage_X_ward[:, :2], out_X_structured[0])
+
+    # check that the distances are correct
+    assert_array_almost_equal(linkage_X_ward[:, 2], out_X_unstructured[4])
+    assert_array_almost_equal(linkage_X_ward[:, 2], out_X_structured[4])
+
+    linkage_options = ['complete', 'average', 'single']
+    X_linkage_truth = [linkage_X_complete, linkage_X_average]
+    for (linkage, X_truth) in zip(linkage_options, X_linkage_truth):
+        out_X_unstructured = linkage_tree(
+            X, return_distance=True, linkage=linkage)
+        out_X_structured = linkage_tree(
+            X, connectivity=connectivity_X, linkage=linkage,
+            return_distance=True)
+
+        # check that the labels are the same
+        assert_array_equal(X_truth[:, :2], out_X_unstructured[0])
+        assert_array_equal(X_truth[:, :2], out_X_structured[0])
+
+        # check that the distances are correct
+        assert_array_almost_equal(X_truth[:, 2], out_X_unstructured[4])
+        assert_array_almost_equal(X_truth[:, 2], out_X_structured[4])
+
+
+def test_connectivity_fixing_non_lil():
+    # Check non regression of a bug if a non item assignable connectivity is
+    # provided with more than one component.
+    # create dummy data
+    x = np.array([[0, 0], [1, 1]])
+    # create a mask with several components to force connectivity fixing
+    m = np.array([[True, False], [False, True]])
+    c = grid_to_graph(n_x=2, n_y=2, mask=m)
+    w = AgglomerativeClustering(connectivity=c, linkage='ward')
+    assert_warns(UserWarning, w.fit, x)
+
+
+def test_int_float_dict():
+    rng = np.random.RandomState(0)
+    keys = np.unique(rng.randint(100, size=10).astype(np.intp, copy=False))
+    values = rng.rand(len(keys))
+
+    d = IntFloatDict(keys, values)
+    for key, value in zip(keys, values):
+        assert d[key] == value
+
+    other_keys = np.arange(50, dtype=np.intp)[::2]
+    other_values = np.full(50, 0.5)[::2]
+    other = IntFloatDict(other_keys, other_values)
+    # Complete smoke test
+    max_merge(d, other, mask=np.ones(100, dtype=np.intp), n_a=1, n_b=1)
+    average_merge(d, other, mask=np.ones(100, dtype=np.intp), n_a=1, n_b=1)
+
+
+def test_connectivity_callable():
+    rng = np.random.RandomState(0)
+    X = rng.rand(20, 5)
+    connectivity = kneighbors_graph(X, 3, include_self=False)
+    aglc1 = AgglomerativeClustering(connectivity=connectivity)
+    aglc2 = AgglomerativeClustering(
+        connectivity=partial(kneighbors_graph, n_neighbors=3,
+                             include_self=False))
+    aglc1.fit(X)
+    aglc2.fit(X)
+    assert_array_equal(aglc1.labels_, aglc2.labels_)
+
+
+def test_connectivity_ignores_diagonal():
+    rng = np.random.RandomState(0)
+    X = rng.rand(20, 5)
+    connectivity = kneighbors_graph(X, 3, include_self=False)
+    connectivity_include_self = kneighbors_graph(X, 3, include_self=True)
+    aglc1 = AgglomerativeClustering(connectivity=connectivity)
+    aglc2 = AgglomerativeClustering(connectivity=connectivity_include_self)
+    aglc1.fit(X)
+    aglc2.fit(X)
+    assert_array_equal(aglc1.labels_, aglc2.labels_)
+
+
+def test_compute_full_tree():
+    # Test that the full tree is computed if n_clusters is small
+    rng = np.random.RandomState(0)
+    X = rng.randn(10, 2)
+    connectivity = kneighbors_graph(X, 5, include_self=False)
+
+    # When n_clusters is less, the full tree should be built
+    # that is the number of merges should be n_samples - 1
+    agc = AgglomerativeClustering(n_clusters=2, connectivity=connectivity)
+    agc.fit(X)
+    n_samples = X.shape[0]
+    n_nodes = agc.children_.shape[0]
+    assert n_nodes == n_samples - 1
+
+    # When n_clusters is large, greater than max of 100 and 0.02 * n_samples.
+    # we should stop when there are n_clusters.
+    n_clusters = 101
+    X = rng.randn(200, 2)
+    connectivity = kneighbors_graph(X, 10, include_self=False)
+    agc = AgglomerativeClustering(n_clusters=n_clusters,
+                                  connectivity=connectivity)
+    agc.fit(X)
+    n_samples = X.shape[0]
+    n_nodes = agc.children_.shape[0]
+    assert n_nodes == n_samples - n_clusters
+
+
+def test_n_components():
+    # Test n_components returned by linkage, average and ward tree
+    rng = np.random.RandomState(0)
+    X = rng.rand(5, 5)
+
+    # Connectivity matrix having five components.
+    connectivity = np.eye(5)
+
+    for linkage_func in _TREE_BUILDERS.values():
+        assert ignore_warnings(linkage_func)(X, connectivity)[1] == 5
+
+
+def test_agg_n_clusters():
+    # Test that an error is raised when n_clusters <= 0
+
+    rng = np.random.RandomState(0)
+    X = rng.rand(20, 10)
+    for n_clus in [-1, 0]:
+        agc = AgglomerativeClustering(n_clusters=n_clus)
+        msg = ("n_clusters should be an integer greater than 0."
+               " %s was provided." % str(agc.n_clusters))
+        assert_raise_message(ValueError, msg, agc.fit, X)
+
+
+def test_affinity_passed_to_fix_connectivity():
+    # Test that the affinity parameter is actually passed to the pairwise
+    # function
+
+    size = 2
+    rng = np.random.RandomState(0)
+    X = rng.randn(size, size)
+    mask = np.array([True, False, False, True])
+
+    connectivity = grid_to_graph(n_x=size, n_y=size,
+                                 mask=mask, return_as=np.ndarray)
+
+    class FakeAffinity:
+        def __init__(self):
+            self.counter = 0
+
+        def increment(self, *args, **kwargs):
+            self.counter += 1
+            return self.counter
+
+    fa = FakeAffinity()
+
+    linkage_tree(X, connectivity=connectivity, affinity=fa.increment)
+
+    assert fa.counter == 3
+
+
+@pytest.mark.parametrize('linkage', ['ward', 'complete', 'average'])
+def test_agglomerative_clustering_with_distance_threshold(linkage):
+    # Check that we obtain the correct number of clusters with
+    # agglomerative clustering with distance_threshold.
+    rng = np.random.RandomState(0)
+    mask = np.ones([10, 10], dtype=np.bool)
+    n_samples = 100
+    X = rng.randn(n_samples, 50)
+    connectivity = grid_to_graph(*mask.shape)
+    # test when distance threshold is set to 10
+    distance_threshold = 10
+    for conn in [None, connectivity]:
+        clustering = AgglomerativeClustering(
+            n_clusters=None,
+            distance_threshold=distance_threshold,
+            connectivity=conn, linkage=linkage)
+        clustering.fit(X)
+        clusters_produced = clustering.labels_
+        num_clusters_produced = len(np.unique(clustering.labels_))
+        # test if the clusters produced match the point in the linkage tree
+        # where the distance exceeds the threshold
+        tree_builder = _TREE_BUILDERS[linkage]
+        children, n_components, n_leaves, parent, distances = \
+            tree_builder(X, connectivity=conn, n_clusters=None,
+                         return_distance=True)
+        num_clusters_at_threshold = np.count_nonzero(
+            distances >= distance_threshold) + 1
+        # test number of clusters produced
+        assert num_clusters_at_threshold == num_clusters_produced
+        # test clusters produced
+        clusters_at_threshold = _hc_cut(n_clusters=num_clusters_produced,
+                                        children=children,
+                                        n_leaves=n_leaves)
+        assert np.array_equiv(clusters_produced,
+                              clusters_at_threshold)
+
+
+def test_small_distance_threshold():
+    rng = np.random.RandomState(0)
+    n_samples = 10
+    X = rng.randint(-300, 300, size=(n_samples, 3))
+    # this should result in all data in their own clusters, given that
+    # their pairwise distances are bigger than .1 (which may not be the case
+    # with a different random seed).
+    clustering = AgglomerativeClustering(
+        n_clusters=None,
+        distance_threshold=1.,
+        linkage="single").fit(X)
+    # check that the pairwise distances are indeed all larger than .1
+    all_distances = pairwise_distances(X, metric='minkowski', p=2)
+    np.fill_diagonal(all_distances, np.inf)
+    assert np.all(all_distances > .1)
+    assert clustering.n_clusters_ == n_samples
+
+
+def test_cluster_distances_with_distance_threshold():
+    rng = np.random.RandomState(0)
+    n_samples = 100
+    X = rng.randint(-10, 10, size=(n_samples, 3))
+    # check the distances within the clusters and with other clusters
+    distance_threshold = 4
+    clustering = AgglomerativeClustering(
+        n_clusters=None,
+        distance_threshold=distance_threshold,
+        linkage="single").fit(X)
+    labels = clustering.labels_
+    D = pairwise_distances(X, metric="minkowski", p=2)
+    # to avoid taking the 0 diagonal in min()
+    np.fill_diagonal(D, np.inf)
+    for label in np.unique(labels):
+        in_cluster_mask = labels == label
+        max_in_cluster_distance = (D[in_cluster_mask][:, in_cluster_mask]
+                                   .min(axis=0).max())
+        min_out_cluster_distance = (D[in_cluster_mask][:, ~in_cluster_mask]
+                                    .min(axis=0).min())
+        # single data point clusters only have that inf diagonal here
+        if in_cluster_mask.sum() > 1:
+            assert max_in_cluster_distance < distance_threshold
+        assert min_out_cluster_distance >= distance_threshold
+
+
+@pytest.mark.parametrize('linkage', ['ward', 'complete', 'average'])
+@pytest.mark.parametrize(('threshold', 'y_true'),
+                         [(0.5, [1, 0]), (1.0, [1, 0]), (1.5, [0, 0])])
+def test_agglomerative_clustering_with_distance_threshold_edge_case(
+        linkage, threshold, y_true):
+    # test boundary case of distance_threshold matching the distance
+    X = [[0], [1]]
+    clusterer = AgglomerativeClustering(
+        n_clusters=None,
+        distance_threshold=threshold,
+        linkage=linkage)
+    y_pred = clusterer.fit_predict(X)
+    assert adjusted_rand_score(y_true, y_pred) == 1
+
+
+def test_dist_threshold_invalid_parameters():
+    X = [[0], [1]]
+    with pytest.raises(ValueError, match="Exactly one of "):
+        AgglomerativeClustering(n_clusters=None,
+                                distance_threshold=None).fit(X)
+
+    with pytest.raises(ValueError, match="Exactly one of "):
+        AgglomerativeClustering(n_clusters=2,
+                                distance_threshold=1).fit(X)
+
+    X = [[0], [1]]
+    with pytest.raises(ValueError, match="compute_full_tree must be True if"):
+        AgglomerativeClustering(n_clusters=None,
+                                distance_threshold=1,
+                                compute_full_tree=False).fit(X)
+
+
+def test_invalid_shape_precomputed_dist_matrix():
+    # Check that an error is raised when affinity='precomputed'
+    # and a non square matrix is passed (PR #16257).
+    rng = np.random.RandomState(0)
+    X = rng.rand(5, 3)
+    with pytest.raises(ValueError, match="Distance matrix should be square, "):
+        AgglomerativeClustering(affinity='precomputed',
+                                linkage='complete').fit(X)
--- a/venv/Lib/site-packages/sklearn/cluster/tests/test_k_means.py
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/test_k_means.py
--- a/venv/Lib/site-packages/sklearn/cluster/tests/test_mean_shift.py
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/test_mean_shift.py
@ -0,0 +1,194 @@
+"""
+Testing for mean shift clustering methods
+
+"""
+
+import numpy as np
+import warnings
+import pytest
+
+from scipy import sparse
+
+from sklearn.utils._testing import assert_array_equal
+from sklearn.utils._testing import assert_array_almost_equal
+from sklearn.utils._testing import assert_raise_message
+from sklearn.utils._testing import assert_allclose
+
+from sklearn.cluster import MeanShift
+from sklearn.cluster import mean_shift
+from sklearn.cluster import estimate_bandwidth
+from sklearn.cluster import get_bin_seeds
+from sklearn.datasets import make_blobs
+from sklearn.metrics import v_measure_score
+
+
+n_clusters = 3
+centers = np.array([[1, 1], [-1, -1], [1, -1]]) + 10
+X, _ = make_blobs(n_samples=300, n_features=2, centers=centers,
+                  cluster_std=0.4, shuffle=True, random_state=11)
+
+
+def test_estimate_bandwidth():
+    # Test estimate_bandwidth
+    bandwidth = estimate_bandwidth(X, n_samples=200)
+    assert 0.9 <= bandwidth <= 1.5
+
+
+def test_estimate_bandwidth_1sample():
+    # Test estimate_bandwidth when n_samples=1 and quantile<1, so that
+    # n_neighbors is set to 1.
+    bandwidth = estimate_bandwidth(X, n_samples=1, quantile=0.3)
+    assert bandwidth == pytest.approx(0., abs=1e-5)
+
+
+@pytest.mark.parametrize("bandwidth, cluster_all, expected, "
+                         "first_cluster_label",
+                         [(1.2, True, 3, 0), (1.2, False, 4, -1)])
+def test_mean_shift(bandwidth, cluster_all, expected, first_cluster_label):
+    # Test MeanShift algorithm
+    ms = MeanShift(bandwidth=bandwidth, cluster_all=cluster_all)
+    labels = ms.fit(X).labels_
+    labels_unique = np.unique(labels)
+    n_clusters_ = len(labels_unique)
+    assert n_clusters_ == expected
+    assert labels_unique[0] == first_cluster_label
+
+    cluster_centers, labels_mean_shift = mean_shift(X, cluster_all=cluster_all)
+    labels_mean_shift_unique = np.unique(labels_mean_shift)
+    n_clusters_mean_shift = len(labels_mean_shift_unique)
+    assert n_clusters_mean_shift == expected
+    assert labels_mean_shift_unique[0] == first_cluster_label
+
+
+def test_mean_shift_negative_bandwidth():
+    bandwidth = -1
+    ms = MeanShift(bandwidth=bandwidth)
+    msg = (r"bandwidth needs to be greater than zero or None,"
+           r" got -1\.000000")
+    with pytest.raises(ValueError, match=msg):
+        ms.fit(X)
+
+
+def test_estimate_bandwidth_with_sparse_matrix():
+    # Test estimate_bandwidth with sparse matrix
+    X = sparse.lil_matrix((1000, 1000))
+    msg = "A sparse matrix was passed, but dense data is required."
+    assert_raise_message(TypeError, msg, estimate_bandwidth, X)
+
+
+def test_parallel():
+    centers = np.array([[1, 1], [-1, -1], [1, -1]]) + 10
+    X, _ = make_blobs(n_samples=50, n_features=2, centers=centers,
+                      cluster_std=0.4, shuffle=True, random_state=11)
+
+    ms1 = MeanShift(n_jobs=2)
+    ms1.fit(X)
+
+    ms2 = MeanShift()
+    ms2.fit(X)
+
+    assert_array_almost_equal(ms1.cluster_centers_, ms2.cluster_centers_)
+    assert_array_equal(ms1.labels_, ms2.labels_)
+
+
+def test_meanshift_predict():
+    # Test MeanShift.predict
+    ms = MeanShift(bandwidth=1.2)
+    labels = ms.fit_predict(X)
+    labels2 = ms.predict(X)
+    assert_array_equal(labels, labels2)
+
+
+def test_meanshift_all_orphans():
+    # init away from the data, crash with a sensible warning
+    ms = MeanShift(bandwidth=0.1, seeds=[[-9, -9], [-10, -10]])
+    msg = "No point was within bandwidth=0.1"
+    assert_raise_message(ValueError, msg, ms.fit, X,)
+
+
+def test_unfitted():
+    # Non-regression: before fit, there should be not fitted attributes.
+    ms = MeanShift()
+    assert not hasattr(ms, "cluster_centers_")
+    assert not hasattr(ms, "labels_")
+
+
+def test_cluster_intensity_tie():
+    X = np.array([[1, 1], [2, 1], [1, 0],
+                  [4, 7], [3, 5], [3, 6]])
+    c1 = MeanShift(bandwidth=2).fit(X)
+
+    X = np.array([[4, 7], [3, 5], [3, 6],
+                  [1, 1], [2, 1], [1, 0]])
+    c2 = MeanShift(bandwidth=2).fit(X)
+    assert_array_equal(c1.labels_, [1, 1, 1, 0, 0, 0])
+    assert_array_equal(c2.labels_, [0, 0, 0, 1, 1, 1])
+
+
+def test_bin_seeds():
+    # Test the bin seeding technique which can be used in the mean shift
+    # algorithm
+    # Data is just 6 points in the plane
+    X = np.array([[1., 1.], [1.4, 1.4], [1.8, 1.2],
+                  [2., 1.], [2.1, 1.1], [0., 0.]])
+
+    # With a bin coarseness of 1.0 and min_bin_freq of 1, 3 bins should be
+    # found
+    ground_truth = {(1., 1.), (2., 1.), (0., 0.)}
+    test_bins = get_bin_seeds(X, 1, 1)
+    test_result = set(tuple(p) for p in test_bins)
+    assert len(ground_truth.symmetric_difference(test_result)) == 0
+
+    # With a bin coarseness of 1.0 and min_bin_freq of 2, 2 bins should be
+    # found
+    ground_truth = {(1., 1.), (2., 1.)}
+    test_bins = get_bin_seeds(X, 1, 2)
+    test_result = set(tuple(p) for p in test_bins)
+    assert len(ground_truth.symmetric_difference(test_result)) == 0
+
+    # With a bin size of 0.01 and min_bin_freq of 1, 6 bins should be found
+    # we bail and use the whole data here.
+    with warnings.catch_warnings(record=True):
+        test_bins = get_bin_seeds(X, 0.01, 1)
+    assert_array_almost_equal(test_bins, X)
+
+    # tight clusters around [0, 0] and [1, 1], only get two bins
+    X, _ = make_blobs(n_samples=100, n_features=2, centers=[[0, 0], [1, 1]],
+                      cluster_std=0.1, random_state=0)
+    test_bins = get_bin_seeds(X, 1)
+    assert_array_equal(test_bins, [[0, 0], [1, 1]])
+
+
+@pytest.mark.parametrize('max_iter', [1, 100])
+def test_max_iter(max_iter):
+    clusters1, _ = mean_shift(X, max_iter=max_iter)
+    ms = MeanShift(max_iter=max_iter).fit(X)
+    clusters2 = ms.cluster_centers_
+
+    assert ms.n_iter_ <= ms.max_iter
+    assert len(clusters1) == len(clusters2)
+
+    for c1, c2 in zip(clusters1, clusters2):
+        assert np.allclose(c1, c2)
+
+
+def test_mean_shift_zero_bandwidth():
+    # Check that mean shift works when the estimated bandwidth is 0.
+    X = np.array([1, 1, 1, 2, 2, 2, 3, 3]).reshape(-1, 1)
+
+    # estimate_bandwidth with default args returns 0 on this dataset
+    bandwidth = estimate_bandwidth(X)
+    assert bandwidth == 0
+
+    # get_bin_seeds with a 0 bin_size should return the dataset itself
+    assert get_bin_seeds(X, bin_size=bandwidth) is X
+
+    # MeanShift with binning and a 0 estimated bandwidth should be equivalent
+    # to no binning.
+    ms_binning = MeanShift(bin_seeding=True, bandwidth=None).fit(X)
+    ms_nobinning = MeanShift(bin_seeding=False).fit(X)
+    expected_labels = np.array([0, 0, 0, 1, 1, 1, 2, 2])
+
+    assert v_measure_score(ms_binning.labels_, expected_labels) == 1
+    assert v_measure_score(ms_nobinning.labels_, expected_labels) == 1
+    assert_allclose(ms_binning.cluster_centers_, ms_nobinning.cluster_centers_)
--- a/venv/Lib/site-packages/sklearn/cluster/tests/test_optics.py
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/test_optics.py
@ -0,0 +1,429 @@
+# Authors: Shane Grigsby <refuge@rocktalus.com>
+#          Adrin Jalali <adrin.jalali@gmail.com>
+# License: BSD 3 clause
+
+import numpy as np
+import pytest
+
+from sklearn.datasets import make_blobs
+from sklearn.cluster import OPTICS
+from sklearn.cluster._optics import _extend_region, _extract_xi_labels
+from sklearn.metrics.cluster import contingency_matrix
+from sklearn.metrics.pairwise import pairwise_distances
+from sklearn.cluster import DBSCAN
+from sklearn.utils import shuffle
+from sklearn.utils._testing import assert_array_equal
+from sklearn.utils._testing import assert_raise_message
+from sklearn.utils._testing import assert_allclose
+
+from sklearn.cluster.tests.common import generate_clustered_data
+
+
+rng = np.random.RandomState(0)
+n_points_per_cluster = 10
+C1 = [-5, -2] + .8 * rng.randn(n_points_per_cluster, 2)
+C2 = [4, -1] + .1 * rng.randn(n_points_per_cluster, 2)
+C3 = [1, -2] + .2 * rng.randn(n_points_per_cluster, 2)
+C4 = [-2, 3] + .3 * rng.randn(n_points_per_cluster, 2)
+C5 = [3, -2] + 1.6 * rng.randn(n_points_per_cluster, 2)
+C6 = [5, 6] + 2 * rng.randn(n_points_per_cluster, 2)
+X = np.vstack((C1, C2, C3, C4, C5, C6))
+
+
+@pytest.mark.parametrize(
+    ('r_plot', 'end'),
+    [[[10, 8.9, 8.8, 8.7, 7, 10], 3],
+     [[10, 8.9, 8.8, 8.7, 8.6, 7, 10], 0],
+     [[10, 8.9, 8.8, 8.7, 7, 6, np.inf], 4],
+     [[10, 8.9, 8.8, 8.7, 7, 6, np.inf], 4],
+     ])
+def test_extend_downward(r_plot, end):
+    r_plot = np.array(r_plot)
+    ratio = r_plot[:-1] / r_plot[1:]
+    steep_downward = ratio >= 1 / .9
+    upward = ratio < 1
+
+    e = _extend_region(steep_downward, upward, 0, 2)
+    assert e == end
+
+
+@pytest.mark.parametrize(
+    ('r_plot', 'end'),
+    [[[1, 2, 2.1, 2.2, 4, 8, 8, np.inf], 6],
+     [[1, 2, 2.1, 2.2, 2.3, 4, 8, 8, np.inf], 0],
+     [[1, 2, 2.1, 2, np.inf], 0],
+     [[1, 2, 2.1, np.inf], 2],
+     ])
+def test_extend_upward(r_plot, end):
+    r_plot = np.array(r_plot)
+    ratio = r_plot[:-1] / r_plot[1:]
+    steep_upward = ratio <= .9
+    downward = ratio > 1
+
+    e = _extend_region(steep_upward, downward, 0, 2)
+    assert e == end
+
+
+@pytest.mark.parametrize(
+    ('ordering', 'clusters', 'expected'),
+    [[[0, 1, 2, 3], [[0, 1], [2, 3]], [0, 0, 1, 1]],
+     [[0, 1, 2, 3], [[0, 1], [3, 3]], [0, 0, -1, 1]],
+     [[0, 1, 2, 3], [[0, 1], [3, 3], [0, 3]], [0, 0, -1, 1]],
+     [[3, 1, 2, 0], [[0, 1], [3, 3], [0, 3]], [1, 0, -1, 0]],
+     ])
+def test_the_extract_xi_labels(ordering, clusters, expected):
+    labels = _extract_xi_labels(ordering, clusters)
+
+    assert_array_equal(labels, expected)
+
+
+def test_extract_xi():
+    # small and easy test (no clusters around other clusters)
+    # but with a clear noise data.
+    rng = np.random.RandomState(0)
+    n_points_per_cluster = 5
+
+    C1 = [-5, -2] + .8 * rng.randn(n_points_per_cluster, 2)
+    C2 = [4, -1] + .1 * rng.randn(n_points_per_cluster, 2)
+    C3 = [1, -2] + .2 * rng.randn(n_points_per_cluster, 2)
+    C4 = [-2, 3] + .3 * rng.randn(n_points_per_cluster, 2)
+    C5 = [3, -2] + .6 * rng.randn(n_points_per_cluster, 2)
+    C6 = [5, 6] + .2 * rng.randn(n_points_per_cluster, 2)
+
+    X = np.vstack((C1, C2, C3, C4, C5, np.array([[100, 100]]), C6))
+    expected_labels = np.r_[[2] * 5, [0] * 5, [1] * 5, [3] * 5, [1] * 5,
+                            -1, [4] * 5]
+    X, expected_labels = shuffle(X, expected_labels, random_state=rng)
+
+    clust = OPTICS(min_samples=3, min_cluster_size=2,
+                   max_eps=20, cluster_method='xi',
+                   xi=0.4).fit(X)
+    assert_array_equal(clust.labels_, expected_labels)
+
+    # check float min_samples and min_cluster_size
+    clust = OPTICS(min_samples=0.1, min_cluster_size=0.08,
+                   max_eps=20, cluster_method='xi',
+                   xi=0.4).fit(X)
+    assert_array_equal(clust.labels_, expected_labels)
+
+    X = np.vstack((C1, C2, C3, C4, C5, np.array([[100, 100]] * 2), C6))
+    expected_labels = np.r_[[1] * 5, [3] * 5, [2] * 5, [0] * 5, [2] * 5,
+                            -1, -1, [4] * 5]
+    X, expected_labels = shuffle(X, expected_labels, random_state=rng)
+
+    clust = OPTICS(min_samples=3, min_cluster_size=3,
+                   max_eps=20, cluster_method='xi',
+                   xi=0.3).fit(X)
+    # this may fail if the predecessor correction is not at work!
+    assert_array_equal(clust.labels_, expected_labels)
+
+    C1 = [[0, 0], [0, 0.1], [0, -.1], [0.1, 0]]
+    C2 = [[10, 10], [10, 9], [10, 11], [9, 10]]
+    C3 = [[100, 100], [100, 90], [100, 110], [90, 100]]
+    X = np.vstack((C1, C2, C3))
+    expected_labels = np.r_[[0] * 4, [1] * 4, [2] * 4]
+    X, expected_labels = shuffle(X, expected_labels, random_state=rng)
+
+    clust = OPTICS(min_samples=2, min_cluster_size=2,
+                   max_eps=np.inf, cluster_method='xi',
+                   xi=0.04).fit(X)
+    assert_array_equal(clust.labels_, expected_labels)
+
+
+def test_cluster_hierarchy_():
+    rng = np.random.RandomState(0)
+    n_points_per_cluster = 100
+    C1 = [0, 0] + 2 * rng.randn(n_points_per_cluster, 2)
+    C2 = [0, 0] + 50 * rng.randn(n_points_per_cluster, 2)
+    X = np.vstack((C1, C2))
+    X = shuffle(X, random_state=0)
+
+    clusters = OPTICS(min_samples=20, xi=.1).fit(X).cluster_hierarchy_
+    assert clusters.shape == (2, 2)
+    diff = np.sum(clusters - np.array([[0, 99], [0, 199]]))
+    assert diff / len(X) < 0.05
+
+
+def test_correct_number_of_clusters():
+    # in 'auto' mode
+
+    n_clusters = 3
+    X = generate_clustered_data(n_clusters=n_clusters)
+    # Parameters chosen specifically for this task.
+    # Compute OPTICS
+    clust = OPTICS(max_eps=5.0 * 6.0, min_samples=4, xi=.1)
+    clust.fit(X)
+    # number of clusters, ignoring noise if present
+    n_clusters_1 = len(set(clust.labels_)) - int(-1 in clust.labels_)
+    assert n_clusters_1 == n_clusters
+
+    # check attribute types and sizes
+    assert clust.labels_.shape == (len(X),)
+    assert clust.labels_.dtype.kind == 'i'
+
+    assert clust.reachability_.shape == (len(X),)
+    assert clust.reachability_.dtype.kind == 'f'
+
+    assert clust.core_distances_.shape == (len(X),)
+    assert clust.core_distances_.dtype.kind == 'f'
+
+    assert clust.ordering_.shape == (len(X),)
+    assert clust.ordering_.dtype.kind == 'i'
+    assert set(clust.ordering_) == set(range(len(X)))
+
+
+def test_minimum_number_of_sample_check():
+    # test that we check a minimum number of samples
+    msg = "min_samples must be no greater than"
+
+    # Compute OPTICS
+    X = [[1, 1]]
+    clust = OPTICS(max_eps=5.0 * 0.3, min_samples=10, min_cluster_size=1)
+
+    # Run the fit
+    assert_raise_message(ValueError, msg, clust.fit, X)
+
+
+def test_bad_extract():
+    # Test an extraction of eps too close to original eps
+    msg = "Specify an epsilon smaller than 0.15. Got 0.3."
+    centers = [[1, 1], [-1, -1], [1, -1]]
+    X, labels_true = make_blobs(n_samples=750, centers=centers,
+                                cluster_std=0.4, random_state=0)
+
+    # Compute OPTICS
+    clust = OPTICS(max_eps=5.0 * 0.03,
+                   cluster_method='dbscan',
+                   eps=0.3, min_samples=10)
+    assert_raise_message(ValueError, msg, clust.fit, X)
+
+
+def test_bad_reachability():
+    msg = "All reachability values are inf. Set a larger max_eps."
+    centers = [[1, 1], [-1, -1], [1, -1]]
+    X, labels_true = make_blobs(n_samples=750, centers=centers,
+                                cluster_std=0.4, random_state=0)
+
+    with pytest.warns(UserWarning, match=msg):
+        clust = OPTICS(max_eps=5.0 * 0.003, min_samples=10, eps=0.015)
+        clust.fit(X)
+
+
+def test_close_extract():
+    # Test extract where extraction eps is close to scaled max_eps
+
+    centers = [[1, 1], [-1, -1], [1, -1]]
+    X, labels_true = make_blobs(n_samples=750, centers=centers,
+                                cluster_std=0.4, random_state=0)
+
+    # Compute OPTICS
+    clust = OPTICS(max_eps=1.0, cluster_method='dbscan',
+                   eps=0.3, min_samples=10).fit(X)
+    # Cluster ordering starts at 0; max cluster label = 2 is 3 clusters
+    assert max(clust.labels_) == 2
+
+
+@pytest.mark.parametrize('eps', [0.1, .3, .5])
+@pytest.mark.parametrize('min_samples', [3, 10, 20])
+def test_dbscan_optics_parity(eps, min_samples):
+    # Test that OPTICS clustering labels are <= 5% difference of DBSCAN
+
+    centers = [[1, 1], [-1, -1], [1, -1]]
+    X, labels_true = make_blobs(n_samples=750, centers=centers,
+                                cluster_std=0.4, random_state=0)
+
+    # calculate optics with dbscan extract at 0.3 epsilon
+    op = OPTICS(min_samples=min_samples, cluster_method='dbscan',
+                eps=eps).fit(X)
+
+    # calculate dbscan labels
+    db = DBSCAN(eps=eps, min_samples=min_samples).fit(X)
+
+    contingency = contingency_matrix(db.labels_, op.labels_)
+    agree = min(np.sum(np.max(contingency, axis=0)),
+                np.sum(np.max(contingency, axis=1)))
+    disagree = X.shape[0] - agree
+
+    percent_mismatch = np.round((disagree - 1) / X.shape[0], 2)
+
+    # verify label mismatch is <= 5% labels
+    assert percent_mismatch <= 0.05
+
+
+def test_min_samples_edge_case():
+    C1 = [[0, 0], [0, 0.1], [0, -.1]]
+    C2 = [[10, 10], [10, 9], [10, 11]]
+    C3 = [[100, 100], [100, 96], [100, 106]]
+    X = np.vstack((C1, C2, C3))
+
+    expected_labels = np.r_[[0] * 3, [1] * 3, [2] * 3]
+    clust = OPTICS(min_samples=3,
+                   max_eps=7, cluster_method='xi',
+                   xi=0.04).fit(X)
+    assert_array_equal(clust.labels_, expected_labels)
+
+    expected_labels = np.r_[[0] * 3, [1] * 3, [-1] * 3]
+    clust = OPTICS(min_samples=3,
+                   max_eps=3, cluster_method='xi',
+                   xi=0.04).fit(X)
+    assert_array_equal(clust.labels_, expected_labels)
+
+    expected_labels = np.r_[[-1] * 9]
+    with pytest.warns(UserWarning, match="All reachability values"):
+        clust = OPTICS(min_samples=4,
+                       max_eps=3, cluster_method='xi',
+                       xi=0.04).fit(X)
+        assert_array_equal(clust.labels_, expected_labels)
+
+
+# try arbitrary minimum sizes
+@pytest.mark.parametrize('min_cluster_size', range(2, X.shape[0] // 10, 23))
+def test_min_cluster_size(min_cluster_size):
+    redX = X[::2]  # reduce for speed
+    clust = OPTICS(min_samples=9, min_cluster_size=min_cluster_size).fit(redX)
+    cluster_sizes = np.bincount(clust.labels_[clust.labels_ != -1])
+    if cluster_sizes.size:
+        assert min(cluster_sizes) >= min_cluster_size
+    # check behaviour is the same when min_cluster_size is a fraction
+    clust_frac = OPTICS(min_samples=9,
+                        min_cluster_size=min_cluster_size / redX.shape[0])
+    clust_frac.fit(redX)
+    assert_array_equal(clust.labels_, clust_frac.labels_)
+
+
+@pytest.mark.parametrize('min_cluster_size', [0, -1, 1.1, 2.2])
+def test_min_cluster_size_invalid(min_cluster_size):
+    clust = OPTICS(min_cluster_size=min_cluster_size)
+    with pytest.raises(ValueError, match="must be a positive integer or a "):
+        clust.fit(X)
+
+
+def test_min_cluster_size_invalid2():
+    clust = OPTICS(min_cluster_size=len(X) + 1)
+    with pytest.raises(ValueError, match="must be no greater than the "):
+        clust.fit(X)
+
+
+def test_processing_order():
+    # Ensure that we consider all unprocessed points,
+    # not only direct neighbors. when picking the next point.
+    Y = [[0], [10], [-10], [25]]
+    clust = OPTICS(min_samples=3, max_eps=15).fit(Y)
+    assert_array_equal(clust.reachability_, [np.inf, 10, 10, 15])
+    assert_array_equal(clust.core_distances_, [10, 15, np.inf, np.inf])
+    assert_array_equal(clust.ordering_, [0, 1, 2, 3])
+
+
+def test_compare_to_ELKI():
+    # Expected values, computed with (future) ELKI 0.7.5 using:
+    # java -jar elki.jar cli -dbc.in csv -dbc.filter FixedDBIDsFilter
+    #   -algorithm clustering.optics.OPTICSHeap -optics.minpts 5
+    # where the FixedDBIDsFilter gives 0-indexed ids.
+    r1 = [np.inf, 1.0574896366427478, 0.7587934993548423, 0.7290174038973836,
+          0.7290174038973836, 0.7290174038973836, 0.6861627576116127,
+          0.7587934993548423, 0.9280118450166668, 1.1748022534146194,
+          3.3355455741292257, 0.49618389254482587, 0.2552805046961355,
+          0.2552805046961355, 0.24944622248445714, 0.24944622248445714,
+          0.24944622248445714, 0.2552805046961355, 0.2552805046961355,
+          0.3086779122185853, 4.163024452756142, 1.623152630340929,
+          0.45315840475822655, 0.25468325192031926, 0.2254004358159971,
+          0.18765711877083036, 0.1821471333893275, 0.1821471333893275,
+          0.18765711877083036, 0.18765711877083036, 0.2240202988740153,
+          1.154337614548715, 1.342604473837069, 1.323308536402633,
+          0.8607514948648837, 0.27219111215810565, 0.13260875220533205,
+          0.13260875220533205, 0.09890587675958984, 0.09890587675958984,
+          0.13548790801634494, 0.1575483940837384, 0.17515137170530226,
+          0.17575920159442388, 0.27219111215810565, 0.6101447895405373,
+          1.3189208094864302, 1.323308536402633, 2.2509184159764577,
+          2.4517810628594527, 3.675977064404973, 3.8264795626020365,
+          2.9130735341510614, 2.9130735341510614, 2.9130735341510614,
+          2.9130735341510614, 2.8459300127258036, 2.8459300127258036,
+          2.8459300127258036, 3.0321982337972537]
+    o1 = [0, 3, 6, 4, 7, 8, 2, 9, 5, 1, 31, 30, 32, 34, 33, 38, 39, 35, 37, 36,
+          44, 21, 23, 24, 22, 25, 27, 29, 26, 28, 20, 40, 45, 46, 10, 15, 11,
+          13, 17, 19, 18, 12, 16, 14, 47, 49, 43, 48, 42, 41, 53, 57, 51, 52,
+          56, 59, 54, 55, 58, 50]
+    p1 = [-1, 0, 3, 6, 6, 6, 8, 3, 7, 5, 1, 31, 30, 30, 34, 34, 34, 32, 32, 37,
+          36, 44, 21, 23, 24, 22, 25, 25, 22, 22, 22, 21, 40, 45, 46, 10, 15,
+          15, 13, 13, 15, 11, 19, 15, 10, 47, 12, 45, 14, 43, 42, 53, 57, 57,
+          57, 57, 59, 59, 59, 58]
+
+    # Tests against known extraction array
+    # Does NOT work with metric='euclidean', because sklearn euclidean has
+    # worse numeric precision. 'minkowski' is slower but more accurate.
+    clust1 = OPTICS(min_samples=5).fit(X)
+
+    assert_array_equal(clust1.ordering_, np.array(o1))
+    assert_array_equal(clust1.predecessor_[clust1.ordering_], np.array(p1))
+    assert_allclose(clust1.reachability_[clust1.ordering_], np.array(r1))
+    # ELKI currently does not print the core distances (which are not used much
+    # in literature, but we can at least ensure to have this consistency:
+    for i in clust1.ordering_[1:]:
+        assert (clust1.reachability_[i] >=
+                clust1.core_distances_[clust1.predecessor_[i]])
+
+    # Expected values, computed with (future) ELKI 0.7.5 using
+    r2 = [np.inf, np.inf, np.inf, np.inf, np.inf, np.inf, np.inf, np.inf,
+          np.inf, np.inf, np.inf, 0.27219111215810565, 0.13260875220533205,
+          0.13260875220533205, 0.09890587675958984, 0.09890587675958984,
+          0.13548790801634494, 0.1575483940837384, 0.17515137170530226,
+          0.17575920159442388, 0.27219111215810565, 0.4928068613197889,
+          np.inf, 0.2666183922512113, 0.18765711877083036, 0.1821471333893275,
+          0.1821471333893275, 0.1821471333893275, 0.18715928772277457,
+          0.18765711877083036, 0.18765711877083036, 0.25468325192031926,
+          np.inf, 0.2552805046961355, 0.2552805046961355, 0.24944622248445714,
+          0.24944622248445714, 0.24944622248445714, 0.2552805046961355,
+          0.2552805046961355, 0.3086779122185853, 0.34466409325984865,
+          np.inf, np.inf, np.inf, np.inf, np.inf, np.inf, np.inf, np.inf,
+          np.inf, np.inf, np.inf, np.inf, np.inf, np.inf, np.inf, np.inf,
+          np.inf, np.inf]
+    o2 = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 11, 13, 17, 19, 18, 12, 16, 14,
+          47, 46, 20, 22, 25, 23, 27, 29, 24, 26, 28, 21, 30, 32, 34, 33, 38,
+          39, 35, 37, 36, 31, 40, 41, 42, 43, 44, 45, 48, 49, 50, 51, 52, 53,
+          54, 55, 56, 57, 58, 59]
+    p2 = [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 15, 15, 13, 13, 15,
+          11, 19, 15, 10, 47, -1, 20, 22, 25, 25, 25, 25, 22, 22, 23, -1, 30,
+          30, 34, 34, 34, 32, 32, 37, 38, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+          -1, -1, -1, -1, -1, -1, -1, -1, -1]
+    clust2 = OPTICS(min_samples=5, max_eps=0.5).fit(X)
+
+    assert_array_equal(clust2.ordering_, np.array(o2))
+    assert_array_equal(clust2.predecessor_[clust2.ordering_], np.array(p2))
+    assert_allclose(clust2.reachability_[clust2.ordering_], np.array(r2))
+
+    index = np.where(clust1.core_distances_ <= 0.5)[0]
+    assert_allclose(clust1.core_distances_[index],
+                    clust2.core_distances_[index])
+
+
+def test_wrong_cluster_method():
+    clust = OPTICS(cluster_method='superfancy')
+    with pytest.raises(ValueError, match="cluster_method should be one of "):
+        clust.fit(X)
+
+
+def test_extract_dbscan():
+    # testing an easy dbscan case. Not including clusters with different
+    # densities.
+    rng = np.random.RandomState(0)
+    n_points_per_cluster = 20
+    C1 = [-5, -2] + .2 * rng.randn(n_points_per_cluster, 2)
+    C2 = [4, -1] + .2 * rng.randn(n_points_per_cluster, 2)
+    C3 = [1, 2] + .2 * rng.randn(n_points_per_cluster, 2)
+    C4 = [-2, 3] + .2 * rng.randn(n_points_per_cluster, 2)
+    X = np.vstack((C1, C2, C3, C4))
+
+    clust = OPTICS(cluster_method='dbscan', eps=.5).fit(X)
+    assert_array_equal(np.sort(np.unique(clust.labels_)), [0, 1, 2, 3])
+
+
+def test_precomputed_dists():
+    redX = X[::2]
+    dists = pairwise_distances(redX, metric='euclidean')
+    clust1 = OPTICS(min_samples=10, algorithm='brute',
+                    metric='precomputed').fit(dists)
+    clust2 = OPTICS(min_samples=10, algorithm='brute',
+                    metric='euclidean').fit(redX)
+
+    assert_allclose(clust1.reachability_, clust2.reachability_)
+    assert_array_equal(clust1.labels_, clust2.labels_)
--- a/venv/Lib/site-packages/sklearn/cluster/tests/test_spectral.py
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/test_spectral.py
@ -0,0 +1,250 @@
+"""Testing for Spectral Clustering methods"""
+
+import numpy as np
+from scipy import sparse
+
+import pytest
+
+import pickle
+
+from sklearn.utils import check_random_state
+from sklearn.utils._testing import assert_array_equal
+from sklearn.utils._testing import assert_warns_message
+
+from sklearn.cluster import SpectralClustering, spectral_clustering
+from sklearn.cluster._spectral import discretize
+from sklearn.feature_extraction import img_to_graph
+from sklearn.metrics import pairwise_distances
+from sklearn.metrics import adjusted_rand_score
+from sklearn.metrics.pairwise import kernel_metrics, rbf_kernel
+from sklearn.neighbors import NearestNeighbors
+from sklearn.datasets import make_blobs
+
+try:
+    from pyamg import smoothed_aggregation_solver  # noqa
+    amg_loaded = True
+except ImportError:
+    amg_loaded = False
+
+
+@pytest.mark.parametrize('eigen_solver', ('arpack', 'lobpcg'))
+@pytest.mark.parametrize('assign_labels', ('kmeans', 'discretize'))
+def test_spectral_clustering(eigen_solver, assign_labels):
+    S = np.array([[1.0, 1.0, 1.0, 0.2, 0.0, 0.0, 0.0],
+                  [1.0, 1.0, 1.0, 0.2, 0.0, 0.0, 0.0],
+                  [1.0, 1.0, 1.0, 0.2, 0.0, 0.0, 0.0],
+                  [0.2, 0.2, 0.2, 1.0, 1.0, 1.0, 1.0],
+                  [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0],
+                  [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0],
+                  [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]])
+
+    for mat in (S, sparse.csr_matrix(S)):
+        model = SpectralClustering(random_state=0, n_clusters=2,
+                                   affinity='precomputed',
+                                   eigen_solver=eigen_solver,
+                                   assign_labels=assign_labels
+                                   ).fit(mat)
+        labels = model.labels_
+        if labels[0] == 0:
+            labels = 1 - labels
+
+        assert adjusted_rand_score(labels, [1, 1, 1, 0, 0, 0, 0]) == 1
+
+        model_copy = pickle.loads(pickle.dumps(model))
+        assert model_copy.n_clusters == model.n_clusters
+        assert model_copy.eigen_solver == model.eigen_solver
+        assert_array_equal(model_copy.labels_, model.labels_)
+
+
+def test_spectral_unknown_mode():
+    # Test that SpectralClustering fails with an unknown mode set.
+    centers = np.array([
+        [0., 0., 0.],
+        [10., 10., 10.],
+        [20., 20., 20.],
+    ])
+    X, true_labels = make_blobs(n_samples=100, centers=centers,
+                                cluster_std=1., random_state=42)
+    D = pairwise_distances(X)  # Distance matrix
+    S = np.max(D) - D  # Similarity matrix
+    S = sparse.coo_matrix(S)
+    with pytest.raises(ValueError):
+        spectral_clustering(S, n_clusters=2, random_state=0,
+                            eigen_solver="<unknown>")
+
+
+def test_spectral_unknown_assign_labels():
+    # Test that SpectralClustering fails with an unknown assign_labels set.
+    centers = np.array([
+        [0., 0., 0.],
+        [10., 10., 10.],
+        [20., 20., 20.],
+    ])
+    X, true_labels = make_blobs(n_samples=100, centers=centers,
+                                cluster_std=1., random_state=42)
+    D = pairwise_distances(X)  # Distance matrix
+    S = np.max(D) - D  # Similarity matrix
+    S = sparse.coo_matrix(S)
+    with pytest.raises(ValueError):
+        spectral_clustering(S, n_clusters=2, random_state=0,
+                            assign_labels="<unknown>")
+
+
+def test_spectral_clustering_sparse():
+    X, y = make_blobs(n_samples=20, random_state=0,
+                      centers=[[1, 1], [-1, -1]], cluster_std=0.01)
+
+    S = rbf_kernel(X, gamma=1)
+    S = np.maximum(S - 1e-4, 0)
+    S = sparse.coo_matrix(S)
+
+    labels = SpectralClustering(random_state=0, n_clusters=2,
+                                affinity='precomputed').fit(S).labels_
+    assert adjusted_rand_score(y, labels) == 1
+
+
+def test_precomputed_nearest_neighbors_filtering():
+    # Test precomputed graph filtering when containing too many neighbors
+    X, y = make_blobs(n_samples=200, random_state=0,
+                      centers=[[1, 1], [-1, -1]], cluster_std=0.01)
+
+    n_neighbors = 2
+    results = []
+    for additional_neighbors in [0, 10]:
+        nn = NearestNeighbors(
+            n_neighbors=n_neighbors + additional_neighbors).fit(X)
+        graph = nn.kneighbors_graph(X, mode='connectivity')
+        labels = SpectralClustering(random_state=0, n_clusters=2,
+                                    affinity='precomputed_nearest_neighbors',
+                                    n_neighbors=n_neighbors).fit(graph).labels_
+        results.append(labels)
+
+    assert_array_equal(results[0], results[1])
+
+
+def test_affinities():
+    # Note: in the following, random_state has been selected to have
+    # a dataset that yields a stable eigen decomposition both when built
+    # on OSX and Linux
+    X, y = make_blobs(n_samples=20, random_state=0,
+                      centers=[[1, 1], [-1, -1]], cluster_std=0.01)
+    # nearest neighbors affinity
+    sp = SpectralClustering(n_clusters=2, affinity='nearest_neighbors',
+                            random_state=0)
+    assert_warns_message(UserWarning, 'not fully connected', sp.fit, X)
+    assert adjusted_rand_score(y, sp.labels_) == 1
+
+    sp = SpectralClustering(n_clusters=2, gamma=2, random_state=0)
+    labels = sp.fit(X).labels_
+    assert adjusted_rand_score(y, labels) == 1
+
+    X = check_random_state(10).rand(10, 5) * 10
+
+    kernels_available = kernel_metrics()
+    for kern in kernels_available:
+        # Additive chi^2 gives a negative similarity matrix which
+        # doesn't make sense for spectral clustering
+        if kern != 'additive_chi2':
+            sp = SpectralClustering(n_clusters=2, affinity=kern,
+                                    random_state=0)
+            labels = sp.fit(X).labels_
+            assert (X.shape[0],) == labels.shape
+
+    sp = SpectralClustering(n_clusters=2, affinity=lambda x, y: 1,
+                            random_state=0)
+    labels = sp.fit(X).labels_
+    assert (X.shape[0],) == labels.shape
+
+    def histogram(x, y, **kwargs):
+        # Histogram kernel implemented as a callable.
+        assert kwargs == {}    # no kernel_params that we didn't ask for
+        return np.minimum(x, y).sum()
+
+    sp = SpectralClustering(n_clusters=2, affinity=histogram, random_state=0)
+    labels = sp.fit(X).labels_
+    assert (X.shape[0],) == labels.shape
+
+    # raise error on unknown affinity
+    sp = SpectralClustering(n_clusters=2, affinity='<unknown>')
+    with pytest.raises(ValueError):
+        sp.fit(X)
+
+
+@pytest.mark.parametrize('n_samples', [50, 100, 150, 500])
+def test_discretize(n_samples):
+    # Test the discretize using a noise assignment matrix
+    random_state = np.random.RandomState(seed=8)
+    for n_class in range(2, 10):
+        # random class labels
+        y_true = random_state.randint(0, n_class + 1, n_samples)
+        y_true = np.array(y_true, np.float)
+        # noise class assignment matrix
+        y_indicator = sparse.coo_matrix((np.ones(n_samples),
+                                         (np.arange(n_samples),
+                                          y_true)),
+                                        shape=(n_samples,
+                                               n_class + 1))
+        y_true_noisy = (y_indicator.toarray()
+                        + 0.1 * random_state.randn(n_samples,
+                                                   n_class + 1))
+        y_pred = discretize(y_true_noisy, random_state=random_state)
+        assert adjusted_rand_score(y_true, y_pred) > 0.8
+
+
+# TODO: Remove when pyamg does replaces sp.rand call with np.random.rand
+# https://github.com/scikit-learn/scikit-learn/issues/15913
+@pytest.mark.filterwarnings(
+    "ignore:scipy.rand is deprecated:DeprecationWarning:pyamg.*")
+def test_spectral_clustering_with_arpack_amg_solvers():
+    # Test that spectral_clustering is the same for arpack and amg solver
+    # Based on toy example from plot_segmentation_toy.py
+
+    # a small two coin image
+    x, y = np.indices((40, 40))
+
+    center1, center2 = (14, 12), (20, 25)
+    radius1, radius2 = 8, 7
+
+    circle1 = (x - center1[0]) ** 2 + (y - center1[1]) ** 2 < radius1 ** 2
+    circle2 = (x - center2[0]) ** 2 + (y - center2[1]) ** 2 < radius2 ** 2
+
+    circles = circle1 | circle2
+    mask = circles.copy()
+    img = circles.astype(float)
+
+    graph = img_to_graph(img, mask=mask)
+    graph.data = np.exp(-graph.data / graph.data.std())
+
+    labels_arpack = spectral_clustering(
+        graph, n_clusters=2, eigen_solver='arpack', random_state=0)
+
+    assert len(np.unique(labels_arpack)) == 2
+
+    if amg_loaded:
+        labels_amg = spectral_clustering(
+            graph, n_clusters=2, eigen_solver='amg', random_state=0)
+        assert adjusted_rand_score(labels_arpack, labels_amg) == 1
+    else:
+        with pytest.raises(ValueError):
+            spectral_clustering(graph, n_clusters=2, eigen_solver='amg',
+                                random_state=0)
+
+
+def test_n_components():
+    # Test that after adding n_components, result is different and
+    # n_components = n_clusters by default
+    X, y = make_blobs(n_samples=20, random_state=0,
+                      centers=[[1, 1], [-1, -1]], cluster_std=0.01)
+    sp = SpectralClustering(n_clusters=2, random_state=0)
+    labels = sp.fit(X).labels_
+    # set n_components = n_cluster and test if result is the same
+    labels_same_ncomp = SpectralClustering(n_clusters=2, n_components=2,
+                                           random_state=0).fit(X).labels_
+    # test that n_components=n_clusters by default
+    assert_array_equal(labels, labels_same_ncomp)
+
+    # test that n_components affect result
+    # n_clusters=8 by default, and set n_components=2
+    labels_diff_ncomp = SpectralClustering(n_components=2,
+                                           random_state=0).fit(X).labels_
+    assert not np.array_equal(labels, labels_diff_ncomp)