Uploaded Test files

2020-11-12 11:05:57 -05:00 · 2020-11-12 11:05:57 -05:00 · 2e81cb7d99
commit 2e81cb7d99
parent f584ad9d97
16627 changed files with 2065359 additions and 102444 deletions
--- a/venv/Lib/site-packages/sklearn/cluster/init.py
+++ b/venv/Lib/site-packages/sklearn/cluster/init.py
@ -0,0 +1,42 @@
+"""
+The :mod:`sklearn.cluster` module gathers popular unsupervised clustering
+algorithms.
+"""
+
+from ._spectral import spectral_clustering, SpectralClustering
+from ._mean_shift import (mean_shift, MeanShift,
+                          estimate_bandwidth, get_bin_seeds)
+from ._affinity_propagation import affinity_propagation, AffinityPropagation
+from ._agglomerative import (ward_tree, AgglomerativeClustering,
+                             linkage_tree, FeatureAgglomeration)
+from ._kmeans import k_means, KMeans, MiniBatchKMeans
+from ._dbscan import dbscan, DBSCAN
+from ._optics import (OPTICS, cluster_optics_dbscan, compute_optics_graph,
+                      cluster_optics_xi)
+from ._bicluster import SpectralBiclustering, SpectralCoclustering
+from ._birch import Birch
+
+__all__ = ['AffinityPropagation',
+           'AgglomerativeClustering',
+           'Birch',
+           'DBSCAN',
+           'OPTICS',
+           'cluster_optics_dbscan',
+           'cluster_optics_xi',
+           'compute_optics_graph',
+           'KMeans',
+           'FeatureAgglomeration',
+           'MeanShift',
+           'MiniBatchKMeans',
+           'SpectralClustering',
+           'affinity_propagation',
+           'dbscan',
+           'estimate_bandwidth',
+           'get_bin_seeds',
+           'k_means',
+           'linkage_tree',
+           'mean_shift',
+           'spectral_clustering',
+           'ward_tree',
+           'SpectralBiclustering',
+           'SpectralCoclustering']
--- a/venv/Lib/site-packages/sklearn/cluster/pycache/init.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/pycache/init.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/pycache/_affinity_propagation.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/pycache/_affinity_propagation.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/pycache/_agglomerative.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/pycache/_agglomerative.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/pycache/_bicluster.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/pycache/_bicluster.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/pycache/_birch.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/pycache/_birch.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/pycache/_dbscan.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/pycache/_dbscan.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/pycache/_feature_agglomeration.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/pycache/_feature_agglomeration.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/pycache/_kmeans.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/pycache/_kmeans.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/pycache/_mean_shift.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/pycache/_mean_shift.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/pycache/_optics.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/pycache/_optics.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/pycache/_spectral.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/pycache/_spectral.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/pycache/affinity_propagation_.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/pycache/affinity_propagation_.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/pycache/bicluster.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/pycache/bicluster.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/pycache/birch.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/pycache/birch.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/pycache/dbscan_.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/pycache/dbscan_.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/pycache/hierarchical.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/pycache/hierarchical.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/pycache/k_means_.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/pycache/k_means_.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/pycache/mean_shift_.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/pycache/mean_shift_.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/pycache/optics_.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/pycache/optics_.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/pycache/setup.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/pycache/setup.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/pycache/spectral.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/pycache/spectral.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/_affinity_propagation.py
+++ b/venv/Lib/site-packages/sklearn/cluster/_affinity_propagation.py
@ -0,0 +1,474 @@
+"""Affinity Propagation clustering algorithm."""
+
+# Author: Alexandre Gramfort alexandre.gramfort@inria.fr
+#        Gael Varoquaux gael.varoquaux@normalesup.org
+
+# License: BSD 3 clause
+
+import numpy as np
+import warnings
+
+from ..exceptions import ConvergenceWarning
+from ..base import BaseEstimator, ClusterMixin
+from ..utils import as_float_array, check_array, check_random_state
+from ..utils.validation import check_is_fitted, _deprecate_positional_args
+from ..metrics import euclidean_distances
+from ..metrics import pairwise_distances_argmin
+
+
+def _equal_similarities_and_preferences(S, preference):
+    def all_equal_preferences():
+        return np.all(preference == preference.flat[0])
+
+    def all_equal_similarities():
+        # Create mask to ignore diagonal of S
+        mask = np.ones(S.shape, dtype=bool)
+        np.fill_diagonal(mask, 0)
+
+        return np.all(S[mask].flat == S[mask].flat[0])
+
+    return all_equal_preferences() and all_equal_similarities()
+
+
+@_deprecate_positional_args
+def affinity_propagation(S, *, preference=None, convergence_iter=15,
+                         max_iter=200, damping=0.5, copy=True, verbose=False,
+                         return_n_iter=False, random_state='warn'):
+    """Perform Affinity Propagation Clustering of data
+
+    Read more in the :ref:`User Guide <affinity_propagation>`.
+
+    Parameters
+    ----------
+
+    S : array-like, shape (n_samples, n_samples)
+        Matrix of similarities between points
+
+    preference : array-like, shape (n_samples,) or float, optional
+        Preferences for each point - points with larger values of
+        preferences are more likely to be chosen as exemplars. The number of
+        exemplars, i.e. of clusters, is influenced by the input preferences
+        value. If the preferences are not passed as arguments, they will be
+        set to the median of the input similarities (resulting in a moderate
+        number of clusters). For a smaller amount of clusters, this can be set
+        to the minimum value of the similarities.
+
+    convergence_iter : int, optional, default: 15
+        Number of iterations with no change in the number
+        of estimated clusters that stops the convergence.
+
+    max_iter : int, optional, default: 200
+        Maximum number of iterations
+
+    damping : float, optional, default: 0.5
+        Damping factor between 0.5 and 1.
+
+    copy : boolean, optional, default: True
+        If copy is False, the affinity matrix is modified inplace by the
+        algorithm, for memory efficiency
+
+    verbose : boolean, optional, default: False
+        The verbosity level
+
+    return_n_iter : bool, default False
+        Whether or not to return the number of iterations.
+
+    random_state : int or np.random.RandomStateInstance, default: 0
+        Pseudo-random number generator to control the starting state.
+        Use an int for reproducible results across function calls.
+        See the :term:`Glossary <random_state>`.
+
+        .. versionadded:: 0.23
+            this parameter was previously hardcoded as 0.
+
+    Returns
+    -------
+
+    cluster_centers_indices : array, shape (n_clusters,)
+        index of clusters centers
+
+    labels : array, shape (n_samples,)
+        cluster labels for each point
+
+    n_iter : int
+        number of iterations run. Returned only if `return_n_iter` is
+        set to True.
+
+    Notes
+    -----
+    For an example, see :ref:`examples/cluster/plot_affinity_propagation.py
+    <sphx_glr_auto_examples_cluster_plot_affinity_propagation.py>`.
+
+    When the algorithm does not converge, it returns an empty array as
+    ``cluster_center_indices`` and ``-1`` as label for each training sample.
+
+    When all training samples have equal similarities and equal preferences,
+    the assignment of cluster centers and labels depends on the preference.
+    If the preference is smaller than the similarities, a single cluster center
+    and label ``0`` for every sample will be returned. Otherwise, every
+    training sample becomes its own cluster center and is assigned a unique
+    label.
+
+    References
+    ----------
+    Brendan J. Frey and Delbert Dueck, "Clustering by Passing Messages
+    Between Data Points", Science Feb. 2007
+    """
+    S = as_float_array(S, copy=copy)
+    n_samples = S.shape[0]
+
+    if S.shape[0] != S.shape[1]:
+        raise ValueError("S must be a square array (shape=%s)" % repr(S.shape))
+
+    if preference is None:
+        preference = np.median(S)
+    if damping < 0.5 or damping >= 1:
+        raise ValueError('damping must be >= 0.5 and < 1')
+
+    preference = np.array(preference)
+
+    if (n_samples == 1 or
+            _equal_similarities_and_preferences(S, preference)):
+        # It makes no sense to run the algorithm in this case, so return 1 or
+        # n_samples clusters, depending on preferences
+        warnings.warn("All samples have mutually equal similarities. "
+                      "Returning arbitrary cluster center(s).")
+        if preference.flat[0] >= S.flat[n_samples - 1]:
+            return ((np.arange(n_samples), np.arange(n_samples), 0)
+                    if return_n_iter
+                    else (np.arange(n_samples), np.arange(n_samples)))
+        else:
+            return ((np.array([0]), np.array([0] * n_samples), 0)
+                    if return_n_iter
+                    else (np.array([0]), np.array([0] * n_samples)))
+
+    if random_state == 'warn':
+        warnings.warn(("'random_state' has been introduced in 0.23. "
+                       "It will be set to None starting from 0.25 which "
+                       "means that results will differ at every function "
+                       "call. Set 'random_state' to None to silence this "
+                       "warning, or to 0 to keep the behavior of versions "
+                       "<0.23."),
+                      FutureWarning)
+        random_state = 0
+    random_state = check_random_state(random_state)
+
+    # Place preference on the diagonal of S
+    S.flat[::(n_samples + 1)] = preference
+
+    A = np.zeros((n_samples, n_samples))
+    R = np.zeros((n_samples, n_samples))  # Initialize messages
+    # Intermediate results
+    tmp = np.zeros((n_samples, n_samples))
+
+    # Remove degeneracies
+    S += ((np.finfo(S.dtype).eps * S + np.finfo(S.dtype).tiny * 100) *
+          random_state.randn(n_samples, n_samples))
+
+    # Execute parallel affinity propagation updates
+    e = np.zeros((n_samples, convergence_iter))
+
+    ind = np.arange(n_samples)
+
+    for it in range(max_iter):
+        # tmp = A + S; compute responsibilities
+        np.add(A, S, tmp)
+        I = np.argmax(tmp, axis=1)
+        Y = tmp[ind, I]  # np.max(A + S, axis=1)
+        tmp[ind, I] = -np.inf
+        Y2 = np.max(tmp, axis=1)
+
+        # tmp = Rnew
+        np.subtract(S, Y[:, None], tmp)
+        tmp[ind, I] = S[ind, I] - Y2
+
+        # Damping
+        tmp *= 1 - damping
+        R *= damping
+        R += tmp
+
+        # tmp = Rp; compute availabilities
+        np.maximum(R, 0, tmp)
+        tmp.flat[::n_samples + 1] = R.flat[::n_samples + 1]
+
+        # tmp = -Anew
+        tmp -= np.sum(tmp, axis=0)
+        dA = np.diag(tmp).copy()
+        tmp.clip(0, np.inf, tmp)
+        tmp.flat[::n_samples + 1] = dA
+
+        # Damping
+        tmp *= 1 - damping
+        A *= damping
+        A -= tmp
+
+        # Check for convergence
+        E = (np.diag(A) + np.diag(R)) > 0
+        e[:, it % convergence_iter] = E
+        K = np.sum(E, axis=0)
+
+        if it >= convergence_iter:
+            se = np.sum(e, axis=1)
+            unconverged = (np.sum((se == convergence_iter) + (se == 0))
+                           != n_samples)
+            if (not unconverged and (K > 0)) or (it == max_iter):
+                never_converged = False
+                if verbose:
+                    print("Converged after %d iterations." % it)
+                break
+    else:
+        never_converged = True
+        if verbose:
+            print("Did not converge")
+
+    I = np.flatnonzero(E)
+    K = I.size  # Identify exemplars
+
+    if K > 0 and not never_converged:
+        c = np.argmax(S[:, I], axis=1)
+        c[I] = np.arange(K)  # Identify clusters
+        # Refine the final set of exemplars and clusters and return results
+        for k in range(K):
+            ii = np.where(c == k)[0]
+            j = np.argmax(np.sum(S[ii[:, np.newaxis], ii], axis=0))
+            I[k] = ii[j]
+
+        c = np.argmax(S[:, I], axis=1)
+        c[I] = np.arange(K)
+        labels = I[c]
+        # Reduce labels to a sorted, gapless, list
+        cluster_centers_indices = np.unique(labels)
+        labels = np.searchsorted(cluster_centers_indices, labels)
+    else:
+        warnings.warn("Affinity propagation did not converge, this model "
+                      "will not have any cluster centers.", ConvergenceWarning)
+        labels = np.array([-1] * n_samples)
+        cluster_centers_indices = []
+
+    if return_n_iter:
+        return cluster_centers_indices, labels, it + 1
+    else:
+        return cluster_centers_indices, labels
+
+
+###############################################################################
+
+class AffinityPropagation(ClusterMixin, BaseEstimator):
+    """Perform Affinity Propagation Clustering of data.
+
+    Read more in the :ref:`User Guide <affinity_propagation>`.
+
+    Parameters
+    ----------
+    damping : float, default=0.5
+        Damping factor (between 0.5 and 1) is the extent to
+        which the current value is maintained relative to
+        incoming values (weighted 1 - damping). This in order
+        to avoid numerical oscillations when updating these
+        values (messages).
+
+    max_iter : int, default=200
+        Maximum number of iterations.
+
+    convergence_iter : int, default=15
+        Number of iterations with no change in the number
+        of estimated clusters that stops the convergence.
+
+    copy : bool, default=True
+        Make a copy of input data.
+
+    preference : array-like of shape (n_samples,) or float, default=None
+        Preferences for each point - points with larger values of
+        preferences are more likely to be chosen as exemplars. The number
+        of exemplars, ie of clusters, is influenced by the input
+        preferences value. If the preferences are not passed as arguments,
+        they will be set to the median of the input similarities.
+
+    affinity : {'euclidean', 'precomputed'}, default='euclidean'
+        Which affinity to use. At the moment 'precomputed' and
+        ``euclidean`` are supported. 'euclidean' uses the
+        negative squared euclidean distance between points.
+
+    verbose : bool, default=False
+        Whether to be verbose.
+
+    random_state : int or np.random.RandomStateInstance, default: 0
+        Pseudo-random number generator to control the starting state.
+        Use an int for reproducible results across function calls.
+        See the :term:`Glossary <random_state>`.
+
+        .. versionadded:: 0.23
+            this parameter was previously hardcoded as 0.
+
+    Attributes
+    ----------
+    cluster_centers_indices_ : ndarray of shape (n_clusters,)
+        Indices of cluster centers
+
+    cluster_centers_ : ndarray of shape (n_clusters, n_features)
+        Cluster centers (if affinity != ``precomputed``).
+
+    labels_ : ndarray of shape (n_samples,)
+        Labels of each point
+
+    affinity_matrix_ : ndarray of shape (n_samples, n_samples)
+        Stores the affinity matrix used in ``fit``.
+
+    n_iter_ : int
+        Number of iterations taken to converge.
+
+    Notes
+    -----
+    For an example, see :ref:`examples/cluster/plot_affinity_propagation.py
+    <sphx_glr_auto_examples_cluster_plot_affinity_propagation.py>`.
+
+    The algorithmic complexity of affinity propagation is quadratic
+    in the number of points.
+
+    When ``fit`` does not converge, ``cluster_centers_`` becomes an empty
+    array and all training samples will be labelled as ``-1``. In addition,
+    ``predict`` will then label every sample as ``-1``.
+
+    When all training samples have equal similarities and equal preferences,
+    the assignment of cluster centers and labels depends on the preference.
+    If the preference is smaller than the similarities, ``fit`` will result in
+    a single cluster center and label ``0`` for every sample. Otherwise, every
+    training sample becomes its own cluster center and is assigned a unique
+    label.
+
+    References
+    ----------
+
+    Brendan J. Frey and Delbert Dueck, "Clustering by Passing Messages
+    Between Data Points", Science Feb. 2007
+
+    Examples
+    --------
+    >>> from sklearn.cluster import AffinityPropagation
+    >>> import numpy as np
+    >>> X = np.array([[1, 2], [1, 4], [1, 0],
+    ...               [4, 2], [4, 4], [4, 0]])
+    >>> clustering = AffinityPropagation(random_state=5).fit(X)
+    >>> clustering
+    AffinityPropagation(random_state=5)
+    >>> clustering.labels_
+    array([0, 0, 0, 1, 1, 1])
+    >>> clustering.predict([[0, 0], [4, 4]])
+    array([0, 1])
+    >>> clustering.cluster_centers_
+    array([[1, 2],
+           [4, 2]])
+    """
+    @_deprecate_positional_args
+    def __init__(self, *, damping=.5, max_iter=200, convergence_iter=15,
+                 copy=True, preference=None, affinity='euclidean',
+                 verbose=False, random_state='warn'):
+
+        self.damping = damping
+        self.max_iter = max_iter
+        self.convergence_iter = convergence_iter
+        self.copy = copy
+        self.verbose = verbose
+        self.preference = preference
+        self.affinity = affinity
+        self.random_state = random_state
+
+    @property
+    def _pairwise(self):
+        return self.affinity == "precomputed"
+
+    def fit(self, X, y=None):
+        """Fit the clustering from features, or affinity matrix.
+
+        Parameters
+        ----------
+        X : array-like or sparse matrix, shape (n_samples, n_features), or \
+            array-like, shape (n_samples, n_samples)
+            Training instances to cluster, or similarities / affinities between
+            instances if ``affinity='precomputed'``. If a sparse feature matrix
+            is provided, it will be converted into a sparse ``csr_matrix``.
+
+        y : Ignored
+            Not used, present here for API consistency by convention.
+
+        Returns
+        -------
+        self
+
+        """
+        if self.affinity == "precomputed":
+            accept_sparse = False
+        else:
+            accept_sparse = 'csr'
+        X = self._validate_data(X, accept_sparse=accept_sparse)
+        if self.affinity == "precomputed":
+            self.affinity_matrix_ = X
+        elif self.affinity == "euclidean":
+            self.affinity_matrix_ = -euclidean_distances(X, squared=True)
+        else:
+            raise ValueError("Affinity must be 'precomputed' or "
+                             "'euclidean'. Got %s instead"
+                             % str(self.affinity))
+
+        self.cluster_centers_indices_, self.labels_, self.n_iter_ = \
+            affinity_propagation(
+                self.affinity_matrix_, preference=self.preference,
+                max_iter=self.max_iter,
+                convergence_iter=self.convergence_iter, damping=self.damping,
+                copy=self.copy, verbose=self.verbose, return_n_iter=True,
+                random_state=self.random_state)
+
+        if self.affinity != "precomputed":
+            self.cluster_centers_ = X[self.cluster_centers_indices_].copy()
+
+        return self
+
+    def predict(self, X):
+        """Predict the closest cluster each sample in X belongs to.
+
+        Parameters
+        ----------
+        X : array-like or sparse matrix, shape (n_samples, n_features)
+            New data to predict. If a sparse matrix is provided, it will be
+            converted into a sparse ``csr_matrix``.
+
+        Returns
+        -------
+        labels : ndarray, shape (n_samples,)
+            Cluster labels.
+        """
+        check_is_fitted(self)
+        X = check_array(X)
+        if not hasattr(self, "cluster_centers_"):
+            raise ValueError("Predict method is not supported when "
+                             "affinity='precomputed'.")
+
+        if self.cluster_centers_.shape[0] > 0:
+            return pairwise_distances_argmin(X, self.cluster_centers_)
+        else:
+            warnings.warn("This model does not have any cluster centers "
+                          "because affinity propagation did not converge. "
+                          "Labeling every sample as '-1'.", ConvergenceWarning)
+            return np.array([-1] * X.shape[0])
+
+    def fit_predict(self, X, y=None):
+        """Fit the clustering from features or affinity matrix, and return
+        cluster labels.
+
+        Parameters
+        ----------
+        X : array-like or sparse matrix, shape (n_samples, n_features), or \
+            array-like, shape (n_samples, n_samples)
+            Training instances to cluster, or similarities / affinities between
+            instances if ``affinity='precomputed'``. If a sparse feature matrix
+            is provided, it will be converted into a sparse ``csr_matrix``.
+
+        y : Ignored
+            Not used, present here for API consistency by convention.
+
+        Returns
+        -------
+        labels : ndarray, shape (n_samples,)
+            Cluster labels.
+        """
+        return super().fit_predict(X, y)
--- a/venv/Lib/site-packages/sklearn/cluster/_agglomerative.py
+++ b/venv/Lib/site-packages/sklearn/cluster/_agglomerative.py
--- a/venv/Lib/site-packages/sklearn/cluster/_bicluster.py
+++ b/venv/Lib/site-packages/sklearn/cluster/_bicluster.py
@ -0,0 +1,546 @@
+"""Spectral biclustering algorithms."""
+# Authors : Kemal Eren
+# License: BSD 3 clause
+
+from abc import ABCMeta, abstractmethod
+import warnings
+
+import numpy as np
+
+from scipy.linalg import norm
+from scipy.sparse import dia_matrix, issparse
+from scipy.sparse.linalg import eigsh, svds
+
+from . import KMeans, MiniBatchKMeans
+from ..base import BaseEstimator, BiclusterMixin
+from ..utils import check_random_state
+
+from ..utils.extmath import (make_nonnegative, randomized_svd,
+                             safe_sparse_dot)
+
+from ..utils.validation import assert_all_finite, _deprecate_positional_args
+
+
+__all__ = ['SpectralCoclustering',
+           'SpectralBiclustering']
+
+
+def _scale_normalize(X):
+    """Normalize ``X`` by scaling rows and columns independently.
+
+    Returns the normalized matrix and the row and column scaling
+    factors.
+
+    """
+    X = make_nonnegative(X)
+    row_diag = np.asarray(1.0 / np.sqrt(X.sum(axis=1))).squeeze()
+    col_diag = np.asarray(1.0 / np.sqrt(X.sum(axis=0))).squeeze()
+    row_diag = np.where(np.isnan(row_diag), 0, row_diag)
+    col_diag = np.where(np.isnan(col_diag), 0, col_diag)
+    if issparse(X):
+        n_rows, n_cols = X.shape
+        r = dia_matrix((row_diag, [0]), shape=(n_rows, n_rows))
+        c = dia_matrix((col_diag, [0]), shape=(n_cols, n_cols))
+        an = r * X * c
+    else:
+        an = row_diag[:, np.newaxis] * X * col_diag
+    return an, row_diag, col_diag
+
+
+def _bistochastic_normalize(X, max_iter=1000, tol=1e-5):
+    """Normalize rows and columns of ``X`` simultaneously so that all
+    rows sum to one constant and all columns sum to a different
+    constant.
+
+    """
+    # According to paper, this can also be done more efficiently with
+    # deviation reduction and balancing algorithms.
+    X = make_nonnegative(X)
+    X_scaled = X
+    for _ in range(max_iter):
+        X_new, _, _ = _scale_normalize(X_scaled)
+        if issparse(X):
+            dist = norm(X_scaled.data - X.data)
+        else:
+            dist = norm(X_scaled - X_new)
+        X_scaled = X_new
+        if dist is not None and dist < tol:
+            break
+    return X_scaled
+
+
+def _log_normalize(X):
+    """Normalize ``X`` according to Kluger's log-interactions scheme."""
+    X = make_nonnegative(X, min_value=1)
+    if issparse(X):
+        raise ValueError("Cannot compute log of a sparse matrix,"
+                         " because log(x) diverges to -infinity as x"
+                         " goes to 0.")
+    L = np.log(X)
+    row_avg = L.mean(axis=1)[:, np.newaxis]
+    col_avg = L.mean(axis=0)
+    avg = L.mean()
+    return L - row_avg - col_avg + avg
+
+
+class BaseSpectral(BiclusterMixin, BaseEstimator, metaclass=ABCMeta):
+    """Base class for spectral biclustering."""
+
+    @abstractmethod
+    def __init__(self, n_clusters=3, svd_method="randomized",
+                 n_svd_vecs=None, mini_batch=False, init="k-means++",
+                 n_init=10, n_jobs='deprecated', random_state=None):
+        self.n_clusters = n_clusters
+        self.svd_method = svd_method
+        self.n_svd_vecs = n_svd_vecs
+        self.mini_batch = mini_batch
+        self.init = init
+        self.n_init = n_init
+        self.n_jobs = n_jobs
+        self.random_state = random_state
+
+    def _check_parameters(self):
+        legal_svd_methods = ('randomized', 'arpack')
+        if self.svd_method not in legal_svd_methods:
+            raise ValueError("Unknown SVD method: '{0}'. svd_method must be"
+                             " one of {1}.".format(self.svd_method,
+                                                   legal_svd_methods))
+
+    def fit(self, X, y=None):
+        """Creates a biclustering for X.
+
+        Parameters
+        ----------
+        X : array-like, shape (n_samples, n_features)
+
+        y : Ignored
+
+        """
+        if self.n_jobs != 'deprecated':
+            warnings.warn("'n_jobs' was deprecated in version 0.23 and will be"
+                          " removed in 0.25.", FutureWarning)
+
+        X = self._validate_data(X, accept_sparse='csr', dtype=np.float64)
+        self._check_parameters()
+        self._fit(X)
+        return self
+
+    def _svd(self, array, n_components, n_discard):
+        """Returns first `n_components` left and right singular
+        vectors u and v, discarding the first `n_discard`.
+
+        """
+        if self.svd_method == 'randomized':
+            kwargs = {}
+            if self.n_svd_vecs is not None:
+                kwargs['n_oversamples'] = self.n_svd_vecs
+            u, _, vt = randomized_svd(array, n_components,
+                                      random_state=self.random_state,
+                                      **kwargs)
+
+        elif self.svd_method == 'arpack':
+            u, _, vt = svds(array, k=n_components, ncv=self.n_svd_vecs)
+            if np.any(np.isnan(vt)):
+                # some eigenvalues of A * A.T are negative, causing
+                # sqrt() to be np.nan. This causes some vectors in vt
+                # to be np.nan.
+                A = safe_sparse_dot(array.T, array)
+                random_state = check_random_state(self.random_state)
+                # initialize with [-1,1] as in ARPACK
+                v0 = random_state.uniform(-1, 1, A.shape[0])
+                _, v = eigsh(A, ncv=self.n_svd_vecs, v0=v0)
+                vt = v.T
+            if np.any(np.isnan(u)):
+                A = safe_sparse_dot(array, array.T)
+                random_state = check_random_state(self.random_state)
+                # initialize with [-1,1] as in ARPACK
+                v0 = random_state.uniform(-1, 1, A.shape[0])
+                _, u = eigsh(A, ncv=self.n_svd_vecs, v0=v0)
+
+        assert_all_finite(u)
+        assert_all_finite(vt)
+        u = u[:, n_discard:]
+        vt = vt[n_discard:]
+        return u, vt.T
+
+    def _k_means(self, data, n_clusters):
+        if self.mini_batch:
+            model = MiniBatchKMeans(n_clusters,
+                                    init=self.init,
+                                    n_init=self.n_init,
+                                    random_state=self.random_state)
+        else:
+            model = KMeans(n_clusters, init=self.init,
+                           n_init=self.n_init, n_jobs=self.n_jobs,
+                           random_state=self.random_state)
+        model.fit(data)
+        centroid = model.cluster_centers_
+        labels = model.labels_
+        return centroid, labels
+
+
+class SpectralCoclustering(BaseSpectral):
+    """Spectral Co-Clustering algorithm (Dhillon, 2001).
+
+    Clusters rows and columns of an array `X` to solve the relaxed
+    normalized cut of the bipartite graph created from `X` as follows:
+    the edge between row vertex `i` and column vertex `j` has weight
+    `X[i, j]`.
+
+    The resulting bicluster structure is block-diagonal, since each
+    row and each column belongs to exactly one bicluster.
+
+    Supports sparse matrices, as long as they are nonnegative.
+
+    Read more in the :ref:`User Guide <spectral_coclustering>`.
+
+    Parameters
+    ----------
+    n_clusters : int, default=3
+        The number of biclusters to find.
+
+    svd_method : {'randomized', 'arpack'}, default='randomized'
+        Selects the algorithm for finding singular vectors. May be
+        'randomized' or 'arpack'. If 'randomized', use
+        :func:`sklearn.utils.extmath.randomized_svd`, which may be faster
+        for large matrices. If 'arpack', use
+        :func:`scipy.sparse.linalg.svds`, which is more accurate, but
+        possibly slower in some cases.
+
+    n_svd_vecs : int, default=None
+        Number of vectors to use in calculating the SVD. Corresponds
+        to `ncv` when `svd_method=arpack` and `n_oversamples` when
+        `svd_method` is 'randomized`.
+
+    mini_batch : bool, default=False
+        Whether to use mini-batch k-means, which is faster but may get
+        different results.
+
+    init : {'k-means++', 'random', or ndarray of shape \
+            (n_clusters, n_features), default='k-means++'
+        Method for initialization of k-means algorithm; defaults to
+        'k-means++'.
+
+    n_init : int, default=10
+        Number of random initializations that are tried with the
+        k-means algorithm.
+
+        If mini-batch k-means is used, the best initialization is
+        chosen and the algorithm runs once. Otherwise, the algorithm
+        is run for each initialization and the best solution chosen.
+
+    n_jobs : int, default=None
+        The number of jobs to use for the computation. This works by breaking
+        down the pairwise matrix into n_jobs even slices and computing them in
+        parallel.
+
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+        .. deprecated:: 0.23
+            ``n_jobs`` was deprecated in version 0.23 and will be removed in
+            0.25.
+
+    random_state : int, RandomState instance, default=None
+        Used for randomizing the singular value decomposition and the k-means
+        initialization. Use an int to make the randomness deterministic.
+        See :term:`Glossary <random_state>`.
+
+    Attributes
+    ----------
+    rows_ : array-like of shape (n_row_clusters, n_rows)
+        Results of the clustering. `rows[i, r]` is True if
+        cluster `i` contains row `r`. Available only after calling ``fit``.
+
+    columns_ : array-like of shape (n_column_clusters, n_columns)
+        Results of the clustering, like `rows`.
+
+    row_labels_ : array-like of shape (n_rows,)
+        The bicluster label of each row.
+
+    column_labels_ : array-like of shape (n_cols,)
+        The bicluster label of each column.
+
+    Examples
+    --------
+    >>> from sklearn.cluster import SpectralCoclustering
+    >>> import numpy as np
+    >>> X = np.array([[1, 1], [2, 1], [1, 0],
+    ...               [4, 7], [3, 5], [3, 6]])
+    >>> clustering = SpectralCoclustering(n_clusters=2, random_state=0).fit(X)
+    >>> clustering.row_labels_ #doctest: +SKIP
+    array([0, 1, 1, 0, 0, 0], dtype=int32)
+    >>> clustering.column_labels_ #doctest: +SKIP
+    array([0, 0], dtype=int32)
+    >>> clustering
+    SpectralCoclustering(n_clusters=2, random_state=0)
+
+    References
+    ----------
+
+    * Dhillon, Inderjit S, 2001. `Co-clustering documents and words using
+      bipartite spectral graph partitioning
+      <http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.140.3011>`__.
+
+    """
+    @_deprecate_positional_args
+    def __init__(self, n_clusters=3, *, svd_method='randomized',
+                 n_svd_vecs=None, mini_batch=False, init='k-means++',
+                 n_init=10, n_jobs='deprecated', random_state=None):
+        super().__init__(n_clusters,
+                         svd_method,
+                         n_svd_vecs,
+                         mini_batch,
+                         init,
+                         n_init,
+                         n_jobs,
+                         random_state)
+
+    def _fit(self, X):
+        normalized_data, row_diag, col_diag = _scale_normalize(X)
+        n_sv = 1 + int(np.ceil(np.log2(self.n_clusters)))
+        u, v = self._svd(normalized_data, n_sv, n_discard=1)
+        z = np.vstack((row_diag[:, np.newaxis] * u,
+                       col_diag[:, np.newaxis] * v))
+
+        _, labels = self._k_means(z, self.n_clusters)
+
+        n_rows = X.shape[0]
+        self.row_labels_ = labels[:n_rows]
+        self.column_labels_ = labels[n_rows:]
+
+        self.rows_ = np.vstack([self.row_labels_ == c
+                                for c in range(self.n_clusters)])
+        self.columns_ = np.vstack([self.column_labels_ == c
+                                   for c in range(self.n_clusters)])
+
+
+class SpectralBiclustering(BaseSpectral):
+    """Spectral biclustering (Kluger, 2003).
+
+    Partitions rows and columns under the assumption that the data has
+    an underlying checkerboard structure. For instance, if there are
+    two row partitions and three column partitions, each row will
+    belong to three biclusters, and each column will belong to two
+    biclusters. The outer product of the corresponding row and column
+    label vectors gives this checkerboard structure.
+
+    Read more in the :ref:`User Guide <spectral_biclustering>`.
+
+    Parameters
+    ----------
+    n_clusters : int or tuple (n_row_clusters, n_column_clusters), default=3
+        The number of row and column clusters in the checkerboard
+        structure.
+
+    method : {'bistochastic', 'scale', 'log'}, default='bistochastic'
+        Method of normalizing and converting singular vectors into
+        biclusters. May be one of 'scale', 'bistochastic', or 'log'.
+        The authors recommend using 'log'. If the data is sparse,
+        however, log normalization will not work, which is why the
+        default is 'bistochastic'.
+
+        .. warning::
+           if `method='log'`, the data must be sparse.
+
+    n_components : int, default=6
+        Number of singular vectors to check.
+
+    n_best : int, default=3
+        Number of best singular vectors to which to project the data
+        for clustering.
+
+    svd_method : {'randomized', 'arpack'}, default='randomized'
+        Selects the algorithm for finding singular vectors. May be
+        'randomized' or 'arpack'. If 'randomized', uses
+        :func:`~sklearn.utils.extmath.randomized_svd`, which may be faster
+        for large matrices. If 'arpack', uses
+        `scipy.sparse.linalg.svds`, which is more accurate, but
+        possibly slower in some cases.
+
+    n_svd_vecs : int, default=None
+        Number of vectors to use in calculating the SVD. Corresponds
+        to `ncv` when `svd_method=arpack` and `n_oversamples` when
+        `svd_method` is 'randomized`.
+
+    mini_batch : bool, default=False
+        Whether to use mini-batch k-means, which is faster but may get
+        different results.
+
+    init : {'k-means++', 'random'} or ndarray of (n_clusters, n_features), \
+            default='k-means++'
+        Method for initialization of k-means algorithm; defaults to
+        'k-means++'.
+
+    n_init : int, default=10
+        Number of random initializations that are tried with the
+        k-means algorithm.
+
+        If mini-batch k-means is used, the best initialization is
+        chosen and the algorithm runs once. Otherwise, the algorithm
+        is run for each initialization and the best solution chosen.
+
+    n_jobs : int, default=None
+        The number of jobs to use for the computation. This works by breaking
+        down the pairwise matrix into n_jobs even slices and computing them in
+        parallel.
+
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+        .. deprecated:: 0.23
+            ``n_jobs`` was deprecated in version 0.23 and will be removed in
+            0.25.
+
+    random_state : int, RandomState instance, default=None
+        Used for randomizing the singular value decomposition and the k-means
+        initialization. Use an int to make the randomness deterministic.
+        See :term:`Glossary <random_state>`.
+
+    Attributes
+    ----------
+    rows_ : array-like of shape (n_row_clusters, n_rows)
+        Results of the clustering. `rows[i, r]` is True if
+        cluster `i` contains row `r`. Available only after calling ``fit``.
+
+    columns_ : array-like of shape (n_column_clusters, n_columns)
+        Results of the clustering, like `rows`.
+
+    row_labels_ : array-like of shape (n_rows,)
+        Row partition labels.
+
+    column_labels_ : array-like of shape (n_cols,)
+        Column partition labels.
+
+    Examples
+    --------
+    >>> from sklearn.cluster import SpectralBiclustering
+    >>> import numpy as np
+    >>> X = np.array([[1, 1], [2, 1], [1, 0],
+    ...               [4, 7], [3, 5], [3, 6]])
+    >>> clustering = SpectralBiclustering(n_clusters=2, random_state=0).fit(X)
+    >>> clustering.row_labels_
+    array([1, 1, 1, 0, 0, 0], dtype=int32)
+    >>> clustering.column_labels_
+    array([0, 1], dtype=int32)
+    >>> clustering
+    SpectralBiclustering(n_clusters=2, random_state=0)
+
+    References
+    ----------
+
+    * Kluger, Yuval, et. al., 2003. `Spectral biclustering of microarray
+      data: coclustering genes and conditions
+      <http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.135.1608>`__.
+
+    """
+    @_deprecate_positional_args
+    def __init__(self, n_clusters=3, *, method='bistochastic',
+                 n_components=6, n_best=3, svd_method='randomized',
+                 n_svd_vecs=None, mini_batch=False, init='k-means++',
+                 n_init=10, n_jobs='deprecated', random_state=None):
+        super().__init__(n_clusters,
+                         svd_method,
+                         n_svd_vecs,
+                         mini_batch,
+                         init,
+                         n_init,
+                         n_jobs,
+                         random_state)
+        self.method = method
+        self.n_components = n_components
+        self.n_best = n_best
+
+    def _check_parameters(self):
+        super()._check_parameters()
+        legal_methods = ('bistochastic', 'scale', 'log')
+        if self.method not in legal_methods:
+            raise ValueError("Unknown method: '{0}'. method must be"
+                             " one of {1}.".format(self.method, legal_methods))
+        try:
+            int(self.n_clusters)
+        except TypeError:
+            try:
+                r, c = self.n_clusters
+                int(r)
+                int(c)
+            except (ValueError, TypeError):
+                raise ValueError("Incorrect parameter n_clusters has value:"
+                                 " {}. It should either be a single integer"
+                                 " or an iterable with two integers:"
+                                 " (n_row_clusters, n_column_clusters)")
+        if self.n_components < 1:
+            raise ValueError("Parameter n_components must be greater than 0,"
+                             " but its value is {}".format(self.n_components))
+        if self.n_best < 1:
+            raise ValueError("Parameter n_best must be greater than 0,"
+                             " but its value is {}".format(self.n_best))
+        if self.n_best > self.n_components:
+            raise ValueError("n_best cannot be larger than"
+                             " n_components, but {} >  {}"
+                             "".format(self.n_best, self.n_components))
+
+    def _fit(self, X):
+        n_sv = self.n_components
+        if self.method == 'bistochastic':
+            normalized_data = _bistochastic_normalize(X)
+            n_sv += 1
+        elif self.method == 'scale':
+            normalized_data, _, _ = _scale_normalize(X)
+            n_sv += 1
+        elif self.method == 'log':
+            normalized_data = _log_normalize(X)
+        n_discard = 0 if self.method == 'log' else 1
+        u, v = self._svd(normalized_data, n_sv, n_discard)
+        ut = u.T
+        vt = v.T
+
+        try:
+            n_row_clusters, n_col_clusters = self.n_clusters
+        except TypeError:
+            n_row_clusters = n_col_clusters = self.n_clusters
+
+        best_ut = self._fit_best_piecewise(ut, self.n_best,
+                                           n_row_clusters)
+
+        best_vt = self._fit_best_piecewise(vt, self.n_best,
+                                           n_col_clusters)
+
+        self.row_labels_ = self._project_and_cluster(X, best_vt.T,
+                                                     n_row_clusters)
+
+        self.column_labels_ = self._project_and_cluster(X.T, best_ut.T,
+                                                        n_col_clusters)
+
+        self.rows_ = np.vstack([self.row_labels_ == label
+                                for label in range(n_row_clusters)
+                                for _ in range(n_col_clusters)])
+        self.columns_ = np.vstack([self.column_labels_ == label
+                                   for _ in range(n_row_clusters)
+                                   for label in range(n_col_clusters)])
+
+    def _fit_best_piecewise(self, vectors, n_best, n_clusters):
+        """Find the ``n_best`` vectors that are best approximated by piecewise
+        constant vectors.
+
+        The piecewise vectors are found by k-means; the best is chosen
+        according to Euclidean distance.
+
+        """
+        def make_piecewise(v):
+            centroid, labels = self._k_means(v.reshape(-1, 1), n_clusters)
+            return centroid[labels].ravel()
+        piecewise_vectors = np.apply_along_axis(make_piecewise,
+                                                axis=1, arr=vectors)
+        dists = np.apply_along_axis(norm, axis=1,
+                                    arr=(vectors - piecewise_vectors))
+        result = vectors[np.argsort(dists)[:n_best]]
+        return result
+
+    def _project_and_cluster(self, data, vectors, n_clusters):
+        """Project ``data`` to ``vectors`` and cluster the result."""
+        projected = safe_sparse_dot(data, vectors)
+        _, labels = self._k_means(projected, n_clusters)
+        return labels
--- a/venv/Lib/site-packages/sklearn/cluster/_birch.py
+++ b/venv/Lib/site-packages/sklearn/cluster/_birch.py
@ -0,0 +1,658 @@
+# Authors: Manoj Kumar <manojkumarsivaraj334@gmail.com>
+#          Alexandre Gramfort <alexandre.gramfort@telecom-paristech.fr>
+#          Joel Nothman <joel.nothman@gmail.com>
+# License: BSD 3 clause
+
+import warnings
+import numbers
+import numpy as np
+from scipy import sparse
+from math import sqrt
+
+from ..metrics import pairwise_distances_argmin
+from ..metrics.pairwise import euclidean_distances
+from ..base import TransformerMixin, ClusterMixin, BaseEstimator
+from ..utils import check_array
+from ..utils.extmath import row_norms
+from ..utils.validation import check_is_fitted, _deprecate_positional_args
+from ..exceptions import ConvergenceWarning
+from . import AgglomerativeClustering
+
+
+def _iterate_sparse_X(X):
+    """This little hack returns a densified row when iterating over a sparse
+    matrix, instead of constructing a sparse matrix for every row that is
+    expensive.
+    """
+    n_samples = X.shape[0]
+    X_indices = X.indices
+    X_data = X.data
+    X_indptr = X.indptr
+
+    for i in range(n_samples):
+        row = np.zeros(X.shape[1])
+        startptr, endptr = X_indptr[i], X_indptr[i + 1]
+        nonzero_indices = X_indices[startptr:endptr]
+        row[nonzero_indices] = X_data[startptr:endptr]
+        yield row
+
+
+def _split_node(node, threshold, branching_factor):
+    """The node has to be split if there is no place for a new subcluster
+    in the node.
+    1. Two empty nodes and two empty subclusters are initialized.
+    2. The pair of distant subclusters are found.
+    3. The properties of the empty subclusters and nodes are updated
+       according to the nearest distance between the subclusters to the
+       pair of distant subclusters.
+    4. The two nodes are set as children to the two subclusters.
+    """
+    new_subcluster1 = _CFSubcluster()
+    new_subcluster2 = _CFSubcluster()
+    new_node1 = _CFNode(
+        threshold=threshold, branching_factor=branching_factor,
+        is_leaf=node.is_leaf,
+        n_features=node.n_features)
+    new_node2 = _CFNode(
+        threshold=threshold, branching_factor=branching_factor,
+        is_leaf=node.is_leaf,
+        n_features=node.n_features)
+    new_subcluster1.child_ = new_node1
+    new_subcluster2.child_ = new_node2
+
+    if node.is_leaf:
+        if node.prev_leaf_ is not None:
+            node.prev_leaf_.next_leaf_ = new_node1
+        new_node1.prev_leaf_ = node.prev_leaf_
+        new_node1.next_leaf_ = new_node2
+        new_node2.prev_leaf_ = new_node1
+        new_node2.next_leaf_ = node.next_leaf_
+        if node.next_leaf_ is not None:
+            node.next_leaf_.prev_leaf_ = new_node2
+
+    dist = euclidean_distances(
+        node.centroids_, Y_norm_squared=node.squared_norm_, squared=True)
+    n_clusters = dist.shape[0]
+
+    farthest_idx = np.unravel_index(
+        dist.argmax(), (n_clusters, n_clusters))
+    node1_dist, node2_dist = dist[(farthest_idx,)]
+
+    node1_closer = node1_dist < node2_dist
+    for idx, subcluster in enumerate(node.subclusters_):
+        if node1_closer[idx]:
+            new_node1.append_subcluster(subcluster)
+            new_subcluster1.update(subcluster)
+        else:
+            new_node2.append_subcluster(subcluster)
+            new_subcluster2.update(subcluster)
+    return new_subcluster1, new_subcluster2
+
+
+class _CFNode:
+    """Each node in a CFTree is called a CFNode.
+
+    The CFNode can have a maximum of branching_factor
+    number of CFSubclusters.
+
+    Parameters
+    ----------
+    threshold : float
+        Threshold needed for a new subcluster to enter a CFSubcluster.
+
+    branching_factor : int
+        Maximum number of CF subclusters in each node.
+
+    is_leaf : bool
+        We need to know if the CFNode is a leaf or not, in order to
+        retrieve the final subclusters.
+
+    n_features : int
+        The number of features.
+
+    Attributes
+    ----------
+    subclusters_ : list
+        List of subclusters for a particular CFNode.
+
+    prev_leaf_ : _CFNode
+        Useful only if is_leaf is True.
+
+    next_leaf_ : _CFNode
+        next_leaf. Useful only if is_leaf is True.
+        the final subclusters.
+
+    init_centroids_ : ndarray of shape (branching_factor + 1, n_features)
+        Manipulate ``init_centroids_`` throughout rather than centroids_ since
+        the centroids are just a view of the ``init_centroids_`` .
+
+    init_sq_norm_ : ndarray of shape (branching_factor + 1,)
+        manipulate init_sq_norm_ throughout. similar to ``init_centroids_``.
+
+    centroids_ : ndarray of shape (branching_factor + 1, n_features)
+        View of ``init_centroids_``.
+
+    squared_norm_ : ndarray of shape (branching_factor + 1,)
+        View of ``init_sq_norm_``.
+
+    """
+    def __init__(self, *, threshold, branching_factor, is_leaf, n_features):
+        self.threshold = threshold
+        self.branching_factor = branching_factor
+        self.is_leaf = is_leaf
+        self.n_features = n_features
+
+        # The list of subclusters, centroids and squared norms
+        # to manipulate throughout.
+        self.subclusters_ = []
+        self.init_centroids_ = np.zeros((branching_factor + 1, n_features))
+        self.init_sq_norm_ = np.zeros((branching_factor + 1))
+        self.squared_norm_ = []
+        self.prev_leaf_ = None
+        self.next_leaf_ = None
+
+    def append_subcluster(self, subcluster):
+        n_samples = len(self.subclusters_)
+        self.subclusters_.append(subcluster)
+        self.init_centroids_[n_samples] = subcluster.centroid_
+        self.init_sq_norm_[n_samples] = subcluster.sq_norm_
+
+        # Keep centroids and squared norm as views. In this way
+        # if we change init_centroids and init_sq_norm_, it is
+        # sufficient,
+        self.centroids_ = self.init_centroids_[:n_samples + 1, :]
+        self.squared_norm_ = self.init_sq_norm_[:n_samples + 1]
+
+    def update_split_subclusters(self, subcluster,
+                                 new_subcluster1, new_subcluster2):
+        """Remove a subcluster from a node and update it with the
+        split subclusters.
+        """
+        ind = self.subclusters_.index(subcluster)
+        self.subclusters_[ind] = new_subcluster1
+        self.init_centroids_[ind] = new_subcluster1.centroid_
+        self.init_sq_norm_[ind] = new_subcluster1.sq_norm_
+        self.append_subcluster(new_subcluster2)
+
+    def insert_cf_subcluster(self, subcluster):
+        """Insert a new subcluster into the node."""
+        if not self.subclusters_:
+            self.append_subcluster(subcluster)
+            return False
+
+        threshold = self.threshold
+        branching_factor = self.branching_factor
+        # We need to find the closest subcluster among all the
+        # subclusters so that we can insert our new subcluster.
+        dist_matrix = np.dot(self.centroids_, subcluster.centroid_)
+        dist_matrix *= -2.
+        dist_matrix += self.squared_norm_
+        closest_index = np.argmin(dist_matrix)
+        closest_subcluster = self.subclusters_[closest_index]
+
+        # If the subcluster has a child, we need a recursive strategy.
+        if closest_subcluster.child_ is not None:
+            split_child = closest_subcluster.child_.insert_cf_subcluster(
+                subcluster)
+
+            if not split_child:
+                # If it is determined that the child need not be split, we
+                # can just update the closest_subcluster
+                closest_subcluster.update(subcluster)
+                self.init_centroids_[closest_index] = \
+                    self.subclusters_[closest_index].centroid_
+                self.init_sq_norm_[closest_index] = \
+                    self.subclusters_[closest_index].sq_norm_
+                return False
+
+            # things not too good. we need to redistribute the subclusters in
+            # our child node, and add a new subcluster in the parent
+            # subcluster to accommodate the new child.
+            else:
+                new_subcluster1, new_subcluster2 = _split_node(
+                    closest_subcluster.child_, threshold, branching_factor)
+                self.update_split_subclusters(
+                    closest_subcluster, new_subcluster1, new_subcluster2)
+
+                if len(self.subclusters_) > self.branching_factor:
+                    return True
+                return False
+
+        # good to go!
+        else:
+            merged = closest_subcluster.merge_subcluster(
+                subcluster, self.threshold)
+            if merged:
+                self.init_centroids_[closest_index] = \
+                    closest_subcluster.centroid_
+                self.init_sq_norm_[closest_index] = \
+                    closest_subcluster.sq_norm_
+                return False
+
+            # not close to any other subclusters, and we still
+            # have space, so add.
+            elif len(self.subclusters_) < self.branching_factor:
+                self.append_subcluster(subcluster)
+                return False
+
+            # We do not have enough space nor is it closer to an
+            # other subcluster. We need to split.
+            else:
+                self.append_subcluster(subcluster)
+                return True
+
+
+class _CFSubcluster:
+    """Each subcluster in a CFNode is called a CFSubcluster.
+
+    A CFSubcluster can have a CFNode has its child.
+
+    Parameters
+    ----------
+    linear_sum : ndarray of shape (n_features,), default=None
+        Sample. This is kept optional to allow initialization of empty
+        subclusters.
+
+    Attributes
+    ----------
+    n_samples_ : int
+        Number of samples that belong to each subcluster.
+
+    linear_sum_ : ndarray
+        Linear sum of all the samples in a subcluster. Prevents holding
+        all sample data in memory.
+
+    squared_sum_ : float
+        Sum of the squared l2 norms of all samples belonging to a subcluster.
+
+    centroid_ : ndarray of shape (branching_factor + 1, n_features)
+        Centroid of the subcluster. Prevent recomputing of centroids when
+        ``CFNode.centroids_`` is called.
+
+    child_ : _CFNode
+        Child Node of the subcluster. Once a given _CFNode is set as the child
+        of the _CFNode, it is set to ``self.child_``.
+
+    sq_norm_ : ndarray of shape (branching_factor + 1,)
+        Squared norm of the subcluster. Used to prevent recomputing when
+        pairwise minimum distances are computed.
+    """
+    def __init__(self, *, linear_sum=None):
+        if linear_sum is None:
+            self.n_samples_ = 0
+            self.squared_sum_ = 0.0
+            self.centroid_ = self.linear_sum_ = 0
+        else:
+            self.n_samples_ = 1
+            self.centroid_ = self.linear_sum_ = linear_sum
+            self.squared_sum_ = self.sq_norm_ = np.dot(
+                self.linear_sum_, self.linear_sum_)
+        self.child_ = None
+
+    def update(self, subcluster):
+        self.n_samples_ += subcluster.n_samples_
+        self.linear_sum_ += subcluster.linear_sum_
+        self.squared_sum_ += subcluster.squared_sum_
+        self.centroid_ = self.linear_sum_ / self.n_samples_
+        self.sq_norm_ = np.dot(self.centroid_, self.centroid_)
+
+    def merge_subcluster(self, nominee_cluster, threshold):
+        """Check if a cluster is worthy enough to be merged. If
+        yes then merge.
+        """
+        new_ss = self.squared_sum_ + nominee_cluster.squared_sum_
+        new_ls = self.linear_sum_ + nominee_cluster.linear_sum_
+        new_n = self.n_samples_ + nominee_cluster.n_samples_
+        new_centroid = (1 / new_n) * new_ls
+        new_norm = np.dot(new_centroid, new_centroid)
+        dot_product = (-2 * new_n) * new_norm
+        sq_radius = (new_ss + dot_product) / new_n + new_norm
+        if sq_radius <= threshold ** 2:
+            (self.n_samples_, self.linear_sum_, self.squared_sum_,
+             self.centroid_, self.sq_norm_) = \
+                new_n, new_ls, new_ss, new_centroid, new_norm
+            return True
+        return False
+
+    @property
+    def radius(self):
+        """Return radius of the subcluster"""
+        dot_product = -2 * np.dot(self.linear_sum_, self.centroid_)
+        return sqrt(
+            ((self.squared_sum_ + dot_product) / self.n_samples_) +
+            self.sq_norm_)
+
+
+class Birch(ClusterMixin, TransformerMixin, BaseEstimator):
+    """Implements the Birch clustering algorithm.
+
+    It is a memory-efficient, online-learning algorithm provided as an
+    alternative to :class:`MiniBatchKMeans`. It constructs a tree
+    data structure with the cluster centroids being read off the leaf.
+    These can be either the final cluster centroids or can be provided as input
+    to another clustering algorithm such as :class:`AgglomerativeClustering`.
+
+    Read more in the :ref:`User Guide <birch>`.
+
+    .. versionadded:: 0.16
+
+    Parameters
+    ----------
+    threshold : float, default=0.5
+        The radius of the subcluster obtained by merging a new sample and the
+        closest subcluster should be lesser than the threshold. Otherwise a new
+        subcluster is started. Setting this value to be very low promotes
+        splitting and vice-versa.
+
+    branching_factor : int, default=50
+        Maximum number of CF subclusters in each node. If a new samples enters
+        such that the number of subclusters exceed the branching_factor then
+        that node is split into two nodes with the subclusters redistributed
+        in each. The parent subcluster of that node is removed and two new
+        subclusters are added as parents of the 2 split nodes.
+
+    n_clusters : int, instance of sklearn.cluster model, default=3
+        Number of clusters after the final clustering step, which treats the
+        subclusters from the leaves as new samples.
+
+        - `None` : the final clustering step is not performed and the
+          subclusters are returned as they are.
+
+        - :mod:`sklearn.cluster` Estimator : If a model is provided, the model
+          is fit treating the subclusters as new samples and the initial data
+          is mapped to the label of the closest subcluster.
+
+        - `int` : the model fit is :class:`AgglomerativeClustering` with
+          `n_clusters` set to be equal to the int.
+
+    compute_labels : bool, default=True
+        Whether or not to compute labels for each fit.
+
+    copy : bool, default=True
+        Whether or not to make a copy of the given data. If set to False,
+        the initial data will be overwritten.
+
+    Attributes
+    ----------
+    root_ : _CFNode
+        Root of the CFTree.
+
+    dummy_leaf_ : _CFNode
+        Start pointer to all the leaves.
+
+    subcluster_centers_ : ndarray
+        Centroids of all subclusters read directly from the leaves.
+
+    subcluster_labels_ : ndarray
+        Labels assigned to the centroids of the subclusters after
+        they are clustered globally.
+
+    labels_ : ndarray of shape (n_samples,)
+        Array of labels assigned to the input data.
+        if partial_fit is used instead of fit, they are assigned to the
+        last batch of data.
+
+    See Also
+    --------
+
+    MiniBatchKMeans
+        Alternative  implementation that does incremental updates
+        of the centers' positions using mini-batches.
+
+    Notes
+    -----
+    The tree data structure consists of nodes with each node consisting of
+    a number of subclusters. The maximum number of subclusters in a node
+    is determined by the branching factor. Each subcluster maintains a
+    linear sum, squared sum and the number of samples in that subcluster.
+    In addition, each subcluster can also have a node as its child, if the
+    subcluster is not a member of a leaf node.
+
+    For a new point entering the root, it is merged with the subcluster closest
+    to it and the linear sum, squared sum and the number of samples of that
+    subcluster are updated. This is done recursively till the properties of
+    the leaf node are updated.
+
+    References
+    ----------
+    * Tian Zhang, Raghu Ramakrishnan, Maron Livny
+      BIRCH: An efficient data clustering method for large databases.
+      https://www.cs.sfu.ca/CourseCentral/459/han/papers/zhang96.pdf
+
+    * Roberto Perdisci
+      JBirch - Java implementation of BIRCH clustering algorithm
+      https://code.google.com/archive/p/jbirch
+
+    Examples
+    --------
+    >>> from sklearn.cluster import Birch
+    >>> X = [[0, 1], [0.3, 1], [-0.3, 1], [0, -1], [0.3, -1], [-0.3, -1]]
+    >>> brc = Birch(n_clusters=None)
+    >>> brc.fit(X)
+    Birch(n_clusters=None)
+    >>> brc.predict(X)
+    array([0, 0, 0, 1, 1, 1])
+    """
+    @_deprecate_positional_args
+    def __init__(self, *, threshold=0.5, branching_factor=50, n_clusters=3,
+                 compute_labels=True, copy=True):
+        self.threshold = threshold
+        self.branching_factor = branching_factor
+        self.n_clusters = n_clusters
+        self.compute_labels = compute_labels
+        self.copy = copy
+
+    def fit(self, X, y=None):
+        """
+        Build a CF Tree for the input data.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Input data.
+
+        y : Ignored
+            Not used, present here for API consistency by convention.
+
+        Returns
+        -------
+        self
+            Fitted estimator.
+        """
+        self.fit_, self.partial_fit_ = True, False
+        return self._fit(X)
+
+    def _fit(self, X):
+        X = self._validate_data(X, accept_sparse='csr', copy=self.copy)
+        threshold = self.threshold
+        branching_factor = self.branching_factor
+
+        if branching_factor <= 1:
+            raise ValueError("Branching_factor should be greater than one.")
+        n_samples, n_features = X.shape
+
+        # If partial_fit is called for the first time or fit is called, we
+        # start a new tree.
+        partial_fit = getattr(self, 'partial_fit_')
+        has_root = getattr(self, 'root_', None)
+        if getattr(self, 'fit_') or (partial_fit and not has_root):
+            # The first root is the leaf. Manipulate this object throughout.
+            self.root_ = _CFNode(threshold=threshold,
+                                 branching_factor=branching_factor,
+                                 is_leaf=True,
+                                 n_features=n_features)
+
+            # To enable getting back subclusters.
+            self.dummy_leaf_ = _CFNode(threshold=threshold,
+                                       branching_factor=branching_factor,
+                                       is_leaf=True, n_features=n_features)
+            self.dummy_leaf_.next_leaf_ = self.root_
+            self.root_.prev_leaf_ = self.dummy_leaf_
+
+        # Cannot vectorize. Enough to convince to use cython.
+        if not sparse.issparse(X):
+            iter_func = iter
+        else:
+            iter_func = _iterate_sparse_X
+
+        for sample in iter_func(X):
+            subcluster = _CFSubcluster(linear_sum=sample)
+            split = self.root_.insert_cf_subcluster(subcluster)
+
+            if split:
+                new_subcluster1, new_subcluster2 = _split_node(
+                    self.root_, threshold, branching_factor)
+                del self.root_
+                self.root_ = _CFNode(threshold=threshold,
+                                     branching_factor=branching_factor,
+                                     is_leaf=False,
+                                     n_features=n_features)
+                self.root_.append_subcluster(new_subcluster1)
+                self.root_.append_subcluster(new_subcluster2)
+
+        centroids = np.concatenate([
+            leaf.centroids_ for leaf in self._get_leaves()])
+        self.subcluster_centers_ = centroids
+
+        self._global_clustering(X)
+        return self
+
+    def _get_leaves(self):
+        """
+        Retrieve the leaves of the CF Node.
+
+        Returns
+        -------
+        leaves : list of shape (n_leaves,)
+            List of the leaf nodes.
+        """
+        leaf_ptr = self.dummy_leaf_.next_leaf_
+        leaves = []
+        while leaf_ptr is not None:
+            leaves.append(leaf_ptr)
+            leaf_ptr = leaf_ptr.next_leaf_
+        return leaves
+
+    def partial_fit(self, X=None, y=None):
+        """
+        Online learning. Prevents rebuilding of CFTree from scratch.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features), \
+            default=None
+            Input data. If X is not provided, only the global clustering
+            step is done.
+
+        y : Ignored
+            Not used, present here for API consistency by convention.
+
+        Returns
+        -------
+        self
+            Fitted estimator.
+        """
+        self.partial_fit_, self.fit_ = True, False
+        if X is None:
+            # Perform just the final global clustering step.
+            self._global_clustering()
+            return self
+        else:
+            self._check_fit(X)
+            return self._fit(X)
+
+    def _check_fit(self, X):
+        check_is_fitted(self)
+
+        if (hasattr(self, 'subcluster_centers_') and
+                X.shape[1] != self.subcluster_centers_.shape[1]):
+            raise ValueError(
+                "Training data and predicted data do "
+                "not have same number of features.")
+
+    def predict(self, X):
+        """
+        Predict data using the ``centroids_`` of subclusters.
+
+        Avoid computation of the row norms of X.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Input data.
+
+        Returns
+        -------
+        labels : ndarray of shape(n_samples,)
+            Labelled data.
+        """
+        X = check_array(X, accept_sparse='csr')
+        self._check_fit(X)
+        kwargs = {'Y_norm_squared': self._subcluster_norms}
+        return self.subcluster_labels_[
+                pairwise_distances_argmin(X,
+                                          self.subcluster_centers_,
+                                          metric_kwargs=kwargs)
+            ]
+
+    def transform(self, X):
+        """
+        Transform X into subcluster centroids dimension.
+
+        Each dimension represents the distance from the sample point to each
+        cluster centroid.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Input data.
+
+        Returns
+        -------
+        X_trans : {array-like, sparse matrix} of shape (n_samples, n_clusters)
+            Transformed data.
+        """
+        check_is_fitted(self)
+        return euclidean_distances(X, self.subcluster_centers_)
+
+    def _global_clustering(self, X=None):
+        """
+        Global clustering for the subclusters obtained after fitting
+        """
+        clusterer = self.n_clusters
+        centroids = self.subcluster_centers_
+        compute_labels = (X is not None) and self.compute_labels
+
+        # Preprocessing for the global clustering.
+        not_enough_centroids = False
+        if isinstance(clusterer, numbers.Integral):
+            clusterer = AgglomerativeClustering(
+                n_clusters=self.n_clusters)
+            # There is no need to perform the global clustering step.
+            if len(centroids) < self.n_clusters:
+                not_enough_centroids = True
+        elif (clusterer is not None and not
+              hasattr(clusterer, 'fit_predict')):
+            raise ValueError("n_clusters should be an instance of "
+                             "ClusterMixin or an int")
+
+        # To use in predict to avoid recalculation.
+        self._subcluster_norms = row_norms(
+            self.subcluster_centers_, squared=True)
+
+        if clusterer is None or not_enough_centroids:
+            self.subcluster_labels_ = np.arange(len(centroids))
+            if not_enough_centroids:
+                warnings.warn(
+                    "Number of subclusters found (%d) by Birch is less "
+                    "than (%d). Decrease the threshold."
+                    % (len(centroids), self.n_clusters), ConvergenceWarning)
+        else:
+            # The global clustering step that clusters the subclusters of
+            # the leaves. It assumes the centroids of the subclusters as
+            # samples and finds the final centroids.
+            self.subcluster_labels_ = clusterer.fit_predict(
+                self.subcluster_centers_)
+
+        if compute_labels:
+            self.labels_ = self.predict(X)
--- a/venv/Lib/site-packages/sklearn/cluster/_dbscan.py
+++ b/venv/Lib/site-packages/sklearn/cluster/_dbscan.py
@ -0,0 +1,392 @@
+# -*- coding: utf-8 -*-
+"""
+DBSCAN: Density-Based Spatial Clustering of Applications with Noise
+"""
+
+# Author: Robert Layton <robertlayton@gmail.com>
+#         Joel Nothman <joel.nothman@gmail.com>
+#         Lars Buitinck
+#
+# License: BSD 3 clause
+
+import numpy as np
+import warnings
+from scipy import sparse
+
+from ..base import BaseEstimator, ClusterMixin
+from ..utils.validation import _check_sample_weight, _deprecate_positional_args
+from ..neighbors import NearestNeighbors
+
+from ._dbscan_inner import dbscan_inner
+
+
+@_deprecate_positional_args
+def dbscan(X, eps=0.5, *, min_samples=5, metric='minkowski',
+           metric_params=None, algorithm='auto', leaf_size=30, p=2,
+           sample_weight=None, n_jobs=None):
+    """Perform DBSCAN clustering from vector array or distance matrix.
+
+    Read more in the :ref:`User Guide <dbscan>`.
+
+    Parameters
+    ----------
+    X : {array-like, sparse (CSR) matrix} of shape (n_samples, n_features) or \
+            (n_samples, n_samples)
+        A feature array, or array of distances between samples if
+        ``metric='precomputed'``.
+
+    eps : float, default=0.5
+        The maximum distance between two samples for one to be considered
+        as in the neighborhood of the other. This is not a maximum bound
+        on the distances of points within a cluster. This is the most
+        important DBSCAN parameter to choose appropriately for your data set
+        and distance function.
+
+    min_samples : int, default=5
+        The number of samples (or total weight) in a neighborhood for a point
+        to be considered as a core point. This includes the point itself.
+
+    metric : string, or callable
+        The metric to use when calculating distance between instances in a
+        feature array. If metric is a string or callable, it must be one of
+        the options allowed by :func:`sklearn.metrics.pairwise_distances` for
+        its metric parameter.
+        If metric is "precomputed", X is assumed to be a distance matrix and
+        must be square during fit.
+        X may be a :term:`sparse graph <sparse graph>`,
+        in which case only "nonzero" elements may be considered neighbors.
+
+    metric_params : dict, default=None
+        Additional keyword arguments for the metric function.
+
+        .. versionadded:: 0.19
+
+    algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'
+        The algorithm to be used by the NearestNeighbors module
+        to compute pointwise distances and find nearest neighbors.
+        See NearestNeighbors module documentation for details.
+
+    leaf_size : int, default=30
+        Leaf size passed to BallTree or cKDTree. This can affect the speed
+        of the construction and query, as well as the memory required
+        to store the tree. The optimal value depends
+        on the nature of the problem.
+
+    p : float, default=2
+        The power of the Minkowski metric to be used to calculate distance
+        between points.
+
+    sample_weight : array-like of shape (n_samples,), default=None
+        Weight of each sample, such that a sample with a weight of at least
+        ``min_samples`` is by itself a core sample; a sample with negative
+        weight may inhibit its eps-neighbor from being core.
+        Note that weights are absolute, and default to 1.
+
+    n_jobs : int, default=None
+        The number of parallel jobs to run for neighbors search. ``None`` means
+        1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means
+        using all processors. See :term:`Glossary <n_jobs>` for more details.
+        If precomputed distance are used, parallel execution is not available
+        and thus n_jobs will have no effect.
+
+    Returns
+    -------
+    core_samples : ndarray of shape (n_core_samples,)
+        Indices of core samples.
+
+    labels : ndarray of shape (n_samples,)
+        Cluster labels for each point.  Noisy samples are given the label -1.
+
+    See also
+    --------
+    DBSCAN
+        An estimator interface for this clustering algorithm.
+    OPTICS
+        A similar estimator interface clustering at multiple values of eps. Our
+        implementation is optimized for memory usage.
+
+    Notes
+    -----
+    For an example, see :ref:`examples/cluster/plot_dbscan.py
+    <sphx_glr_auto_examples_cluster_plot_dbscan.py>`.
+
+    This implementation bulk-computes all neighborhood queries, which increases
+    the memory complexity to O(n.d) where d is the average number of neighbors,
+    while original DBSCAN had memory complexity O(n). It may attract a higher
+    memory complexity when querying these nearest neighborhoods, depending
+    on the ``algorithm``.
+
+    One way to avoid the query complexity is to pre-compute sparse
+    neighborhoods in chunks using
+    :func:`NearestNeighbors.radius_neighbors_graph
+    <sklearn.neighbors.NearestNeighbors.radius_neighbors_graph>` with
+    ``mode='distance'``, then using ``metric='precomputed'`` here.
+
+    Another way to reduce memory and computation time is to remove
+    (near-)duplicate points and use ``sample_weight`` instead.
+
+    :func:`cluster.optics <sklearn.cluster.optics>` provides a similar
+    clustering with lower memory usage.
+
+    References
+    ----------
+    Ester, M., H. P. Kriegel, J. Sander, and X. Xu, "A Density-Based
+    Algorithm for Discovering Clusters in Large Spatial Databases with Noise".
+    In: Proceedings of the 2nd International Conference on Knowledge Discovery
+    and Data Mining, Portland, OR, AAAI Press, pp. 226-231. 1996
+
+    Schubert, E., Sander, J., Ester, M., Kriegel, H. P., & Xu, X. (2017).
+    DBSCAN revisited, revisited: why and how you should (still) use DBSCAN.
+    ACM Transactions on Database Systems (TODS), 42(3), 19.
+    """
+
+    est = DBSCAN(eps=eps, min_samples=min_samples, metric=metric,
+                 metric_params=metric_params, algorithm=algorithm,
+                 leaf_size=leaf_size, p=p, n_jobs=n_jobs)
+    est.fit(X, sample_weight=sample_weight)
+    return est.core_sample_indices_, est.labels_
+
+
+class DBSCAN(ClusterMixin, BaseEstimator):
+    """Perform DBSCAN clustering from vector array or distance matrix.
+
+    DBSCAN - Density-Based Spatial Clustering of Applications with Noise.
+    Finds core samples of high density and expands clusters from them.
+    Good for data which contains clusters of similar density.
+
+    Read more in the :ref:`User Guide <dbscan>`.
+
+    Parameters
+    ----------
+    eps : float, default=0.5
+        The maximum distance between two samples for one to be considered
+        as in the neighborhood of the other. This is not a maximum bound
+        on the distances of points within a cluster. This is the most
+        important DBSCAN parameter to choose appropriately for your data set
+        and distance function.
+
+    min_samples : int, default=5
+        The number of samples (or total weight) in a neighborhood for a point
+        to be considered as a core point. This includes the point itself.
+
+    metric : string, or callable, default='euclidean'
+        The metric to use when calculating distance between instances in a
+        feature array. If metric is a string or callable, it must be one of
+        the options allowed by :func:`sklearn.metrics.pairwise_distances` for
+        its metric parameter.
+        If metric is "precomputed", X is assumed to be a distance matrix and
+        must be square. X may be a :term:`Glossary <sparse graph>`, in which
+        case only "nonzero" elements may be considered neighbors for DBSCAN.
+
+        .. versionadded:: 0.17
+           metric *precomputed* to accept precomputed sparse matrix.
+
+    metric_params : dict, default=None
+        Additional keyword arguments for the metric function.
+
+        .. versionadded:: 0.19
+
+    algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'
+        The algorithm to be used by the NearestNeighbors module
+        to compute pointwise distances and find nearest neighbors.
+        See NearestNeighbors module documentation for details.
+
+    leaf_size : int, default=30
+        Leaf size passed to BallTree or cKDTree. This can affect the speed
+        of the construction and query, as well as the memory required
+        to store the tree. The optimal value depends
+        on the nature of the problem.
+
+    p : float, default=None
+        The power of the Minkowski metric to be used to calculate distance
+        between points.
+
+    n_jobs : int, default=None
+        The number of parallel jobs to run.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    Attributes
+    ----------
+    core_sample_indices_ : ndarray of shape (n_core_samples,)
+        Indices of core samples.
+
+    components_ : ndarray of shape (n_core_samples, n_features)
+        Copy of each core sample found by training.
+
+    labels_ : ndarray of shape (n_samples)
+        Cluster labels for each point in the dataset given to fit().
+        Noisy samples are given the label -1.
+
+    Examples
+    --------
+    >>> from sklearn.cluster import DBSCAN
+    >>> import numpy as np
+    >>> X = np.array([[1, 2], [2, 2], [2, 3],
+    ...               [8, 7], [8, 8], [25, 80]])
+    >>> clustering = DBSCAN(eps=3, min_samples=2).fit(X)
+    >>> clustering.labels_
+    array([ 0,  0,  0,  1,  1, -1])
+    >>> clustering
+    DBSCAN(eps=3, min_samples=2)
+
+    See also
+    --------
+    OPTICS
+        A similar clustering at multiple values of eps. Our implementation
+        is optimized for memory usage.
+
+    Notes
+    -----
+    For an example, see :ref:`examples/cluster/plot_dbscan.py
+    <sphx_glr_auto_examples_cluster_plot_dbscan.py>`.
+
+    This implementation bulk-computes all neighborhood queries, which increases
+    the memory complexity to O(n.d) where d is the average number of neighbors,
+    while original DBSCAN had memory complexity O(n). It may attract a higher
+    memory complexity when querying these nearest neighborhoods, depending
+    on the ``algorithm``.
+
+    One way to avoid the query complexity is to pre-compute sparse
+    neighborhoods in chunks using
+    :func:`NearestNeighbors.radius_neighbors_graph
+    <sklearn.neighbors.NearestNeighbors.radius_neighbors_graph>` with
+    ``mode='distance'``, then using ``metric='precomputed'`` here.
+
+    Another way to reduce memory and computation time is to remove
+    (near-)duplicate points and use ``sample_weight`` instead.
+
+    :class:`cluster.OPTICS` provides a similar clustering with lower memory
+    usage.
+
+    References
+    ----------
+    Ester, M., H. P. Kriegel, J. Sander, and X. Xu, "A Density-Based
+    Algorithm for Discovering Clusters in Large Spatial Databases with Noise".
+    In: Proceedings of the 2nd International Conference on Knowledge Discovery
+    and Data Mining, Portland, OR, AAAI Press, pp. 226-231. 1996
+
+    Schubert, E., Sander, J., Ester, M., Kriegel, H. P., & Xu, X. (2017).
+    DBSCAN revisited, revisited: why and how you should (still) use DBSCAN.
+    ACM Transactions on Database Systems (TODS), 42(3), 19.
+    """
+    @_deprecate_positional_args
+    def __init__(self, eps=0.5, *, min_samples=5, metric='euclidean',
+                 metric_params=None, algorithm='auto', leaf_size=30, p=None,
+                 n_jobs=None):
+        self.eps = eps
+        self.min_samples = min_samples
+        self.metric = metric
+        self.metric_params = metric_params
+        self.algorithm = algorithm
+        self.leaf_size = leaf_size
+        self.p = p
+        self.n_jobs = n_jobs
+
+    def fit(self, X, y=None, sample_weight=None):
+        """Perform DBSCAN clustering from features, or distance matrix.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features), or \
+            (n_samples, n_samples)
+            Training instances to cluster, or distances between instances if
+            ``metric='precomputed'``. If a sparse matrix is provided, it will
+            be converted into a sparse ``csr_matrix``.
+
+        sample_weight : array-like of shape (n_samples,), default=None
+            Weight of each sample, such that a sample with a weight of at least
+            ``min_samples`` is by itself a core sample; a sample with a
+            negative weight may inhibit its eps-neighbor from being core.
+            Note that weights are absolute, and default to 1.
+
+        y : Ignored
+            Not used, present here for API consistency by convention.
+
+        Returns
+        -------
+        self
+
+        """
+        X = self._validate_data(X, accept_sparse='csr')
+
+        if not self.eps > 0.0:
+            raise ValueError("eps must be positive.")
+
+        if sample_weight is not None:
+            sample_weight = _check_sample_weight(sample_weight, X)
+
+        # Calculate neighborhood for all samples. This leaves the original
+        # point in, which needs to be considered later (i.e. point i is in the
+        # neighborhood of point i. While True, its useless information)
+        if self.metric == 'precomputed' and sparse.issparse(X):
+            # set the diagonal to explicit values, as a point is its own
+            # neighbor
+            with warnings.catch_warnings():
+                warnings.simplefilter('ignore', sparse.SparseEfficiencyWarning)
+                X.setdiag(X.diagonal())  # XXX: modifies X's internals in-place
+
+        neighbors_model = NearestNeighbors(
+            radius=self.eps, algorithm=self.algorithm,
+            leaf_size=self.leaf_size, metric=self.metric,
+            metric_params=self.metric_params, p=self.p, n_jobs=self.n_jobs)
+        neighbors_model.fit(X)
+        # This has worst case O(n^2) memory complexity
+        neighborhoods = neighbors_model.radius_neighbors(X,
+                                                         return_distance=False)
+
+        if sample_weight is None:
+            n_neighbors = np.array([len(neighbors)
+                                    for neighbors in neighborhoods])
+        else:
+            n_neighbors = np.array([np.sum(sample_weight[neighbors])
+                                    for neighbors in neighborhoods])
+
+        # Initially, all samples are noise.
+        labels = np.full(X.shape[0], -1, dtype=np.intp)
+
+        # A list of all core samples found.
+        core_samples = np.asarray(n_neighbors >= self.min_samples,
+                                  dtype=np.uint8)
+        dbscan_inner(core_samples, neighborhoods, labels)
+
+        self.core_sample_indices_ = np.where(core_samples)[0]
+        self.labels_ = labels
+
+        if len(self.core_sample_indices_):
+            # fix for scipy sparse indexing issue
+            self.components_ = X[self.core_sample_indices_].copy()
+        else:
+            # no core samples
+            self.components_ = np.empty((0, X.shape[1]))
+        return self
+
+    def fit_predict(self, X, y=None, sample_weight=None):
+        """Perform DBSCAN clustering from features or distance matrix,
+        and return cluster labels.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features), or \
+            (n_samples, n_samples)
+            Training instances to cluster, or distances between instances if
+            ``metric='precomputed'``. If a sparse matrix is provided, it will
+            be converted into a sparse ``csr_matrix``.
+
+        sample_weight : array-like of shape (n_samples,), default=None
+            Weight of each sample, such that a sample with a weight of at least
+            ``min_samples`` is by itself a core sample; a sample with a
+            negative weight may inhibit its eps-neighbor from being core.
+            Note that weights are absolute, and default to 1.
+
+        y : Ignored
+            Not used, present here for API consistency by convention.
+
+        Returns
+        -------
+        labels : ndarray of shape (n_samples,)
+            Cluster labels. Noisy samples are given the label -1.
+        """
+        self.fit(X, sample_weight=sample_weight)
+        return self.labels_
--- a/venv/Lib/site-packages/sklearn/cluster/_dbscan_inner.cp36-win32.pyd
+++ b/venv/Lib/site-packages/sklearn/cluster/_dbscan_inner.cp36-win32.pyd
--- a/venv/Lib/site-packages/sklearn/cluster/_feature_agglomeration.py
+++ b/venv/Lib/site-packages/sklearn/cluster/_feature_agglomeration.py
@ -0,0 +1,77 @@
+"""
+Feature agglomeration. Base classes and functions for performing feature
+agglomeration.
+"""
+# Author: V. Michel, A. Gramfort
+# License: BSD 3 clause
+
+import numpy as np
+
+from ..base import TransformerMixin
+from ..utils import check_array
+from ..utils.validation import check_is_fitted
+from scipy.sparse import issparse
+
+###############################################################################
+# Mixin class for feature agglomeration.
+
+
+class AgglomerationTransform(TransformerMixin):
+    """
+    A class for feature agglomeration via the transform interface
+    """
+
+    def transform(self, X):
+        """
+        Transform a new matrix using the built clustering
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features) or (n_samples,)
+            A M by N array of M observations in N dimensions or a length
+            M array of M one-dimensional observations.
+
+        Returns
+        -------
+        Y : array, shape = [n_samples, n_clusters] or [n_clusters]
+            The pooled values for each feature cluster.
+        """
+        check_is_fitted(self)
+
+        X = check_array(X)
+        if len(self.labels_) != X.shape[1]:
+            raise ValueError("X has a different number of features than "
+                             "during fitting.")
+        if self.pooling_func == np.mean and not issparse(X):
+            size = np.bincount(self.labels_)
+            n_samples = X.shape[0]
+            # a fast way to compute the mean of grouped features
+            nX = np.array([np.bincount(self.labels_, X[i, :]) / size
+                          for i in range(n_samples)])
+        else:
+            nX = [self.pooling_func(X[:, self.labels_ == l], axis=1)
+                  for l in np.unique(self.labels_)]
+            nX = np.array(nX).T
+        return nX
+
+    def inverse_transform(self, Xred):
+        """
+        Inverse the transformation.
+        Return a vector of size nb_features with the values of Xred assigned
+        to each group of features
+
+        Parameters
+        ----------
+        Xred : array-like of shape (n_samples, n_clusters) or (n_clusters,)
+            The values to be assigned to each cluster of samples
+
+        Returns
+        -------
+        X : array, shape=[n_samples, n_features] or [n_features]
+            A vector of size n_samples with the values of Xred assigned to
+            each of the cluster of samples.
+        """
+        check_is_fitted(self)
+
+        unil, inverse = np.unique(self.labels_, return_inverse=True)
+        return Xred[..., inverse]
--- a/venv/Lib/site-packages/sklearn/cluster/_hierarchical_fast.cp36-win32.pyd
+++ b/venv/Lib/site-packages/sklearn/cluster/_hierarchical_fast.cp36-win32.pyd
--- a/venv/Lib/site-packages/sklearn/cluster/_k_means_elkan.cp36-win32.pyd
+++ b/venv/Lib/site-packages/sklearn/cluster/_k_means_elkan.cp36-win32.pyd
--- a/venv/Lib/site-packages/sklearn/cluster/_k_means_fast.cp36-win32.pyd
+++ b/venv/Lib/site-packages/sklearn/cluster/_k_means_fast.cp36-win32.pyd
--- a/venv/Lib/site-packages/sklearn/cluster/_k_means_fast.pxd
+++ b/venv/Lib/site-packages/sklearn/cluster/_k_means_fast.pxd
@ -0,0 +1,23 @@
+# cython: language_level=3
+
+
+from cython cimport floating
+cimport numpy as np
+
+
+cdef floating _euclidean_dense_dense(floating*, floating*, int, bint) nogil
+
+cdef floating _euclidean_sparse_dense(floating[::1], int[::1], floating[::1],
+                                      floating, bint) nogil
+
+cpdef void _relocate_empty_clusters_dense(
+    np.ndarray[floating, ndim=2, mode='c'], floating[::1], floating[:, ::1],
+    floating[:, ::1], floating[::1], int[::1])
+
+cpdef void _relocate_empty_clusters_sparse(
+    floating[::1], int[::1], int[::1], floating[::1], floating[:, ::1],
+    floating[:, ::1], floating[::1], int[::1])
+
+cdef void _average_centers(floating[:, ::1], floating[::1])
+
+cdef void _center_shift(floating[:, ::1], floating[:, ::1], floating[::1])
--- a/venv/Lib/site-packages/sklearn/cluster/_k_means_lloyd.cp36-win32.pyd
+++ b/venv/Lib/site-packages/sklearn/cluster/_k_means_lloyd.cp36-win32.pyd
--- a/venv/Lib/site-packages/sklearn/cluster/_kmeans.py
+++ b/venv/Lib/site-packages/sklearn/cluster/_kmeans.py
--- a/venv/Lib/site-packages/sklearn/cluster/_mean_shift.py
+++ b/venv/Lib/site-packages/sklearn/cluster/_mean_shift.py
@ -0,0 +1,465 @@
+"""Mean shift clustering algorithm.
+
+Mean shift clustering aims to discover *blobs* in a smooth density of
+samples. It is a centroid based algorithm, which works by updating candidates
+for centroids to be the mean of the points within a given region. These
+candidates are then filtered in a post-processing stage to eliminate
+near-duplicates to form the final set of centroids.
+
+Seeding is performed using a binning technique for scalability.
+"""
+
+# Authors: Conrad Lee <conradlee@gmail.com>
+#          Alexandre Gramfort <alexandre.gramfort@inria.fr>
+#          Gael Varoquaux <gael.varoquaux@normalesup.org>
+#          Martino Sorbaro <martino.sorbaro@ed.ac.uk>
+
+import numpy as np
+import warnings
+from joblib import Parallel, delayed
+
+from collections import defaultdict
+from ..utils.validation import check_is_fitted, _deprecate_positional_args
+from ..utils import check_random_state, gen_batches, check_array
+from ..base import BaseEstimator, ClusterMixin
+from ..neighbors import NearestNeighbors
+from ..metrics.pairwise import pairwise_distances_argmin
+
+
+@_deprecate_positional_args
+def estimate_bandwidth(X, *, quantile=0.3, n_samples=None, random_state=0,
+                       n_jobs=None):
+    """Estimate the bandwidth to use with the mean-shift algorithm.
+
+    That this function takes time at least quadratic in n_samples. For large
+    datasets, it's wise to set that parameter to a small value.
+
+    Parameters
+    ----------
+    X : array-like of shape (n_samples, n_features)
+        Input points.
+
+    quantile : float, default=0.3
+        should be between [0, 1]
+        0.5 means that the median of all pairwise distances is used.
+
+    n_samples : int, default=None
+        The number of samples to use. If not given, all samples are used.
+
+    random_state : int, RandomState instance, default=None
+        The generator used to randomly select the samples from input points
+        for bandwidth estimation. Use an int to make the randomness
+        deterministic.
+        See :term:`Glossary <random_state>`.
+
+    n_jobs : int, default=None
+        The number of parallel jobs to run for neighbors search.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    Returns
+    -------
+    bandwidth : float
+        The bandwidth parameter.
+    """
+    X = check_array(X)
+
+    random_state = check_random_state(random_state)
+    if n_samples is not None:
+        idx = random_state.permutation(X.shape[0])[:n_samples]
+        X = X[idx]
+    n_neighbors = int(X.shape[0] * quantile)
+    if n_neighbors < 1:  # cannot fit NearestNeighbors with n_neighbors = 0
+        n_neighbors = 1
+    nbrs = NearestNeighbors(n_neighbors=n_neighbors,
+                            n_jobs=n_jobs)
+    nbrs.fit(X)
+
+    bandwidth = 0.
+    for batch in gen_batches(len(X), 500):
+        d, _ = nbrs.kneighbors(X[batch, :], return_distance=True)
+        bandwidth += np.max(d, axis=1).sum()
+
+    return bandwidth / X.shape[0]
+
+
+# separate function for each seed's iterative loop
+def _mean_shift_single_seed(my_mean, X, nbrs, max_iter):
+    # For each seed, climb gradient until convergence or max_iter
+    bandwidth = nbrs.get_params()['radius']
+    stop_thresh = 1e-3 * bandwidth  # when mean has converged
+    completed_iterations = 0
+    while True:
+        # Find mean of points within bandwidth
+        i_nbrs = nbrs.radius_neighbors([my_mean], bandwidth,
+                                       return_distance=False)[0]
+        points_within = X[i_nbrs]
+        if len(points_within) == 0:
+            break  # Depending on seeding strategy this condition may occur
+        my_old_mean = my_mean  # save the old mean
+        my_mean = np.mean(points_within, axis=0)
+        # If converged or at max_iter, adds the cluster
+        if (np.linalg.norm(my_mean - my_old_mean) < stop_thresh or
+                completed_iterations == max_iter):
+            break
+        completed_iterations += 1
+    return tuple(my_mean), len(points_within), completed_iterations
+
+
+@_deprecate_positional_args
+def mean_shift(X, *, bandwidth=None, seeds=None, bin_seeding=False,
+               min_bin_freq=1, cluster_all=True, max_iter=300,
+               n_jobs=None):
+    """Perform mean shift clustering of data using a flat kernel.
+
+    Read more in the :ref:`User Guide <mean_shift>`.
+
+    Parameters
+    ----------
+
+    X : array-like of shape (n_samples, n_features)
+        Input data.
+
+    bandwidth : float, default=None
+        Kernel bandwidth.
+
+        If bandwidth is not given, it is determined using a heuristic based on
+        the median of all pairwise distances. This will take quadratic time in
+        the number of samples. The sklearn.cluster.estimate_bandwidth function
+        can be used to do this more efficiently.
+
+    seeds : array-like of shape (n_seeds, n_features) or None
+        Point used as initial kernel locations. If None and bin_seeding=False,
+        each data point is used as a seed. If None and bin_seeding=True,
+        see bin_seeding.
+
+    bin_seeding : boolean, default=False
+        If true, initial kernel locations are not locations of all
+        points, but rather the location of the discretized version of
+        points, where points are binned onto a grid whose coarseness
+        corresponds to the bandwidth. Setting this option to True will speed
+        up the algorithm because fewer seeds will be initialized.
+        Ignored if seeds argument is not None.
+
+    min_bin_freq : int, default=1
+       To speed up the algorithm, accept only those bins with at least
+       min_bin_freq points as seeds.
+
+    cluster_all : bool, default=True
+        If true, then all points are clustered, even those orphans that are
+        not within any kernel. Orphans are assigned to the nearest kernel.
+        If false, then orphans are given cluster label -1.
+
+    max_iter : int, default=300
+        Maximum number of iterations, per seed point before the clustering
+        operation terminates (for that seed point), if has not converged yet.
+
+    n_jobs : int, default=None
+        The number of jobs to use for the computation. This works by computing
+        each of the n_init runs in parallel.
+
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+        .. versionadded:: 0.17
+           Parallel Execution using *n_jobs*.
+
+    Returns
+    -------
+
+    cluster_centers : array, shape=[n_clusters, n_features]
+        Coordinates of cluster centers.
+
+    labels : array, shape=[n_samples]
+        Cluster labels for each point.
+
+    Notes
+    -----
+    For an example, see :ref:`examples/cluster/plot_mean_shift.py
+    <sphx_glr_auto_examples_cluster_plot_mean_shift.py>`.
+
+    """
+    model = MeanShift(bandwidth=bandwidth, seeds=seeds,
+                      min_bin_freq=min_bin_freq,
+                      bin_seeding=bin_seeding,
+                      cluster_all=cluster_all, n_jobs=n_jobs,
+                      max_iter=max_iter).fit(X)
+    return model.cluster_centers_, model.labels_
+
+
+def get_bin_seeds(X, bin_size, min_bin_freq=1):
+    """Finds seeds for mean_shift.
+
+    Finds seeds by first binning data onto a grid whose lines are
+    spaced bin_size apart, and then choosing those bins with at least
+    min_bin_freq points.
+
+    Parameters
+    ----------
+
+    X : array-like of shape (n_samples, n_features)
+        Input points, the same points that will be used in mean_shift.
+
+    bin_size : float
+        Controls the coarseness of the binning. Smaller values lead
+        to more seeding (which is computationally more expensive). If you're
+        not sure how to set this, set it to the value of the bandwidth used
+        in clustering.mean_shift.
+
+    min_bin_freq : int, default=1
+        Only bins with at least min_bin_freq will be selected as seeds.
+        Raising this value decreases the number of seeds found, which
+        makes mean_shift computationally cheaper.
+
+    Returns
+    -------
+    bin_seeds : array-like of shape (n_samples, n_features)
+        Points used as initial kernel positions in clustering.mean_shift.
+    """
+    if bin_size == 0:
+        return X
+
+    # Bin points
+    bin_sizes = defaultdict(int)
+    for point in X:
+        binned_point = np.round(point / bin_size)
+        bin_sizes[tuple(binned_point)] += 1
+
+    # Select only those bins as seeds which have enough members
+    bin_seeds = np.array([point for point, freq in bin_sizes.items() if
+                          freq >= min_bin_freq], dtype=np.float32)
+    if len(bin_seeds) == len(X):
+        warnings.warn("Binning data failed with provided bin_size=%f,"
+                      " using data points as seeds." % bin_size)
+        return X
+    bin_seeds = bin_seeds * bin_size
+    return bin_seeds
+
+
+class MeanShift(ClusterMixin, BaseEstimator):
+    """Mean shift clustering using a flat kernel.
+
+    Mean shift clustering aims to discover "blobs" in a smooth density of
+    samples. It is a centroid-based algorithm, which works by updating
+    candidates for centroids to be the mean of the points within a given
+    region. These candidates are then filtered in a post-processing stage to
+    eliminate near-duplicates to form the final set of centroids.
+
+    Seeding is performed using a binning technique for scalability.
+
+    Read more in the :ref:`User Guide <mean_shift>`.
+
+    Parameters
+    ----------
+    bandwidth : float, default=None
+        Bandwidth used in the RBF kernel.
+
+        If not given, the bandwidth is estimated using
+        sklearn.cluster.estimate_bandwidth; see the documentation for that
+        function for hints on scalability (see also the Notes, below).
+
+    seeds : array-like of shape (n_samples, n_features), default=None
+        Seeds used to initialize kernels. If not set,
+        the seeds are calculated by clustering.get_bin_seeds
+        with bandwidth as the grid size and default values for
+        other parameters.
+
+    bin_seeding : bool, default=False
+        If true, initial kernel locations are not locations of all
+        points, but rather the location of the discretized version of
+        points, where points are binned onto a grid whose coarseness
+        corresponds to the bandwidth. Setting this option to True will speed
+        up the algorithm because fewer seeds will be initialized.
+        The default value is False.
+        Ignored if seeds argument is not None.
+
+    min_bin_freq : int, default=1
+       To speed up the algorithm, accept only those bins with at least
+       min_bin_freq points as seeds.
+
+    cluster_all : bool, default=True
+        If true, then all points are clustered, even those orphans that are
+        not within any kernel. Orphans are assigned to the nearest kernel.
+        If false, then orphans are given cluster label -1.
+
+    n_jobs : int, default=None
+        The number of jobs to use for the computation. This works by computing
+        each of the n_init runs in parallel.
+
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    max_iter : int, default=300
+        Maximum number of iterations, per seed point before the clustering
+        operation terminates (for that seed point), if has not converged yet.
+
+        .. versionadded:: 0.22
+
+    Attributes
+    ----------
+    cluster_centers_ : array, [n_clusters, n_features]
+        Coordinates of cluster centers.
+
+    labels_ : array of shape (n_samples,)
+        Labels of each point.
+
+    n_iter_ : int
+        Maximum number of iterations performed on each seed.
+
+        .. versionadded:: 0.22
+
+    Examples
+    --------
+    >>> from sklearn.cluster import MeanShift
+    >>> import numpy as np
+    >>> X = np.array([[1, 1], [2, 1], [1, 0],
+    ...               [4, 7], [3, 5], [3, 6]])
+    >>> clustering = MeanShift(bandwidth=2).fit(X)
+    >>> clustering.labels_
+    array([1, 1, 1, 0, 0, 0])
+    >>> clustering.predict([[0, 0], [5, 5]])
+    array([1, 0])
+    >>> clustering
+    MeanShift(bandwidth=2)
+
+    Notes
+    -----
+
+    Scalability:
+
+    Because this implementation uses a flat kernel and
+    a Ball Tree to look up members of each kernel, the complexity will tend
+    towards O(T*n*log(n)) in lower dimensions, with n the number of samples
+    and T the number of points. In higher dimensions the complexity will
+    tend towards O(T*n^2).
+
+    Scalability can be boosted by using fewer seeds, for example by using
+    a higher value of min_bin_freq in the get_bin_seeds function.
+
+    Note that the estimate_bandwidth function is much less scalable than the
+    mean shift algorithm and will be the bottleneck if it is used.
+
+    References
+    ----------
+
+    Dorin Comaniciu and Peter Meer, "Mean Shift: A robust approach toward
+    feature space analysis". IEEE Transactions on Pattern Analysis and
+    Machine Intelligence. 2002. pp. 603-619.
+
+    """
+    @_deprecate_positional_args
+    def __init__(self, *, bandwidth=None, seeds=None, bin_seeding=False,
+                 min_bin_freq=1, cluster_all=True, n_jobs=None, max_iter=300):
+        self.bandwidth = bandwidth
+        self.seeds = seeds
+        self.bin_seeding = bin_seeding
+        self.cluster_all = cluster_all
+        self.min_bin_freq = min_bin_freq
+        self.n_jobs = n_jobs
+        self.max_iter = max_iter
+
+    def fit(self, X, y=None):
+        """Perform clustering.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Samples to cluster.
+
+        y : Ignored
+
+        """
+        X = self._validate_data(X)
+        bandwidth = self.bandwidth
+        if bandwidth is None:
+            bandwidth = estimate_bandwidth(X, n_jobs=self.n_jobs)
+        elif bandwidth <= 0:
+            raise ValueError("bandwidth needs to be greater than zero or None,"
+                             " got %f" % bandwidth)
+
+        seeds = self.seeds
+        if seeds is None:
+            if self.bin_seeding:
+                seeds = get_bin_seeds(X, bandwidth, self.min_bin_freq)
+            else:
+                seeds = X
+        n_samples, n_features = X.shape
+        center_intensity_dict = {}
+
+        # We use n_jobs=1 because this will be used in nested calls under
+        # parallel calls to _mean_shift_single_seed so there is no need for
+        # for further parallelism.
+        nbrs = NearestNeighbors(radius=bandwidth, n_jobs=1).fit(X)
+
+        # execute iterations on all seeds in parallel
+        all_res = Parallel(n_jobs=self.n_jobs)(
+            delayed(_mean_shift_single_seed)
+            (seed, X, nbrs, self.max_iter) for seed in seeds)
+        # copy results in a dictionary
+        for i in range(len(seeds)):
+            if all_res[i][1]:  # i.e. len(points_within) > 0
+                center_intensity_dict[all_res[i][0]] = all_res[i][1]
+
+        self.n_iter_ = max([x[2] for x in all_res])
+
+        if not center_intensity_dict:
+            # nothing near seeds
+            raise ValueError("No point was within bandwidth=%f of any seed."
+                             " Try a different seeding strategy \
+                             or increase the bandwidth."
+                             % bandwidth)
+
+        # POST PROCESSING: remove near duplicate points
+        # If the distance between two kernels is less than the bandwidth,
+        # then we have to remove one because it is a duplicate. Remove the
+        # one with fewer points.
+
+        sorted_by_intensity = sorted(center_intensity_dict.items(),
+                                     key=lambda tup: (tup[1], tup[0]),
+                                     reverse=True)
+        sorted_centers = np.array([tup[0] for tup in sorted_by_intensity])
+        unique = np.ones(len(sorted_centers), dtype=np.bool)
+        nbrs = NearestNeighbors(radius=bandwidth,
+                                n_jobs=self.n_jobs).fit(sorted_centers)
+        for i, center in enumerate(sorted_centers):
+            if unique[i]:
+                neighbor_idxs = nbrs.radius_neighbors([center],
+                                                      return_distance=False)[0]
+                unique[neighbor_idxs] = 0
+                unique[i] = 1  # leave the current point as unique
+        cluster_centers = sorted_centers[unique]
+
+        # ASSIGN LABELS: a point belongs to the cluster that it is closest to
+        nbrs = NearestNeighbors(n_neighbors=1,
+                                n_jobs=self.n_jobs).fit(cluster_centers)
+        labels = np.zeros(n_samples, dtype=np.int)
+        distances, idxs = nbrs.kneighbors(X)
+        if self.cluster_all:
+            labels = idxs.flatten()
+        else:
+            labels.fill(-1)
+            bool_selector = distances.flatten() <= bandwidth
+            labels[bool_selector] = idxs.flatten()[bool_selector]
+
+        self.cluster_centers_, self.labels_ = cluster_centers, labels
+        return self
+
+    def predict(self, X):
+        """Predict the closest cluster each sample in X belongs to.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix}, shape=[n_samples, n_features]
+            New data to predict.
+
+        Returns
+        -------
+        labels : array, shape [n_samples,]
+            Index of the cluster each sample belongs to.
+        """
+        check_is_fitted(self)
+
+        return pairwise_distances_argmin(X, self.cluster_centers_)
--- a/venv/Lib/site-packages/sklearn/cluster/_optics.py
+++ b/venv/Lib/site-packages/sklearn/cluster/_optics.py
@ -0,0 +1,928 @@
+# -*- coding: utf-8 -*-
+"""Ordering Points To Identify the Clustering Structure (OPTICS)
+
+These routines execute the OPTICS algorithm, and implement various
+cluster extraction methods of the ordered list.
+
+Authors: Shane Grigsby <refuge@rocktalus.com>
+         Adrin Jalali <adrinjalali@gmail.com>
+         Erich Schubert <erich@debian.org>
+         Hanmin Qin <qinhanmin2005@sina.com>
+License: BSD 3 clause
+"""
+
+import warnings
+import numpy as np
+
+from ..utils import gen_batches, get_chunk_n_rows
+from ..utils.validation import _deprecate_positional_args
+from ..neighbors import NearestNeighbors
+from ..base import BaseEstimator, ClusterMixin
+from ..metrics import pairwise_distances
+
+
+class OPTICS(ClusterMixin, BaseEstimator):
+    """Estimate clustering structure from vector array.
+
+    OPTICS (Ordering Points To Identify the Clustering Structure), closely
+    related to DBSCAN, finds core sample of high density and expands clusters
+    from them [1]_. Unlike DBSCAN, keeps cluster hierarchy for a variable
+    neighborhood radius. Better suited for usage on large datasets than the
+    current sklearn implementation of DBSCAN.
+
+    Clusters are then extracted using a DBSCAN-like method
+    (cluster_method = 'dbscan') or an automatic
+    technique proposed in [1]_ (cluster_method = 'xi').
+
+    This implementation deviates from the original OPTICS by first performing
+    k-nearest-neighborhood searches on all points to identify core sizes, then
+    computing only the distances to unprocessed points when constructing the
+    cluster order. Note that we do not employ a heap to manage the expansion
+    candidates, so the time complexity will be O(n^2).
+
+    Read more in the :ref:`User Guide <optics>`.
+
+    Parameters
+    ----------
+    min_samples : int > 1 or float between 0 and 1 (default=5)
+        The number of samples in a neighborhood for a point to be considered as
+        a core point. Also, up and down steep regions can't have more then
+        ``min_samples`` consecutive non-steep points. Expressed as an absolute
+        number or a fraction of the number of samples (rounded to be at least
+        2).
+
+    max_eps : float, optional (default=np.inf)
+        The maximum distance between two samples for one to be considered as
+        in the neighborhood of the other. Default value of ``np.inf`` will
+        identify clusters across all scales; reducing ``max_eps`` will result
+        in shorter run times.
+
+    metric : str or callable, optional (default='minkowski')
+        Metric to use for distance computation. Any metric from scikit-learn
+        or scipy.spatial.distance can be used.
+
+        If metric is a callable function, it is called on each
+        pair of instances (rows) and the resulting value recorded. The callable
+        should take two arrays as input and return one value indicating the
+        distance between them. This works for Scipy's metrics, but is less
+        efficient than passing the metric name as a string. If metric is
+        "precomputed", X is assumed to be a distance matrix and must be square.
+
+        Valid values for metric are:
+
+        - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',
+          'manhattan']
+
+        - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',
+          'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',
+          'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',
+          'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',
+          'yule']
+
+        See the documentation for scipy.spatial.distance for details on these
+        metrics.
+
+    p : int, optional (default=2)
+        Parameter for the Minkowski metric from
+        :class:`sklearn.metrics.pairwise_distances`. When p = 1, this is
+        equivalent to using manhattan_distance (l1), and euclidean_distance
+        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
+
+    metric_params : dict, optional (default=None)
+        Additional keyword arguments for the metric function.
+
+    cluster_method : str, optional (default='xi')
+        The extraction method used to extract clusters using the calculated
+        reachability and ordering. Possible values are "xi" and "dbscan".
+
+    eps : float, optional (default=None)
+        The maximum distance between two samples for one to be considered as
+        in the neighborhood of the other. By default it assumes the same value
+        as ``max_eps``.
+        Used only when ``cluster_method='dbscan'``.
+
+    xi : float, between 0 and 1, optional (default=0.05)
+        Determines the minimum steepness on the reachability plot that
+        constitutes a cluster boundary. For example, an upwards point in the
+        reachability plot is defined by the ratio from one point to its
+        successor being at most 1-xi.
+        Used only when ``cluster_method='xi'``.
+
+    predecessor_correction : bool, optional (default=True)
+        Correct clusters according to the predecessors calculated by OPTICS
+        [2]_. This parameter has minimal effect on most datasets.
+        Used only when ``cluster_method='xi'``.
+
+    min_cluster_size : int > 1 or float between 0 and 1 (default=None)
+        Minimum number of samples in an OPTICS cluster, expressed as an
+        absolute number or a fraction of the number of samples (rounded to be
+        at least 2). If ``None``, the value of ``min_samples`` is used instead.
+        Used only when ``cluster_method='xi'``.
+
+    algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, optional
+        Algorithm used to compute the nearest neighbors:
+
+        - 'ball_tree' will use :class:`BallTree`
+        - 'kd_tree' will use :class:`KDTree`
+        - 'brute' will use a brute-force search.
+        - 'auto' will attempt to decide the most appropriate algorithm
+          based on the values passed to :meth:`fit` method. (default)
+
+        Note: fitting on sparse input will override the setting of
+        this parameter, using brute force.
+
+    leaf_size : int, optional (default=30)
+        Leaf size passed to :class:`BallTree` or :class:`KDTree`. This can
+        affect the speed of the construction and query, as well as the memory
+        required to store the tree. The optimal value depends on the
+        nature of the problem.
+
+    n_jobs : int or None, optional (default=None)
+        The number of parallel jobs to run for neighbors search.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    Attributes
+    ----------
+    labels_ : array, shape (n_samples,)
+        Cluster labels for each point in the dataset given to fit().
+        Noisy samples and points which are not included in a leaf cluster
+        of ``cluster_hierarchy_`` are labeled as -1.
+
+    reachability_ : array, shape (n_samples,)
+        Reachability distances per sample, indexed by object order. Use
+        ``clust.reachability_[clust.ordering_]`` to access in cluster order.
+
+    ordering_ : array, shape (n_samples,)
+        The cluster ordered list of sample indices.
+
+    core_distances_ : array, shape (n_samples,)
+        Distance at which each sample becomes a core point, indexed by object
+        order. Points which will never be core have a distance of inf. Use
+        ``clust.core_distances_[clust.ordering_]`` to access in cluster order.
+
+    predecessor_ : array, shape (n_samples,)
+        Point that a sample was reached from, indexed by object order.
+        Seed points have a predecessor of -1.
+
+    cluster_hierarchy_ : array, shape (n_clusters, 2)
+        The list of clusters in the form of ``[start, end]`` in each row, with
+        all indices inclusive. The clusters are ordered according to
+        ``(end, -start)`` (ascending) so that larger clusters encompassing
+        smaller clusters come after those smaller ones. Since ``labels_`` does
+        not reflect the hierarchy, usually
+        ``len(cluster_hierarchy_) > np.unique(optics.labels_)``. Please also
+        note that these indices are of the ``ordering_``, i.e.
+        ``X[ordering_][start:end + 1]`` form a cluster.
+        Only available when ``cluster_method='xi'``.
+
+    See Also
+    --------
+    DBSCAN
+        A similar clustering for a specified neighborhood radius (eps).
+        Our implementation is optimized for runtime.
+
+    References
+    ----------
+    .. [1] Ankerst, Mihael, Markus M. Breunig, Hans-Peter Kriegel,
+       and Jörg Sander. "OPTICS: ordering points to identify the clustering
+       structure." ACM SIGMOD Record 28, no. 2 (1999): 49-60.
+
+    .. [2] Schubert, Erich, Michael Gertz.
+       "Improving the Cluster Structure Extracted from OPTICS Plots." Proc. of
+       the Conference "Lernen, Wissen, Daten, Analysen" (LWDA) (2018): 318-329.
+
+    Examples
+    --------
+    >>> from sklearn.cluster import OPTICS
+    >>> import numpy as np
+    >>> X = np.array([[1, 2], [2, 5], [3, 6],
+    ...               [8, 7], [8, 8], [7, 3]])
+    >>> clustering = OPTICS(min_samples=2).fit(X)
+    >>> clustering.labels_
+    array([0, 0, 0, 1, 1, 1])
+    """
+    @_deprecate_positional_args
+    def __init__(self, *, min_samples=5, max_eps=np.inf, metric='minkowski',
+                 p=2, metric_params=None, cluster_method='xi', eps=None,
+                 xi=0.05, predecessor_correction=True, min_cluster_size=None,
+                 algorithm='auto', leaf_size=30, n_jobs=None):
+        self.max_eps = max_eps
+        self.min_samples = min_samples
+        self.min_cluster_size = min_cluster_size
+        self.algorithm = algorithm
+        self.metric = metric
+        self.metric_params = metric_params
+        self.p = p
+        self.leaf_size = leaf_size
+        self.cluster_method = cluster_method
+        self.eps = eps
+        self.xi = xi
+        self.predecessor_correction = predecessor_correction
+        self.n_jobs = n_jobs
+
+    def fit(self, X, y=None):
+        """Perform OPTICS clustering.
+
+        Extracts an ordered list of points and reachability distances, and
+        performs initial clustering using ``max_eps`` distance specified at
+        OPTICS object instantiation.
+
+        Parameters
+        ----------
+        X : array, shape (n_samples, n_features), or (n_samples, n_samples)  \
+        if metric=’precomputed’
+            A feature array, or array of distances between samples if
+            metric='precomputed'.
+
+        y : ignored
+            Ignored.
+
+        Returns
+        -------
+        self : instance of OPTICS
+            The instance.
+        """
+        X = self._validate_data(X, dtype=np.float)
+
+        if self.cluster_method not in ['dbscan', 'xi']:
+            raise ValueError("cluster_method should be one of"
+                             " 'dbscan' or 'xi' but is %s" %
+                             self.cluster_method)
+
+        (self.ordering_, self.core_distances_, self.reachability_,
+         self.predecessor_) = compute_optics_graph(
+             X=X, min_samples=self.min_samples, algorithm=self.algorithm,
+             leaf_size=self.leaf_size, metric=self.metric,
+             metric_params=self.metric_params, p=self.p, n_jobs=self.n_jobs,
+             max_eps=self.max_eps)
+
+        # Extract clusters from the calculated orders and reachability
+        if self.cluster_method == 'xi':
+            labels_, clusters_ = cluster_optics_xi(
+                reachability=self.reachability_,
+                predecessor=self.predecessor_,
+                ordering=self.ordering_,
+                min_samples=self.min_samples,
+                min_cluster_size=self.min_cluster_size,
+                xi=self.xi,
+                predecessor_correction=self.predecessor_correction)
+            self.cluster_hierarchy_ = clusters_
+        elif self.cluster_method == 'dbscan':
+            if self.eps is None:
+                eps = self.max_eps
+            else:
+                eps = self.eps
+
+            if eps > self.max_eps:
+                raise ValueError('Specify an epsilon smaller than %s. Got %s.'
+                                 % (self.max_eps, eps))
+
+            labels_ = cluster_optics_dbscan(
+                reachability=self.reachability_,
+                core_distances=self.core_distances_,
+                ordering=self.ordering_, eps=eps)
+
+        self.labels_ = labels_
+        return self
+
+
+def _validate_size(size, n_samples, param_name):
+    if size <= 0 or (size !=
+                     int(size)
+                     and size > 1):
+        raise ValueError('%s must be a positive integer '
+                         'or a float between 0 and 1. Got %r' %
+                         (param_name, size))
+    elif size > n_samples:
+        raise ValueError('%s must be no greater than the'
+                         ' number of samples (%d). Got %d' %
+                         (param_name, n_samples, size))
+
+
+# OPTICS helper functions
+def _compute_core_distances_(X, neighbors, min_samples, working_memory):
+    """Compute the k-th nearest neighbor of each sample
+
+    Equivalent to neighbors.kneighbors(X, self.min_samples)[0][:, -1]
+    but with more memory efficiency.
+
+    Parameters
+    ----------
+    X : array, shape (n_samples, n_features)
+        The data.
+    neighbors : NearestNeighbors instance
+        The fitted nearest neighbors estimator.
+    working_memory : int, optional
+        The sought maximum memory for temporary distance matrix chunks.
+        When None (default), the value of
+        ``sklearn.get_config()['working_memory']`` is used.
+
+    Returns
+    -------
+    core_distances : array, shape (n_samples,)
+        Distance at which each sample becomes a core point.
+        Points which will never be core have a distance of inf.
+    """
+    n_samples = X.shape[0]
+    core_distances = np.empty(n_samples)
+    core_distances.fill(np.nan)
+
+    chunk_n_rows = get_chunk_n_rows(row_bytes=16 * min_samples,
+                                    max_n_rows=n_samples,
+                                    working_memory=working_memory)
+    slices = gen_batches(n_samples, chunk_n_rows)
+    for sl in slices:
+        core_distances[sl] = neighbors.kneighbors(
+            X[sl], min_samples)[0][:, -1]
+    return core_distances
+
+
+@_deprecate_positional_args
+def compute_optics_graph(X, *, min_samples, max_eps, metric, p, metric_params,
+                         algorithm, leaf_size, n_jobs):
+    """Computes the OPTICS reachability graph.
+
+    Read more in the :ref:`User Guide <optics>`.
+
+    Parameters
+    ----------
+    X : array, shape (n_samples, n_features), or (n_samples, n_samples)  \
+if metric=’precomputed’.
+        A feature array, or array of distances between samples if
+        metric='precomputed'
+
+    min_samples : int > 1 or float between 0 and 1
+        The number of samples in a neighborhood for a point to be considered
+        as a core point. Expressed as an absolute number or a fraction of the
+        number of samples (rounded to be at least 2).
+
+    max_eps : float, optional (default=np.inf)
+        The maximum distance between two samples for one to be considered as
+        in the neighborhood of the other. Default value of ``np.inf`` will
+        identify clusters across all scales; reducing ``max_eps`` will result
+        in shorter run times.
+
+    metric : string or callable, optional (default='minkowski')
+        Metric to use for distance computation. Any metric from scikit-learn
+        or scipy.spatial.distance can be used.
+
+        If metric is a callable function, it is called on each
+        pair of instances (rows) and the resulting value recorded. The callable
+        should take two arrays as input and return one value indicating the
+        distance between them. This works for Scipy's metrics, but is less
+        efficient than passing the metric name as a string. If metric is
+        "precomputed", X is assumed to be a distance matrix and must be square.
+
+        Valid values for metric are:
+
+        - from scikit-learn: ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',
+          'manhattan']
+
+        - from scipy.spatial.distance: ['braycurtis', 'canberra', 'chebyshev',
+          'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski',
+          'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao',
+          'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean',
+          'yule']
+
+        See the documentation for scipy.spatial.distance for details on these
+        metrics.
+
+    p : integer, optional (default=2)
+        Parameter for the Minkowski metric from
+        :class:`sklearn.metrics.pairwise_distances`. When p = 1, this is
+        equivalent to using manhattan_distance (l1), and euclidean_distance
+        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
+
+    metric_params : dict, optional (default=None)
+        Additional keyword arguments for the metric function.
+
+    algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, optional
+        Algorithm used to compute the nearest neighbors:
+
+        - 'ball_tree' will use :class:`BallTree`
+        - 'kd_tree' will use :class:`KDTree`
+        - 'brute' will use a brute-force search.
+        - 'auto' will attempt to decide the most appropriate algorithm
+          based on the values passed to :meth:`fit` method. (default)
+
+        Note: fitting on sparse input will override the setting of
+        this parameter, using brute force.
+
+    leaf_size : int, optional (default=30)
+        Leaf size passed to :class:`BallTree` or :class:`KDTree`. This can
+        affect the speed of the construction and query, as well as the memory
+        required to store the tree. The optimal value depends on the
+        nature of the problem.
+
+    n_jobs : int or None, optional (default=None)
+        The number of parallel jobs to run for neighbors search.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    Returns
+    -------
+    ordering_ : array, shape (n_samples,)
+        The cluster ordered list of sample indices.
+
+    core_distances_ : array, shape (n_samples,)
+        Distance at which each sample becomes a core point, indexed by object
+        order. Points which will never be core have a distance of inf. Use
+        ``clust.core_distances_[clust.ordering_]`` to access in cluster order.
+
+    reachability_ : array, shape (n_samples,)
+        Reachability distances per sample, indexed by object order. Use
+        ``clust.reachability_[clust.ordering_]`` to access in cluster order.
+
+    predecessor_ : array, shape (n_samples,)
+        Point that a sample was reached from, indexed by object order.
+        Seed points have a predecessor of -1.
+
+    References
+    ----------
+    .. [1] Ankerst, Mihael, Markus M. Breunig, Hans-Peter Kriegel,
+       and Jörg Sander. "OPTICS: ordering points to identify the clustering
+       structure." ACM SIGMOD Record 28, no. 2 (1999): 49-60.
+    """
+    n_samples = X.shape[0]
+    _validate_size(min_samples, n_samples, 'min_samples')
+    if min_samples <= 1:
+        min_samples = max(2, int(min_samples * n_samples))
+
+    # Start all points as 'unprocessed' ##
+    reachability_ = np.empty(n_samples)
+    reachability_.fill(np.inf)
+    predecessor_ = np.empty(n_samples, dtype=int)
+    predecessor_.fill(-1)
+
+    nbrs = NearestNeighbors(n_neighbors=min_samples,
+                            algorithm=algorithm,
+                            leaf_size=leaf_size,
+                            metric=metric,
+                            metric_params=metric_params,
+                            p=p,
+                            n_jobs=n_jobs)
+
+    nbrs.fit(X)
+    # Here we first do a kNN query for each point, this differs from
+    # the original OPTICS that only used epsilon range queries.
+    # TODO: handle working_memory somehow?
+    core_distances_ = _compute_core_distances_(X=X, neighbors=nbrs,
+                                               min_samples=min_samples,
+                                               working_memory=None)
+    # OPTICS puts an upper limit on these, use inf for undefined.
+    core_distances_[core_distances_ > max_eps] = np.inf
+
+    # Main OPTICS loop. Not parallelizable. The order that entries are
+    # written to the 'ordering_' list is important!
+    # Note that this implementation is O(n^2) theoretically, but
+    # supposedly with very low constant factors.
+    processed = np.zeros(X.shape[0], dtype=bool)
+    ordering = np.zeros(X.shape[0], dtype=int)
+    for ordering_idx in range(X.shape[0]):
+        # Choose next based on smallest reachability distance
+        # (And prefer smaller ids on ties, possibly np.inf!)
+        index = np.where(processed == 0)[0]
+        point = index[np.argmin(reachability_[index])]
+
+        processed[point] = True
+        ordering[ordering_idx] = point
+        if core_distances_[point] != np.inf:
+            _set_reach_dist(core_distances_=core_distances_,
+                            reachability_=reachability_,
+                            predecessor_=predecessor_,
+                            point_index=point,
+                            processed=processed, X=X, nbrs=nbrs,
+                            metric=metric, metric_params=metric_params,
+                            p=p, max_eps=max_eps)
+    if np.all(np.isinf(reachability_)):
+        warnings.warn("All reachability values are inf. Set a larger"
+                      " max_eps or all data will be considered outliers.",
+                      UserWarning)
+    return ordering, core_distances_, reachability_, predecessor_
+
+
+def _set_reach_dist(core_distances_, reachability_, predecessor_,
+                    point_index, processed, X, nbrs, metric, metric_params,
+                    p, max_eps):
+    P = X[point_index:point_index + 1]
+    # Assume that radius_neighbors is faster without distances
+    # and we don't need all distances, nevertheless, this means
+    # we may be doing some work twice.
+    indices = nbrs.radius_neighbors(P, radius=max_eps,
+                                    return_distance=False)[0]
+
+    # Getting indices of neighbors that have not been processed
+    unproc = np.compress(~np.take(processed, indices), indices)
+    # Neighbors of current point are already processed.
+    if not unproc.size:
+        return
+
+    # Only compute distances to unprocessed neighbors:
+    if metric == 'precomputed':
+        dists = X[point_index, unproc]
+    else:
+        _params = dict() if metric_params is None else metric_params.copy()
+        if metric == 'minkowski' and 'p' not in _params:
+            # the same logic as neighbors, p is ignored if explicitly set
+            # in the dict params
+            _params['p'] = p
+        dists = pairwise_distances(P, np.take(X, unproc, axis=0),
+                                   metric=metric, n_jobs=None,
+                                   **_params).ravel()
+
+    rdists = np.maximum(dists, core_distances_[point_index])
+    improved = np.where(rdists < np.take(reachability_, unproc))
+    reachability_[unproc[improved]] = rdists[improved]
+    predecessor_[unproc[improved]] = point_index
+
+
+@_deprecate_positional_args
+def cluster_optics_dbscan(*, reachability, core_distances, ordering, eps):
+    """Performs DBSCAN extraction for an arbitrary epsilon.
+
+    Extracting the clusters runs in linear time. Note that this results in
+    ``labels_`` which are close to a :class:`~sklearn.cluster.DBSCAN` with
+    similar settings and ``eps``, only if ``eps`` is close to ``max_eps``.
+
+    Parameters
+    ----------
+    reachability : array, shape (n_samples,)
+        Reachability distances calculated by OPTICS (``reachability_``)
+
+    core_distances : array, shape (n_samples,)
+        Distances at which points become core (``core_distances_``)
+
+    ordering : array, shape (n_samples,)
+        OPTICS ordered point indices (``ordering_``)
+
+    eps : float
+        DBSCAN ``eps`` parameter. Must be set to < ``max_eps``. Results
+        will be close to DBSCAN algorithm if ``eps`` and ``max_eps`` are close
+        to one another.
+
+    Returns
+    -------
+    labels_ : array, shape (n_samples,)
+        The estimated labels.
+
+    """
+    n_samples = len(core_distances)
+    labels = np.zeros(n_samples, dtype=int)
+
+    far_reach = reachability > eps
+    near_core = core_distances <= eps
+    labels[ordering] = np.cumsum(far_reach[ordering] & near_core[ordering]) - 1
+    labels[far_reach & ~near_core] = -1
+    return labels
+
+
+def cluster_optics_xi(*, reachability, predecessor, ordering, min_samples,
+                      min_cluster_size=None, xi=0.05,
+                      predecessor_correction=True):
+    """Automatically extract clusters according to the Xi-steep method.
+
+    Parameters
+    ----------
+    reachability : array, shape (n_samples,)
+        Reachability distances calculated by OPTICS (`reachability_`)
+
+    predecessor : array, shape (n_samples,)
+        Predecessors calculated by OPTICS.
+
+    ordering : array, shape (n_samples,)
+        OPTICS ordered point indices (`ordering_`)
+
+    min_samples : int > 1 or float between 0 and 1
+        The same as the min_samples given to OPTICS. Up and down steep regions
+        can't have more then ``min_samples`` consecutive non-steep points.
+        Expressed as an absolute number or a fraction of the number of samples
+        (rounded to be at least 2).
+
+    min_cluster_size : int > 1 or float between 0 and 1 (default=None)
+        Minimum number of samples in an OPTICS cluster, expressed as an
+        absolute number or a fraction of the number of samples (rounded to be
+        at least 2). If ``None``, the value of ``min_samples`` is used instead.
+
+    xi : float, between 0 and 1, optional (default=0.05)
+        Determines the minimum steepness on the reachability plot that
+        constitutes a cluster boundary. For example, an upwards point in the
+        reachability plot is defined by the ratio from one point to its
+        successor being at most 1-xi.
+
+    predecessor_correction : bool, optional (default=True)
+        Correct clusters based on the calculated predecessors.
+
+    Returns
+    -------
+    labels : array, shape (n_samples)
+        The labels assigned to samples. Points which are not included
+        in any cluster are labeled as -1.
+
+    clusters : array, shape (n_clusters, 2)
+        The list of clusters in the form of ``[start, end]`` in each row, with
+        all indices inclusive. The clusters are ordered according to ``(end,
+        -start)`` (ascending) so that larger clusters encompassing smaller
+        clusters come after such nested smaller clusters. Since ``labels`` does
+        not reflect the hierarchy, usually ``len(clusters) >
+        np.unique(labels)``.
+    """
+    n_samples = len(reachability)
+    _validate_size(min_samples, n_samples, 'min_samples')
+    if min_samples <= 1:
+        min_samples = max(2, int(min_samples * n_samples))
+    if min_cluster_size is None:
+        min_cluster_size = min_samples
+    _validate_size(min_cluster_size, n_samples, 'min_cluster_size')
+    if min_cluster_size <= 1:
+        min_cluster_size = max(2, int(min_cluster_size * n_samples))
+
+    clusters = _xi_cluster(reachability[ordering], predecessor[ordering],
+                           ordering, xi,
+                           min_samples, min_cluster_size,
+                           predecessor_correction)
+    labels = _extract_xi_labels(ordering, clusters)
+    return labels, clusters
+
+
+def _extend_region(steep_point, xward_point, start, min_samples):
+    """Extend the area until it's maximal.
+
+    It's the same function for both upward and downward reagions, depending on
+    the given input parameters. Assuming:
+
+        - steep_{upward/downward}: bool array indicating whether a point is a
+          steep {upward/downward};
+        - upward/downward: bool array indicating whether a point is
+          upward/downward;
+
+    To extend an upward reagion, ``steep_point=steep_upward`` and
+    ``xward_point=downward`` are expected, and to extend a downward region,
+    ``steep_point=steep_downward`` and ``xward_point=upward``.
+
+    Parameters
+    ----------
+    steep_point : bool array, shape (n_samples)
+        True if the point is steep downward (upward).
+
+    xward_point : bool array, shape (n_samples)
+        True if the point is an upward (respectively downward) point.
+
+    start : integer
+        The start of the xward region.
+
+    min_samples : integer
+       The same as the min_samples given to OPTICS. Up and down steep
+       regions can't have more then ``min_samples`` consecutive non-steep
+       points.
+
+    Returns
+    -------
+    index : integer
+        The current index iterating over all the samples, i.e. where we are up
+        to in our search.
+
+    end : integer
+        The end of the region, which can be behind the index. The region
+        includes the ``end`` index.
+    """
+    n_samples = len(steep_point)
+    non_xward_points = 0
+    index = start
+    end = start
+    # find a maximal area
+    while index < n_samples:
+        if steep_point[index]:
+            non_xward_points = 0
+            end = index
+        elif not xward_point[index]:
+            # it's not a steep point, but still goes up.
+            non_xward_points += 1
+            # region should include no more than min_samples consecutive
+            # non steep xward points.
+            if non_xward_points > min_samples:
+                break
+        else:
+            return end
+        index += 1
+    return end
+
+
+def _update_filter_sdas(sdas, mib, xi_complement, reachability_plot):
+    """Update steep down areas (SDAs) using the new maximum in between (mib)
+    value, and the given complement of xi, i.e. ``1 - xi``.
+    """
+    if np.isinf(mib):
+        return []
+    res = [sda for sda in sdas
+           if mib <= reachability_plot[sda['start']] * xi_complement]
+    for sda in res:
+        sda['mib'] = max(sda['mib'], mib)
+    return res
+
+
+def _correct_predecessor(reachability_plot, predecessor_plot, ordering, s, e):
+    """Correct for predecessors.
+
+    Applies Algorithm 2 of [1]_.
+
+    Input parameters are ordered by the computer OPTICS ordering.
+
+    .. [1] Schubert, Erich, Michael Gertz.
+       "Improving the Cluster Structure Extracted from OPTICS Plots." Proc. of
+       the Conference "Lernen, Wissen, Daten, Analysen" (LWDA) (2018): 318-329.
+    """
+    while s < e:
+        if reachability_plot[s] > reachability_plot[e]:
+            return s, e
+        p_e = ordering[predecessor_plot[e]]
+        for i in range(s, e):
+            if p_e == ordering[i]:
+                return s, e
+        e -= 1
+    return None, None
+
+
+def _xi_cluster(reachability_plot, predecessor_plot, ordering, xi, min_samples,
+                min_cluster_size, predecessor_correction):
+    """Automatically extract clusters according to the Xi-steep method.
+
+    This is rouphly an implementation of Figure 19 of the OPTICS paper.
+
+    Parameters
+    ----------
+    reachability_plot : array, shape (n_samples)
+        The reachability plot, i.e. reachability ordered according to
+        the calculated ordering, all computed by OPTICS.
+
+    predecessor_plot : array, shape (n_samples)
+        Predecessors ordered according to the calculated ordering.
+
+    xi : float, between 0 and 1
+        Determines the minimum steepness on the reachability plot that
+        constitutes a cluster boundary. For example, an upwards point in the
+        reachability plot is defined by the ratio from one point to its
+        successor being at most 1-xi.
+
+    min_samples : int > 1
+        The same as the min_samples given to OPTICS. Up and down steep regions
+        can't have more then ``min_samples`` consecutive non-steep points.
+
+    min_cluster_size : int > 1
+        Minimum number of samples in an OPTICS cluster.
+
+    predecessor_correction : bool
+        Correct clusters based on the calculated predecessors.
+
+    Returns
+    -------
+    clusters : array, shape (n_clusters, 2)
+        The list of clusters in the form of [start, end] in each row, with all
+        indices inclusive. The clusters are ordered in a way that larger
+        clusters encompassing smaller clusters come after those smaller
+        clusters.
+    """
+
+    # Our implementation adds an inf to the end of reachability plot
+    # this helps to find potential clusters at the end of the
+    # reachability plot even if there's no upward region at the end of it.
+    reachability_plot = np.hstack((reachability_plot, np.inf))
+
+    xi_complement = 1 - xi
+    sdas = []  # steep down areas, introduced in section 4.3.2 of the paper
+    clusters = []
+    index = 0
+    mib = 0.  # maximum in between, section 4.3.2
+
+    # Our implementation corrects a mistake in the original
+    # paper, i.e., in Definition 9 steep downward point,
+    # r(p) * (1 - x1) <= r(p + 1) should be
+    # r(p) * (1 - x1) >= r(p + 1)
+    with np.errstate(invalid='ignore'):
+        ratio = reachability_plot[:-1] / reachability_plot[1:]
+        steep_upward = ratio <= xi_complement
+        steep_downward = ratio >= 1 / xi_complement
+        downward = ratio > 1
+        upward = ratio < 1
+
+    # the following loop is is almost exactly as Figure 19 of the paper.
+    # it jumps over the areas which are not either steep down or up areas
+    for steep_index in iter(np.flatnonzero(steep_upward | steep_downward)):
+        # just continue if steep_index has been a part of a discovered xward
+        # area.
+        if steep_index < index:
+            continue
+
+        mib = max(mib, np.max(reachability_plot[index:steep_index + 1]))
+
+        # steep downward areas
+        if steep_downward[steep_index]:
+            sdas = _update_filter_sdas(sdas, mib, xi_complement,
+                                       reachability_plot)
+            D_start = steep_index
+            D_end = _extend_region(steep_downward, upward,
+                                   D_start, min_samples)
+            D = {'start': D_start, 'end': D_end, 'mib': 0.}
+            sdas.append(D)
+            index = D_end + 1
+            mib = reachability_plot[index]
+
+        # steep upward areas
+        else:
+            sdas = _update_filter_sdas(sdas, mib, xi_complement,
+                                       reachability_plot)
+            U_start = steep_index
+            U_end = _extend_region(steep_upward, downward, U_start,
+                                   min_samples)
+            index = U_end + 1
+            mib = reachability_plot[index]
+
+            U_clusters = []
+            for D in sdas:
+                c_start = D['start']
+                c_end = U_end
+
+                # line (**), sc2*
+                if reachability_plot[c_end + 1] * xi_complement < D['mib']:
+                    continue
+
+                # Definition 11: criterion 4
+                D_max = reachability_plot[D['start']]
+                if D_max * xi_complement >= reachability_plot[c_end + 1]:
+                    # Find the first index from the left side which is almost
+                    # at the same level as the end of the detected cluster.
+                    while (reachability_plot[c_start + 1] >
+                           reachability_plot[c_end + 1]
+                           and c_start < D['end']):
+                        c_start += 1
+                elif reachability_plot[c_end + 1] * xi_complement >= D_max:
+                    # Find the first index from the right side which is almost
+                    # at the same level as the beginning of the detected
+                    # cluster.
+                    # Our implementation corrects a mistake in the original
+                    # paper, i.e., in Definition 11 4c, r(x) < r(sD) should be
+                    # r(x) > r(sD).
+                    while (reachability_plot[c_end - 1] > D_max
+                           and c_end > U_start):
+                        c_end -= 1
+
+                # predecessor correction
+                if predecessor_correction:
+                    c_start, c_end = _correct_predecessor(reachability_plot,
+                                                          predecessor_plot,
+                                                          ordering,
+                                                          c_start,
+                                                          c_end)
+                if c_start is None:
+                    continue
+
+                # Definition 11: criterion 3.a
+                if c_end - c_start + 1 < min_cluster_size:
+                    continue
+
+                # Definition 11: criterion 1
+                if c_start > D['end']:
+                    continue
+
+                # Definition 11: criterion 2
+                if c_end < U_start:
+                    continue
+
+                U_clusters.append((c_start, c_end))
+
+            # add smaller clusters first.
+            U_clusters.reverse()
+            clusters.extend(U_clusters)
+
+    return np.array(clusters)
+
+
+def _extract_xi_labels(ordering, clusters):
+    """Extracts the labels from the clusters returned by `_xi_cluster`.
+    We rely on the fact that clusters are stored
+    with the smaller clusters coming before the larger ones.
+
+    Parameters
+    ----------
+    ordering : array, shape (n_samples)
+        The ordering of points calculated by OPTICS
+
+    clusters : array, shape (n_clusters, 2)
+        List of clusters i.e. (start, end) tuples,
+        as returned by `_xi_cluster`.
+
+    Returns
+    -------
+    labels : array, shape (n_samples)
+    """
+
+    labels = np.full(len(ordering), -1, dtype=int)
+    label = 0
+    for c in clusters:
+        if not np.any(labels[c[0]:(c[1] + 1)] != -1):
+            labels[c[0]:(c[1] + 1)] = label
+            label += 1
+    labels[ordering] = labels.copy()
+    return labels
--- a/venv/Lib/site-packages/sklearn/cluster/_spectral.py
+++ b/venv/Lib/site-packages/sklearn/cluster/_spectral.py
@ -0,0 +1,552 @@
+# -*- coding: utf-8 -*-
+"""Algorithms for spectral clustering"""
+
+# Author: Gael Varoquaux gael.varoquaux@normalesup.org
+#         Brian Cheung
+#         Wei LI <kuantkid@gmail.com>
+# License: BSD 3 clause
+import warnings
+
+import numpy as np
+
+from ..base import BaseEstimator, ClusterMixin
+from ..utils import check_random_state, as_float_array
+from ..utils.validation import _deprecate_positional_args
+from ..metrics.pairwise import pairwise_kernels
+from ..neighbors import kneighbors_graph, NearestNeighbors
+from ..manifold import spectral_embedding
+from ._kmeans import k_means
+
+
+@_deprecate_positional_args
+def discretize(vectors, *, copy=True, max_svd_restarts=30, n_iter_max=20,
+               random_state=None):
+    """Search for a partition matrix (clustering) which is closest to the
+    eigenvector embedding.
+
+    Parameters
+    ----------
+    vectors : array-like, shape: (n_samples, n_clusters)
+        The embedding space of the samples.
+
+    copy : boolean, optional, default: True
+        Whether to copy vectors, or perform in-place normalization.
+
+    max_svd_restarts : int, optional, default: 30
+        Maximum number of attempts to restart SVD if convergence fails
+
+    n_iter_max : int, optional, default: 30
+        Maximum number of iterations to attempt in rotation and partition
+        matrix search if machine precision convergence is not reached
+
+    random_state : int, RandomState instance, default=None
+        Determines random number generation for rotation matrix initialization.
+        Use an int to make the randomness deterministic.
+        See :term:`Glossary <random_state>`.
+
+    Returns
+    -------
+    labels : array of integers, shape: n_samples
+        The labels of the clusters.
+
+    References
+    ----------
+
+    - Multiclass spectral clustering, 2003
+      Stella X. Yu, Jianbo Shi
+      https://www1.icsi.berkeley.edu/~stellayu/publication/doc/2003kwayICCV.pdf
+
+    Notes
+    -----
+
+    The eigenvector embedding is used to iteratively search for the
+    closest discrete partition.  First, the eigenvector embedding is
+    normalized to the space of partition matrices. An optimal discrete
+    partition matrix closest to this normalized embedding multiplied by
+    an initial rotation is calculated.  Fixing this discrete partition
+    matrix, an optimal rotation matrix is calculated.  These two
+    calculations are performed until convergence.  The discrete partition
+    matrix is returned as the clustering solution.  Used in spectral
+    clustering, this method tends to be faster and more robust to random
+    initialization than k-means.
+
+    """
+
+    from scipy.sparse import csc_matrix
+    from scipy.linalg import LinAlgError
+
+    random_state = check_random_state(random_state)
+
+    vectors = as_float_array(vectors, copy=copy)
+
+    eps = np.finfo(float).eps
+    n_samples, n_components = vectors.shape
+
+    # Normalize the eigenvectors to an equal length of a vector of ones.
+    # Reorient the eigenvectors to point in the negative direction with respect
+    # to the first element.  This may have to do with constraining the
+    # eigenvectors to lie in a specific quadrant to make the discretization
+    # search easier.
+    norm_ones = np.sqrt(n_samples)
+    for i in range(vectors.shape[1]):
+        vectors[:, i] = (vectors[:, i] / np.linalg.norm(vectors[:, i])) \
+            * norm_ones
+        if vectors[0, i] != 0:
+            vectors[:, i] = -1 * vectors[:, i] * np.sign(vectors[0, i])
+
+    # Normalize the rows of the eigenvectors.  Samples should lie on the unit
+    # hypersphere centered at the origin.  This transforms the samples in the
+    # embedding space to the space of partition matrices.
+    vectors = vectors / np.sqrt((vectors ** 2).sum(axis=1))[:, np.newaxis]
+
+    svd_restarts = 0
+    has_converged = False
+
+    # If there is an exception we try to randomize and rerun SVD again
+    # do this max_svd_restarts times.
+    while (svd_restarts < max_svd_restarts) and not has_converged:
+
+        # Initialize first column of rotation matrix with a row of the
+        # eigenvectors
+        rotation = np.zeros((n_components, n_components))
+        rotation[:, 0] = vectors[random_state.randint(n_samples), :].T
+
+        # To initialize the rest of the rotation matrix, find the rows
+        # of the eigenvectors that are as orthogonal to each other as
+        # possible
+        c = np.zeros(n_samples)
+        for j in range(1, n_components):
+            # Accumulate c to ensure row is as orthogonal as possible to
+            # previous picks as well as current one
+            c += np.abs(np.dot(vectors, rotation[:, j - 1]))
+            rotation[:, j] = vectors[c.argmin(), :].T
+
+        last_objective_value = 0.0
+        n_iter = 0
+
+        while not has_converged:
+            n_iter += 1
+
+            t_discrete = np.dot(vectors, rotation)
+
+            labels = t_discrete.argmax(axis=1)
+            vectors_discrete = csc_matrix(
+                (np.ones(len(labels)), (np.arange(0, n_samples), labels)),
+                shape=(n_samples, n_components))
+
+            t_svd = vectors_discrete.T * vectors
+
+            try:
+                U, S, Vh = np.linalg.svd(t_svd)
+                svd_restarts += 1
+            except LinAlgError:
+                print("SVD did not converge, randomizing and trying again")
+                break
+
+            ncut_value = 2.0 * (n_samples - S.sum())
+            if ((abs(ncut_value - last_objective_value) < eps) or
+                    (n_iter > n_iter_max)):
+                has_converged = True
+            else:
+                # otherwise calculate rotation and continue
+                last_objective_value = ncut_value
+                rotation = np.dot(Vh.T, U.T)
+
+    if not has_converged:
+        raise LinAlgError('SVD did not converge')
+    return labels
+
+
+@_deprecate_positional_args
+def spectral_clustering(affinity, *, n_clusters=8, n_components=None,
+                        eigen_solver=None, random_state=None, n_init=10,
+                        eigen_tol=0.0, assign_labels='kmeans'):
+    """Apply clustering to a projection of the normalized Laplacian.
+
+    In practice Spectral Clustering is very useful when the structure of
+    the individual clusters is highly non-convex or more generally when
+    a measure of the center and spread of the cluster is not a suitable
+    description of the complete cluster. For instance, when clusters are
+    nested circles on the 2D plane.
+
+    If affinity is the adjacency matrix of a graph, this method can be
+    used to find normalized graph cuts.
+
+    Read more in the :ref:`User Guide <spectral_clustering>`.
+
+    Parameters
+    ----------
+    affinity : array-like or sparse matrix, shape: (n_samples, n_samples)
+        The affinity matrix describing the relationship of the samples to
+        embed. **Must be symmetric**.
+
+        Possible examples:
+          - adjacency matrix of a graph,
+          - heat kernel of the pairwise distance matrix of the samples,
+          - symmetric k-nearest neighbours connectivity matrix of the samples.
+
+    n_clusters : integer, optional
+        Number of clusters to extract.
+
+    n_components : integer, optional, default is n_clusters
+        Number of eigen vectors to use for the spectral embedding
+
+    eigen_solver : {None, 'arpack', 'lobpcg', or 'amg'}
+        The eigenvalue decomposition strategy to use. AMG requires pyamg
+        to be installed. It can be faster on very large, sparse problems,
+        but may also lead to instabilities
+
+    random_state : int, RandomState instance, default=None
+        A pseudo random number generator used for the initialization of the
+        lobpcg eigen vectors decomposition when eigen_solver == 'amg' and by
+        the K-Means initialization. Use an int to make the randomness
+        deterministic.
+        See :term:`Glossary <random_state>`.
+
+    n_init : int, optional, default: 10
+        Number of time the k-means algorithm will be run with different
+        centroid seeds. The final results will be the best output of
+        n_init consecutive runs in terms of inertia.
+
+    eigen_tol : float, optional, default: 0.0
+        Stopping criterion for eigendecomposition of the Laplacian matrix
+        when using arpack eigen_solver.
+
+    assign_labels : {'kmeans', 'discretize'}, default: 'kmeans'
+        The strategy to use to assign labels in the embedding
+        space.  There are two ways to assign labels after the laplacian
+        embedding.  k-means can be applied and is a popular choice. But it can
+        also be sensitive to initialization. Discretization is another
+        approach which is less sensitive to random initialization. See
+        the 'Multiclass spectral clustering' paper referenced below for
+        more details on the discretization approach.
+
+    Returns
+    -------
+    labels : array of integers, shape: n_samples
+        The labels of the clusters.
+
+    References
+    ----------
+
+    - Normalized cuts and image segmentation, 2000
+      Jianbo Shi, Jitendra Malik
+      http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.160.2324
+
+    - A Tutorial on Spectral Clustering, 2007
+      Ulrike von Luxburg
+      http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.165.9323
+
+    - Multiclass spectral clustering, 2003
+      Stella X. Yu, Jianbo Shi
+      https://www1.icsi.berkeley.edu/~stellayu/publication/doc/2003kwayICCV.pdf
+
+    Notes
+    -----
+    The graph should contain only one connect component, elsewhere
+    the results make little sense.
+
+    This algorithm solves the normalized cut for k=2: it is a
+    normalized spectral clustering.
+    """
+    if assign_labels not in ('kmeans', 'discretize'):
+        raise ValueError("The 'assign_labels' parameter should be "
+                         "'kmeans' or 'discretize', but '%s' was given"
+                         % assign_labels)
+
+    random_state = check_random_state(random_state)
+    n_components = n_clusters if n_components is None else n_components
+
+    # The first eigen vector is constant only for fully connected graphs
+    # and should be kept for spectral clustering (drop_first = False)
+    # See spectral_embedding documentation.
+    maps = spectral_embedding(affinity, n_components=n_components,
+                              eigen_solver=eigen_solver,
+                              random_state=random_state,
+                              eigen_tol=eigen_tol, drop_first=False)
+
+    if assign_labels == 'kmeans':
+        _, labels, _ = k_means(maps, n_clusters, random_state=random_state,
+                               n_init=n_init)
+    else:
+        labels = discretize(maps, random_state=random_state)
+
+    return labels
+
+
+class SpectralClustering(ClusterMixin, BaseEstimator):
+    """Apply clustering to a projection of the normalized Laplacian.
+
+    In practice Spectral Clustering is very useful when the structure of
+    the individual clusters is highly non-convex or more generally when
+    a measure of the center and spread of the cluster is not a suitable
+    description of the complete cluster. For instance when clusters are
+    nested circles on the 2D plane.
+
+    If affinity is the adjacency matrix of a graph, this method can be
+    used to find normalized graph cuts.
+
+    When calling ``fit``, an affinity matrix is constructed using either
+    kernel function such the Gaussian (aka RBF) kernel of the euclidean
+    distanced ``d(X, X)``::
+
+            np.exp(-gamma * d(X,X) ** 2)
+
+    or a k-nearest neighbors connectivity matrix.
+
+    Alternatively, using ``precomputed``, a user-provided affinity
+    matrix can be used.
+
+    Read more in the :ref:`User Guide <spectral_clustering>`.
+
+    Parameters
+    ----------
+    n_clusters : integer, optional
+        The dimension of the projection subspace.
+
+    eigen_solver : {None, 'arpack', 'lobpcg', or 'amg'}
+        The eigenvalue decomposition strategy to use. AMG requires pyamg
+        to be installed. It can be faster on very large, sparse problems,
+        but may also lead to instabilities.
+
+    n_components : integer, optional, default=n_clusters
+        Number of eigen vectors to use for the spectral embedding
+
+    random_state : int, RandomState instance, default=None
+        A pseudo random number generator used for the initialization of the
+        lobpcg eigen vectors decomposition when ``eigen_solver='amg'`` and by
+        the K-Means initialization. Use an int to make the randomness
+        deterministic.
+        See :term:`Glossary <random_state>`.
+
+    n_init : int, optional, default: 10
+        Number of time the k-means algorithm will be run with different
+        centroid seeds. The final results will be the best output of
+        n_init consecutive runs in terms of inertia.
+
+    gamma : float, default=1.0
+        Kernel coefficient for rbf, poly, sigmoid, laplacian and chi2 kernels.
+        Ignored for ``affinity='nearest_neighbors'``.
+
+    affinity : string or callable, default 'rbf'
+        How to construct the affinity matrix.
+         - 'nearest_neighbors' : construct the affinity matrix by computing a
+           graph of nearest neighbors.
+         - 'rbf' : construct the affinity matrix using a radial basis function
+           (RBF) kernel.
+         - 'precomputed' : interpret ``X`` as a precomputed affinity matrix.
+         - 'precomputed_nearest_neighbors' : interpret ``X`` as a sparse graph
+           of precomputed nearest neighbors, and constructs the affinity matrix
+           by selecting the ``n_neighbors`` nearest neighbors.
+         - one of the kernels supported by
+           :func:`~sklearn.metrics.pairwise_kernels`.
+
+        Only kernels that produce similarity scores (non-negative values that
+        increase with similarity) should be used. This property is not checked
+        by the clustering algorithm.
+
+    n_neighbors : integer
+        Number of neighbors to use when constructing the affinity matrix using
+        the nearest neighbors method. Ignored for ``affinity='rbf'``.
+
+    eigen_tol : float, optional, default: 0.0
+        Stopping criterion for eigendecomposition of the Laplacian matrix
+        when ``eigen_solver='arpack'``.
+
+    assign_labels : {'kmeans', 'discretize'}, default: 'kmeans'
+        The strategy to use to assign labels in the embedding
+        space. There are two ways to assign labels after the laplacian
+        embedding. k-means can be applied and is a popular choice. But it can
+        also be sensitive to initialization. Discretization is another approach
+        which is less sensitive to random initialization.
+
+    degree : float, default=3
+        Degree of the polynomial kernel. Ignored by other kernels.
+
+    coef0 : float, default=1
+        Zero coefficient for polynomial and sigmoid kernels.
+        Ignored by other kernels.
+
+    kernel_params : dictionary of string to any, optional
+        Parameters (keyword arguments) and values for kernel passed as
+        callable object. Ignored by other kernels.
+
+    n_jobs : int or None, optional (default=None)
+        The number of parallel jobs to run.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    Attributes
+    ----------
+    affinity_matrix_ : array-like, shape (n_samples, n_samples)
+        Affinity matrix used for clustering. Available only if after calling
+        ``fit``.
+
+    labels_ : array, shape (n_samples,)
+        Labels of each point
+
+    Examples
+    --------
+    >>> from sklearn.cluster import SpectralClustering
+    >>> import numpy as np
+    >>> X = np.array([[1, 1], [2, 1], [1, 0],
+    ...               [4, 7], [3, 5], [3, 6]])
+    >>> clustering = SpectralClustering(n_clusters=2,
+    ...         assign_labels="discretize",
+    ...         random_state=0).fit(X)
+    >>> clustering.labels_
+    array([1, 1, 1, 0, 0, 0])
+    >>> clustering
+    SpectralClustering(assign_labels='discretize', n_clusters=2,
+        random_state=0)
+
+    Notes
+    -----
+    If you have an affinity matrix, such as a distance matrix,
+    for which 0 means identical elements, and high values means
+    very dissimilar elements, it can be transformed in a
+    similarity matrix that is well suited for the algorithm by
+    applying the Gaussian (RBF, heat) kernel::
+
+        np.exp(- dist_matrix ** 2 / (2. * delta ** 2))
+
+    Where ``delta`` is a free parameter representing the width of the Gaussian
+    kernel.
+
+    Another alternative is to take a symmetric version of the k
+    nearest neighbors connectivity matrix of the points.
+
+    If the pyamg package is installed, it is used: this greatly
+    speeds up computation.
+
+    References
+    ----------
+
+    - Normalized cuts and image segmentation, 2000
+      Jianbo Shi, Jitendra Malik
+      http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.160.2324
+
+    - A Tutorial on Spectral Clustering, 2007
+      Ulrike von Luxburg
+      http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.165.9323
+
+    - Multiclass spectral clustering, 2003
+      Stella X. Yu, Jianbo Shi
+      https://www1.icsi.berkeley.edu/~stellayu/publication/doc/2003kwayICCV.pdf
+    """
+    @_deprecate_positional_args
+    def __init__(self, n_clusters=8, *, eigen_solver=None, n_components=None,
+                 random_state=None, n_init=10, gamma=1., affinity='rbf',
+                 n_neighbors=10, eigen_tol=0.0, assign_labels='kmeans',
+                 degree=3, coef0=1, kernel_params=None, n_jobs=None):
+        self.n_clusters = n_clusters
+        self.eigen_solver = eigen_solver
+        self.n_components = n_components
+        self.random_state = random_state
+        self.n_init = n_init
+        self.gamma = gamma
+        self.affinity = affinity
+        self.n_neighbors = n_neighbors
+        self.eigen_tol = eigen_tol
+        self.assign_labels = assign_labels
+        self.degree = degree
+        self.coef0 = coef0
+        self.kernel_params = kernel_params
+        self.n_jobs = n_jobs
+
+    def fit(self, X, y=None):
+        """Perform spectral clustering from features, or affinity matrix.
+
+        Parameters
+        ----------
+        X : array-like or sparse matrix, shape (n_samples, n_features), or \
+            array-like, shape (n_samples, n_samples)
+            Training instances to cluster, or similarities / affinities between
+            instances if ``affinity='precomputed'``. If a sparse matrix is
+            provided in a format other than ``csr_matrix``, ``csc_matrix``,
+            or ``coo_matrix``, it will be converted into a sparse
+            ``csr_matrix``.
+
+        y : Ignored
+            Not used, present here for API consistency by convention.
+
+        Returns
+        -------
+        self
+
+        """
+        X = self._validate_data(X, accept_sparse=['csr', 'csc', 'coo'],
+                                dtype=np.float64, ensure_min_samples=2)
+        allow_squared = self.affinity in ["precomputed",
+                                          "precomputed_nearest_neighbors"]
+        if X.shape[0] == X.shape[1] and not allow_squared:
+            warnings.warn("The spectral clustering API has changed. ``fit``"
+                          "now constructs an affinity matrix from data. To use"
+                          " a custom affinity matrix, "
+                          "set ``affinity=precomputed``.")
+
+        if self.affinity == 'nearest_neighbors':
+            connectivity = kneighbors_graph(X, n_neighbors=self.n_neighbors,
+                                            include_self=True,
+                                            n_jobs=self.n_jobs)
+            self.affinity_matrix_ = 0.5 * (connectivity + connectivity.T)
+        elif self.affinity == 'precomputed_nearest_neighbors':
+            estimator = NearestNeighbors(n_neighbors=self.n_neighbors,
+                                         n_jobs=self.n_jobs,
+                                         metric="precomputed").fit(X)
+            connectivity = estimator.kneighbors_graph(X=X, mode='connectivity')
+            self.affinity_matrix_ = 0.5 * (connectivity + connectivity.T)
+        elif self.affinity == 'precomputed':
+            self.affinity_matrix_ = X
+        else:
+            params = self.kernel_params
+            if params is None:
+                params = {}
+            if not callable(self.affinity):
+                params['gamma'] = self.gamma
+                params['degree'] = self.degree
+                params['coef0'] = self.coef0
+            self.affinity_matrix_ = pairwise_kernels(X, metric=self.affinity,
+                                                     filter_params=True,
+                                                     **params)
+
+        random_state = check_random_state(self.random_state)
+        self.labels_ = spectral_clustering(self.affinity_matrix_,
+                                           n_clusters=self.n_clusters,
+                                           n_components=self.n_components,
+                                           eigen_solver=self.eigen_solver,
+                                           random_state=random_state,
+                                           n_init=self.n_init,
+                                           eigen_tol=self.eigen_tol,
+                                           assign_labels=self.assign_labels)
+        return self
+
+    def fit_predict(self, X, y=None):
+        """Perform spectral clustering from features, or affinity matrix,
+        and return cluster labels.
+
+        Parameters
+        ----------
+        X : array-like or sparse matrix, shape (n_samples, n_features), or \
+            array-like, shape (n_samples, n_samples)
+            Training instances to cluster, or similarities / affinities between
+            instances if ``affinity='precomputed'``. If a sparse matrix is
+            provided in a format other than ``csr_matrix``, ``csc_matrix``,
+            or ``coo_matrix``, it will be converted into a sparse
+            ``csr_matrix``.
+
+        y : Ignored
+            Not used, present here for API consistency by convention.
+
+        Returns
+        -------
+        labels : ndarray, shape (n_samples,)
+            Cluster labels.
+        """
+        return super().fit_predict(X, y)
+
+    @property
+    def _pairwise(self):
+        return self.affinity in ["precomputed",
+                                 "precomputed_nearest_neighbors"]
--- a/venv/Lib/site-packages/sklearn/cluster/affinity_propagation_.py
+++ b/venv/Lib/site-packages/sklearn/cluster/affinity_propagation_.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _affinity_propagation  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.cluster.affinity_propagation_'
+correct_import_path = 'sklearn.cluster'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_affinity_propagation, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/cluster/bicluster.py
+++ b/venv/Lib/site-packages/sklearn/cluster/bicluster.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _bicluster  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.cluster.bicluster'
+correct_import_path = 'sklearn.cluster'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_bicluster, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/cluster/birch.py
+++ b/venv/Lib/site-packages/sklearn/cluster/birch.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _birch  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.cluster.birch'
+correct_import_path = 'sklearn.cluster'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_birch, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/cluster/dbscan_.py
+++ b/venv/Lib/site-packages/sklearn/cluster/dbscan_.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _dbscan  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.cluster.dbscan_'
+correct_import_path = 'sklearn.cluster'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_dbscan, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/cluster/hierarchical.py
+++ b/venv/Lib/site-packages/sklearn/cluster/hierarchical.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _agglomerative  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.cluster.hierarchical'
+correct_import_path = 'sklearn.cluster'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_agglomerative, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/cluster/k_means_.py
+++ b/venv/Lib/site-packages/sklearn/cluster/k_means_.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _kmeans  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.cluster.k_means_'
+correct_import_path = 'sklearn.cluster'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_kmeans, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/cluster/mean_shift_.py
+++ b/venv/Lib/site-packages/sklearn/cluster/mean_shift_.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _mean_shift  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.cluster.mean_shift_'
+correct_import_path = 'sklearn.cluster'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_mean_shift, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/cluster/optics_.py
+++ b/venv/Lib/site-packages/sklearn/cluster/optics_.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _optics  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.cluster.optics_'
+correct_import_path = 'sklearn.cluster'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_optics, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/cluster/setup.py
+++ b/venv/Lib/site-packages/sklearn/cluster/setup.py
@ -0,0 +1,50 @@
+# Author: Alexandre Gramfort <alexandre.gramfort@inria.fr>
+# License: BSD 3 clause
+import os
+
+import numpy
+
+
+def configuration(parent_package='', top_path=None):
+    from numpy.distutils.misc_util import Configuration
+
+    libraries = []
+    if os.name == 'posix':
+        libraries.append('m')
+
+    config = Configuration('cluster', parent_package, top_path)
+
+    config.add_extension('_dbscan_inner',
+                         sources=['_dbscan_inner.pyx'],
+                         include_dirs=[numpy.get_include()],
+                         language="c++")
+
+    config.add_extension('_hierarchical_fast',
+                         sources=['_hierarchical_fast.pyx'],
+                         language="c++",
+                         include_dirs=[numpy.get_include()],
+                         libraries=libraries)
+
+    config.add_extension('_k_means_fast',
+                         sources=['_k_means_fast.pyx'],
+                         include_dirs=[numpy.get_include()],
+                         libraries=libraries)
+
+    config.add_extension('_k_means_lloyd',
+                         sources=['_k_means_lloyd.pyx'],
+                         include_dirs=[numpy.get_include()],
+                         libraries=libraries)
+
+    config.add_extension('_k_means_elkan',
+                         sources=['_k_means_elkan.pyx'],
+                         include_dirs=[numpy.get_include()],
+                         libraries=libraries)
+
+    config.add_subpackage('tests')
+
+    return config
+
+
+if __name__ == '__main__':
+    from numpy.distutils.core import setup
+    setup(**configuration(top_path='').todict())
--- a/venv/Lib/site-packages/sklearn/cluster/spectral.py
+++ b/venv/Lib/site-packages/sklearn/cluster/spectral.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _spectral  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.cluster.spectral'
+correct_import_path = 'sklearn.cluster'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_spectral, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/cluster/tests/init.py
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/init.py
--- a/venv/Lib/site-packages/sklearn/cluster/tests/pycache/init.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/pycache/init.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/tests/pycache/common.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/pycache/common.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/tests/pycache/test_affinity_propagation.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/pycache/test_affinity_propagation.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/tests/pycache/test_bicluster.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/pycache/test_bicluster.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/tests/pycache/test_birch.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/pycache/test_birch.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/tests/pycache/test_dbscan.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/pycache/test_dbscan.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/tests/pycache/test_feature_agglomeration.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/pycache/test_feature_agglomeration.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/tests/pycache/test_hierarchical.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/pycache/test_hierarchical.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/tests/pycache/test_k_means.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/pycache/test_k_means.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/tests/pycache/test_mean_shift.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/pycache/test_mean_shift.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/tests/pycache/test_optics.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/pycache/test_optics.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/tests/pycache/test_spectral.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/pycache/test_spectral.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/cluster/tests/common.py
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/common.py
@ -0,0 +1,28 @@
+"""
+Common utilities for testing clustering.
+
+"""
+
+import numpy as np
+
+
+###############################################################################
+# Generate sample data
+
+def generate_clustered_data(seed=0, n_clusters=3, n_features=2,
+                            n_samples_per_cluster=20, std=.4):
+    prng = np.random.RandomState(seed)
+
+    # the data is voluntary shifted away from zero to check clustering
+    # algorithm robustness with regards to non centered data
+    means = np.array([[1, 1, 1, 0],
+                      [-1, -1, 0, 1],
+                      [1, -1, 1, 1],
+                      [-1, 1, 1, 0],
+                     ]) + 10
+
+    X = np.empty((0, n_features))
+    for i in range(n_clusters):
+        X = np.r_[X, means[i][:n_features]
+                  + std * prng.randn(n_samples_per_cluster, n_features)]
+    return X
--- a/venv/Lib/site-packages/sklearn/cluster/tests/test_affinity_propagation.py
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/test_affinity_propagation.py
@ -0,0 +1,246 @@
+"""
+Testing for Clustering methods
+
+"""
+
+import numpy as np
+import pytest
+from scipy.sparse import csr_matrix
+
+from sklearn.exceptions import ConvergenceWarning
+from sklearn.utils._testing import (
+    assert_array_equal, assert_warns,
+    assert_warns_message, assert_no_warnings)
+
+from sklearn.cluster import AffinityPropagation
+from sklearn.cluster._affinity_propagation import (
+    _equal_similarities_and_preferences
+)
+from sklearn.cluster import affinity_propagation
+from sklearn.datasets import make_blobs
+from sklearn.metrics import euclidean_distances
+
+n_clusters = 3
+centers = np.array([[1, 1], [-1, -1], [1, -1]]) + 10
+X, _ = make_blobs(n_samples=60, n_features=2, centers=centers,
+                  cluster_std=0.4, shuffle=True, random_state=0)
+
+
+def test_affinity_propagation():
+    # Affinity Propagation algorithm
+    # Compute similarities
+    S = -euclidean_distances(X, squared=True)
+    preference = np.median(S) * 10
+    # Compute Affinity Propagation
+    cluster_centers_indices, labels = affinity_propagation(
+        S, preference=preference, random_state=39)
+
+    n_clusters_ = len(cluster_centers_indices)
+
+    assert n_clusters == n_clusters_
+
+    af = AffinityPropagation(preference=preference, affinity="precomputed",
+                             random_state=28)
+    labels_precomputed = af.fit(S).labels_
+
+    af = AffinityPropagation(preference=preference, verbose=True,
+                             random_state=37)
+    labels = af.fit(X).labels_
+
+    assert_array_equal(labels, labels_precomputed)
+
+    cluster_centers_indices = af.cluster_centers_indices_
+
+    n_clusters_ = len(cluster_centers_indices)
+    assert np.unique(labels).size == n_clusters_
+    assert n_clusters == n_clusters_
+
+    # Test also with no copy
+    _, labels_no_copy = affinity_propagation(S, preference=preference,
+                                             copy=False, random_state=74)
+    assert_array_equal(labels, labels_no_copy)
+
+    # Test input validation
+    with pytest.raises(ValueError):
+        affinity_propagation(S[:, :-1])
+    with pytest.raises(ValueError):
+        affinity_propagation(S, damping=0)
+    af = AffinityPropagation(affinity="unknown", random_state=78)
+    with pytest.raises(ValueError):
+        af.fit(X)
+    af_2 = AffinityPropagation(affinity='precomputed', random_state=21)
+    with pytest.raises(TypeError):
+        af_2.fit(csr_matrix((3, 3)))
+
+def test_affinity_propagation_predict():
+    # Test AffinityPropagation.predict
+    af = AffinityPropagation(affinity="euclidean", random_state=63)
+    labels = af.fit_predict(X)
+    labels2 = af.predict(X)
+    assert_array_equal(labels, labels2)
+
+
+def test_affinity_propagation_predict_error():
+    # Test exception in AffinityPropagation.predict
+    # Not fitted.
+    af = AffinityPropagation(affinity="euclidean")
+    with pytest.raises(ValueError):
+        af.predict(X)
+
+    # Predict not supported when affinity="precomputed".
+    S = np.dot(X, X.T)
+    af = AffinityPropagation(affinity="precomputed", random_state=57)
+    af.fit(S)
+    with pytest.raises(ValueError):
+        af.predict(X)
+
+
+def test_affinity_propagation_fit_non_convergence():
+    # In case of non-convergence of affinity_propagation(), the cluster
+    # centers should be an empty array and training samples should be labelled
+    # as noise (-1)
+    X = np.array([[0, 0], [1, 1], [-2, -2]])
+
+    # Force non-convergence by allowing only a single iteration
+    af = AffinityPropagation(preference=-10, max_iter=1, random_state=82)
+
+    assert_warns(ConvergenceWarning, af.fit, X)
+    assert_array_equal(np.empty((0, 2)), af.cluster_centers_)
+    assert_array_equal(np.array([-1, -1, -1]), af.labels_)
+
+
+def test_affinity_propagation_equal_mutual_similarities():
+    X = np.array([[-1, 1], [1, -1]])
+    S = -euclidean_distances(X, squared=True)
+
+    # setting preference > similarity
+    cluster_center_indices, labels = assert_warns_message(
+        UserWarning, "mutually equal", affinity_propagation, S, preference=0)
+
+    # expect every sample to become an exemplar
+    assert_array_equal([0, 1], cluster_center_indices)
+    assert_array_equal([0, 1], labels)
+
+    # setting preference < similarity
+    cluster_center_indices, labels = assert_warns_message(
+        UserWarning, "mutually equal", affinity_propagation, S, preference=-10)
+
+    # expect one cluster, with arbitrary (first) sample as exemplar
+    assert_array_equal([0], cluster_center_indices)
+    assert_array_equal([0, 0], labels)
+
+    # setting different preferences
+    cluster_center_indices, labels = assert_no_warnings(
+        affinity_propagation, S, preference=[-20, -10], random_state=37)
+
+    # expect one cluster, with highest-preference sample as exemplar
+    assert_array_equal([1], cluster_center_indices)
+    assert_array_equal([0, 0], labels)
+
+
+def test_affinity_propagation_predict_non_convergence():
+    # In case of non-convergence of affinity_propagation(), the cluster
+    # centers should be an empty array
+    X = np.array([[0, 0], [1, 1], [-2, -2]])
+
+    # Force non-convergence by allowing only a single iteration
+    af = assert_warns(ConvergenceWarning,
+                      AffinityPropagation(preference=-10,
+                                          max_iter=1, random_state=75).fit, X)
+
+    # At prediction time, consider new samples as noise since there are no
+    # clusters
+    to_predict = np.array([[2, 2], [3, 3], [4, 4]])
+    y = assert_warns(ConvergenceWarning, af.predict, to_predict)
+    assert_array_equal(np.array([-1, -1, -1]), y)
+
+
+def test_affinity_propagation_non_convergence_regressiontest():
+    X = np.array([[1, 0, 0, 0, 0, 0],
+                  [0, 1, 1, 1, 0, 0],
+                  [0, 0, 1, 0, 0, 1]])
+    af = AffinityPropagation(affinity='euclidean',
+                             max_iter=2, random_state=34).fit(X)
+    assert_array_equal(np.array([-1, -1, -1]), af.labels_)
+
+
+def test_equal_similarities_and_preferences():
+    # Unequal distances
+    X = np.array([[0, 0], [1, 1], [-2, -2]])
+    S = -euclidean_distances(X, squared=True)
+
+    assert not _equal_similarities_and_preferences(S, np.array(0))
+    assert not _equal_similarities_and_preferences(S, np.array([0, 0]))
+    assert not _equal_similarities_and_preferences(S, np.array([0, 1]))
+
+    # Equal distances
+    X = np.array([[0, 0], [1, 1]])
+    S = -euclidean_distances(X, squared=True)
+
+    # Different preferences
+    assert not _equal_similarities_and_preferences(S, np.array([0, 1]))
+
+    # Same preferences
+    assert _equal_similarities_and_preferences(S, np.array([0, 0]))
+    assert _equal_similarities_and_preferences(S, np.array(0))
+
+
+def test_affinity_propagation_random_state():
+    # Significance of random_state parameter
+    # Generate sample data
+    centers = [[1, 1], [-1, -1], [1, -1]]
+    X, labels_true = make_blobs(n_samples=300, centers=centers,
+                                cluster_std=0.5, random_state=0)
+    # random_state = 0
+    ap = AffinityPropagation(convergence_iter=1, max_iter=2, random_state=0)
+    ap.fit(X)
+    centers0 = ap.cluster_centers_
+
+    # random_state = 76
+    ap = AffinityPropagation(convergence_iter=1, max_iter=2, random_state=76)
+    ap.fit(X)
+    centers76 = ap.cluster_centers_
+
+    assert np.mean((centers0 - centers76) ** 2) > 1
+
+
+# FIXME: to be removed in 0.25
+def test_affinity_propagation_random_state_warning():
+    # test that a warning is raised when random_state is not defined.
+    X = np.array([[0, 0], [1, 1], [-2, -2]])
+    match = ("'random_state' has been introduced in 0.23. "
+             "It will be set to None starting from 0.25 which "
+             "means that results will differ at every function "
+             "call. Set 'random_state' to None to silence this "
+             "warning, or to 0 to keep the behavior of versions "
+             "<0.23.")
+    with pytest.warns(FutureWarning, match=match):
+        AffinityPropagation().fit(X)
+
+@pytest.mark.parametrize('centers', [csr_matrix(np.zeros((1, 10))),
+                                     np.zeros((1, 10))])
+def test_affinity_propagation_convergence_warning_dense_sparse(centers):
+    """Non-regression, see #13334"""
+    rng = np.random.RandomState(42)
+    X = rng.rand(40, 10)
+    y = (4 * rng.rand(40)).astype(np.int)
+    ap = AffinityPropagation(random_state=46)
+    ap.fit(X, y)
+    ap.cluster_centers_ = centers
+    with pytest.warns(None) as record:
+        assert_array_equal(ap.predict(X),
+                           np.zeros(X.shape[0], dtype=int))
+    assert len(record) == 0
+
+
+def test_affinity_propagation_float32():
+    # Test to fix incorrect clusters due to dtype change
+    # (non-regression test for issue #10832)
+    X = np.array([[1, 0, 0, 0],
+                  [0, 1, 1, 0],
+                  [0, 1, 1, 0],
+                  [0, 0, 0, 1]], dtype='float32')
+    afp = AffinityPropagation(preference=1, affinity='precomputed',
+                              random_state=0).fit(X)
+    expected = np.array([0, 1, 1, 2])
+    assert_array_equal(afp.labels_, expected)
--- a/venv/Lib/site-packages/sklearn/cluster/tests/test_bicluster.py
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/test_bicluster.py
@ -0,0 +1,277 @@
+"""Testing for Spectral Biclustering methods"""
+
+import numpy as np
+import pytest
+from scipy.sparse import csr_matrix, issparse
+
+from sklearn.model_selection import ParameterGrid
+
+from sklearn.utils._testing import assert_almost_equal
+from sklearn.utils._testing import assert_array_equal
+from sklearn.utils._testing import assert_array_almost_equal
+
+from sklearn.base import BaseEstimator, BiclusterMixin
+
+from sklearn.cluster import SpectralCoclustering
+from sklearn.cluster import SpectralBiclustering
+from sklearn.cluster._bicluster import _scale_normalize
+from sklearn.cluster._bicluster import _bistochastic_normalize
+from sklearn.cluster._bicluster import _log_normalize
+
+from sklearn.metrics import (consensus_score, v_measure_score)
+
+from sklearn.datasets import make_biclusters, make_checkerboard
+
+
+class MockBiclustering(BiclusterMixin, BaseEstimator):
+    # Mock object for testing get_submatrix.
+    def __init__(self):
+        pass
+
+    def get_indices(self, i):
+        # Overridden to reproduce old get_submatrix test.
+        return (np.where([True, True, False, False, True])[0],
+                np.where([False, False, True, True])[0])
+
+
+def test_get_submatrix():
+    data = np.arange(20).reshape(5, 4)
+    model = MockBiclustering()
+
+    for X in (data, csr_matrix(data), data.tolist()):
+        submatrix = model.get_submatrix(0, X)
+        if issparse(submatrix):
+            submatrix = submatrix.toarray()
+        assert_array_equal(submatrix, [[2, 3],
+                                       [6, 7],
+                                       [18, 19]])
+        submatrix[:] = -1
+        if issparse(X):
+            X = X.toarray()
+        assert np.all(X != -1)
+
+
+def _test_shape_indices(model):
+    # Test get_shape and get_indices on fitted model.
+    for i in range(model.n_clusters):
+        m, n = model.get_shape(i)
+        i_ind, j_ind = model.get_indices(i)
+        assert len(i_ind) == m
+        assert len(j_ind) == n
+
+
+def test_spectral_coclustering():
+    # Test Dhillon's Spectral CoClustering on a simple problem.
+    param_grid = {'svd_method': ['randomized', 'arpack'],
+                  'n_svd_vecs': [None, 20],
+                  'mini_batch': [False, True],
+                  'init': ['k-means++'],
+                  'n_init': [10]}
+    random_state = 0
+    S, rows, cols = make_biclusters((30, 30), 3, noise=0.5,
+                                    random_state=random_state)
+    S -= S.min()  # needs to be nonnegative before making it sparse
+    S = np.where(S < 1, 0, S)  # threshold some values
+    for mat in (S, csr_matrix(S)):
+        for kwargs in ParameterGrid(param_grid):
+            model = SpectralCoclustering(n_clusters=3,
+                                         random_state=random_state,
+                                         **kwargs)
+            model.fit(mat)
+
+            assert model.rows_.shape == (3, 30)
+            assert_array_equal(model.rows_.sum(axis=0), np.ones(30))
+            assert_array_equal(model.columns_.sum(axis=0), np.ones(30))
+            assert consensus_score(model.biclusters_,
+                                   (rows, cols)) == 1
+
+            _test_shape_indices(model)
+
+
+def test_spectral_biclustering():
+    # Test Kluger methods on a checkerboard dataset.
+    S, rows, cols = make_checkerboard((30, 30), 3, noise=0.5,
+                                      random_state=0)
+
+    non_default_params = {'method': ['scale', 'log'],
+                          'svd_method': ['arpack'],
+                          'n_svd_vecs': [20],
+                          'mini_batch': [True]}
+
+    for mat in (S, csr_matrix(S)):
+        for param_name, param_values in non_default_params.items():
+            for param_value in param_values:
+
+                model = SpectralBiclustering(
+                    n_clusters=3,
+                    n_init=3,
+                    init='k-means++',
+                    random_state=0,
+                )
+                model.set_params(**dict([(param_name, param_value)]))
+
+                if issparse(mat) and model.get_params().get('method') == 'log':
+                    # cannot take log of sparse matrix
+                    with pytest.raises(ValueError):
+                        model.fit(mat)
+                    continue
+                else:
+                    model.fit(mat)
+
+                assert model.rows_.shape == (9, 30)
+                assert model.columns_.shape == (9, 30)
+                assert_array_equal(model.rows_.sum(axis=0),
+                                   np.repeat(3, 30))
+                assert_array_equal(model.columns_.sum(axis=0),
+                                   np.repeat(3, 30))
+                assert consensus_score(model.biclusters_,
+                                       (rows, cols)) == 1
+
+                _test_shape_indices(model)
+
+
+def _do_scale_test(scaled):
+    """Check that rows sum to one constant, and columns to another."""
+    row_sum = scaled.sum(axis=1)
+    col_sum = scaled.sum(axis=0)
+    if issparse(scaled):
+        row_sum = np.asarray(row_sum).squeeze()
+        col_sum = np.asarray(col_sum).squeeze()
+    assert_array_almost_equal(row_sum, np.tile(row_sum.mean(), 100),
+                              decimal=1)
+    assert_array_almost_equal(col_sum, np.tile(col_sum.mean(), 100),
+                              decimal=1)
+
+
+def _do_bistochastic_test(scaled):
+    """Check that rows and columns sum to the same constant."""
+    _do_scale_test(scaled)
+    assert_almost_equal(scaled.sum(axis=0).mean(),
+                        scaled.sum(axis=1).mean(),
+                        decimal=1)
+
+
+def test_scale_normalize():
+    generator = np.random.RandomState(0)
+    X = generator.rand(100, 100)
+    for mat in (X, csr_matrix(X)):
+        scaled, _, _ = _scale_normalize(mat)
+        _do_scale_test(scaled)
+        if issparse(mat):
+            assert issparse(scaled)
+
+
+def test_bistochastic_normalize():
+    generator = np.random.RandomState(0)
+    X = generator.rand(100, 100)
+    for mat in (X, csr_matrix(X)):
+        scaled = _bistochastic_normalize(mat)
+        _do_bistochastic_test(scaled)
+        if issparse(mat):
+            assert issparse(scaled)
+
+
+def test_log_normalize():
+    # adding any constant to a log-scaled matrix should make it
+    # bistochastic
+    generator = np.random.RandomState(0)
+    mat = generator.rand(100, 100)
+    scaled = _log_normalize(mat) + 1
+    _do_bistochastic_test(scaled)
+
+
+def test_fit_best_piecewise():
+    model = SpectralBiclustering(random_state=0)
+    vectors = np.array([[0, 0, 0, 1, 1, 1],
+                        [2, 2, 2, 3, 3, 3],
+                        [0, 1, 2, 3, 4, 5]])
+    best = model._fit_best_piecewise(vectors, n_best=2, n_clusters=2)
+    assert_array_equal(best, vectors[:2])
+
+
+def test_project_and_cluster():
+    model = SpectralBiclustering(random_state=0)
+    data = np.array([[1, 1, 1],
+                     [1, 1, 1],
+                     [3, 6, 3],
+                     [3, 6, 3]])
+    vectors = np.array([[1, 0],
+                        [0, 1],
+                        [0, 0]])
+    for mat in (data, csr_matrix(data)):
+        labels = model._project_and_cluster(mat, vectors,
+                                            n_clusters=2)
+        assert_almost_equal(v_measure_score(labels, [0, 0, 1, 1]), 1.0)
+
+
+def test_perfect_checkerboard():
+    # XXX Previously failed on build bot (not reproducible)
+    model = SpectralBiclustering(3, svd_method="arpack", random_state=0)
+
+    S, rows, cols = make_checkerboard((30, 30), 3, noise=0,
+                                      random_state=0)
+    model.fit(S)
+    assert consensus_score(model.biclusters_,
+                           (rows, cols)) == 1
+
+    S, rows, cols = make_checkerboard((40, 30), 3, noise=0,
+                                      random_state=0)
+    model.fit(S)
+    assert consensus_score(model.biclusters_,
+                           (rows, cols)) == 1
+
+    S, rows, cols = make_checkerboard((30, 40), 3, noise=0,
+                                      random_state=0)
+    model.fit(S)
+    assert consensus_score(model.biclusters_,
+                           (rows, cols)) == 1
+
+
+@pytest.mark.parametrize(
+    "args",
+    [{'n_clusters': (3, 3, 3)},
+     {'n_clusters': 'abc'},
+     {'n_clusters': (3, 'abc')},
+     {'method': 'unknown'},
+     {'n_components': 0},
+     {'n_best': 0},
+     {'svd_method': 'unknown'},
+     {'n_components': 3, 'n_best': 4}]
+)
+def test_errors(args):
+    data = np.arange(25).reshape((5, 5))
+
+    model = SpectralBiclustering(**args)
+    with pytest.raises(ValueError):
+        model.fit(data)
+
+
+def test_wrong_shape():
+    model = SpectralBiclustering()
+    data = np.arange(27).reshape((3, 3, 3))
+    with pytest.raises(ValueError):
+        model.fit(data)
+
+
+@pytest.mark.parametrize('est',
+                         (SpectralBiclustering(), SpectralCoclustering()))
+def test_n_features_in_(est):
+
+    X, _, _ = make_biclusters((3, 3), 3, random_state=0)
+
+    assert not hasattr(est, 'n_features_in_')
+    est.fit(X)
+    assert est.n_features_in_ == 3
+
+
+@pytest.mark.parametrize("klass", [SpectralBiclustering, SpectralCoclustering])
+@pytest.mark.parametrize("n_jobs", [None, 1])
+def test_n_jobs_deprecated(klass, n_jobs):
+    # FIXME: remove in 0.25
+    depr_msg = ("'n_jobs' was deprecated in version 0.23 and will be removed "
+                "in 0.25.")
+    S, _, _ = make_biclusters((30, 30), 3, noise=0.5, random_state=0)
+    est = klass(random_state=0, n_jobs=n_jobs)
+
+    with pytest.warns(FutureWarning, match=depr_msg):
+        est.fit(S)
--- a/venv/Lib/site-packages/sklearn/cluster/tests/test_birch.py
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/test_birch.py
@ -0,0 +1,169 @@
+"""
+Tests for the birch clustering algorithm.
+"""
+
+from scipy import sparse
+import numpy as np
+import pytest
+
+from sklearn.cluster.tests.common import generate_clustered_data
+from sklearn.cluster import Birch
+from sklearn.cluster import AgglomerativeClustering
+from sklearn.datasets import make_blobs
+from sklearn.exceptions import ConvergenceWarning
+from sklearn.linear_model import ElasticNet
+from sklearn.metrics import pairwise_distances_argmin, v_measure_score
+
+from sklearn.utils._testing import assert_almost_equal
+from sklearn.utils._testing import assert_array_equal
+from sklearn.utils._testing import assert_array_almost_equal
+from sklearn.utils._testing import assert_warns
+
+
+def test_n_samples_leaves_roots():
+    # Sanity check for the number of samples in leaves and roots
+    X, y = make_blobs(n_samples=10)
+    brc = Birch()
+    brc.fit(X)
+    n_samples_root = sum([sc.n_samples_ for sc in brc.root_.subclusters_])
+    n_samples_leaves = sum([sc.n_samples_ for leaf in brc._get_leaves()
+                            for sc in leaf.subclusters_])
+    assert n_samples_leaves == X.shape[0]
+    assert n_samples_root == X.shape[0]
+
+
+def test_partial_fit():
+    # Test that fit is equivalent to calling partial_fit multiple times
+    X, y = make_blobs(n_samples=100)
+    brc = Birch(n_clusters=3)
+    brc.fit(X)
+    brc_partial = Birch(n_clusters=None)
+    brc_partial.partial_fit(X[:50])
+    brc_partial.partial_fit(X[50:])
+    assert_array_almost_equal(brc_partial.subcluster_centers_,
+                              brc.subcluster_centers_)
+
+    # Test that same global labels are obtained after calling partial_fit
+    # with None
+    brc_partial.set_params(n_clusters=3)
+    brc_partial.partial_fit(None)
+    assert_array_equal(brc_partial.subcluster_labels_, brc.subcluster_labels_)
+
+
+def test_birch_predict():
+    # Test the predict method predicts the nearest centroid.
+    rng = np.random.RandomState(0)
+    X = generate_clustered_data(n_clusters=3, n_features=3,
+                                n_samples_per_cluster=10)
+
+    # n_samples * n_samples_per_cluster
+    shuffle_indices = np.arange(30)
+    rng.shuffle(shuffle_indices)
+    X_shuffle = X[shuffle_indices, :]
+    brc = Birch(n_clusters=4, threshold=1.)
+    brc.fit(X_shuffle)
+    centroids = brc.subcluster_centers_
+    assert_array_equal(brc.labels_, brc.predict(X_shuffle))
+    nearest_centroid = pairwise_distances_argmin(X_shuffle, centroids)
+    assert_almost_equal(v_measure_score(nearest_centroid, brc.labels_), 1.0)
+
+
+def test_n_clusters():
+    # Test that n_clusters param works properly
+    X, y = make_blobs(n_samples=100, centers=10)
+    brc1 = Birch(n_clusters=10)
+    brc1.fit(X)
+    assert len(brc1.subcluster_centers_) > 10
+    assert len(np.unique(brc1.labels_)) == 10
+
+    # Test that n_clusters = Agglomerative Clustering gives
+    # the same results.
+    gc = AgglomerativeClustering(n_clusters=10)
+    brc2 = Birch(n_clusters=gc)
+    brc2.fit(X)
+    assert_array_equal(brc1.subcluster_labels_, brc2.subcluster_labels_)
+    assert_array_equal(brc1.labels_, brc2.labels_)
+
+    # Test that the wrong global clustering step raises an Error.
+    clf = ElasticNet()
+    brc3 = Birch(n_clusters=clf)
+    with pytest.raises(ValueError):
+        brc3.fit(X)
+
+    # Test that a small number of clusters raises a warning.
+    brc4 = Birch(threshold=10000.)
+    assert_warns(ConvergenceWarning, brc4.fit, X)
+
+
+def test_sparse_X():
+    # Test that sparse and dense data give same results
+    X, y = make_blobs(n_samples=100, centers=10)
+    brc = Birch(n_clusters=10)
+    brc.fit(X)
+
+    csr = sparse.csr_matrix(X)
+    brc_sparse = Birch(n_clusters=10)
+    brc_sparse.fit(csr)
+
+    assert_array_equal(brc.labels_, brc_sparse.labels_)
+    assert_array_almost_equal(brc.subcluster_centers_,
+                              brc_sparse.subcluster_centers_)
+
+
+def check_branching_factor(node, branching_factor):
+    subclusters = node.subclusters_
+    assert branching_factor >= len(subclusters)
+    for cluster in subclusters:
+        if cluster.child_:
+            check_branching_factor(cluster.child_, branching_factor)
+
+
+def test_branching_factor():
+    # Test that nodes have at max branching_factor number of subclusters
+    X, y = make_blobs()
+    branching_factor = 9
+
+    # Purposefully set a low threshold to maximize the subclusters.
+    brc = Birch(n_clusters=None, branching_factor=branching_factor,
+                threshold=0.01)
+    brc.fit(X)
+    check_branching_factor(brc.root_, branching_factor)
+    brc = Birch(n_clusters=3, branching_factor=branching_factor,
+                threshold=0.01)
+    brc.fit(X)
+    check_branching_factor(brc.root_, branching_factor)
+
+    # Raises error when branching_factor is set to one.
+    brc = Birch(n_clusters=None, branching_factor=1, threshold=0.01)
+    with pytest.raises(ValueError):
+        brc.fit(X)
+
+
+def check_threshold(birch_instance, threshold):
+    """Use the leaf linked list for traversal"""
+    current_leaf = birch_instance.dummy_leaf_.next_leaf_
+    while current_leaf:
+        subclusters = current_leaf.subclusters_
+        for sc in subclusters:
+            assert threshold >= sc.radius
+        current_leaf = current_leaf.next_leaf_
+
+
+def test_threshold():
+    # Test that the leaf subclusters have a threshold lesser than radius
+    X, y = make_blobs(n_samples=80, centers=4)
+    brc = Birch(threshold=0.5, n_clusters=None)
+    brc.fit(X)
+    check_threshold(brc, 0.5)
+
+    brc = Birch(threshold=5.0, n_clusters=None)
+    brc.fit(X)
+    check_threshold(brc, 5.)
+
+
+def test_birch_n_clusters_long_int():
+    # Check that birch supports n_clusters with np.int64 dtype, for instance
+    # coming from np.arange. #16484
+    X, _ = make_blobs(random_state=0)
+    n_clusters = np.int64(5)
+    Birch(n_clusters=n_clusters).fit(X)
--- a/venv/Lib/site-packages/sklearn/cluster/tests/test_dbscan.py
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/test_dbscan.py
@ -0,0 +1,395 @@
+"""
+Tests for DBSCAN clustering algorithm
+"""
+
+import pickle
+
+import numpy as np
+
+from scipy.spatial import distance
+from scipy import sparse
+
+import pytest
+
+from sklearn.utils._testing import assert_array_equal
+from sklearn.neighbors import NearestNeighbors
+from sklearn.cluster import DBSCAN
+from sklearn.cluster import dbscan
+from sklearn.cluster.tests.common import generate_clustered_data
+from sklearn.metrics.pairwise import pairwise_distances
+
+
+n_clusters = 3
+X = generate_clustered_data(n_clusters=n_clusters)
+
+
+def test_dbscan_similarity():
+    # Tests the DBSCAN algorithm with a similarity array.
+    # Parameters chosen specifically for this task.
+    eps = 0.15
+    min_samples = 10
+    # Compute similarities
+    D = distance.squareform(distance.pdist(X))
+    D /= np.max(D)
+    # Compute DBSCAN
+    core_samples, labels = dbscan(D, metric="precomputed", eps=eps,
+                                  min_samples=min_samples)
+    # number of clusters, ignoring noise if present
+    n_clusters_1 = len(set(labels)) - (1 if -1 in labels else 0)
+
+    assert n_clusters_1 == n_clusters
+
+    db = DBSCAN(metric="precomputed", eps=eps, min_samples=min_samples)
+    labels = db.fit(D).labels_
+
+    n_clusters_2 = len(set(labels)) - int(-1 in labels)
+    assert n_clusters_2 == n_clusters
+
+
+def test_dbscan_feature():
+    # Tests the DBSCAN algorithm with a feature vector array.
+    # Parameters chosen specifically for this task.
+    # Different eps to other test, because distance is not normalised.
+    eps = 0.8
+    min_samples = 10
+    metric = 'euclidean'
+    # Compute DBSCAN
+    # parameters chosen for task
+    core_samples, labels = dbscan(X, metric=metric, eps=eps,
+                                  min_samples=min_samples)
+
+    # number of clusters, ignoring noise if present
+    n_clusters_1 = len(set(labels)) - int(-1 in labels)
+    assert n_clusters_1 == n_clusters
+
+    db = DBSCAN(metric=metric, eps=eps, min_samples=min_samples)
+    labels = db.fit(X).labels_
+
+    n_clusters_2 = len(set(labels)) - int(-1 in labels)
+    assert n_clusters_2 == n_clusters
+
+
+def test_dbscan_sparse():
+    core_sparse, labels_sparse = dbscan(sparse.lil_matrix(X), eps=.8,
+                                        min_samples=10)
+    core_dense, labels_dense = dbscan(X, eps=.8, min_samples=10)
+    assert_array_equal(core_dense, core_sparse)
+    assert_array_equal(labels_dense, labels_sparse)
+
+
+@pytest.mark.parametrize('include_self', [False, True])
+def test_dbscan_sparse_precomputed(include_self):
+    D = pairwise_distances(X)
+    nn = NearestNeighbors(radius=.9).fit(X)
+    X_ = X if include_self else None
+    D_sparse = nn.radius_neighbors_graph(X=X_, mode='distance')
+    # Ensure it is sparse not merely on diagonals:
+    assert D_sparse.nnz < D.shape[0] * (D.shape[0] - 1)
+    core_sparse, labels_sparse = dbscan(D_sparse,
+                                        eps=.8,
+                                        min_samples=10,
+                                        metric='precomputed')
+    core_dense, labels_dense = dbscan(D, eps=.8, min_samples=10,
+                                      metric='precomputed')
+    assert_array_equal(core_dense, core_sparse)
+    assert_array_equal(labels_dense, labels_sparse)
+
+
+def test_dbscan_sparse_precomputed_different_eps():
+    # test that precomputed neighbors graph is filtered if computed with
+    # a radius larger than DBSCAN's eps.
+    lower_eps = 0.2
+    nn = NearestNeighbors(radius=lower_eps).fit(X)
+    D_sparse = nn.radius_neighbors_graph(X, mode='distance')
+    dbscan_lower = dbscan(D_sparse, eps=lower_eps, metric='precomputed')
+
+    higher_eps = lower_eps + 0.7
+    nn = NearestNeighbors(radius=higher_eps).fit(X)
+    D_sparse = nn.radius_neighbors_graph(X, mode='distance')
+    dbscan_higher = dbscan(D_sparse, eps=lower_eps, metric='precomputed')
+
+    assert_array_equal(dbscan_lower[0], dbscan_higher[0])
+    assert_array_equal(dbscan_lower[1], dbscan_higher[1])
+
+
+@pytest.mark.parametrize('use_sparse', [True, False])
+@pytest.mark.parametrize('metric', ['precomputed', 'minkowski'])
+def test_dbscan_input_not_modified(use_sparse, metric):
+    # test that the input is not modified by dbscan
+    X = np.random.RandomState(0).rand(10, 10)
+    X = sparse.csr_matrix(X) if use_sparse else X
+    X_copy = X.copy()
+    dbscan(X, metric=metric)
+
+    if use_sparse:
+        assert_array_equal(X.toarray(), X_copy.toarray())
+    else:
+        assert_array_equal(X, X_copy)
+
+
+def test_dbscan_no_core_samples():
+    rng = np.random.RandomState(0)
+    X = rng.rand(40, 10)
+    X[X < .8] = 0
+
+    for X_ in [X, sparse.csr_matrix(X)]:
+        db = DBSCAN(min_samples=6).fit(X_)
+        assert_array_equal(db.components_, np.empty((0, X_.shape[1])))
+        assert_array_equal(db.labels_, -1)
+        assert db.core_sample_indices_.shape == (0,)
+
+
+def test_dbscan_callable():
+    # Tests the DBSCAN algorithm with a callable metric.
+    # Parameters chosen specifically for this task.
+    # Different eps to other test, because distance is not normalised.
+    eps = 0.8
+    min_samples = 10
+    # metric is the function reference, not the string key.
+    metric = distance.euclidean
+    # Compute DBSCAN
+    # parameters chosen for task
+    core_samples, labels = dbscan(X, metric=metric, eps=eps,
+                                  min_samples=min_samples,
+                                  algorithm='ball_tree')
+
+    # number of clusters, ignoring noise if present
+    n_clusters_1 = len(set(labels)) - int(-1 in labels)
+    assert n_clusters_1 == n_clusters
+
+    db = DBSCAN(metric=metric, eps=eps, min_samples=min_samples,
+                algorithm='ball_tree')
+    labels = db.fit(X).labels_
+
+    n_clusters_2 = len(set(labels)) - int(-1 in labels)
+    assert n_clusters_2 == n_clusters
+
+
+def test_dbscan_metric_params():
+    # Tests that DBSCAN works with the metrics_params argument.
+    eps = 0.8
+    min_samples = 10
+    p = 1
+
+    # Compute DBSCAN with metric_params arg
+    db = DBSCAN(metric='minkowski', metric_params={'p': p}, eps=eps,
+                min_samples=min_samples, algorithm='ball_tree').fit(X)
+    core_sample_1, labels_1 = db.core_sample_indices_, db.labels_
+
+    # Test that sample labels are the same as passing Minkowski 'p' directly
+    db = DBSCAN(metric='minkowski', eps=eps, min_samples=min_samples,
+                algorithm='ball_tree', p=p).fit(X)
+    core_sample_2, labels_2 = db.core_sample_indices_, db.labels_
+
+    assert_array_equal(core_sample_1, core_sample_2)
+    assert_array_equal(labels_1, labels_2)
+
+    # Minkowski with p=1 should be equivalent to Manhattan distance
+    db = DBSCAN(metric='manhattan', eps=eps, min_samples=min_samples,
+                algorithm='ball_tree').fit(X)
+    core_sample_3, labels_3 = db.core_sample_indices_, db.labels_
+
+    assert_array_equal(core_sample_1, core_sample_3)
+    assert_array_equal(labels_1, labels_3)
+
+
+def test_dbscan_balltree():
+    # Tests the DBSCAN algorithm with balltree for neighbor calculation.
+    eps = 0.8
+    min_samples = 10
+
+    D = pairwise_distances(X)
+    core_samples, labels = dbscan(D, metric="precomputed", eps=eps,
+                                  min_samples=min_samples)
+
+    # number of clusters, ignoring noise if present
+    n_clusters_1 = len(set(labels)) - int(-1 in labels)
+    assert n_clusters_1 == n_clusters
+
+    db = DBSCAN(p=2.0, eps=eps, min_samples=min_samples, algorithm='ball_tree')
+    labels = db.fit(X).labels_
+
+    n_clusters_2 = len(set(labels)) - int(-1 in labels)
+    assert n_clusters_2 == n_clusters
+
+    db = DBSCAN(p=2.0, eps=eps, min_samples=min_samples, algorithm='kd_tree')
+    labels = db.fit(X).labels_
+
+    n_clusters_3 = len(set(labels)) - int(-1 in labels)
+    assert n_clusters_3 == n_clusters
+
+    db = DBSCAN(p=1.0, eps=eps, min_samples=min_samples, algorithm='ball_tree')
+    labels = db.fit(X).labels_
+
+    n_clusters_4 = len(set(labels)) - int(-1 in labels)
+    assert n_clusters_4 == n_clusters
+
+    db = DBSCAN(leaf_size=20, eps=eps, min_samples=min_samples,
+                algorithm='ball_tree')
+    labels = db.fit(X).labels_
+
+    n_clusters_5 = len(set(labels)) - int(-1 in labels)
+    assert n_clusters_5 == n_clusters
+
+
+def test_input_validation():
+    # DBSCAN.fit should accept a list of lists.
+    X = [[1., 2.], [3., 4.]]
+    DBSCAN().fit(X)             # must not raise exception
+
+
+@pytest.mark.parametrize(
+    "args",
+    [{'eps': -1.0}, {'algorithm': 'blah'}, {'metric': 'blah'},
+     {'leaf_size': -1}, {'p': -1}]
+)
+def test_dbscan_badargs(args):
+    # Test bad argument values: these should all raise ValueErrors
+    with pytest.raises(ValueError):
+        dbscan(X, **args)
+
+
+def test_pickle():
+    obj = DBSCAN()
+    s = pickle.dumps(obj)
+    assert type(pickle.loads(s)) == obj.__class__
+
+
+def test_boundaries():
+    # ensure min_samples is inclusive of core point
+    core, _ = dbscan([[0], [1]], eps=2, min_samples=2)
+    assert 0 in core
+    # ensure eps is inclusive of circumference
+    core, _ = dbscan([[0], [1], [1]], eps=1, min_samples=2)
+    assert 0 in core
+    core, _ = dbscan([[0], [1], [1]], eps=.99, min_samples=2)
+    assert 0 not in core
+
+
+def test_weighted_dbscan():
+    # ensure sample_weight is validated
+    with pytest.raises(ValueError):
+        dbscan([[0], [1]], sample_weight=[2])
+    with pytest.raises(ValueError):
+        dbscan([[0], [1]], sample_weight=[2, 3, 4])
+
+    # ensure sample_weight has an effect
+    assert_array_equal([], dbscan([[0], [1]], sample_weight=None,
+                                  min_samples=6)[0])
+    assert_array_equal([], dbscan([[0], [1]], sample_weight=[5, 5],
+                                  min_samples=6)[0])
+    assert_array_equal([0], dbscan([[0], [1]], sample_weight=[6, 5],
+                                   min_samples=6)[0])
+    assert_array_equal([0, 1], dbscan([[0], [1]], sample_weight=[6, 6],
+                                      min_samples=6)[0])
+
+    # points within eps of each other:
+    assert_array_equal([0, 1], dbscan([[0], [1]], eps=1.5,
+                                      sample_weight=[5, 1], min_samples=6)[0])
+    # and effect of non-positive and non-integer sample_weight:
+    assert_array_equal([], dbscan([[0], [1]], sample_weight=[5, 0],
+                                  eps=1.5, min_samples=6)[0])
+    assert_array_equal([0, 1], dbscan([[0], [1]], sample_weight=[5.9, 0.1],
+                                      eps=1.5, min_samples=6)[0])
+    assert_array_equal([0, 1], dbscan([[0], [1]], sample_weight=[6, 0],
+                                      eps=1.5, min_samples=6)[0])
+    assert_array_equal([], dbscan([[0], [1]], sample_weight=[6, -1],
+                                  eps=1.5, min_samples=6)[0])
+
+    # for non-negative sample_weight, cores should be identical to repetition
+    rng = np.random.RandomState(42)
+    sample_weight = rng.randint(0, 5, X.shape[0])
+    core1, label1 = dbscan(X, sample_weight=sample_weight)
+    assert len(label1) == len(X)
+
+    X_repeated = np.repeat(X, sample_weight, axis=0)
+    core_repeated, label_repeated = dbscan(X_repeated)
+    core_repeated_mask = np.zeros(X_repeated.shape[0], dtype=bool)
+    core_repeated_mask[core_repeated] = True
+    core_mask = np.zeros(X.shape[0], dtype=bool)
+    core_mask[core1] = True
+    assert_array_equal(np.repeat(core_mask, sample_weight), core_repeated_mask)
+
+    # sample_weight should work with precomputed distance matrix
+    D = pairwise_distances(X)
+    core3, label3 = dbscan(D, sample_weight=sample_weight,
+                           metric='precomputed')
+    assert_array_equal(core1, core3)
+    assert_array_equal(label1, label3)
+
+    # sample_weight should work with estimator
+    est = DBSCAN().fit(X, sample_weight=sample_weight)
+    core4 = est.core_sample_indices_
+    label4 = est.labels_
+    assert_array_equal(core1, core4)
+    assert_array_equal(label1, label4)
+
+    est = DBSCAN()
+    label5 = est.fit_predict(X, sample_weight=sample_weight)
+    core5 = est.core_sample_indices_
+    assert_array_equal(core1, core5)
+    assert_array_equal(label1, label5)
+    assert_array_equal(label1, est.labels_)
+
+
+@pytest.mark.parametrize('algorithm', ['brute', 'kd_tree', 'ball_tree'])
+def test_dbscan_core_samples_toy(algorithm):
+    X = [[0], [2], [3], [4], [6], [8], [10]]
+    n_samples = len(X)
+
+    # Degenerate case: every sample is a core sample, either with its own
+    # cluster or including other close core samples.
+    core_samples, labels = dbscan(X, algorithm=algorithm, eps=1,
+                                  min_samples=1)
+    assert_array_equal(core_samples, np.arange(n_samples))
+    assert_array_equal(labels, [0, 1, 1, 1, 2, 3, 4])
+
+    # With eps=1 and min_samples=2 only the 3 samples from the denser area
+    # are core samples. All other points are isolated and considered noise.
+    core_samples, labels = dbscan(X, algorithm=algorithm, eps=1,
+                                  min_samples=2)
+    assert_array_equal(core_samples, [1, 2, 3])
+    assert_array_equal(labels, [-1, 0, 0, 0, -1, -1, -1])
+
+    # Only the sample in the middle of the dense area is core. Its two
+    # neighbors are edge samples. Remaining samples are noise.
+    core_samples, labels = dbscan(X, algorithm=algorithm, eps=1,
+                                  min_samples=3)
+    assert_array_equal(core_samples, [2])
+    assert_array_equal(labels, [-1, 0, 0, 0, -1, -1, -1])
+
+    # It's no longer possible to extract core samples with eps=1:
+    # everything is noise.
+    core_samples, labels = dbscan(X, algorithm=algorithm, eps=1,
+                                  min_samples=4)
+    assert_array_equal(core_samples, [])
+    assert_array_equal(labels, np.full(n_samples, -1.))
+
+
+def test_dbscan_precomputed_metric_with_degenerate_input_arrays():
+    # see https://github.com/scikit-learn/scikit-learn/issues/4641 for
+    # more details
+    X = np.eye(10)
+    labels = DBSCAN(eps=0.5, metric='precomputed').fit(X).labels_
+    assert len(set(labels)) == 1
+
+    X = np.zeros((10, 10))
+    labels = DBSCAN(eps=0.5, metric='precomputed').fit(X).labels_
+    assert len(set(labels)) == 1
+
+
+def test_dbscan_precomputed_metric_with_initial_rows_zero():
+    # sample matrix with initial two row all zero
+    ar = np.array([
+        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+        [0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0],
+        [0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0],
+        [0.0, 0.0, 0.1, 0.1, 0.0, 0.0, 0.3],
+        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1],
+        [0.0, 0.0, 0.0, 0.0, 0.3, 0.1, 0.0]
+    ])
+    matrix = sparse.csr_matrix(ar)
+    labels = DBSCAN(eps=0.2, metric='precomputed',
+                    min_samples=2).fit(matrix).labels_
+    assert_array_equal(labels, [-1, -1,  0,  0,  0,  1,  1])
--- a/venv/Lib/site-packages/sklearn/cluster/tests/test_feature_agglomeration.py
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/test_feature_agglomeration.py
@ -0,0 +1,43 @@
+"""
+Tests for sklearn.cluster._feature_agglomeration
+"""
+# Authors: Sergul Aydore 2017
+import numpy as np
+from sklearn.cluster import FeatureAgglomeration
+from sklearn.utils._testing import assert_no_warnings
+from sklearn.utils._testing import assert_array_almost_equal
+
+
+def test_feature_agglomeration():
+    n_clusters = 1
+    X = np.array([0, 0, 1]).reshape(1, 3)  # (n_samples, n_features)
+
+    agglo_mean = FeatureAgglomeration(n_clusters=n_clusters,
+                                      pooling_func=np.mean)
+    agglo_median = FeatureAgglomeration(n_clusters=n_clusters,
+                                        pooling_func=np.median)
+    assert_no_warnings(agglo_mean.fit, X)
+    assert_no_warnings(agglo_median.fit, X)
+    assert np.size(np.unique(agglo_mean.labels_)) == n_clusters
+    assert np.size(np.unique(agglo_median.labels_)) == n_clusters
+    assert np.size(agglo_mean.labels_) == X.shape[1]
+    assert np.size(agglo_median.labels_) == X.shape[1]
+
+    # Test transform
+    Xt_mean = agglo_mean.transform(X)
+    Xt_median = agglo_median.transform(X)
+    assert Xt_mean.shape[1] == n_clusters
+    assert Xt_median.shape[1] == n_clusters
+    assert Xt_mean == np.array([1 / 3.])
+    assert Xt_median == np.array([0.])
+
+    # Test inverse transform
+    X_full_mean = agglo_mean.inverse_transform(Xt_mean)
+    X_full_median = agglo_median.inverse_transform(Xt_median)
+    assert np.unique(X_full_mean[0]).size == n_clusters
+    assert np.unique(X_full_median[0]).size == n_clusters
+
+    assert_array_almost_equal(agglo_mean.transform(X_full_mean),
+                              Xt_mean)
+    assert_array_almost_equal(agglo_median.transform(X_full_median),
+                              Xt_median)
--- a/venv/Lib/site-packages/sklearn/cluster/tests/test_hierarchical.py
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/test_hierarchical.py
@ -0,0 +1,765 @@
+"""
+Several basic tests for hierarchical clustering procedures
+
+"""
+# Authors: Vincent Michel, 2010, Gael Varoquaux 2012,
+#          Matteo Visconti di Oleggio Castello 2014
+# License: BSD 3 clause
+from tempfile import mkdtemp
+import shutil
+import pytest
+from functools import partial
+
+import numpy as np
+from scipy import sparse
+from scipy.cluster import hierarchy
+
+from sklearn.metrics.cluster import adjusted_rand_score
+from sklearn.utils._testing import assert_almost_equal
+from sklearn.utils._testing import assert_array_almost_equal
+from sklearn.utils._testing import assert_raise_message
+from sklearn.utils._testing import ignore_warnings
+
+from sklearn.cluster import ward_tree
+from sklearn.cluster import AgglomerativeClustering, FeatureAgglomeration
+from sklearn.cluster._agglomerative import (_hc_cut, _TREE_BUILDERS,
+                                            linkage_tree,
+                                            _fix_connectivity)
+from sklearn.feature_extraction.image import grid_to_graph
+from sklearn.metrics.pairwise import PAIRED_DISTANCES, cosine_distances,\
+    manhattan_distances, pairwise_distances
+from sklearn.metrics.cluster import normalized_mutual_info_score
+from sklearn.neighbors import kneighbors_graph
+from sklearn.cluster._hierarchical_fast import average_merge, max_merge
+from sklearn.utils._fast_dict import IntFloatDict
+from sklearn.utils._testing import assert_array_equal
+from sklearn.utils._testing import assert_warns
+from sklearn.datasets import make_moons, make_circles
+
+
+def test_linkage_misc():
+    # Misc tests on linkage
+    rng = np.random.RandomState(42)
+    X = rng.normal(size=(5, 5))
+    with pytest.raises(ValueError):
+        AgglomerativeClustering(linkage='foo').fit(X)
+
+    with pytest.raises(ValueError):
+        linkage_tree(X, linkage='foo')
+
+    with pytest.raises(ValueError):
+        linkage_tree(X, connectivity=np.ones((4, 4)))
+
+    # Smoke test FeatureAgglomeration
+    FeatureAgglomeration().fit(X)
+
+    # test hierarchical clustering on a precomputed distances matrix
+    dis = cosine_distances(X)
+
+    res = linkage_tree(dis, affinity="precomputed")
+    assert_array_equal(res[0], linkage_tree(X, affinity="cosine")[0])
+
+    # test hierarchical clustering on a precomputed distances matrix
+    res = linkage_tree(X, affinity=manhattan_distances)
+    assert_array_equal(res[0], linkage_tree(X, affinity="manhattan")[0])
+
+
+def test_structured_linkage_tree():
+    # Check that we obtain the correct solution for structured linkage trees.
+    rng = np.random.RandomState(0)
+    mask = np.ones([10, 10], dtype=np.bool)
+    # Avoiding a mask with only 'True' entries
+    mask[4:7, 4:7] = 0
+    X = rng.randn(50, 100)
+    connectivity = grid_to_graph(*mask.shape)
+    for tree_builder in _TREE_BUILDERS.values():
+        children, n_components, n_leaves, parent = \
+            tree_builder(X.T, connectivity=connectivity)
+        n_nodes = 2 * X.shape[1] - 1
+        assert len(children) + n_leaves == n_nodes
+        # Check that ward_tree raises a ValueError with a connectivity matrix
+        # of the wrong shape
+        with pytest.raises(ValueError):
+            tree_builder(X.T, connectivity=np.ones((4, 4)))
+        # Check that fitting with no samples raises an error
+        with pytest.raises(ValueError):
+            tree_builder(X.T[:0], connectivity=connectivity)
+
+
+def test_unstructured_linkage_tree():
+    # Check that we obtain the correct solution for unstructured linkage trees.
+    rng = np.random.RandomState(0)
+    X = rng.randn(50, 100)
+    for this_X in (X, X[0]):
+        # With specified a number of clusters just for the sake of
+        # raising a warning and testing the warning code
+        with ignore_warnings():
+            children, n_nodes, n_leaves, parent = assert_warns(
+                UserWarning, ward_tree, this_X.T, n_clusters=10)
+        n_nodes = 2 * X.shape[1] - 1
+        assert len(children) + n_leaves == n_nodes
+
+    for tree_builder in _TREE_BUILDERS.values():
+        for this_X in (X, X[0]):
+            with ignore_warnings():
+                children, n_nodes, n_leaves, parent = assert_warns(
+                    UserWarning, tree_builder, this_X.T, n_clusters=10)
+
+            n_nodes = 2 * X.shape[1] - 1
+            assert len(children) + n_leaves == n_nodes
+
+
+def test_height_linkage_tree():
+    # Check that the height of the results of linkage tree is sorted.
+    rng = np.random.RandomState(0)
+    mask = np.ones([10, 10], dtype=np.bool)
+    X = rng.randn(50, 100)
+    connectivity = grid_to_graph(*mask.shape)
+    for linkage_func in _TREE_BUILDERS.values():
+        children, n_nodes, n_leaves, parent = linkage_func(
+            X.T, connectivity=connectivity)
+        n_nodes = 2 * X.shape[1] - 1
+        assert len(children) + n_leaves == n_nodes
+
+
+def test_agglomerative_clustering_wrong_arg_memory():
+    # Test either if an error is raised when memory is not
+    # either a str or a joblib.Memory instance
+    rng = np.random.RandomState(0)
+    n_samples = 100
+    X = rng.randn(n_samples, 50)
+    memory = 5
+    clustering = AgglomerativeClustering(memory=memory)
+    with pytest.raises(ValueError):
+        clustering.fit(X)
+
+
+def test_zero_cosine_linkage_tree():
+    # Check that zero vectors in X produce an error when
+    # 'cosine' affinity is used
+    X = np.array([[0, 1],
+                  [0, 0]])
+    msg = 'Cosine affinity cannot be used when X contains zero vectors'
+    assert_raise_message(ValueError, msg, linkage_tree, X, affinity='cosine')
+
+
+def test_agglomerative_clustering():
+    # Check that we obtain the correct number of clusters with
+    # agglomerative clustering.
+    rng = np.random.RandomState(0)
+    mask = np.ones([10, 10], dtype=np.bool)
+    n_samples = 100
+    X = rng.randn(n_samples, 50)
+    connectivity = grid_to_graph(*mask.shape)
+    for linkage in ("ward", "complete", "average", "single"):
+        clustering = AgglomerativeClustering(n_clusters=10,
+                                             connectivity=connectivity,
+                                             linkage=linkage)
+        clustering.fit(X)
+        # test caching
+        try:
+            tempdir = mkdtemp()
+            clustering = AgglomerativeClustering(
+                n_clusters=10, connectivity=connectivity,
+                memory=tempdir,
+                linkage=linkage)
+            clustering.fit(X)
+            labels = clustering.labels_
+            assert np.size(np.unique(labels)) == 10
+        finally:
+            shutil.rmtree(tempdir)
+        # Turn caching off now
+        clustering = AgglomerativeClustering(
+            n_clusters=10, connectivity=connectivity, linkage=linkage)
+        # Check that we obtain the same solution with early-stopping of the
+        # tree building
+        clustering.compute_full_tree = False
+        clustering.fit(X)
+        assert_almost_equal(normalized_mutual_info_score(clustering.labels_,
+                                                         labels), 1)
+        clustering.connectivity = None
+        clustering.fit(X)
+        assert np.size(np.unique(clustering.labels_)) == 10
+        # Check that we raise a TypeError on dense matrices
+        clustering = AgglomerativeClustering(
+            n_clusters=10,
+            connectivity=sparse.lil_matrix(
+                connectivity.toarray()[:10, :10]),
+            linkage=linkage)
+        with pytest.raises(ValueError):
+            clustering.fit(X)
+
+    # Test that using ward with another metric than euclidean raises an
+    # exception
+    clustering = AgglomerativeClustering(
+        n_clusters=10,
+        connectivity=connectivity.toarray(),
+        affinity="manhattan",
+        linkage="ward")
+    with pytest.raises(ValueError):
+        clustering.fit(X)
+
+    # Test using another metric than euclidean works with linkage complete
+    for affinity in PAIRED_DISTANCES.keys():
+        # Compare our (structured) implementation to scipy
+        clustering = AgglomerativeClustering(
+            n_clusters=10,
+            connectivity=np.ones((n_samples, n_samples)),
+            affinity=affinity,
+            linkage="complete")
+        clustering.fit(X)
+        clustering2 = AgglomerativeClustering(
+            n_clusters=10,
+            connectivity=None,
+            affinity=affinity,
+            linkage="complete")
+        clustering2.fit(X)
+        assert_almost_equal(normalized_mutual_info_score(clustering2.labels_,
+                                                         clustering.labels_),
+                            1)
+
+    # Test that using a distance matrix (affinity = 'precomputed') has same
+    # results (with connectivity constraints)
+    clustering = AgglomerativeClustering(n_clusters=10,
+                                         connectivity=connectivity,
+                                         linkage="complete")
+    clustering.fit(X)
+    X_dist = pairwise_distances(X)
+    clustering2 = AgglomerativeClustering(n_clusters=10,
+                                          connectivity=connectivity,
+                                          affinity='precomputed',
+                                          linkage="complete")
+    clustering2.fit(X_dist)
+    assert_array_equal(clustering.labels_, clustering2.labels_)
+
+
+def test_ward_agglomeration():
+    # Check that we obtain the correct solution in a simplistic case
+    rng = np.random.RandomState(0)
+    mask = np.ones([10, 10], dtype=np.bool)
+    X = rng.randn(50, 100)
+    connectivity = grid_to_graph(*mask.shape)
+    agglo = FeatureAgglomeration(n_clusters=5, connectivity=connectivity)
+    agglo.fit(X)
+    assert np.size(np.unique(agglo.labels_)) == 5
+
+    X_red = agglo.transform(X)
+    assert X_red.shape[1] == 5
+    X_full = agglo.inverse_transform(X_red)
+    assert np.unique(X_full[0]).size == 5
+    assert_array_almost_equal(agglo.transform(X_full), X_red)
+
+    # Check that fitting with no samples raises a ValueError
+    with pytest.raises(ValueError):
+        agglo.fit(X[:0])
+
+
+def test_single_linkage_clustering():
+    # Check that we get the correct result in two emblematic cases
+    moons, moon_labels = make_moons(noise=0.05, random_state=42)
+    clustering = AgglomerativeClustering(n_clusters=2, linkage='single')
+    clustering.fit(moons)
+    assert_almost_equal(normalized_mutual_info_score(clustering.labels_,
+                                                     moon_labels), 1)
+
+    circles, circle_labels = make_circles(factor=0.5, noise=0.025,
+                                          random_state=42)
+    clustering = AgglomerativeClustering(n_clusters=2, linkage='single')
+    clustering.fit(circles)
+    assert_almost_equal(normalized_mutual_info_score(clustering.labels_,
+                                                     circle_labels), 1)
+
+
+def assess_same_labelling(cut1, cut2):
+    """Util for comparison with scipy"""
+    co_clust = []
+    for cut in [cut1, cut2]:
+        n = len(cut)
+        k = cut.max() + 1
+        ecut = np.zeros((n, k))
+        ecut[np.arange(n), cut] = 1
+        co_clust.append(np.dot(ecut, ecut.T))
+    assert (co_clust[0] == co_clust[1]).all()
+
+
+def test_sparse_scikit_vs_scipy():
+    # Test scikit linkage with full connectivity (i.e. unstructured) vs scipy
+    n, p, k = 10, 5, 3
+    rng = np.random.RandomState(0)
+
+    # Not using a lil_matrix here, just to check that non sparse
+    # matrices are well handled
+    connectivity = np.ones((n, n))
+    for linkage in _TREE_BUILDERS.keys():
+        for i in range(5):
+            X = .1 * rng.normal(size=(n, p))
+            X -= 4. * np.arange(n)[:, np.newaxis]
+            X -= X.mean(axis=1)[:, np.newaxis]
+
+            out = hierarchy.linkage(X, method=linkage)
+
+            children_ = out[:, :2].astype(np.int, copy=False)
+            children, _, n_leaves, _ = _TREE_BUILDERS[linkage](
+                X, connectivity=connectivity)
+
+            # Sort the order of child nodes per row for consistency
+            children.sort(axis=1)
+            assert_array_equal(children, children_, 'linkage tree differs'
+                                                    ' from scipy impl for'
+                                                    ' linkage: ' + linkage)
+
+            cut = _hc_cut(k, children, n_leaves)
+            cut_ = _hc_cut(k, children_, n_leaves)
+            assess_same_labelling(cut, cut_)
+
+    # Test error management in _hc_cut
+    with pytest.raises(ValueError):
+        _hc_cut(n_leaves + 1, children, n_leaves)
+
+
+# Make sure our custom mst_linkage_core gives
+# the same results as scipy's builtin
+@pytest.mark.parametrize('seed', range(5))
+def test_vector_scikit_single_vs_scipy_single(seed):
+    n_samples, n_features, n_clusters = 10, 5, 3
+    rng = np.random.RandomState(seed)
+    X = .1 * rng.normal(size=(n_samples, n_features))
+    X -= 4. * np.arange(n_samples)[:, np.newaxis]
+    X -= X.mean(axis=1)[:, np.newaxis]
+
+    out = hierarchy.linkage(X, method='single')
+    children_scipy = out[:, :2].astype(np.int)
+
+    children, _, n_leaves, _ = _TREE_BUILDERS['single'](X)
+
+    # Sort the order of child nodes per row for consistency
+    children.sort(axis=1)
+    assert_array_equal(children, children_scipy,
+                       'linkage tree differs'
+                       ' from scipy impl for'
+                       ' single linkage.')
+
+    cut = _hc_cut(n_clusters, children, n_leaves)
+    cut_scipy = _hc_cut(n_clusters, children_scipy, n_leaves)
+    assess_same_labelling(cut, cut_scipy)
+
+
+def test_identical_points():
+    # Ensure identical points are handled correctly when using mst with
+    # a sparse connectivity matrix
+    X = np.array([[0, 0, 0], [0, 0, 0],
+                  [1, 1, 1], [1, 1, 1],
+                  [2, 2, 2], [2, 2, 2]])
+    true_labels = np.array([0, 0, 1, 1, 2, 2])
+    connectivity = kneighbors_graph(X, n_neighbors=3, include_self=False)
+    connectivity = 0.5 * (connectivity + connectivity.T)
+    connectivity, n_components = _fix_connectivity(X,
+                                                   connectivity,
+                                                   'euclidean')
+
+    for linkage in ('single', 'average', 'average', 'ward'):
+        clustering = AgglomerativeClustering(n_clusters=3,
+                                             linkage=linkage,
+                                             connectivity=connectivity)
+        clustering.fit(X)
+
+        assert_almost_equal(normalized_mutual_info_score(clustering.labels_,
+                                                         true_labels), 1)
+
+
+def test_connectivity_propagation():
+    # Check that connectivity in the ward tree is propagated correctly during
+    # merging.
+    X = np.array([(.014, .120), (.014, .099), (.014, .097),
+                  (.017, .153), (.017, .153), (.018, .153),
+                  (.018, .153), (.018, .153), (.018, .153),
+                  (.018, .153), (.018, .153), (.018, .153),
+                  (.018, .152), (.018, .149), (.018, .144)])
+    connectivity = kneighbors_graph(X, 10, include_self=False)
+    ward = AgglomerativeClustering(
+        n_clusters=4, connectivity=connectivity, linkage='ward')
+    # If changes are not propagated correctly, fit crashes with an
+    # IndexError
+    ward.fit(X)
+
+
+def test_ward_tree_children_order():
+    # Check that children are ordered in the same way for both structured and
+    # unstructured versions of ward_tree.
+
+    # test on five random datasets
+    n, p = 10, 5
+    rng = np.random.RandomState(0)
+
+    connectivity = np.ones((n, n))
+    for i in range(5):
+        X = .1 * rng.normal(size=(n, p))
+        X -= 4. * np.arange(n)[:, np.newaxis]
+        X -= X.mean(axis=1)[:, np.newaxis]
+
+        out_unstructured = ward_tree(X)
+        out_structured = ward_tree(X, connectivity=connectivity)
+
+        assert_array_equal(out_unstructured[0], out_structured[0])
+
+
+def test_ward_linkage_tree_return_distance():
+    # Test return_distance option on linkage and ward trees
+
+    # test that return_distance when set true, gives same
+    # output on both structured and unstructured clustering.
+    n, p = 10, 5
+    rng = np.random.RandomState(0)
+
+    connectivity = np.ones((n, n))
+    for i in range(5):
+        X = .1 * rng.normal(size=(n, p))
+        X -= 4. * np.arange(n)[:, np.newaxis]
+        X -= X.mean(axis=1)[:, np.newaxis]
+
+        out_unstructured = ward_tree(X, return_distance=True)
+        out_structured = ward_tree(X, connectivity=connectivity,
+                                   return_distance=True)
+
+        # get children
+        children_unstructured = out_unstructured[0]
+        children_structured = out_structured[0]
+
+        # check if we got the same clusters
+        assert_array_equal(children_unstructured, children_structured)
+
+        # check if the distances are the same
+        dist_unstructured = out_unstructured[-1]
+        dist_structured = out_structured[-1]
+
+        assert_array_almost_equal(dist_unstructured, dist_structured)
+
+        for linkage in ['average', 'complete', 'single']:
+            structured_items = linkage_tree(
+                X, connectivity=connectivity, linkage=linkage,
+                return_distance=True)[-1]
+            unstructured_items = linkage_tree(
+                X, linkage=linkage, return_distance=True)[-1]
+            structured_dist = structured_items[-1]
+            unstructured_dist = unstructured_items[-1]
+            structured_children = structured_items[0]
+            unstructured_children = unstructured_items[0]
+            assert_array_almost_equal(structured_dist, unstructured_dist)
+            assert_array_almost_equal(
+                structured_children, unstructured_children)
+
+    # test on the following dataset where we know the truth
+    # taken from scipy/cluster/tests/hierarchy_test_data.py
+    X = np.array([[1.43054825, -7.5693489],
+                  [6.95887839, 6.82293382],
+                  [2.87137846, -9.68248579],
+                  [7.87974764, -6.05485803],
+                  [8.24018364, -6.09495602],
+                  [7.39020262, 8.54004355]])
+    # truth
+    linkage_X_ward = np.array([[3., 4., 0.36265956, 2.],
+                               [1., 5., 1.77045373, 2.],
+                               [0., 2., 2.55760419, 2.],
+                               [6., 8., 9.10208346, 4.],
+                               [7., 9., 24.7784379, 6.]])
+
+    linkage_X_complete = np.array(
+        [[3., 4., 0.36265956, 2.],
+         [1., 5., 1.77045373, 2.],
+         [0., 2., 2.55760419, 2.],
+         [6., 8., 6.96742194, 4.],
+         [7., 9., 18.77445997, 6.]])
+
+    linkage_X_average = np.array(
+        [[3., 4., 0.36265956, 2.],
+         [1., 5., 1.77045373, 2.],
+         [0., 2., 2.55760419, 2.],
+         [6., 8., 6.55832839, 4.],
+         [7., 9., 15.44089605, 6.]])
+
+    n_samples, n_features = np.shape(X)
+    connectivity_X = np.ones((n_samples, n_samples))
+
+    out_X_unstructured = ward_tree(X, return_distance=True)
+    out_X_structured = ward_tree(X, connectivity=connectivity_X,
+                                 return_distance=True)
+
+    # check that the labels are the same
+    assert_array_equal(linkage_X_ward[:, :2], out_X_unstructured[0])
+    assert_array_equal(linkage_X_ward[:, :2], out_X_structured[0])
+
+    # check that the distances are correct
+    assert_array_almost_equal(linkage_X_ward[:, 2], out_X_unstructured[4])
+    assert_array_almost_equal(linkage_X_ward[:, 2], out_X_structured[4])
+
+    linkage_options = ['complete', 'average', 'single']
+    X_linkage_truth = [linkage_X_complete, linkage_X_average]
+    for (linkage, X_truth) in zip(linkage_options, X_linkage_truth):
+        out_X_unstructured = linkage_tree(
+            X, return_distance=True, linkage=linkage)
+        out_X_structured = linkage_tree(
+            X, connectivity=connectivity_X, linkage=linkage,
+            return_distance=True)
+
+        # check that the labels are the same
+        assert_array_equal(X_truth[:, :2], out_X_unstructured[0])
+        assert_array_equal(X_truth[:, :2], out_X_structured[0])
+
+        # check that the distances are correct
+        assert_array_almost_equal(X_truth[:, 2], out_X_unstructured[4])
+        assert_array_almost_equal(X_truth[:, 2], out_X_structured[4])
+
+
+def test_connectivity_fixing_non_lil():
+    # Check non regression of a bug if a non item assignable connectivity is
+    # provided with more than one component.
+    # create dummy data
+    x = np.array([[0, 0], [1, 1]])
+    # create a mask with several components to force connectivity fixing
+    m = np.array([[True, False], [False, True]])
+    c = grid_to_graph(n_x=2, n_y=2, mask=m)
+    w = AgglomerativeClustering(connectivity=c, linkage='ward')
+    assert_warns(UserWarning, w.fit, x)
+
+
+def test_int_float_dict():
+    rng = np.random.RandomState(0)
+    keys = np.unique(rng.randint(100, size=10).astype(np.intp, copy=False))
+    values = rng.rand(len(keys))
+
+    d = IntFloatDict(keys, values)
+    for key, value in zip(keys, values):
+        assert d[key] == value
+
+    other_keys = np.arange(50, dtype=np.intp)[::2]
+    other_values = np.full(50, 0.5)[::2]
+    other = IntFloatDict(other_keys, other_values)
+    # Complete smoke test
+    max_merge(d, other, mask=np.ones(100, dtype=np.intp), n_a=1, n_b=1)
+    average_merge(d, other, mask=np.ones(100, dtype=np.intp), n_a=1, n_b=1)
+
+
+def test_connectivity_callable():
+    rng = np.random.RandomState(0)
+    X = rng.rand(20, 5)
+    connectivity = kneighbors_graph(X, 3, include_self=False)
+    aglc1 = AgglomerativeClustering(connectivity=connectivity)
+    aglc2 = AgglomerativeClustering(
+        connectivity=partial(kneighbors_graph, n_neighbors=3,
+                             include_self=False))
+    aglc1.fit(X)
+    aglc2.fit(X)
+    assert_array_equal(aglc1.labels_, aglc2.labels_)
+
+
+def test_connectivity_ignores_diagonal():
+    rng = np.random.RandomState(0)
+    X = rng.rand(20, 5)
+    connectivity = kneighbors_graph(X, 3, include_self=False)
+    connectivity_include_self = kneighbors_graph(X, 3, include_self=True)
+    aglc1 = AgglomerativeClustering(connectivity=connectivity)
+    aglc2 = AgglomerativeClustering(connectivity=connectivity_include_self)
+    aglc1.fit(X)
+    aglc2.fit(X)
+    assert_array_equal(aglc1.labels_, aglc2.labels_)
+
+
+def test_compute_full_tree():
+    # Test that the full tree is computed if n_clusters is small
+    rng = np.random.RandomState(0)
+    X = rng.randn(10, 2)
+    connectivity = kneighbors_graph(X, 5, include_self=False)
+
+    # When n_clusters is less, the full tree should be built
+    # that is the number of merges should be n_samples - 1
+    agc = AgglomerativeClustering(n_clusters=2, connectivity=connectivity)
+    agc.fit(X)
+    n_samples = X.shape[0]
+    n_nodes = agc.children_.shape[0]
+    assert n_nodes == n_samples - 1
+
+    # When n_clusters is large, greater than max of 100 and 0.02 * n_samples.
+    # we should stop when there are n_clusters.
+    n_clusters = 101
+    X = rng.randn(200, 2)
+    connectivity = kneighbors_graph(X, 10, include_self=False)
+    agc = AgglomerativeClustering(n_clusters=n_clusters,
+                                  connectivity=connectivity)
+    agc.fit(X)
+    n_samples = X.shape[0]
+    n_nodes = agc.children_.shape[0]
+    assert n_nodes == n_samples - n_clusters
+
+
+def test_n_components():
+    # Test n_components returned by linkage, average and ward tree
+    rng = np.random.RandomState(0)
+    X = rng.rand(5, 5)
+
+    # Connectivity matrix having five components.
+    connectivity = np.eye(5)
+
+    for linkage_func in _TREE_BUILDERS.values():
+        assert ignore_warnings(linkage_func)(X, connectivity)[1] == 5
+
+
+def test_agg_n_clusters():
+    # Test that an error is raised when n_clusters <= 0
+
+    rng = np.random.RandomState(0)
+    X = rng.rand(20, 10)
+    for n_clus in [-1, 0]:
+        agc = AgglomerativeClustering(n_clusters=n_clus)
+        msg = ("n_clusters should be an integer greater than 0."
+               " %s was provided." % str(agc.n_clusters))
+        assert_raise_message(ValueError, msg, agc.fit, X)
+
+
+def test_affinity_passed_to_fix_connectivity():
+    # Test that the affinity parameter is actually passed to the pairwise
+    # function
+
+    size = 2
+    rng = np.random.RandomState(0)
+    X = rng.randn(size, size)
+    mask = np.array([True, False, False, True])
+
+    connectivity = grid_to_graph(n_x=size, n_y=size,
+                                 mask=mask, return_as=np.ndarray)
+
+    class FakeAffinity:
+        def __init__(self):
+            self.counter = 0
+
+        def increment(self, *args, **kwargs):
+            self.counter += 1
+            return self.counter
+
+    fa = FakeAffinity()
+
+    linkage_tree(X, connectivity=connectivity, affinity=fa.increment)
+
+    assert fa.counter == 3
+
+
+@pytest.mark.parametrize('linkage', ['ward', 'complete', 'average'])
+def test_agglomerative_clustering_with_distance_threshold(linkage):
+    # Check that we obtain the correct number of clusters with
+    # agglomerative clustering with distance_threshold.
+    rng = np.random.RandomState(0)
+    mask = np.ones([10, 10], dtype=np.bool)
+    n_samples = 100
+    X = rng.randn(n_samples, 50)
+    connectivity = grid_to_graph(*mask.shape)
+    # test when distance threshold is set to 10
+    distance_threshold = 10
+    for conn in [None, connectivity]:
+        clustering = AgglomerativeClustering(
+            n_clusters=None,
+            distance_threshold=distance_threshold,
+            connectivity=conn, linkage=linkage)
+        clustering.fit(X)
+        clusters_produced = clustering.labels_
+        num_clusters_produced = len(np.unique(clustering.labels_))
+        # test if the clusters produced match the point in the linkage tree
+        # where the distance exceeds the threshold
+        tree_builder = _TREE_BUILDERS[linkage]
+        children, n_components, n_leaves, parent, distances = \
+            tree_builder(X, connectivity=conn, n_clusters=None,
+                         return_distance=True)
+        num_clusters_at_threshold = np.count_nonzero(
+            distances >= distance_threshold) + 1
+        # test number of clusters produced
+        assert num_clusters_at_threshold == num_clusters_produced
+        # test clusters produced
+        clusters_at_threshold = _hc_cut(n_clusters=num_clusters_produced,
+                                        children=children,
+                                        n_leaves=n_leaves)
+        assert np.array_equiv(clusters_produced,
+                              clusters_at_threshold)
+
+
+def test_small_distance_threshold():
+    rng = np.random.RandomState(0)
+    n_samples = 10
+    X = rng.randint(-300, 300, size=(n_samples, 3))
+    # this should result in all data in their own clusters, given that
+    # their pairwise distances are bigger than .1 (which may not be the case
+    # with a different random seed).
+    clustering = AgglomerativeClustering(
+        n_clusters=None,
+        distance_threshold=1.,
+        linkage="single").fit(X)
+    # check that the pairwise distances are indeed all larger than .1
+    all_distances = pairwise_distances(X, metric='minkowski', p=2)
+    np.fill_diagonal(all_distances, np.inf)
+    assert np.all(all_distances > .1)
+    assert clustering.n_clusters_ == n_samples
+
+
+def test_cluster_distances_with_distance_threshold():
+    rng = np.random.RandomState(0)
+    n_samples = 100
+    X = rng.randint(-10, 10, size=(n_samples, 3))
+    # check the distances within the clusters and with other clusters
+    distance_threshold = 4
+    clustering = AgglomerativeClustering(
+        n_clusters=None,
+        distance_threshold=distance_threshold,
+        linkage="single").fit(X)
+    labels = clustering.labels_
+    D = pairwise_distances(X, metric="minkowski", p=2)
+    # to avoid taking the 0 diagonal in min()
+    np.fill_diagonal(D, np.inf)
+    for label in np.unique(labels):
+        in_cluster_mask = labels == label
+        max_in_cluster_distance = (D[in_cluster_mask][:, in_cluster_mask]
+                                   .min(axis=0).max())
+        min_out_cluster_distance = (D[in_cluster_mask][:, ~in_cluster_mask]
+                                    .min(axis=0).min())
+        # single data point clusters only have that inf diagonal here
+        if in_cluster_mask.sum() > 1:
+            assert max_in_cluster_distance < distance_threshold
+        assert min_out_cluster_distance >= distance_threshold
+
+
+@pytest.mark.parametrize('linkage', ['ward', 'complete', 'average'])
+@pytest.mark.parametrize(('threshold', 'y_true'),
+                         [(0.5, [1, 0]), (1.0, [1, 0]), (1.5, [0, 0])])
+def test_agglomerative_clustering_with_distance_threshold_edge_case(
+        linkage, threshold, y_true):
+    # test boundary case of distance_threshold matching the distance
+    X = [[0], [1]]
+    clusterer = AgglomerativeClustering(
+        n_clusters=None,
+        distance_threshold=threshold,
+        linkage=linkage)
+    y_pred = clusterer.fit_predict(X)
+    assert adjusted_rand_score(y_true, y_pred) == 1
+
+
+def test_dist_threshold_invalid_parameters():
+    X = [[0], [1]]
+    with pytest.raises(ValueError, match="Exactly one of "):
+        AgglomerativeClustering(n_clusters=None,
+                                distance_threshold=None).fit(X)
+
+    with pytest.raises(ValueError, match="Exactly one of "):
+        AgglomerativeClustering(n_clusters=2,
+                                distance_threshold=1).fit(X)
+
+    X = [[0], [1]]
+    with pytest.raises(ValueError, match="compute_full_tree must be True if"):
+        AgglomerativeClustering(n_clusters=None,
+                                distance_threshold=1,
+                                compute_full_tree=False).fit(X)
+
+
+def test_invalid_shape_precomputed_dist_matrix():
+    # Check that an error is raised when affinity='precomputed'
+    # and a non square matrix is passed (PR #16257).
+    rng = np.random.RandomState(0)
+    X = rng.rand(5, 3)
+    with pytest.raises(ValueError, match="Distance matrix should be square, "):
+        AgglomerativeClustering(affinity='precomputed',
+                                linkage='complete').fit(X)
--- a/venv/Lib/site-packages/sklearn/cluster/tests/test_k_means.py
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/test_k_means.py
--- a/venv/Lib/site-packages/sklearn/cluster/tests/test_mean_shift.py
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/test_mean_shift.py
@ -0,0 +1,194 @@
+"""
+Testing for mean shift clustering methods
+
+"""
+
+import numpy as np
+import warnings
+import pytest
+
+from scipy import sparse
+
+from sklearn.utils._testing import assert_array_equal
+from sklearn.utils._testing import assert_array_almost_equal
+from sklearn.utils._testing import assert_raise_message
+from sklearn.utils._testing import assert_allclose
+
+from sklearn.cluster import MeanShift
+from sklearn.cluster import mean_shift
+from sklearn.cluster import estimate_bandwidth
+from sklearn.cluster import get_bin_seeds
+from sklearn.datasets import make_blobs
+from sklearn.metrics import v_measure_score
+
+
+n_clusters = 3
+centers = np.array([[1, 1], [-1, -1], [1, -1]]) + 10
+X, _ = make_blobs(n_samples=300, n_features=2, centers=centers,
+                  cluster_std=0.4, shuffle=True, random_state=11)
+
+
+def test_estimate_bandwidth():
+    # Test estimate_bandwidth
+    bandwidth = estimate_bandwidth(X, n_samples=200)
+    assert 0.9 <= bandwidth <= 1.5
+
+
+def test_estimate_bandwidth_1sample():
+    # Test estimate_bandwidth when n_samples=1 and quantile<1, so that
+    # n_neighbors is set to 1.
+    bandwidth = estimate_bandwidth(X, n_samples=1, quantile=0.3)
+    assert bandwidth == pytest.approx(0., abs=1e-5)
+
+
+@pytest.mark.parametrize("bandwidth, cluster_all, expected, "
+                         "first_cluster_label",
+                         [(1.2, True, 3, 0), (1.2, False, 4, -1)])
+def test_mean_shift(bandwidth, cluster_all, expected, first_cluster_label):
+    # Test MeanShift algorithm
+    ms = MeanShift(bandwidth=bandwidth, cluster_all=cluster_all)
+    labels = ms.fit(X).labels_
+    labels_unique = np.unique(labels)
+    n_clusters_ = len(labels_unique)
+    assert n_clusters_ == expected
+    assert labels_unique[0] == first_cluster_label
+
+    cluster_centers, labels_mean_shift = mean_shift(X, cluster_all=cluster_all)
+    labels_mean_shift_unique = np.unique(labels_mean_shift)
+    n_clusters_mean_shift = len(labels_mean_shift_unique)
+    assert n_clusters_mean_shift == expected
+    assert labels_mean_shift_unique[0] == first_cluster_label
+
+
+def test_mean_shift_negative_bandwidth():
+    bandwidth = -1
+    ms = MeanShift(bandwidth=bandwidth)
+    msg = (r"bandwidth needs to be greater than zero or None,"
+           r" got -1\.000000")
+    with pytest.raises(ValueError, match=msg):
+        ms.fit(X)
+
+
+def test_estimate_bandwidth_with_sparse_matrix():
+    # Test estimate_bandwidth with sparse matrix
+    X = sparse.lil_matrix((1000, 1000))
+    msg = "A sparse matrix was passed, but dense data is required."
+    assert_raise_message(TypeError, msg, estimate_bandwidth, X)
+
+
+def test_parallel():
+    centers = np.array([[1, 1], [-1, -1], [1, -1]]) + 10
+    X, _ = make_blobs(n_samples=50, n_features=2, centers=centers,
+                      cluster_std=0.4, shuffle=True, random_state=11)
+
+    ms1 = MeanShift(n_jobs=2)
+    ms1.fit(X)
+
+    ms2 = MeanShift()
+    ms2.fit(X)
+
+    assert_array_almost_equal(ms1.cluster_centers_, ms2.cluster_centers_)
+    assert_array_equal(ms1.labels_, ms2.labels_)
+
+
+def test_meanshift_predict():
+    # Test MeanShift.predict
+    ms = MeanShift(bandwidth=1.2)
+    labels = ms.fit_predict(X)
+    labels2 = ms.predict(X)
+    assert_array_equal(labels, labels2)
+
+
+def test_meanshift_all_orphans():
+    # init away from the data, crash with a sensible warning
+    ms = MeanShift(bandwidth=0.1, seeds=[[-9, -9], [-10, -10]])
+    msg = "No point was within bandwidth=0.1"
+    assert_raise_message(ValueError, msg, ms.fit, X,)
+
+
+def test_unfitted():
+    # Non-regression: before fit, there should be not fitted attributes.
+    ms = MeanShift()
+    assert not hasattr(ms, "cluster_centers_")
+    assert not hasattr(ms, "labels_")
+
+
+def test_cluster_intensity_tie():
+    X = np.array([[1, 1], [2, 1], [1, 0],
+                  [4, 7], [3, 5], [3, 6]])
+    c1 = MeanShift(bandwidth=2).fit(X)
+
+    X = np.array([[4, 7], [3, 5], [3, 6],
+                  [1, 1], [2, 1], [1, 0]])
+    c2 = MeanShift(bandwidth=2).fit(X)
+    assert_array_equal(c1.labels_, [1, 1, 1, 0, 0, 0])
+    assert_array_equal(c2.labels_, [0, 0, 0, 1, 1, 1])
+
+
+def test_bin_seeds():
+    # Test the bin seeding technique which can be used in the mean shift
+    # algorithm
+    # Data is just 6 points in the plane
+    X = np.array([[1., 1.], [1.4, 1.4], [1.8, 1.2],
+                  [2., 1.], [2.1, 1.1], [0., 0.]])
+
+    # With a bin coarseness of 1.0 and min_bin_freq of 1, 3 bins should be
+    # found
+    ground_truth = {(1., 1.), (2., 1.), (0., 0.)}
+    test_bins = get_bin_seeds(X, 1, 1)
+    test_result = set(tuple(p) for p in test_bins)
+    assert len(ground_truth.symmetric_difference(test_result)) == 0
+
+    # With a bin coarseness of 1.0 and min_bin_freq of 2, 2 bins should be
+    # found
+    ground_truth = {(1., 1.), (2., 1.)}
+    test_bins = get_bin_seeds(X, 1, 2)
+    test_result = set(tuple(p) for p in test_bins)
+    assert len(ground_truth.symmetric_difference(test_result)) == 0
+
+    # With a bin size of 0.01 and min_bin_freq of 1, 6 bins should be found
+    # we bail and use the whole data here.
+    with warnings.catch_warnings(record=True):
+        test_bins = get_bin_seeds(X, 0.01, 1)
+    assert_array_almost_equal(test_bins, X)
+
+    # tight clusters around [0, 0] and [1, 1], only get two bins
+    X, _ = make_blobs(n_samples=100, n_features=2, centers=[[0, 0], [1, 1]],
+                      cluster_std=0.1, random_state=0)
+    test_bins = get_bin_seeds(X, 1)
+    assert_array_equal(test_bins, [[0, 0], [1, 1]])
+
+
+@pytest.mark.parametrize('max_iter', [1, 100])
+def test_max_iter(max_iter):
+    clusters1, _ = mean_shift(X, max_iter=max_iter)
+    ms = MeanShift(max_iter=max_iter).fit(X)
+    clusters2 = ms.cluster_centers_
+
+    assert ms.n_iter_ <= ms.max_iter
+    assert len(clusters1) == len(clusters2)
+
+    for c1, c2 in zip(clusters1, clusters2):
+        assert np.allclose(c1, c2)
+
+
+def test_mean_shift_zero_bandwidth():
+    # Check that mean shift works when the estimated bandwidth is 0.
+    X = np.array([1, 1, 1, 2, 2, 2, 3, 3]).reshape(-1, 1)
+
+    # estimate_bandwidth with default args returns 0 on this dataset
+    bandwidth = estimate_bandwidth(X)
+    assert bandwidth == 0
+
+    # get_bin_seeds with a 0 bin_size should return the dataset itself
+    assert get_bin_seeds(X, bin_size=bandwidth) is X
+
+    # MeanShift with binning and a 0 estimated bandwidth should be equivalent
+    # to no binning.
+    ms_binning = MeanShift(bin_seeding=True, bandwidth=None).fit(X)
+    ms_nobinning = MeanShift(bin_seeding=False).fit(X)
+    expected_labels = np.array([0, 0, 0, 1, 1, 1, 2, 2])
+
+    assert v_measure_score(ms_binning.labels_, expected_labels) == 1
+    assert v_measure_score(ms_nobinning.labels_, expected_labels) == 1
+    assert_allclose(ms_binning.cluster_centers_, ms_nobinning.cluster_centers_)
--- a/venv/Lib/site-packages/sklearn/cluster/tests/test_optics.py
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/test_optics.py
@ -0,0 +1,429 @@
+# Authors: Shane Grigsby <refuge@rocktalus.com>
+#          Adrin Jalali <adrin.jalali@gmail.com>
+# License: BSD 3 clause
+
+import numpy as np
+import pytest
+
+from sklearn.datasets import make_blobs
+from sklearn.cluster import OPTICS
+from sklearn.cluster._optics import _extend_region, _extract_xi_labels
+from sklearn.metrics.cluster import contingency_matrix
+from sklearn.metrics.pairwise import pairwise_distances
+from sklearn.cluster import DBSCAN
+from sklearn.utils import shuffle
+from sklearn.utils._testing import assert_array_equal
+from sklearn.utils._testing import assert_raise_message
+from sklearn.utils._testing import assert_allclose
+
+from sklearn.cluster.tests.common import generate_clustered_data
+
+
+rng = np.random.RandomState(0)
+n_points_per_cluster = 10
+C1 = [-5, -2] + .8 * rng.randn(n_points_per_cluster, 2)
+C2 = [4, -1] + .1 * rng.randn(n_points_per_cluster, 2)
+C3 = [1, -2] + .2 * rng.randn(n_points_per_cluster, 2)
+C4 = [-2, 3] + .3 * rng.randn(n_points_per_cluster, 2)
+C5 = [3, -2] + 1.6 * rng.randn(n_points_per_cluster, 2)
+C6 = [5, 6] + 2 * rng.randn(n_points_per_cluster, 2)
+X = np.vstack((C1, C2, C3, C4, C5, C6))
+
+
+@pytest.mark.parametrize(
+    ('r_plot', 'end'),
+    [[[10, 8.9, 8.8, 8.7, 7, 10], 3],
+     [[10, 8.9, 8.8, 8.7, 8.6, 7, 10], 0],
+     [[10, 8.9, 8.8, 8.7, 7, 6, np.inf], 4],
+     [[10, 8.9, 8.8, 8.7, 7, 6, np.inf], 4],
+     ])
+def test_extend_downward(r_plot, end):
+    r_plot = np.array(r_plot)
+    ratio = r_plot[:-1] / r_plot[1:]
+    steep_downward = ratio >= 1 / .9
+    upward = ratio < 1
+
+    e = _extend_region(steep_downward, upward, 0, 2)
+    assert e == end
+
+
+@pytest.mark.parametrize(
+    ('r_plot', 'end'),
+    [[[1, 2, 2.1, 2.2, 4, 8, 8, np.inf], 6],
+     [[1, 2, 2.1, 2.2, 2.3, 4, 8, 8, np.inf], 0],
+     [[1, 2, 2.1, 2, np.inf], 0],
+     [[1, 2, 2.1, np.inf], 2],
+     ])
+def test_extend_upward(r_plot, end):
+    r_plot = np.array(r_plot)
+    ratio = r_plot[:-1] / r_plot[1:]
+    steep_upward = ratio <= .9
+    downward = ratio > 1
+
+    e = _extend_region(steep_upward, downward, 0, 2)
+    assert e == end
+
+
+@pytest.mark.parametrize(
+    ('ordering', 'clusters', 'expected'),
+    [[[0, 1, 2, 3], [[0, 1], [2, 3]], [0, 0, 1, 1]],
+     [[0, 1, 2, 3], [[0, 1], [3, 3]], [0, 0, -1, 1]],
+     [[0, 1, 2, 3], [[0, 1], [3, 3], [0, 3]], [0, 0, -1, 1]],
+     [[3, 1, 2, 0], [[0, 1], [3, 3], [0, 3]], [1, 0, -1, 0]],
+     ])
+def test_the_extract_xi_labels(ordering, clusters, expected):
+    labels = _extract_xi_labels(ordering, clusters)
+
+    assert_array_equal(labels, expected)
+
+
+def test_extract_xi():
+    # small and easy test (no clusters around other clusters)
+    # but with a clear noise data.
+    rng = np.random.RandomState(0)
+    n_points_per_cluster = 5
+
+    C1 = [-5, -2] + .8 * rng.randn(n_points_per_cluster, 2)
+    C2 = [4, -1] + .1 * rng.randn(n_points_per_cluster, 2)
+    C3 = [1, -2] + .2 * rng.randn(n_points_per_cluster, 2)
+    C4 = [-2, 3] + .3 * rng.randn(n_points_per_cluster, 2)
+    C5 = [3, -2] + .6 * rng.randn(n_points_per_cluster, 2)
+    C6 = [5, 6] + .2 * rng.randn(n_points_per_cluster, 2)
+
+    X = np.vstack((C1, C2, C3, C4, C5, np.array([[100, 100]]), C6))
+    expected_labels = np.r_[[2] * 5, [0] * 5, [1] * 5, [3] * 5, [1] * 5,
+                            -1, [4] * 5]
+    X, expected_labels = shuffle(X, expected_labels, random_state=rng)
+
+    clust = OPTICS(min_samples=3, min_cluster_size=2,
+                   max_eps=20, cluster_method='xi',
+                   xi=0.4).fit(X)
+    assert_array_equal(clust.labels_, expected_labels)
+
+    # check float min_samples and min_cluster_size
+    clust = OPTICS(min_samples=0.1, min_cluster_size=0.08,
+                   max_eps=20, cluster_method='xi',
+                   xi=0.4).fit(X)
+    assert_array_equal(clust.labels_, expected_labels)
+
+    X = np.vstack((C1, C2, C3, C4, C5, np.array([[100, 100]] * 2), C6))
+    expected_labels = np.r_[[1] * 5, [3] * 5, [2] * 5, [0] * 5, [2] * 5,
+                            -1, -1, [4] * 5]
+    X, expected_labels = shuffle(X, expected_labels, random_state=rng)
+
+    clust = OPTICS(min_samples=3, min_cluster_size=3,
+                   max_eps=20, cluster_method='xi',
+                   xi=0.3).fit(X)
+    # this may fail if the predecessor correction is not at work!
+    assert_array_equal(clust.labels_, expected_labels)
+
+    C1 = [[0, 0], [0, 0.1], [0, -.1], [0.1, 0]]
+    C2 = [[10, 10], [10, 9], [10, 11], [9, 10]]
+    C3 = [[100, 100], [100, 90], [100, 110], [90, 100]]
+    X = np.vstack((C1, C2, C3))
+    expected_labels = np.r_[[0] * 4, [1] * 4, [2] * 4]
+    X, expected_labels = shuffle(X, expected_labels, random_state=rng)
+
+    clust = OPTICS(min_samples=2, min_cluster_size=2,
+                   max_eps=np.inf, cluster_method='xi',
+                   xi=0.04).fit(X)
+    assert_array_equal(clust.labels_, expected_labels)
+
+
+def test_cluster_hierarchy_():
+    rng = np.random.RandomState(0)
+    n_points_per_cluster = 100
+    C1 = [0, 0] + 2 * rng.randn(n_points_per_cluster, 2)
+    C2 = [0, 0] + 50 * rng.randn(n_points_per_cluster, 2)
+    X = np.vstack((C1, C2))
+    X = shuffle(X, random_state=0)
+
+    clusters = OPTICS(min_samples=20, xi=.1).fit(X).cluster_hierarchy_
+    assert clusters.shape == (2, 2)
+    diff = np.sum(clusters - np.array([[0, 99], [0, 199]]))
+    assert diff / len(X) < 0.05
+
+
+def test_correct_number_of_clusters():
+    # in 'auto' mode
+
+    n_clusters = 3
+    X = generate_clustered_data(n_clusters=n_clusters)
+    # Parameters chosen specifically for this task.
+    # Compute OPTICS
+    clust = OPTICS(max_eps=5.0 * 6.0, min_samples=4, xi=.1)
+    clust.fit(X)
+    # number of clusters, ignoring noise if present
+    n_clusters_1 = len(set(clust.labels_)) - int(-1 in clust.labels_)
+    assert n_clusters_1 == n_clusters
+
+    # check attribute types and sizes
+    assert clust.labels_.shape == (len(X),)
+    assert clust.labels_.dtype.kind == 'i'
+
+    assert clust.reachability_.shape == (len(X),)
+    assert clust.reachability_.dtype.kind == 'f'
+
+    assert clust.core_distances_.shape == (len(X),)
+    assert clust.core_distances_.dtype.kind == 'f'
+
+    assert clust.ordering_.shape == (len(X),)
+    assert clust.ordering_.dtype.kind == 'i'
+    assert set(clust.ordering_) == set(range(len(X)))
+
+
+def test_minimum_number_of_sample_check():
+    # test that we check a minimum number of samples
+    msg = "min_samples must be no greater than"
+
+    # Compute OPTICS
+    X = [[1, 1]]
+    clust = OPTICS(max_eps=5.0 * 0.3, min_samples=10, min_cluster_size=1)
+
+    # Run the fit
+    assert_raise_message(ValueError, msg, clust.fit, X)
+
+
+def test_bad_extract():
+    # Test an extraction of eps too close to original eps
+    msg = "Specify an epsilon smaller than 0.15. Got 0.3."
+    centers = [[1, 1], [-1, -1], [1, -1]]
+    X, labels_true = make_blobs(n_samples=750, centers=centers,
+                                cluster_std=0.4, random_state=0)
+
+    # Compute OPTICS
+    clust = OPTICS(max_eps=5.0 * 0.03,
+                   cluster_method='dbscan',
+                   eps=0.3, min_samples=10)
+    assert_raise_message(ValueError, msg, clust.fit, X)
+
+
+def test_bad_reachability():
+    msg = "All reachability values are inf. Set a larger max_eps."
+    centers = [[1, 1], [-1, -1], [1, -1]]
+    X, labels_true = make_blobs(n_samples=750, centers=centers,
+                                cluster_std=0.4, random_state=0)
+
+    with pytest.warns(UserWarning, match=msg):
+        clust = OPTICS(max_eps=5.0 * 0.003, min_samples=10, eps=0.015)
+        clust.fit(X)
+
+
+def test_close_extract():
+    # Test extract where extraction eps is close to scaled max_eps
+
+    centers = [[1, 1], [-1, -1], [1, -1]]
+    X, labels_true = make_blobs(n_samples=750, centers=centers,
+                                cluster_std=0.4, random_state=0)
+
+    # Compute OPTICS
+    clust = OPTICS(max_eps=1.0, cluster_method='dbscan',
+                   eps=0.3, min_samples=10).fit(X)
+    # Cluster ordering starts at 0; max cluster label = 2 is 3 clusters
+    assert max(clust.labels_) == 2
+
+
+@pytest.mark.parametrize('eps', [0.1, .3, .5])
+@pytest.mark.parametrize('min_samples', [3, 10, 20])
+def test_dbscan_optics_parity(eps, min_samples):
+    # Test that OPTICS clustering labels are <= 5% difference of DBSCAN
+
+    centers = [[1, 1], [-1, -1], [1, -1]]
+    X, labels_true = make_blobs(n_samples=750, centers=centers,
+                                cluster_std=0.4, random_state=0)
+
+    # calculate optics with dbscan extract at 0.3 epsilon
+    op = OPTICS(min_samples=min_samples, cluster_method='dbscan',
+                eps=eps).fit(X)
+
+    # calculate dbscan labels
+    db = DBSCAN(eps=eps, min_samples=min_samples).fit(X)
+
+    contingency = contingency_matrix(db.labels_, op.labels_)
+    agree = min(np.sum(np.max(contingency, axis=0)),
+                np.sum(np.max(contingency, axis=1)))
+    disagree = X.shape[0] - agree
+
+    percent_mismatch = np.round((disagree - 1) / X.shape[0], 2)
+
+    # verify label mismatch is <= 5% labels
+    assert percent_mismatch <= 0.05
+
+
+def test_min_samples_edge_case():
+    C1 = [[0, 0], [0, 0.1], [0, -.1]]
+    C2 = [[10, 10], [10, 9], [10, 11]]
+    C3 = [[100, 100], [100, 96], [100, 106]]
+    X = np.vstack((C1, C2, C3))
+
+    expected_labels = np.r_[[0] * 3, [1] * 3, [2] * 3]
+    clust = OPTICS(min_samples=3,
+                   max_eps=7, cluster_method='xi',
+                   xi=0.04).fit(X)
+    assert_array_equal(clust.labels_, expected_labels)
+
+    expected_labels = np.r_[[0] * 3, [1] * 3, [-1] * 3]
+    clust = OPTICS(min_samples=3,
+                   max_eps=3, cluster_method='xi',
+                   xi=0.04).fit(X)
+    assert_array_equal(clust.labels_, expected_labels)
+
+    expected_labels = np.r_[[-1] * 9]
+    with pytest.warns(UserWarning, match="All reachability values"):
+        clust = OPTICS(min_samples=4,
+                       max_eps=3, cluster_method='xi',
+                       xi=0.04).fit(X)
+        assert_array_equal(clust.labels_, expected_labels)
+
+
+# try arbitrary minimum sizes
+@pytest.mark.parametrize('min_cluster_size', range(2, X.shape[0] // 10, 23))
+def test_min_cluster_size(min_cluster_size):
+    redX = X[::2]  # reduce for speed
+    clust = OPTICS(min_samples=9, min_cluster_size=min_cluster_size).fit(redX)
+    cluster_sizes = np.bincount(clust.labels_[clust.labels_ != -1])
+    if cluster_sizes.size:
+        assert min(cluster_sizes) >= min_cluster_size
+    # check behaviour is the same when min_cluster_size is a fraction
+    clust_frac = OPTICS(min_samples=9,
+                        min_cluster_size=min_cluster_size / redX.shape[0])
+    clust_frac.fit(redX)
+    assert_array_equal(clust.labels_, clust_frac.labels_)
+
+
+@pytest.mark.parametrize('min_cluster_size', [0, -1, 1.1, 2.2])
+def test_min_cluster_size_invalid(min_cluster_size):
+    clust = OPTICS(min_cluster_size=min_cluster_size)
+    with pytest.raises(ValueError, match="must be a positive integer or a "):
+        clust.fit(X)
+
+
+def test_min_cluster_size_invalid2():
+    clust = OPTICS(min_cluster_size=len(X) + 1)
+    with pytest.raises(ValueError, match="must be no greater than the "):
+        clust.fit(X)
+
+
+def test_processing_order():
+    # Ensure that we consider all unprocessed points,
+    # not only direct neighbors. when picking the next point.
+    Y = [[0], [10], [-10], [25]]
+    clust = OPTICS(min_samples=3, max_eps=15).fit(Y)
+    assert_array_equal(clust.reachability_, [np.inf, 10, 10, 15])
+    assert_array_equal(clust.core_distances_, [10, 15, np.inf, np.inf])
+    assert_array_equal(clust.ordering_, [0, 1, 2, 3])
+
+
+def test_compare_to_ELKI():
+    # Expected values, computed with (future) ELKI 0.7.5 using:
+    # java -jar elki.jar cli -dbc.in csv -dbc.filter FixedDBIDsFilter
+    #   -algorithm clustering.optics.OPTICSHeap -optics.minpts 5
+    # where the FixedDBIDsFilter gives 0-indexed ids.
+    r1 = [np.inf, 1.0574896366427478, 0.7587934993548423, 0.7290174038973836,
+          0.7290174038973836, 0.7290174038973836, 0.6861627576116127,
+          0.7587934993548423, 0.9280118450166668, 1.1748022534146194,
+          3.3355455741292257, 0.49618389254482587, 0.2552805046961355,
+          0.2552805046961355, 0.24944622248445714, 0.24944622248445714,
+          0.24944622248445714, 0.2552805046961355, 0.2552805046961355,
+          0.3086779122185853, 4.163024452756142, 1.623152630340929,
+          0.45315840475822655, 0.25468325192031926, 0.2254004358159971,
+          0.18765711877083036, 0.1821471333893275, 0.1821471333893275,
+          0.18765711877083036, 0.18765711877083036, 0.2240202988740153,
+          1.154337614548715, 1.342604473837069, 1.323308536402633,
+          0.8607514948648837, 0.27219111215810565, 0.13260875220533205,
+          0.13260875220533205, 0.09890587675958984, 0.09890587675958984,
+          0.13548790801634494, 0.1575483940837384, 0.17515137170530226,
+          0.17575920159442388, 0.27219111215810565, 0.6101447895405373,
+          1.3189208094864302, 1.323308536402633, 2.2509184159764577,
+          2.4517810628594527, 3.675977064404973, 3.8264795626020365,
+          2.9130735341510614, 2.9130735341510614, 2.9130735341510614,
+          2.9130735341510614, 2.8459300127258036, 2.8459300127258036,
+          2.8459300127258036, 3.0321982337972537]
+    o1 = [0, 3, 6, 4, 7, 8, 2, 9, 5, 1, 31, 30, 32, 34, 33, 38, 39, 35, 37, 36,
+          44, 21, 23, 24, 22, 25, 27, 29, 26, 28, 20, 40, 45, 46, 10, 15, 11,
+          13, 17, 19, 18, 12, 16, 14, 47, 49, 43, 48, 42, 41, 53, 57, 51, 52,
+          56, 59, 54, 55, 58, 50]
+    p1 = [-1, 0, 3, 6, 6, 6, 8, 3, 7, 5, 1, 31, 30, 30, 34, 34, 34, 32, 32, 37,
+          36, 44, 21, 23, 24, 22, 25, 25, 22, 22, 22, 21, 40, 45, 46, 10, 15,
+          15, 13, 13, 15, 11, 19, 15, 10, 47, 12, 45, 14, 43, 42, 53, 57, 57,
+          57, 57, 59, 59, 59, 58]
+
+    # Tests against known extraction array
+    # Does NOT work with metric='euclidean', because sklearn euclidean has
+    # worse numeric precision. 'minkowski' is slower but more accurate.
+    clust1 = OPTICS(min_samples=5).fit(X)
+
+    assert_array_equal(clust1.ordering_, np.array(o1))
+    assert_array_equal(clust1.predecessor_[clust1.ordering_], np.array(p1))
+    assert_allclose(clust1.reachability_[clust1.ordering_], np.array(r1))
+    # ELKI currently does not print the core distances (which are not used much
+    # in literature, but we can at least ensure to have this consistency:
+    for i in clust1.ordering_[1:]:
+        assert (clust1.reachability_[i] >=
+                clust1.core_distances_[clust1.predecessor_[i]])
+
+    # Expected values, computed with (future) ELKI 0.7.5 using
+    r2 = [np.inf, np.inf, np.inf, np.inf, np.inf, np.inf, np.inf, np.inf,
+          np.inf, np.inf, np.inf, 0.27219111215810565, 0.13260875220533205,
+          0.13260875220533205, 0.09890587675958984, 0.09890587675958984,
+          0.13548790801634494, 0.1575483940837384, 0.17515137170530226,
+          0.17575920159442388, 0.27219111215810565, 0.4928068613197889,
+          np.inf, 0.2666183922512113, 0.18765711877083036, 0.1821471333893275,
+          0.1821471333893275, 0.1821471333893275, 0.18715928772277457,
+          0.18765711877083036, 0.18765711877083036, 0.25468325192031926,
+          np.inf, 0.2552805046961355, 0.2552805046961355, 0.24944622248445714,
+          0.24944622248445714, 0.24944622248445714, 0.2552805046961355,
+          0.2552805046961355, 0.3086779122185853, 0.34466409325984865,
+          np.inf, np.inf, np.inf, np.inf, np.inf, np.inf, np.inf, np.inf,
+          np.inf, np.inf, np.inf, np.inf, np.inf, np.inf, np.inf, np.inf,
+          np.inf, np.inf]
+    o2 = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 11, 13, 17, 19, 18, 12, 16, 14,
+          47, 46, 20, 22, 25, 23, 27, 29, 24, 26, 28, 21, 30, 32, 34, 33, 38,
+          39, 35, 37, 36, 31, 40, 41, 42, 43, 44, 45, 48, 49, 50, 51, 52, 53,
+          54, 55, 56, 57, 58, 59]
+    p2 = [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 15, 15, 13, 13, 15,
+          11, 19, 15, 10, 47, -1, 20, 22, 25, 25, 25, 25, 22, 22, 23, -1, 30,
+          30, 34, 34, 34, 32, 32, 37, 38, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+          -1, -1, -1, -1, -1, -1, -1, -1, -1]
+    clust2 = OPTICS(min_samples=5, max_eps=0.5).fit(X)
+
+    assert_array_equal(clust2.ordering_, np.array(o2))
+    assert_array_equal(clust2.predecessor_[clust2.ordering_], np.array(p2))
+    assert_allclose(clust2.reachability_[clust2.ordering_], np.array(r2))
+
+    index = np.where(clust1.core_distances_ <= 0.5)[0]
+    assert_allclose(clust1.core_distances_[index],
+                    clust2.core_distances_[index])
+
+
+def test_wrong_cluster_method():
+    clust = OPTICS(cluster_method='superfancy')
+    with pytest.raises(ValueError, match="cluster_method should be one of "):
+        clust.fit(X)
+
+
+def test_extract_dbscan():
+    # testing an easy dbscan case. Not including clusters with different
+    # densities.
+    rng = np.random.RandomState(0)
+    n_points_per_cluster = 20
+    C1 = [-5, -2] + .2 * rng.randn(n_points_per_cluster, 2)
+    C2 = [4, -1] + .2 * rng.randn(n_points_per_cluster, 2)
+    C3 = [1, 2] + .2 * rng.randn(n_points_per_cluster, 2)
+    C4 = [-2, 3] + .2 * rng.randn(n_points_per_cluster, 2)
+    X = np.vstack((C1, C2, C3, C4))
+
+    clust = OPTICS(cluster_method='dbscan', eps=.5).fit(X)
+    assert_array_equal(np.sort(np.unique(clust.labels_)), [0, 1, 2, 3])
+
+
+def test_precomputed_dists():
+    redX = X[::2]
+    dists = pairwise_distances(redX, metric='euclidean')
+    clust1 = OPTICS(min_samples=10, algorithm='brute',
+                    metric='precomputed').fit(dists)
+    clust2 = OPTICS(min_samples=10, algorithm='brute',
+                    metric='euclidean').fit(redX)
+
+    assert_allclose(clust1.reachability_, clust2.reachability_)
+    assert_array_equal(clust1.labels_, clust2.labels_)
--- a/venv/Lib/site-packages/sklearn/cluster/tests/test_spectral.py
+++ b/venv/Lib/site-packages/sklearn/cluster/tests/test_spectral.py
@ -0,0 +1,250 @@
+"""Testing for Spectral Clustering methods"""
+
+import numpy as np
+from scipy import sparse
+
+import pytest
+
+import pickle
+
+from sklearn.utils import check_random_state
+from sklearn.utils._testing import assert_array_equal
+from sklearn.utils._testing import assert_warns_message
+
+from sklearn.cluster import SpectralClustering, spectral_clustering
+from sklearn.cluster._spectral import discretize
+from sklearn.feature_extraction import img_to_graph
+from sklearn.metrics import pairwise_distances
+from sklearn.metrics import adjusted_rand_score
+from sklearn.metrics.pairwise import kernel_metrics, rbf_kernel
+from sklearn.neighbors import NearestNeighbors
+from sklearn.datasets import make_blobs
+
+try:
+    from pyamg import smoothed_aggregation_solver  # noqa
+    amg_loaded = True
+except ImportError:
+    amg_loaded = False
+
+
+@pytest.mark.parametrize('eigen_solver', ('arpack', 'lobpcg'))
+@pytest.mark.parametrize('assign_labels', ('kmeans', 'discretize'))
+def test_spectral_clustering(eigen_solver, assign_labels):
+    S = np.array([[1.0, 1.0, 1.0, 0.2, 0.0, 0.0, 0.0],
+                  [1.0, 1.0, 1.0, 0.2, 0.0, 0.0, 0.0],
+                  [1.0, 1.0, 1.0, 0.2, 0.0, 0.0, 0.0],
+                  [0.2, 0.2, 0.2, 1.0, 1.0, 1.0, 1.0],
+                  [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0],
+                  [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0],
+                  [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0]])
+
+    for mat in (S, sparse.csr_matrix(S)):
+        model = SpectralClustering(random_state=0, n_clusters=2,
+                                   affinity='precomputed',
+                                   eigen_solver=eigen_solver,
+                                   assign_labels=assign_labels
+                                   ).fit(mat)
+        labels = model.labels_
+        if labels[0] == 0:
+            labels = 1 - labels
+
+        assert adjusted_rand_score(labels, [1, 1, 1, 0, 0, 0, 0]) == 1
+
+        model_copy = pickle.loads(pickle.dumps(model))
+        assert model_copy.n_clusters == model.n_clusters
+        assert model_copy.eigen_solver == model.eigen_solver
+        assert_array_equal(model_copy.labels_, model.labels_)
+
+
+def test_spectral_unknown_mode():
+    # Test that SpectralClustering fails with an unknown mode set.
+    centers = np.array([
+        [0., 0., 0.],
+        [10., 10., 10.],
+        [20., 20., 20.],
+    ])
+    X, true_labels = make_blobs(n_samples=100, centers=centers,
+                                cluster_std=1., random_state=42)
+    D = pairwise_distances(X)  # Distance matrix
+    S = np.max(D) - D  # Similarity matrix
+    S = sparse.coo_matrix(S)
+    with pytest.raises(ValueError):
+        spectral_clustering(S, n_clusters=2, random_state=0,
+                            eigen_solver="<unknown>")
+
+
+def test_spectral_unknown_assign_labels():
+    # Test that SpectralClustering fails with an unknown assign_labels set.
+    centers = np.array([
+        [0., 0., 0.],
+        [10., 10., 10.],
+        [20., 20., 20.],
+    ])
+    X, true_labels = make_blobs(n_samples=100, centers=centers,
+                                cluster_std=1., random_state=42)
+    D = pairwise_distances(X)  # Distance matrix
+    S = np.max(D) - D  # Similarity matrix
+    S = sparse.coo_matrix(S)
+    with pytest.raises(ValueError):
+        spectral_clustering(S, n_clusters=2, random_state=0,
+                            assign_labels="<unknown>")
+
+
+def test_spectral_clustering_sparse():
+    X, y = make_blobs(n_samples=20, random_state=0,
+                      centers=[[1, 1], [-1, -1]], cluster_std=0.01)
+
+    S = rbf_kernel(X, gamma=1)
+    S = np.maximum(S - 1e-4, 0)
+    S = sparse.coo_matrix(S)
+
+    labels = SpectralClustering(random_state=0, n_clusters=2,
+                                affinity='precomputed').fit(S).labels_
+    assert adjusted_rand_score(y, labels) == 1
+
+
+def test_precomputed_nearest_neighbors_filtering():
+    # Test precomputed graph filtering when containing too many neighbors
+    X, y = make_blobs(n_samples=200, random_state=0,
+                      centers=[[1, 1], [-1, -1]], cluster_std=0.01)
+
+    n_neighbors = 2
+    results = []
+    for additional_neighbors in [0, 10]:
+        nn = NearestNeighbors(
+            n_neighbors=n_neighbors + additional_neighbors).fit(X)
+        graph = nn.kneighbors_graph(X, mode='connectivity')
+        labels = SpectralClustering(random_state=0, n_clusters=2,
+                                    affinity='precomputed_nearest_neighbors',
+                                    n_neighbors=n_neighbors).fit(graph).labels_
+        results.append(labels)
+
+    assert_array_equal(results[0], results[1])
+
+
+def test_affinities():
+    # Note: in the following, random_state has been selected to have
+    # a dataset that yields a stable eigen decomposition both when built
+    # on OSX and Linux
+    X, y = make_blobs(n_samples=20, random_state=0,
+                      centers=[[1, 1], [-1, -1]], cluster_std=0.01)
+    # nearest neighbors affinity
+    sp = SpectralClustering(n_clusters=2, affinity='nearest_neighbors',
+                            random_state=0)
+    assert_warns_message(UserWarning, 'not fully connected', sp.fit, X)
+    assert adjusted_rand_score(y, sp.labels_) == 1
+
+    sp = SpectralClustering(n_clusters=2, gamma=2, random_state=0)
+    labels = sp.fit(X).labels_
+    assert adjusted_rand_score(y, labels) == 1
+
+    X = check_random_state(10).rand(10, 5) * 10
+
+    kernels_available = kernel_metrics()
+    for kern in kernels_available:
+        # Additive chi^2 gives a negative similarity matrix which
+        # doesn't make sense for spectral clustering
+        if kern != 'additive_chi2':
+            sp = SpectralClustering(n_clusters=2, affinity=kern,
+                                    random_state=0)
+            labels = sp.fit(X).labels_
+            assert (X.shape[0],) == labels.shape
+
+    sp = SpectralClustering(n_clusters=2, affinity=lambda x, y: 1,
+                            random_state=0)
+    labels = sp.fit(X).labels_
+    assert (X.shape[0],) == labels.shape
+
+    def histogram(x, y, **kwargs):
+        # Histogram kernel implemented as a callable.
+        assert kwargs == {}    # no kernel_params that we didn't ask for
+        return np.minimum(x, y).sum()
+
+    sp = SpectralClustering(n_clusters=2, affinity=histogram, random_state=0)
+    labels = sp.fit(X).labels_
+    assert (X.shape[0],) == labels.shape
+
+    # raise error on unknown affinity
+    sp = SpectralClustering(n_clusters=2, affinity='<unknown>')
+    with pytest.raises(ValueError):
+        sp.fit(X)
+
+
+@pytest.mark.parametrize('n_samples', [50, 100, 150, 500])
+def test_discretize(n_samples):
+    # Test the discretize using a noise assignment matrix
+    random_state = np.random.RandomState(seed=8)
+    for n_class in range(2, 10):
+        # random class labels
+        y_true = random_state.randint(0, n_class + 1, n_samples)
+        y_true = np.array(y_true, np.float)
+        # noise class assignment matrix
+        y_indicator = sparse.coo_matrix((np.ones(n_samples),
+                                         (np.arange(n_samples),
+                                          y_true)),
+                                        shape=(n_samples,
+                                               n_class + 1))
+        y_true_noisy = (y_indicator.toarray()
+                        + 0.1 * random_state.randn(n_samples,
+                                                   n_class + 1))
+        y_pred = discretize(y_true_noisy, random_state=random_state)
+        assert adjusted_rand_score(y_true, y_pred) > 0.8
+
+
+# TODO: Remove when pyamg does replaces sp.rand call with np.random.rand
+# https://github.com/scikit-learn/scikit-learn/issues/15913
+@pytest.mark.filterwarnings(
+    "ignore:scipy.rand is deprecated:DeprecationWarning:pyamg.*")
+def test_spectral_clustering_with_arpack_amg_solvers():
+    # Test that spectral_clustering is the same for arpack and amg solver
+    # Based on toy example from plot_segmentation_toy.py
+
+    # a small two coin image
+    x, y = np.indices((40, 40))
+
+    center1, center2 = (14, 12), (20, 25)
+    radius1, radius2 = 8, 7
+
+    circle1 = (x - center1[0]) ** 2 + (y - center1[1]) ** 2 < radius1 ** 2
+    circle2 = (x - center2[0]) ** 2 + (y - center2[1]) ** 2 < radius2 ** 2
+
+    circles = circle1 | circle2
+    mask = circles.copy()
+    img = circles.astype(float)
+
+    graph = img_to_graph(img, mask=mask)
+    graph.data = np.exp(-graph.data / graph.data.std())
+
+    labels_arpack = spectral_clustering(
+        graph, n_clusters=2, eigen_solver='arpack', random_state=0)
+
+    assert len(np.unique(labels_arpack)) == 2
+
+    if amg_loaded:
+        labels_amg = spectral_clustering(
+            graph, n_clusters=2, eigen_solver='amg', random_state=0)
+        assert adjusted_rand_score(labels_arpack, labels_amg) == 1
+    else:
+        with pytest.raises(ValueError):
+            spectral_clustering(graph, n_clusters=2, eigen_solver='amg',
+                                random_state=0)
+
+
+def test_n_components():
+    # Test that after adding n_components, result is different and
+    # n_components = n_clusters by default
+    X, y = make_blobs(n_samples=20, random_state=0,
+                      centers=[[1, 1], [-1, -1]], cluster_std=0.01)
+    sp = SpectralClustering(n_clusters=2, random_state=0)
+    labels = sp.fit(X).labels_
+    # set n_components = n_cluster and test if result is the same
+    labels_same_ncomp = SpectralClustering(n_clusters=2, n_components=2,
+                                           random_state=0).fit(X).labels_
+    # test that n_components=n_clusters by default
+    assert_array_equal(labels, labels_same_ncomp)
+
+    # test that n_components affect result
+    # n_clusters=8 by default, and set n_components=2
+    labels_diff_ncomp = SpectralClustering(n_components=2,
+                                           random_state=0).fit(X).labels_
+    assert not np.array_equal(labels, labels_diff_ncomp)