Uploaded Test files

2020-11-12 11:05:57 -05:00 · 2020-11-12 11:05:57 -05:00 · 2e81cb7d99
commit 2e81cb7d99
parent f584ad9d97
16627 changed files with 2065359 additions and 102444 deletions
--- a/venv/Lib/site-packages/sklearn/metrics/cluster/init.py
+++ b/venv/Lib/site-packages/sklearn/metrics/cluster/init.py
@ -0,0 +1,32 @@
+"""
+The :mod:`sklearn.metrics.cluster` submodule contains evaluation metrics for
+cluster analysis results. There are two forms of evaluation:
+
+- supervised, which uses a ground truth class values for each sample.
+- unsupervised, which does not and measures the 'quality' of the model itself.
+"""
+from ._supervised import adjusted_mutual_info_score
+from ._supervised import normalized_mutual_info_score
+from ._supervised import adjusted_rand_score
+from ._supervised import completeness_score
+from ._supervised import contingency_matrix
+from ._supervised import expected_mutual_information
+from ._supervised import homogeneity_completeness_v_measure
+from ._supervised import homogeneity_score
+from ._supervised import mutual_info_score
+from ._supervised import v_measure_score
+from ._supervised import fowlkes_mallows_score
+from ._supervised import entropy
+from ._unsupervised import silhouette_samples
+from ._unsupervised import silhouette_score
+from ._unsupervised import calinski_harabasz_score
+from ._unsupervised import davies_bouldin_score
+from ._bicluster import consensus_score
+
+__all__ = ["adjusted_mutual_info_score", "normalized_mutual_info_score",
+           "adjusted_rand_score", "completeness_score", "contingency_matrix",
+           "expected_mutual_information", "homogeneity_completeness_v_measure",
+           "homogeneity_score", "mutual_info_score", "v_measure_score",
+           "fowlkes_mallows_score", "entropy", "silhouette_samples",
+           "silhouette_score", "calinski_harabasz_score",
+           "davies_bouldin_score", "consensus_score"]
--- a/venv/Lib/site-packages/sklearn/metrics/cluster/pycache/init.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/metrics/cluster/pycache/init.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/metrics/cluster/pycache/_bicluster.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/metrics/cluster/pycache/_bicluster.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/metrics/cluster/pycache/_supervised.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/metrics/cluster/pycache/_supervised.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/metrics/cluster/pycache/_unsupervised.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/metrics/cluster/pycache/_unsupervised.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/metrics/cluster/pycache/bicluster.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/metrics/cluster/pycache/bicluster.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/metrics/cluster/pycache/expected_mutual_info_fast.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/metrics/cluster/pycache/expected_mutual_info_fast.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/metrics/cluster/pycache/setup.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/metrics/cluster/pycache/setup.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/metrics/cluster/pycache/supervised.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/metrics/cluster/pycache/supervised.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/metrics/cluster/pycache/unsupervised.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/metrics/cluster/pycache/unsupervised.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/metrics/cluster/_bicluster.py
+++ b/venv/Lib/site-packages/sklearn/metrics/cluster/_bicluster.py
@ -0,0 +1,86 @@
+import numpy as np
+from scipy.optimize import linear_sum_assignment
+
+from ...utils.validation import check_consistent_length, check_array
+from ...utils.validation import _deprecate_positional_args
+
+__all__ = ["consensus_score"]
+
+
+def _check_rows_and_columns(a, b):
+    """Unpacks the row and column arrays and checks their shape."""
+    check_consistent_length(*a)
+    check_consistent_length(*b)
+    checks = lambda x: check_array(x, ensure_2d=False)
+    a_rows, a_cols = map(checks, a)
+    b_rows, b_cols = map(checks, b)
+    return a_rows, a_cols, b_rows, b_cols
+
+
+def _jaccard(a_rows, a_cols, b_rows, b_cols):
+    """Jaccard coefficient on the elements of the two biclusters."""
+    intersection = ((a_rows * b_rows).sum() *
+                    (a_cols * b_cols).sum())
+
+    a_size = a_rows.sum() * a_cols.sum()
+    b_size = b_rows.sum() * b_cols.sum()
+
+    return intersection / (a_size + b_size - intersection)
+
+
+def _pairwise_similarity(a, b, similarity):
+    """Computes pairwise similarity matrix.
+
+    result[i, j] is the Jaccard coefficient of a's bicluster i and b's
+    bicluster j.
+
+    """
+    a_rows, a_cols, b_rows, b_cols = _check_rows_and_columns(a, b)
+    n_a = a_rows.shape[0]
+    n_b = b_rows.shape[0]
+    result = np.array(list(list(similarity(a_rows[i], a_cols[i],
+                                           b_rows[j], b_cols[j])
+                                for j in range(n_b))
+                           for i in range(n_a)))
+    return result
+
+
+@_deprecate_positional_args
+def consensus_score(a, b, *, similarity="jaccard"):
+    """The similarity of two sets of biclusters.
+
+    Similarity between individual biclusters is computed. Then the
+    best matching between sets is found using the Hungarian algorithm.
+    The final score is the sum of similarities divided by the size of
+    the larger set.
+
+    Read more in the :ref:`User Guide <biclustering>`.
+
+    Parameters
+    ----------
+    a : (rows, columns)
+        Tuple of row and column indicators for a set of biclusters.
+
+    b : (rows, columns)
+        Another set of biclusters like ``a``.
+
+    similarity : string or function, optional, default: "jaccard"
+        May be the string "jaccard" to use the Jaccard coefficient, or
+        any function that takes four arguments, each of which is a 1d
+        indicator vector: (a_rows, a_columns, b_rows, b_columns).
+
+    References
+    ----------
+
+    * Hochreiter, Bodenhofer, et. al., 2010. `FABIA: factor analysis
+      for bicluster acquisition
+      <https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2881408/>`__.
+
+    """
+    if similarity == "jaccard":
+        similarity = _jaccard
+    matrix = _pairwise_similarity(a, b, similarity)
+    row_indices, col_indices = linear_sum_assignment(1. - matrix)
+    n_a = len(a[0])
+    n_b = len(b[0])
+    return matrix[row_indices, col_indices].sum() / max(n_a, n_b)
--- a/venv/Lib/site-packages/sklearn/metrics/cluster/_expected_mutual_info_fast.cp36-win32.pyd
+++ b/venv/Lib/site-packages/sklearn/metrics/cluster/_expected_mutual_info_fast.cp36-win32.pyd
--- a/venv/Lib/site-packages/sklearn/metrics/cluster/_supervised.py
+++ b/venv/Lib/site-packages/sklearn/metrics/cluster/_supervised.py
@ -0,0 +1,980 @@
+"""Utilities to evaluate the clustering performance of models.
+
+Functions named as *_score return a scalar value to maximize: the higher the
+better.
+"""
+
+# Authors: Olivier Grisel <olivier.grisel@ensta.org>
+#          Wei LI <kuantkid@gmail.com>
+#          Diego Molla <dmolla-aliod@gmail.com>
+#          Arnaud Fouchet <foucheta@gmail.com>
+#          Thierry Guillemot <thierry.guillemot.work@gmail.com>
+#          Gregory Stupp <stuppie@gmail.com>
+#          Joel Nothman <joel.nothman@gmail.com>
+#          Arya McCarthy <arya@jhu.edu>
+# License: BSD 3 clause
+
+
+from math import log
+
+import numpy as np
+from scipy import sparse as sp
+from scipy.special import comb
+
+from ._expected_mutual_info_fast import expected_mutual_information
+from ...utils.validation import check_array, check_consistent_length
+from ...utils.validation import _deprecate_positional_args
+from ...utils.fixes import _astype_copy_false
+
+
+def _comb2(n):
+    # the exact version is faster for k == 2: use it by default globally in
+    # this module instead of the float approximate variant
+    return comb(n, 2, exact=1)
+
+
+def check_clusterings(labels_true, labels_pred):
+    """Check that the labels arrays are 1D and of same dimension.
+
+    Parameters
+    ----------
+    labels_true : array-like of shape (n_samples,)
+        The true labels.
+
+    labels_pred : array-like of shape (n_samples,)
+        The predicted labels.
+    """
+    labels_true = check_array(
+        labels_true, ensure_2d=False, ensure_min_samples=0, dtype=None,
+    )
+    labels_pred = check_array(
+        labels_pred, ensure_2d=False, ensure_min_samples=0, dtype=None,
+    )
+
+    # input checks
+    if labels_true.ndim != 1:
+        raise ValueError(
+            "labels_true must be 1D: shape is %r" % (labels_true.shape,))
+    if labels_pred.ndim != 1:
+        raise ValueError(
+            "labels_pred must be 1D: shape is %r" % (labels_pred.shape,))
+    check_consistent_length(labels_true, labels_pred)
+
+    return labels_true, labels_pred
+
+
+def _generalized_average(U, V, average_method):
+    """Return a particular mean of two numbers."""
+    if average_method == "min":
+        return min(U, V)
+    elif average_method == "geometric":
+        return np.sqrt(U * V)
+    elif average_method == "arithmetic":
+        return np.mean([U, V])
+    elif average_method == "max":
+        return max(U, V)
+    else:
+        raise ValueError("'average_method' must be 'min', 'geometric', "
+                         "'arithmetic', or 'max'")
+
+
+@_deprecate_positional_args
+def contingency_matrix(labels_true, labels_pred, *, eps=None, sparse=False):
+    """Build a contingency matrix describing the relationship between labels.
+
+    Parameters
+    ----------
+    labels_true : int array, shape = [n_samples]
+        Ground truth class labels to be used as a reference
+
+    labels_pred : array-like of shape (n_samples,)
+        Cluster labels to evaluate
+
+    eps : None or float, optional.
+        If a float, that value is added to all values in the contingency
+        matrix. This helps to stop NaN propagation.
+        If ``None``, nothing is adjusted.
+
+    sparse : boolean, optional.
+        If True, return a sparse CSR continency matrix. If ``eps is not None``,
+        and ``sparse is True``, will throw ValueError.
+
+        .. versionadded:: 0.18
+
+    Returns
+    -------
+    contingency : {array-like, sparse}, shape=[n_classes_true, n_classes_pred]
+        Matrix :math:`C` such that :math:`C_{i, j}` is the number of samples in
+        true class :math:`i` and in predicted class :math:`j`. If
+        ``eps is None``, the dtype of this array will be integer. If ``eps`` is
+        given, the dtype will be float.
+        Will be a ``scipy.sparse.csr_matrix`` if ``sparse=True``.
+    """
+
+    if eps is not None and sparse:
+        raise ValueError("Cannot set 'eps' when sparse=True")
+
+    classes, class_idx = np.unique(labels_true, return_inverse=True)
+    clusters, cluster_idx = np.unique(labels_pred, return_inverse=True)
+    n_classes = classes.shape[0]
+    n_clusters = clusters.shape[0]
+    # Using coo_matrix to accelerate simple histogram calculation,
+    # i.e. bins are consecutive integers
+    # Currently, coo_matrix is faster than histogram2d for simple cases
+    contingency = sp.coo_matrix((np.ones(class_idx.shape[0]),
+                                 (class_idx, cluster_idx)),
+                                shape=(n_classes, n_clusters),
+                                dtype=np.int)
+    if sparse:
+        contingency = contingency.tocsr()
+        contingency.sum_duplicates()
+    else:
+        contingency = contingency.toarray()
+        if eps is not None:
+            # don't use += as contingency is integer
+            contingency = contingency + eps
+    return contingency
+
+
+# clustering measures
+
+def adjusted_rand_score(labels_true, labels_pred):
+    """Rand index adjusted for chance.
+
+    The Rand Index computes a similarity measure between two clusterings
+    by considering all pairs of samples and counting pairs that are
+    assigned in the same or different clusters in the predicted and
+    true clusterings.
+
+    The raw RI score is then "adjusted for chance" into the ARI score
+    using the following scheme::
+
+        ARI = (RI - Expected_RI) / (max(RI) - Expected_RI)
+
+    The adjusted Rand index is thus ensured to have a value close to
+    0.0 for random labeling independently of the number of clusters and
+    samples and exactly 1.0 when the clusterings are identical (up to
+    a permutation).
+
+    ARI is a symmetric measure::
+
+        adjusted_rand_score(a, b) == adjusted_rand_score(b, a)
+
+    Read more in the :ref:`User Guide <adjusted_rand_score>`.
+
+    Parameters
+    ----------
+    labels_true : int array, shape = [n_samples]
+        Ground truth class labels to be used as a reference
+
+    labels_pred : array-like of shape (n_samples,)
+        Cluster labels to evaluate
+
+    Returns
+    -------
+    ari : float
+       Similarity score between -1.0 and 1.0. Random labelings have an ARI
+       close to 0.0. 1.0 stands for perfect match.
+
+    Examples
+    --------
+
+    Perfectly matching labelings have a score of 1 even
+
+      >>> from sklearn.metrics.cluster import adjusted_rand_score
+      >>> adjusted_rand_score([0, 0, 1, 1], [0, 0, 1, 1])
+      1.0
+      >>> adjusted_rand_score([0, 0, 1, 1], [1, 1, 0, 0])
+      1.0
+
+    Labelings that assign all classes members to the same clusters
+    are complete be not always pure, hence penalized::
+
+      >>> adjusted_rand_score([0, 0, 1, 2], [0, 0, 1, 1])
+      0.57...
+
+    ARI is symmetric, so labelings that have pure clusters with members
+    coming from the same classes but unnecessary splits are penalized::
+
+      >>> adjusted_rand_score([0, 0, 1, 1], [0, 0, 1, 2])
+      0.57...
+
+    If classes members are completely split across different clusters, the
+    assignment is totally incomplete, hence the ARI is very low::
+
+      >>> adjusted_rand_score([0, 0, 0, 0], [0, 1, 2, 3])
+      0.0
+
+    References
+    ----------
+
+    .. [Hubert1985] L. Hubert and P. Arabie, Comparing Partitions,
+      Journal of Classification 1985
+      https://link.springer.com/article/10.1007%2FBF01908075
+
+    .. [wk] https://en.wikipedia.org/wiki/Rand_index#Adjusted_Rand_index
+
+    See also
+    --------
+    adjusted_mutual_info_score: Adjusted Mutual Information
+
+    """
+    labels_true, labels_pred = check_clusterings(labels_true, labels_pred)
+    n_samples = labels_true.shape[0]
+    n_classes = np.unique(labels_true).shape[0]
+    n_clusters = np.unique(labels_pred).shape[0]
+
+    # Special limit cases: no clustering since the data is not split;
+    # or trivial clustering where each document is assigned a unique cluster.
+    # These are perfect matches hence return 1.0.
+    if (n_classes == n_clusters == 1 or
+            n_classes == n_clusters == 0 or
+            n_classes == n_clusters == n_samples):
+        return 1.0
+
+    # Compute the ARI using the contingency data
+    contingency = contingency_matrix(labels_true, labels_pred, sparse=True)
+    sum_comb_c = sum(_comb2(n_c) for n_c in np.ravel(contingency.sum(axis=1)))
+    sum_comb_k = sum(_comb2(n_k) for n_k in np.ravel(contingency.sum(axis=0)))
+    sum_comb = sum(_comb2(n_ij) for n_ij in contingency.data)
+
+    prod_comb = (sum_comb_c * sum_comb_k) / _comb2(n_samples)
+    mean_comb = (sum_comb_k + sum_comb_c) / 2.
+    return (sum_comb - prod_comb) / (mean_comb - prod_comb)
+
+
+@_deprecate_positional_args
+def homogeneity_completeness_v_measure(labels_true, labels_pred, *, beta=1.0):
+    """Compute the homogeneity and completeness and V-Measure scores at once.
+
+    Those metrics are based on normalized conditional entropy measures of
+    the clustering labeling to evaluate given the knowledge of a Ground
+    Truth class labels of the same samples.
+
+    A clustering result satisfies homogeneity if all of its clusters
+    contain only data points which are members of a single class.
+
+    A clustering result satisfies completeness if all the data points
+    that are members of a given class are elements of the same cluster.
+
+    Both scores have positive values between 0.0 and 1.0, larger values
+    being desirable.
+
+    Those 3 metrics are independent of the absolute values of the labels:
+    a permutation of the class or cluster label values won't change the
+    score values in any way.
+
+    V-Measure is furthermore symmetric: swapping ``labels_true`` and
+    ``label_pred`` will give the same score. This does not hold for
+    homogeneity and completeness. V-Measure is identical to
+    :func:`normalized_mutual_info_score` with the arithmetic averaging
+    method.
+
+    Read more in the :ref:`User Guide <homogeneity_completeness>`.
+
+    Parameters
+    ----------
+    labels_true : int array, shape = [n_samples]
+        ground truth class labels to be used as a reference
+
+    labels_pred : array-like of shape (n_samples,)
+        cluster labels to evaluate
+
+    beta : float
+        Ratio of weight attributed to ``homogeneity`` vs ``completeness``.
+        If ``beta`` is greater than 1, ``completeness`` is weighted more
+        strongly in the calculation. If ``beta`` is less than 1,
+        ``homogeneity`` is weighted more strongly.
+
+    Returns
+    -------
+    homogeneity : float
+       score between 0.0 and 1.0. 1.0 stands for perfectly homogeneous labeling
+
+    completeness : float
+       score between 0.0 and 1.0. 1.0 stands for perfectly complete labeling
+
+    v_measure : float
+        harmonic mean of the first two
+
+    See also
+    --------
+    homogeneity_score
+    completeness_score
+    v_measure_score
+    """
+    labels_true, labels_pred = check_clusterings(labels_true, labels_pred)
+
+    if len(labels_true) == 0:
+        return 1.0, 1.0, 1.0
+
+    entropy_C = entropy(labels_true)
+    entropy_K = entropy(labels_pred)
+
+    contingency = contingency_matrix(labels_true, labels_pred, sparse=True)
+    MI = mutual_info_score(None, None, contingency=contingency)
+
+    homogeneity = MI / (entropy_C) if entropy_C else 1.0
+    completeness = MI / (entropy_K) if entropy_K else 1.0
+
+    if homogeneity + completeness == 0.0:
+        v_measure_score = 0.0
+    else:
+        v_measure_score = ((1 + beta) * homogeneity * completeness
+                           / (beta * homogeneity + completeness))
+
+    return homogeneity, completeness, v_measure_score
+
+
+def homogeneity_score(labels_true, labels_pred):
+    """Homogeneity metric of a cluster labeling given a ground truth.
+
+    A clustering result satisfies homogeneity if all of its clusters
+    contain only data points which are members of a single class.
+
+    This metric is independent of the absolute values of the labels:
+    a permutation of the class or cluster label values won't change the
+    score value in any way.
+
+    This metric is not symmetric: switching ``label_true`` with ``label_pred``
+    will return the :func:`completeness_score` which will be different in
+    general.
+
+    Read more in the :ref:`User Guide <homogeneity_completeness>`.
+
+    Parameters
+    ----------
+    labels_true : int array, shape = [n_samples]
+        ground truth class labels to be used as a reference
+
+    labels_pred : array-like of shape (n_samples,)
+        cluster labels to evaluate
+
+    Returns
+    -------
+    homogeneity : float
+       score between 0.0 and 1.0. 1.0 stands for perfectly homogeneous labeling
+
+    References
+    ----------
+
+    .. [1] `Andrew Rosenberg and Julia Hirschberg, 2007. V-Measure: A
+       conditional entropy-based external cluster evaluation measure
+       <https://aclweb.org/anthology/D/D07/D07-1043.pdf>`_
+
+    See also
+    --------
+    completeness_score
+    v_measure_score
+
+    Examples
+    --------
+
+    Perfect labelings are homogeneous::
+
+      >>> from sklearn.metrics.cluster import homogeneity_score
+      >>> homogeneity_score([0, 0, 1, 1], [1, 1, 0, 0])
+      1.0
+
+    Non-perfect labelings that further split classes into more clusters can be
+    perfectly homogeneous::
+
+      >>> print("%.6f" % homogeneity_score([0, 0, 1, 1], [0, 0, 1, 2]))
+      1.000000
+      >>> print("%.6f" % homogeneity_score([0, 0, 1, 1], [0, 1, 2, 3]))
+      1.000000
+
+    Clusters that include samples from different classes do not make for an
+    homogeneous labeling::
+
+      >>> print("%.6f" % homogeneity_score([0, 0, 1, 1], [0, 1, 0, 1]))
+      0.0...
+      >>> print("%.6f" % homogeneity_score([0, 0, 1, 1], [0, 0, 0, 0]))
+      0.0...
+
+    """
+    return homogeneity_completeness_v_measure(labels_true, labels_pred)[0]
+
+
+def completeness_score(labels_true, labels_pred):
+    """Completeness metric of a cluster labeling given a ground truth.
+
+    A clustering result satisfies completeness if all the data points
+    that are members of a given class are elements of the same cluster.
+
+    This metric is independent of the absolute values of the labels:
+    a permutation of the class or cluster label values won't change the
+    score value in any way.
+
+    This metric is not symmetric: switching ``label_true`` with ``label_pred``
+    will return the :func:`homogeneity_score` which will be different in
+    general.
+
+    Read more in the :ref:`User Guide <homogeneity_completeness>`.
+
+    Parameters
+    ----------
+    labels_true : int array, shape = [n_samples]
+        ground truth class labels to be used as a reference
+
+    labels_pred : array-like of shape (n_samples,)
+        cluster labels to evaluate
+
+    Returns
+    -------
+    completeness : float
+       score between 0.0 and 1.0. 1.0 stands for perfectly complete labeling
+
+    References
+    ----------
+
+    .. [1] `Andrew Rosenberg and Julia Hirschberg, 2007. V-Measure: A
+       conditional entropy-based external cluster evaluation measure
+       <https://aclweb.org/anthology/D/D07/D07-1043.pdf>`_
+
+    See also
+    --------
+    homogeneity_score
+    v_measure_score
+
+    Examples
+    --------
+
+    Perfect labelings are complete::
+
+      >>> from sklearn.metrics.cluster import completeness_score
+      >>> completeness_score([0, 0, 1, 1], [1, 1, 0, 0])
+      1.0
+
+    Non-perfect labelings that assign all classes members to the same clusters
+    are still complete::
+
+      >>> print(completeness_score([0, 0, 1, 1], [0, 0, 0, 0]))
+      1.0
+      >>> print(completeness_score([0, 1, 2, 3], [0, 0, 1, 1]))
+      0.999...
+
+    If classes members are split across different clusters, the
+    assignment cannot be complete::
+
+      >>> print(completeness_score([0, 0, 1, 1], [0, 1, 0, 1]))
+      0.0
+      >>> print(completeness_score([0, 0, 0, 0], [0, 1, 2, 3]))
+      0.0
+
+    """
+    return homogeneity_completeness_v_measure(labels_true, labels_pred)[1]
+
+
+@_deprecate_positional_args
+def v_measure_score(labels_true, labels_pred, *, beta=1.0):
+    """V-measure cluster labeling given a ground truth.
+
+    This score is identical to :func:`normalized_mutual_info_score` with
+    the ``'arithmetic'`` option for averaging.
+
+    The V-measure is the harmonic mean between homogeneity and completeness::
+
+        v = (1 + beta) * homogeneity * completeness
+             / (beta * homogeneity + completeness)
+
+    This metric is independent of the absolute values of the labels:
+    a permutation of the class or cluster label values won't change the
+    score value in any way.
+
+    This metric is furthermore symmetric: switching ``label_true`` with
+    ``label_pred`` will return the same score value. This can be useful to
+    measure the agreement of two independent label assignments strategies
+    on the same dataset when the real ground truth is not known.
+
+
+    Read more in the :ref:`User Guide <homogeneity_completeness>`.
+
+    Parameters
+    ----------
+    labels_true : int array, shape = [n_samples]
+        ground truth class labels to be used as a reference
+
+    labels_pred : array-like of shape (n_samples,)
+        cluster labels to evaluate
+
+    beta : float
+        Ratio of weight attributed to ``homogeneity`` vs ``completeness``.
+        If ``beta`` is greater than 1, ``completeness`` is weighted more
+        strongly in the calculation. If ``beta`` is less than 1,
+        ``homogeneity`` is weighted more strongly.
+
+    Returns
+    -------
+    v_measure : float
+       score between 0.0 and 1.0. 1.0 stands for perfectly complete labeling
+
+    References
+    ----------
+
+    .. [1] `Andrew Rosenberg and Julia Hirschberg, 2007. V-Measure: A
+       conditional entropy-based external cluster evaluation measure
+       <https://aclweb.org/anthology/D/D07/D07-1043.pdf>`_
+
+    See also
+    --------
+    homogeneity_score
+    completeness_score
+    normalized_mutual_info_score
+
+    Examples
+    --------
+
+    Perfect labelings are both homogeneous and complete, hence have score 1.0::
+
+      >>> from sklearn.metrics.cluster import v_measure_score
+      >>> v_measure_score([0, 0, 1, 1], [0, 0, 1, 1])
+      1.0
+      >>> v_measure_score([0, 0, 1, 1], [1, 1, 0, 0])
+      1.0
+
+    Labelings that assign all classes members to the same clusters
+    are complete be not homogeneous, hence penalized::
+
+      >>> print("%.6f" % v_measure_score([0, 0, 1, 2], [0, 0, 1, 1]))
+      0.8...
+      >>> print("%.6f" % v_measure_score([0, 1, 2, 3], [0, 0, 1, 1]))
+      0.66...
+
+    Labelings that have pure clusters with members coming from the same
+    classes are homogeneous but un-necessary splits harms completeness
+    and thus penalize V-measure as well::
+
+      >>> print("%.6f" % v_measure_score([0, 0, 1, 1], [0, 0, 1, 2]))
+      0.8...
+      >>> print("%.6f" % v_measure_score([0, 0, 1, 1], [0, 1, 2, 3]))
+      0.66...
+
+    If classes members are completely split across different clusters,
+    the assignment is totally incomplete, hence the V-Measure is null::
+
+      >>> print("%.6f" % v_measure_score([0, 0, 0, 0], [0, 1, 2, 3]))
+      0.0...
+
+    Clusters that include samples from totally different classes totally
+    destroy the homogeneity of the labeling, hence::
+
+      >>> print("%.6f" % v_measure_score([0, 0, 1, 1], [0, 0, 0, 0]))
+      0.0...
+
+    """
+    return homogeneity_completeness_v_measure(labels_true, labels_pred,
+                                              beta=beta)[2]
+
+
+@_deprecate_positional_args
+def mutual_info_score(labels_true, labels_pred, *, contingency=None):
+    """Mutual Information between two clusterings.
+
+    The Mutual Information is a measure of the similarity between two labels of
+    the same data. Where :math:`|U_i|` is the number of the samples
+    in cluster :math:`U_i` and :math:`|V_j|` is the number of the
+    samples in cluster :math:`V_j`, the Mutual Information
+    between clusterings :math:`U` and :math:`V` is given as:
+
+    .. math::
+
+        MI(U,V)=\\sum_{i=1}^{|U|} \\sum_{j=1}^{|V|} \\frac{|U_i\\cap V_j|}{N}
+        \\log\\frac{N|U_i \\cap V_j|}{|U_i||V_j|}
+
+    This metric is independent of the absolute values of the labels:
+    a permutation of the class or cluster label values won't change the
+    score value in any way.
+
+    This metric is furthermore symmetric: switching ``label_true`` with
+    ``label_pred`` will return the same score value. This can be useful to
+    measure the agreement of two independent label assignments strategies
+    on the same dataset when the real ground truth is not known.
+
+    Read more in the :ref:`User Guide <mutual_info_score>`.
+
+    Parameters
+    ----------
+    labels_true : int array, shape = [n_samples]
+        A clustering of the data into disjoint subsets.
+
+    labels_pred : int array-like of shape (n_samples,)
+        A clustering of the data into disjoint subsets.
+
+    contingency : {None, array, sparse matrix}, \
+                  shape = [n_classes_true, n_classes_pred]
+        A contingency matrix given by the :func:`contingency_matrix` function.
+        If value is ``None``, it will be computed, otherwise the given value is
+        used, with ``labels_true`` and ``labels_pred`` ignored.
+
+    Returns
+    -------
+    mi : float
+       Mutual information, a non-negative value
+
+    Notes
+    -----
+    The logarithm used is the natural logarithm (base-e).
+
+    See also
+    --------
+    adjusted_mutual_info_score: Adjusted against chance Mutual Information
+    normalized_mutual_info_score: Normalized Mutual Information
+    """
+    if contingency is None:
+        labels_true, labels_pred = check_clusterings(labels_true, labels_pred)
+        contingency = contingency_matrix(labels_true, labels_pred, sparse=True)
+    else:
+        contingency = check_array(contingency,
+                                  accept_sparse=['csr', 'csc', 'coo'],
+                                  dtype=[int, np.int32, np.int64])
+
+    if isinstance(contingency, np.ndarray):
+        # For an array
+        nzx, nzy = np.nonzero(contingency)
+        nz_val = contingency[nzx, nzy]
+    elif sp.issparse(contingency):
+        # For a sparse matrix
+        nzx, nzy, nz_val = sp.find(contingency)
+    else:
+        raise ValueError("Unsupported type for 'contingency': %s" %
+                         type(contingency))
+
+    contingency_sum = contingency.sum()
+    pi = np.ravel(contingency.sum(axis=1))
+    pj = np.ravel(contingency.sum(axis=0))
+    log_contingency_nm = np.log(nz_val)
+    contingency_nm = nz_val / contingency_sum
+    # Don't need to calculate the full outer product, just for non-zeroes
+    outer = (pi.take(nzx).astype(np.int64, copy=False)
+             * pj.take(nzy).astype(np.int64, copy=False))
+    log_outer = -np.log(outer) + log(pi.sum()) + log(pj.sum())
+    mi = (contingency_nm * (log_contingency_nm - log(contingency_sum)) +
+          contingency_nm * log_outer)
+    return np.clip(mi.sum(), 0.0, None)
+
+
+@_deprecate_positional_args
+def adjusted_mutual_info_score(labels_true, labels_pred, *,
+                               average_method='arithmetic'):
+    """Adjusted Mutual Information between two clusterings.
+
+    Adjusted Mutual Information (AMI) is an adjustment of the Mutual
+    Information (MI) score to account for chance. It accounts for the fact that
+    the MI is generally higher for two clusterings with a larger number of
+    clusters, regardless of whether there is actually more information shared.
+    For two clusterings :math:`U` and :math:`V`, the AMI is given as::
+
+        AMI(U, V) = [MI(U, V) - E(MI(U, V))] / [avg(H(U), H(V)) - E(MI(U, V))]
+
+    This metric is independent of the absolute values of the labels:
+    a permutation of the class or cluster label values won't change the
+    score value in any way.
+
+    This metric is furthermore symmetric: switching ``label_true`` with
+    ``label_pred`` will return the same score value. This can be useful to
+    measure the agreement of two independent label assignments strategies
+    on the same dataset when the real ground truth is not known.
+
+    Be mindful that this function is an order of magnitude slower than other
+    metrics, such as the Adjusted Rand Index.
+
+    Read more in the :ref:`User Guide <mutual_info_score>`.
+
+    Parameters
+    ----------
+    labels_true : int array, shape = [n_samples]
+        A clustering of the data into disjoint subsets.
+
+    labels_pred : int array-like of shape (n_samples,)
+        A clustering of the data into disjoint subsets.
+
+    average_method : string, optional (default: 'arithmetic')
+        How to compute the normalizer in the denominator. Possible options
+        are 'min', 'geometric', 'arithmetic', and 'max'.
+
+        .. versionadded:: 0.20
+
+        .. versionchanged:: 0.22
+           The default value of ``average_method`` changed from 'max' to
+           'arithmetic'.
+
+    Returns
+    -------
+    ami: float (upperlimited by 1.0)
+       The AMI returns a value of 1 when the two partitions are identical
+       (ie perfectly matched). Random partitions (independent labellings) have
+       an expected AMI around 0 on average hence can be negative.
+
+    See also
+    --------
+    adjusted_rand_score: Adjusted Rand Index
+    mutual_info_score: Mutual Information (not adjusted for chance)
+
+    Examples
+    --------
+
+    Perfect labelings are both homogeneous and complete, hence have
+    score 1.0::
+
+      >>> from sklearn.metrics.cluster import adjusted_mutual_info_score
+      >>> adjusted_mutual_info_score([0, 0, 1, 1], [0, 0, 1, 1])
+      ... # doctest: +SKIP
+      1.0
+      >>> adjusted_mutual_info_score([0, 0, 1, 1], [1, 1, 0, 0])
+      ... # doctest: +SKIP
+      1.0
+
+    If classes members are completely split across different clusters,
+    the assignment is totally in-complete, hence the AMI is null::
+
+      >>> adjusted_mutual_info_score([0, 0, 0, 0], [0, 1, 2, 3])
+      ... # doctest: +SKIP
+      0.0
+
+    References
+    ----------
+    .. [1] `Vinh, Epps, and Bailey, (2010). Information Theoretic Measures for
+       Clusterings Comparison: Variants, Properties, Normalization and
+       Correction for Chance, JMLR
+       <http://jmlr.csail.mit.edu/papers/volume11/vinh10a/vinh10a.pdf>`_
+
+    .. [2] `Wikipedia entry for the Adjusted Mutual Information
+       <https://en.wikipedia.org/wiki/Adjusted_Mutual_Information>`_
+
+    """
+    labels_true, labels_pred = check_clusterings(labels_true, labels_pred)
+    n_samples = labels_true.shape[0]
+    classes = np.unique(labels_true)
+    clusters = np.unique(labels_pred)
+    # Special limit cases: no clustering since the data is not split.
+    # This is a perfect match hence return 1.0.
+    if (classes.shape[0] == clusters.shape[0] == 1 or
+            classes.shape[0] == clusters.shape[0] == 0):
+        return 1.0
+    contingency = contingency_matrix(labels_true, labels_pred, sparse=True)
+    contingency = contingency.astype(np.float64,
+                                     **_astype_copy_false(contingency))
+    # Calculate the MI for the two clusterings
+    mi = mutual_info_score(labels_true, labels_pred,
+                           contingency=contingency)
+    # Calculate the expected value for the mutual information
+    emi = expected_mutual_information(contingency, n_samples)
+    # Calculate entropy for each labeling
+    h_true, h_pred = entropy(labels_true), entropy(labels_pred)
+    normalizer = _generalized_average(h_true, h_pred, average_method)
+    denominator = normalizer - emi
+    # Avoid 0.0 / 0.0 when expectation equals maximum, i.e a perfect match.
+    # normalizer should always be >= emi, but because of floating-point
+    # representation, sometimes emi is slightly larger. Correct this
+    # by preserving the sign.
+    if denominator < 0:
+        denominator = min(denominator, -np.finfo('float64').eps)
+    else:
+        denominator = max(denominator, np.finfo('float64').eps)
+    ami = (mi - emi) / denominator
+    return ami
+
+
+@_deprecate_positional_args
+def normalized_mutual_info_score(labels_true, labels_pred, *,
+                                 average_method='arithmetic'):
+    """Normalized Mutual Information between two clusterings.
+
+    Normalized Mutual Information (NMI) is a normalization of the Mutual
+    Information (MI) score to scale the results between 0 (no mutual
+    information) and 1 (perfect correlation). In this function, mutual
+    information is normalized by some generalized mean of ``H(labels_true)``
+    and ``H(labels_pred))``, defined by the `average_method`.
+
+    This measure is not adjusted for chance. Therefore
+    :func:`adjusted_mutual_info_score` might be preferred.
+
+    This metric is independent of the absolute values of the labels:
+    a permutation of the class or cluster label values won't change the
+    score value in any way.
+
+    This metric is furthermore symmetric: switching ``label_true`` with
+    ``label_pred`` will return the same score value. This can be useful to
+    measure the agreement of two independent label assignments strategies
+    on the same dataset when the real ground truth is not known.
+
+    Read more in the :ref:`User Guide <mutual_info_score>`.
+
+    Parameters
+    ----------
+    labels_true : int array, shape = [n_samples]
+        A clustering of the data into disjoint subsets.
+
+    labels_pred : int array-like of shape (n_samples,)
+        A clustering of the data into disjoint subsets.
+
+    average_method : string, optional (default: 'arithmetic')
+        How to compute the normalizer in the denominator. Possible options
+        are 'min', 'geometric', 'arithmetic', and 'max'.
+
+        .. versionadded:: 0.20
+
+        .. versionchanged:: 0.22
+           The default value of ``average_method`` changed from 'geometric' to
+           'arithmetic'.
+
+    Returns
+    -------
+    nmi : float
+       score between 0.0 and 1.0. 1.0 stands for perfectly complete labeling
+
+    See also
+    --------
+    v_measure_score: V-Measure (NMI with arithmetic mean option.)
+    adjusted_rand_score: Adjusted Rand Index
+    adjusted_mutual_info_score: Adjusted Mutual Information (adjusted
+        against chance)
+
+    Examples
+    --------
+
+    Perfect labelings are both homogeneous and complete, hence have
+    score 1.0::
+
+      >>> from sklearn.metrics.cluster import normalized_mutual_info_score
+      >>> normalized_mutual_info_score([0, 0, 1, 1], [0, 0, 1, 1])
+      ... # doctest: +SKIP
+      1.0
+      >>> normalized_mutual_info_score([0, 0, 1, 1], [1, 1, 0, 0])
+      ... # doctest: +SKIP
+      1.0
+
+    If classes members are completely split across different clusters,
+    the assignment is totally in-complete, hence the NMI is null::
+
+      >>> normalized_mutual_info_score([0, 0, 0, 0], [0, 1, 2, 3])
+      ... # doctest: +SKIP
+      0.0
+
+    """
+    labels_true, labels_pred = check_clusterings(labels_true, labels_pred)
+    classes = np.unique(labels_true)
+    clusters = np.unique(labels_pred)
+    # Special limit cases: no clustering since the data is not split.
+    # This is a perfect match hence return 1.0.
+    if (classes.shape[0] == clusters.shape[0] == 1 or
+            classes.shape[0] == clusters.shape[0] == 0):
+        return 1.0
+    contingency = contingency_matrix(labels_true, labels_pred, sparse=True)
+    contingency = contingency.astype(np.float64,
+                                     **_astype_copy_false(contingency))
+    # Calculate the MI for the two clusterings
+    mi = mutual_info_score(labels_true, labels_pred,
+                           contingency=contingency)
+    # Calculate the expected value for the mutual information
+    # Calculate entropy for each labeling
+    h_true, h_pred = entropy(labels_true), entropy(labels_pred)
+    normalizer = _generalized_average(h_true, h_pred, average_method)
+    # Avoid 0.0 / 0.0 when either entropy is zero.
+    normalizer = max(normalizer, np.finfo('float64').eps)
+    nmi = mi / normalizer
+    return nmi
+
+
+@_deprecate_positional_args
+def fowlkes_mallows_score(labels_true, labels_pred, *, sparse=False):
+    """Measure the similarity of two clusterings of a set of points.
+
+    .. versionadded:: 0.18
+
+    The Fowlkes-Mallows index (FMI) is defined as the geometric mean between of
+    the precision and recall::
+
+        FMI = TP / sqrt((TP + FP) * (TP + FN))
+
+    Where ``TP`` is the number of **True Positive** (i.e. the number of pair of
+    points that belongs in the same clusters in both ``labels_true`` and
+    ``labels_pred``), ``FP`` is the number of **False Positive** (i.e. the
+    number of pair of points that belongs in the same clusters in
+    ``labels_true`` and not in ``labels_pred``) and ``FN`` is the number of
+    **False Negative** (i.e the number of pair of points that belongs in the
+    same clusters in ``labels_pred`` and not in ``labels_True``).
+
+    The score ranges from 0 to 1. A high value indicates a good similarity
+    between two clusters.
+
+    Read more in the :ref:`User Guide <fowlkes_mallows_scores>`.
+
+    Parameters
+    ----------
+    labels_true : int array, shape = (``n_samples``,)
+        A clustering of the data into disjoint subsets.
+
+    labels_pred : array, shape = (``n_samples``, )
+        A clustering of the data into disjoint subsets.
+
+    sparse : bool
+        Compute contingency matrix internally with sparse matrix.
+
+    Returns
+    -------
+    score : float
+       The resulting Fowlkes-Mallows score.
+
+    Examples
+    --------
+
+    Perfect labelings are both homogeneous and complete, hence have
+    score 1.0::
+
+      >>> from sklearn.metrics.cluster import fowlkes_mallows_score
+      >>> fowlkes_mallows_score([0, 0, 1, 1], [0, 0, 1, 1])
+      1.0
+      >>> fowlkes_mallows_score([0, 0, 1, 1], [1, 1, 0, 0])
+      1.0
+
+    If classes members are completely split across different clusters,
+    the assignment is totally random, hence the FMI is null::
+
+      >>> fowlkes_mallows_score([0, 0, 0, 0], [0, 1, 2, 3])
+      0.0
+
+    References
+    ----------
+    .. [1] `E. B. Fowkles and C. L. Mallows, 1983. "A method for comparing two
+       hierarchical clusterings". Journal of the American Statistical
+       Association
+       <http://wildfire.stat.ucla.edu/pdflibrary/fowlkes.pdf>`_
+
+    .. [2] `Wikipedia entry for the Fowlkes-Mallows Index
+           <https://en.wikipedia.org/wiki/Fowlkes-Mallows_index>`_
+    """
+    labels_true, labels_pred = check_clusterings(labels_true, labels_pred)
+    n_samples, = labels_true.shape
+
+    c = contingency_matrix(labels_true, labels_pred,
+                           sparse=True)
+    c = c.astype(np.int64, **_astype_copy_false(c))
+    tk = np.dot(c.data, c.data) - n_samples
+    pk = np.sum(np.asarray(c.sum(axis=0)).ravel() ** 2) - n_samples
+    qk = np.sum(np.asarray(c.sum(axis=1)).ravel() ** 2) - n_samples
+    return np.sqrt(tk / pk) * np.sqrt(tk / qk) if tk != 0. else 0.
+
+
+def entropy(labels):
+    """Calculates the entropy for a labeling.
+
+    Parameters
+    ----------
+    labels : int array, shape = [n_samples]
+        The labels
+
+    Notes
+    -----
+    The logarithm used is the natural logarithm (base-e).
+    """
+    if len(labels) == 0:
+        return 1.0
+    label_idx = np.unique(labels, return_inverse=True)[1]
+    pi = np.bincount(label_idx).astype(np.float64)
+    pi = pi[pi > 0]
+    pi_sum = np.sum(pi)
+    # log(a / b) should be calculated as log(a) - log(b) for
+    # possible loss of precision
+    return -np.sum((pi / pi_sum) * (np.log(pi) - log(pi_sum)))
--- a/venv/Lib/site-packages/sklearn/metrics/cluster/_unsupervised.py
+++ b/venv/Lib/site-packages/sklearn/metrics/cluster/_unsupervised.py
@ -0,0 +1,363 @@
+"""Unsupervised evaluation metrics."""
+
+# Authors: Robert Layton <robertlayton@gmail.com>
+#          Arnaud Fouchet <foucheta@gmail.com>
+#          Thierry Guillemot <thierry.guillemot.work@gmail.com>
+# License: BSD 3 clause
+
+
+import functools
+
+import numpy as np
+
+from ...utils import check_random_state
+from ...utils import check_X_y
+from ...utils import _safe_indexing
+from ..pairwise import pairwise_distances_chunked
+from ..pairwise import pairwise_distances
+from ...preprocessing import LabelEncoder
+from ...utils.validation import _deprecate_positional_args
+
+
+def check_number_of_labels(n_labels, n_samples):
+    """Check that number of labels are valid.
+
+    Parameters
+    ----------
+    n_labels : int
+        Number of labels
+
+    n_samples : int
+        Number of samples
+    """
+    if not 1 < n_labels < n_samples:
+        raise ValueError("Number of labels is %d. Valid values are 2 "
+                         "to n_samples - 1 (inclusive)" % n_labels)
+
+
+@_deprecate_positional_args
+def silhouette_score(X, labels, *, metric='euclidean', sample_size=None,
+                     random_state=None, **kwds):
+    """Compute the mean Silhouette Coefficient of all samples.
+
+    The Silhouette Coefficient is calculated using the mean intra-cluster
+    distance (``a``) and the mean nearest-cluster distance (``b``) for each
+    sample.  The Silhouette Coefficient for a sample is ``(b - a) / max(a,
+    b)``.  To clarify, ``b`` is the distance between a sample and the nearest
+    cluster that the sample is not a part of.
+    Note that Silhouette Coefficient is only defined if number of labels
+    is 2 <= n_labels <= n_samples - 1.
+
+    This function returns the mean Silhouette Coefficient over all samples.
+    To obtain the values for each sample, use :func:`silhouette_samples`.
+
+    The best value is 1 and the worst value is -1. Values near 0 indicate
+    overlapping clusters. Negative values generally indicate that a sample has
+    been assigned to the wrong cluster, as a different cluster is more similar.
+
+    Read more in the :ref:`User Guide <silhouette_coefficient>`.
+
+    Parameters
+    ----------
+    X : array [n_samples_a, n_samples_a] if metric == "precomputed", or, \
+             [n_samples_a, n_features] otherwise
+        Array of pairwise distances between samples, or a feature array.
+
+    labels : array, shape = [n_samples]
+         Predicted labels for each sample.
+
+    metric : string, or callable
+        The metric to use when calculating distance between instances in a
+        feature array. If metric is a string, it must be one of the options
+        allowed by :func:`metrics.pairwise.pairwise_distances
+        <sklearn.metrics.pairwise.pairwise_distances>`. If X is the distance
+        array itself, use ``metric="precomputed"``.
+
+    sample_size : int or None
+        The size of the sample to use when computing the Silhouette Coefficient
+        on a random subset of the data.
+        If ``sample_size is None``, no sampling is used.
+
+    random_state : int, RandomState instance or None, optional (default=None)
+        Determines random number generation for selecting a subset of samples.
+        Used when ``sample_size is not None``.
+        Pass an int for reproducible results across multiple function calls.
+        See :term:`Glossary <random_state>`.
+
+    **kwds : optional keyword parameters
+        Any further parameters are passed directly to the distance function.
+        If using a scipy.spatial.distance metric, the parameters are still
+        metric dependent. See the scipy docs for usage examples.
+
+    Returns
+    -------
+    silhouette : float
+        Mean Silhouette Coefficient for all samples.
+
+    References
+    ----------
+
+    .. [1] `Peter J. Rousseeuw (1987). "Silhouettes: a Graphical Aid to the
+       Interpretation and Validation of Cluster Analysis". Computational
+       and Applied Mathematics 20: 53-65.
+       <https://www.sciencedirect.com/science/article/pii/0377042787901257>`_
+
+    .. [2] `Wikipedia entry on the Silhouette Coefficient
+           <https://en.wikipedia.org/wiki/Silhouette_(clustering)>`_
+
+    """
+    if sample_size is not None:
+        X, labels = check_X_y(X, labels, accept_sparse=['csc', 'csr'])
+        random_state = check_random_state(random_state)
+        indices = random_state.permutation(X.shape[0])[:sample_size]
+        if metric == "precomputed":
+            X, labels = X[indices].T[indices].T, labels[indices]
+        else:
+            X, labels = X[indices], labels[indices]
+    return np.mean(silhouette_samples(X, labels, metric=metric, **kwds))
+
+
+def _silhouette_reduce(D_chunk, start, labels, label_freqs):
+    """Accumulate silhouette statistics for vertical chunk of X
+
+    Parameters
+    ----------
+    D_chunk : shape (n_chunk_samples, n_samples)
+        precomputed distances for a chunk
+    start : int
+        first index in chunk
+    labels : array, shape (n_samples,)
+        corresponding cluster labels, encoded as {0, ..., n_clusters-1}
+    label_freqs : array
+        distribution of cluster labels in ``labels``
+    """
+    # accumulate distances from each sample to each cluster
+    clust_dists = np.zeros((len(D_chunk), len(label_freqs)),
+                           dtype=D_chunk.dtype)
+    for i in range(len(D_chunk)):
+        clust_dists[i] += np.bincount(labels, weights=D_chunk[i],
+                                      minlength=len(label_freqs))
+
+    # intra_index selects intra-cluster distances within clust_dists
+    intra_index = (np.arange(len(D_chunk)), labels[start:start + len(D_chunk)])
+    # intra_clust_dists are averaged over cluster size outside this function
+    intra_clust_dists = clust_dists[intra_index]
+    # of the remaining distances we normalise and extract the minimum
+    clust_dists[intra_index] = np.inf
+    clust_dists /= label_freqs
+    inter_clust_dists = clust_dists.min(axis=1)
+    return intra_clust_dists, inter_clust_dists
+
+
+@_deprecate_positional_args
+def silhouette_samples(X, labels, *, metric='euclidean', **kwds):
+    """Compute the Silhouette Coefficient for each sample.
+
+    The Silhouette Coefficient is a measure of how well samples are clustered
+    with samples that are similar to themselves. Clustering models with a high
+    Silhouette Coefficient are said to be dense, where samples in the same
+    cluster are similar to each other, and well separated, where samples in
+    different clusters are not very similar to each other.
+
+    The Silhouette Coefficient is calculated using the mean intra-cluster
+    distance (``a``) and the mean nearest-cluster distance (``b``) for each
+    sample.  The Silhouette Coefficient for a sample is ``(b - a) / max(a,
+    b)``.
+    Note that Silhouette Coefficient is only defined if number of labels
+    is 2 <= n_labels <= n_samples - 1.
+
+    This function returns the Silhouette Coefficient for each sample.
+
+    The best value is 1 and the worst value is -1. Values near 0 indicate
+    overlapping clusters.
+
+    Read more in the :ref:`User Guide <silhouette_coefficient>`.
+
+    Parameters
+    ----------
+    X : array [n_samples_a, n_samples_a] if metric == "precomputed", or, \
+             [n_samples_a, n_features] otherwise
+        Array of pairwise distances between samples, or a feature array.
+
+    labels : array, shape = [n_samples]
+             label values for each sample
+
+    metric : string, or callable
+        The metric to use when calculating distance between instances in a
+        feature array. If metric is a string, it must be one of the options
+        allowed by :func:`sklearn.metrics.pairwise.pairwise_distances`. If X is
+        the distance array itself, use "precomputed" as the metric. Precomputed
+        distance matrices must have 0 along the diagonal.
+
+    `**kwds` : optional keyword parameters
+        Any further parameters are passed directly to the distance function.
+        If using a ``scipy.spatial.distance`` metric, the parameters are still
+        metric dependent. See the scipy docs for usage examples.
+
+    Returns
+    -------
+    silhouette : array, shape = [n_samples]
+        Silhouette Coefficient for each samples.
+
+    References
+    ----------
+
+    .. [1] `Peter J. Rousseeuw (1987). "Silhouettes: a Graphical Aid to the
+       Interpretation and Validation of Cluster Analysis". Computational
+       and Applied Mathematics 20: 53-65.
+       <https://www.sciencedirect.com/science/article/pii/0377042787901257>`_
+
+    .. [2] `Wikipedia entry on the Silhouette Coefficient
+       <https://en.wikipedia.org/wiki/Silhouette_(clustering)>`_
+
+    """
+    X, labels = check_X_y(X, labels, accept_sparse=['csc', 'csr'])
+
+    # Check for non-zero diagonal entries in precomputed distance matrix
+    if metric == 'precomputed':
+        atol = np.finfo(X.dtype).eps * 100
+        if np.any(np.abs(np.diagonal(X)) > atol):
+            raise ValueError(
+                'The precomputed distance matrix contains non-zero '
+                'elements on the diagonal. Use np.fill_diagonal(X, 0).'
+            )
+
+    le = LabelEncoder()
+    labels = le.fit_transform(labels)
+    n_samples = len(labels)
+    label_freqs = np.bincount(labels)
+    check_number_of_labels(len(le.classes_), n_samples)
+
+    kwds['metric'] = metric
+    reduce_func = functools.partial(_silhouette_reduce,
+                                    labels=labels, label_freqs=label_freqs)
+    results = zip(*pairwise_distances_chunked(X, reduce_func=reduce_func,
+                                              **kwds))
+    intra_clust_dists, inter_clust_dists = results
+    intra_clust_dists = np.concatenate(intra_clust_dists)
+    inter_clust_dists = np.concatenate(inter_clust_dists)
+
+    denom = (label_freqs - 1).take(labels, mode='clip')
+    with np.errstate(divide="ignore", invalid="ignore"):
+        intra_clust_dists /= denom
+
+    sil_samples = inter_clust_dists - intra_clust_dists
+    with np.errstate(divide="ignore", invalid="ignore"):
+        sil_samples /= np.maximum(intra_clust_dists, inter_clust_dists)
+    # nan values are for clusters of size 1, and should be 0
+    return np.nan_to_num(sil_samples)
+
+
+def calinski_harabasz_score(X, labels):
+    """Compute the Calinski and Harabasz score.
+
+    It is also known as the Variance Ratio Criterion.
+
+    The score is defined as ratio between the within-cluster dispersion and
+    the between-cluster dispersion.
+
+    Read more in the :ref:`User Guide <calinski_harabasz_index>`.
+
+    Parameters
+    ----------
+    X : array-like, shape (``n_samples``, ``n_features``)
+        List of ``n_features``-dimensional data points. Each row corresponds
+        to a single data point.
+
+    labels : array-like, shape (``n_samples``,)
+        Predicted labels for each sample.
+
+    Returns
+    -------
+    score : float
+        The resulting Calinski-Harabasz score.
+
+    References
+    ----------
+    .. [1] `T. Calinski and J. Harabasz, 1974. "A dendrite method for cluster
+       analysis". Communications in Statistics
+       <https://www.tandfonline.com/doi/abs/10.1080/03610927408827101>`_
+    """
+    X, labels = check_X_y(X, labels)
+    le = LabelEncoder()
+    labels = le.fit_transform(labels)
+
+    n_samples, _ = X.shape
+    n_labels = len(le.classes_)
+
+    check_number_of_labels(n_labels, n_samples)
+
+    extra_disp, intra_disp = 0., 0.
+    mean = np.mean(X, axis=0)
+    for k in range(n_labels):
+        cluster_k = X[labels == k]
+        mean_k = np.mean(cluster_k, axis=0)
+        extra_disp += len(cluster_k) * np.sum((mean_k - mean) ** 2)
+        intra_disp += np.sum((cluster_k - mean_k) ** 2)
+
+    return (1. if intra_disp == 0. else
+            extra_disp * (n_samples - n_labels) /
+            (intra_disp * (n_labels - 1.)))
+
+
+def davies_bouldin_score(X, labels):
+    """Computes the Davies-Bouldin score.
+
+    The score is defined as the average similarity measure of each cluster with
+    its most similar cluster, where similarity is the ratio of within-cluster
+    distances to between-cluster distances. Thus, clusters which are farther
+    apart and less dispersed will result in a better score.
+
+    The minimum score is zero, with lower values indicating better clustering.
+
+    Read more in the :ref:`User Guide <davies-bouldin_index>`.
+
+    .. versionadded:: 0.20
+
+    Parameters
+    ----------
+    X : array-like, shape (``n_samples``, ``n_features``)
+        List of ``n_features``-dimensional data points. Each row corresponds
+        to a single data point.
+
+    labels : array-like, shape (``n_samples``,)
+        Predicted labels for each sample.
+
+    Returns
+    -------
+    score: float
+        The resulting Davies-Bouldin score.
+
+    References
+    ----------
+    .. [1] Davies, David L.; Bouldin, Donald W. (1979).
+       `"A Cluster Separation Measure"
+       <https://ieeexplore.ieee.org/document/4766909>`__.
+       IEEE Transactions on Pattern Analysis and Machine Intelligence.
+       PAMI-1 (2): 224-227
+    """
+    X, labels = check_X_y(X, labels)
+    le = LabelEncoder()
+    labels = le.fit_transform(labels)
+    n_samples, _ = X.shape
+    n_labels = len(le.classes_)
+    check_number_of_labels(n_labels, n_samples)
+
+    intra_dists = np.zeros(n_labels)
+    centroids = np.zeros((n_labels, len(X[0])), dtype=np.float)
+    for k in range(n_labels):
+        cluster_k = _safe_indexing(X, labels == k)
+        centroid = cluster_k.mean(axis=0)
+        centroids[k] = centroid
+        intra_dists[k] = np.average(pairwise_distances(
+            cluster_k, [centroid]))
+
+    centroid_distances = pairwise_distances(centroids)
+
+    if np.allclose(intra_dists, 0) or np.allclose(centroid_distances, 0):
+        return 0.0
+
+    centroid_distances[centroid_distances == 0] = np.inf
+    combined_intra_dists = intra_dists[:, None] + intra_dists
+    scores = np.max(combined_intra_dists / centroid_distances, axis=1)
+    return np.mean(scores)
--- a/venv/Lib/site-packages/sklearn/metrics/cluster/bicluster.py
+++ b/venv/Lib/site-packages/sklearn/metrics/cluster/bicluster.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _bicluster  # type: ignore
+from ...externals._pep562 import Pep562
+from ...utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.metrics.cluster.bicluster'
+correct_import_path = 'sklearn.metrics.cluster'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_bicluster, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/metrics/cluster/expected_mutual_info_fast.py
+++ b/venv/Lib/site-packages/sklearn/metrics/cluster/expected_mutual_info_fast.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _expected_mutual_info_fast  # type: ignore
+from ...externals._pep562 import Pep562
+from ...utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.metrics.cluster.expected_mutual_info_fast'
+correct_import_path = 'sklearn.metrics.cluster'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_expected_mutual_info_fast, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/metrics/cluster/setup.py
+++ b/venv/Lib/site-packages/sklearn/metrics/cluster/setup.py
@ -0,0 +1,24 @@
+import os
+
+import numpy
+from numpy.distutils.misc_util import Configuration
+
+
+def configuration(parent_package="", top_path=None):
+    config = Configuration("cluster", parent_package, top_path)
+    libraries = []
+    if os.name == 'posix':
+        libraries.append('m')
+    config.add_extension("_expected_mutual_info_fast",
+                         sources=["_expected_mutual_info_fast.pyx"],
+                         include_dirs=[numpy.get_include()],
+                         libraries=libraries)
+
+    config.add_subpackage("tests")
+
+    return config
+
+
+if __name__ == "__main__":
+    from numpy.distutils.core import setup
+    setup(**configuration().todict())
--- a/venv/Lib/site-packages/sklearn/metrics/cluster/supervised.py
+++ b/venv/Lib/site-packages/sklearn/metrics/cluster/supervised.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _supervised  # type: ignore
+from ...externals._pep562 import Pep562
+from ...utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.metrics.cluster.supervised'
+correct_import_path = 'sklearn.metrics.cluster'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_supervised, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/metrics/cluster/tests/init.py
+++ b/venv/Lib/site-packages/sklearn/metrics/cluster/tests/init.py
--- a/venv/Lib/site-packages/sklearn/metrics/cluster/tests/pycache/init.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/metrics/cluster/tests/pycache/init.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/metrics/cluster/tests/pycache/test_bicluster.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/metrics/cluster/tests/pycache/test_bicluster.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/metrics/cluster/tests/pycache/test_common.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/metrics/cluster/tests/pycache/test_common.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/metrics/cluster/tests/pycache/test_supervised.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/metrics/cluster/tests/pycache/test_supervised.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/metrics/cluster/tests/pycache/test_unsupervised.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/metrics/cluster/tests/pycache/test_unsupervised.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/metrics/cluster/tests/test_bicluster.py
+++ b/venv/Lib/site-packages/sklearn/metrics/cluster/tests/test_bicluster.py
@ -0,0 +1,50 @@
+"""Testing for bicluster metrics module"""
+
+import numpy as np
+
+from sklearn.utils._testing import assert_almost_equal
+
+from sklearn.metrics.cluster._bicluster import _jaccard
+from sklearn.metrics import consensus_score
+
+
+def test_jaccard():
+    a1 = np.array([True, True, False, False])
+    a2 = np.array([True, True, True, True])
+    a3 = np.array([False, True, True, False])
+    a4 = np.array([False, False, True, True])
+
+    assert _jaccard(a1, a1, a1, a1) == 1
+    assert _jaccard(a1, a1, a2, a2) == 0.25
+    assert _jaccard(a1, a1, a3, a3) == 1.0 / 7
+    assert _jaccard(a1, a1, a4, a4) == 0
+
+
+def test_consensus_score():
+    a = [[True, True, False, False],
+         [False, False, True, True]]
+    b = a[::-1]
+
+    assert consensus_score((a, a), (a, a)) == 1
+    assert consensus_score((a, a), (b, b)) == 1
+    assert consensus_score((a, b), (a, b)) == 1
+    assert consensus_score((a, b), (b, a)) == 1
+
+    assert consensus_score((a, a), (b, a)) == 0
+    assert consensus_score((a, a), (a, b)) == 0
+    assert consensus_score((b, b), (a, b)) == 0
+    assert consensus_score((b, b), (b, a)) == 0
+
+
+def test_consensus_score_issue2445():
+    ''' Different number of biclusters in A and B'''
+    a_rows = np.array([[True, True, False, False],
+                       [False, False, True, True],
+                       [False, False, False, True]])
+    a_cols = np.array([[True, True, False, False],
+                       [False, False, True, True],
+                       [False, False, False, True]])
+    idx = [0, 2]
+    s = consensus_score((a_rows, a_cols), (a_rows[idx], a_cols[idx]))
+    # B contains 2 of the 3 biclusters in A, so score should be 2/3
+    assert_almost_equal(s, 2.0/3.0)
--- a/venv/Lib/site-packages/sklearn/metrics/cluster/tests/test_common.py
+++ b/venv/Lib/site-packages/sklearn/metrics/cluster/tests/test_common.py
@ -0,0 +1,211 @@
+from functools import partial
+
+import pytest
+import numpy as np
+
+from sklearn.metrics.cluster import adjusted_mutual_info_score
+from sklearn.metrics.cluster import adjusted_rand_score
+from sklearn.metrics.cluster import completeness_score
+from sklearn.metrics.cluster import fowlkes_mallows_score
+from sklearn.metrics.cluster import homogeneity_score
+from sklearn.metrics.cluster import mutual_info_score
+from sklearn.metrics.cluster import normalized_mutual_info_score
+from sklearn.metrics.cluster import v_measure_score
+from sklearn.metrics.cluster import silhouette_score
+from sklearn.metrics.cluster import calinski_harabasz_score
+from sklearn.metrics.cluster import davies_bouldin_score
+
+from sklearn.utils._testing import assert_allclose
+
+
+# Dictionaries of metrics
+# ------------------------
+# The goal of having those dictionaries is to have an easy way to call a
+# particular metric and associate a name to each function:
+#   - SUPERVISED_METRICS: all supervised cluster metrics - (when given a
+# ground truth value)
+#   - UNSUPERVISED_METRICS: all unsupervised cluster metrics
+#
+# Those dictionaries will be used to test systematically some invariance
+# properties, e.g. invariance toward several input layout.
+#
+
+SUPERVISED_METRICS = {
+    "adjusted_mutual_info_score": adjusted_mutual_info_score,
+    "adjusted_rand_score": adjusted_rand_score,
+    "completeness_score": completeness_score,
+    "homogeneity_score": homogeneity_score,
+    "mutual_info_score": mutual_info_score,
+    "normalized_mutual_info_score": normalized_mutual_info_score,
+    "v_measure_score": v_measure_score,
+    "fowlkes_mallows_score": fowlkes_mallows_score
+}
+
+UNSUPERVISED_METRICS = {
+    "silhouette_score": silhouette_score,
+    "silhouette_manhattan": partial(silhouette_score, metric='manhattan'),
+    "calinski_harabasz_score": calinski_harabasz_score,
+    "davies_bouldin_score": davies_bouldin_score
+}
+
+# Lists of metrics with common properties
+# ---------------------------------------
+# Lists of metrics with common properties are used to test systematically some
+# functionalities and invariance, e.g. SYMMETRIC_METRICS lists all metrics
+# that are symmetric with respect to their input argument y_true and y_pred.
+#
+# --------------------------------------------------------------------
+# Symmetric with respect to their input arguments y_true and y_pred.
+# Symmetric metrics only apply to supervised clusters.
+SYMMETRIC_METRICS = [
+    "adjusted_rand_score", "v_measure_score",
+    "mutual_info_score", "adjusted_mutual_info_score",
+    "normalized_mutual_info_score", "fowlkes_mallows_score"
+]
+
+NON_SYMMETRIC_METRICS = ["homogeneity_score", "completeness_score"]
+
+# Metrics whose upper bound is 1
+NORMALIZED_METRICS = [
+    "adjusted_rand_score", "homogeneity_score", "completeness_score",
+    "v_measure_score", "adjusted_mutual_info_score", "fowlkes_mallows_score",
+    "normalized_mutual_info_score"
+]
+
+
+rng = np.random.RandomState(0)
+y1 = rng.randint(3, size=30)
+y2 = rng.randint(3, size=30)
+
+
+def test_symmetric_non_symmetric_union():
+    assert (sorted(SYMMETRIC_METRICS + NON_SYMMETRIC_METRICS) ==
+            sorted(SUPERVISED_METRICS))
+
+
+# 0.22 AMI and NMI changes
+@pytest.mark.filterwarnings('ignore::FutureWarning')
+@pytest.mark.parametrize(
+    'metric_name, y1, y2',
+    [(name, y1, y2) for name in SYMMETRIC_METRICS]
+)
+def test_symmetry(metric_name, y1, y2):
+    metric = SUPERVISED_METRICS[metric_name]
+    assert metric(y1, y2) == pytest.approx(metric(y2, y1))
+
+
+@pytest.mark.parametrize(
+    'metric_name, y1, y2',
+    [(name, y1, y2) for name in NON_SYMMETRIC_METRICS]
+)
+def test_non_symmetry(metric_name, y1, y2):
+    metric = SUPERVISED_METRICS[metric_name]
+    assert metric(y1, y2) != pytest.approx(metric(y2, y1))
+
+
+# 0.22 AMI and NMI changes
+@pytest.mark.filterwarnings('ignore::FutureWarning')
+@pytest.mark.parametrize("metric_name", NORMALIZED_METRICS)
+def test_normalized_output(metric_name):
+    upper_bound_1 = [0, 0, 0, 1, 1, 1]
+    upper_bound_2 = [0, 0, 0, 1, 1, 1]
+    metric = SUPERVISED_METRICS[metric_name]
+    assert metric([0, 0, 0, 1, 1], [0, 0, 0, 1, 2]) > 0.0
+    assert metric([0, 0, 1, 1, 2], [0, 0, 1, 1, 1]) > 0.0
+    assert metric([0, 0, 0, 1, 2], [0, 1, 1, 1, 1]) < 1.0
+    assert metric([0, 0, 0, 1, 2], [0, 1, 1, 1, 1]) < 1.0
+    assert metric(upper_bound_1, upper_bound_2) == pytest.approx(1.0)
+
+    lower_bound_1 = [0, 0, 0, 0, 0, 0]
+    lower_bound_2 = [0, 1, 2, 3, 4, 5]
+    score = np.array([metric(lower_bound_1, lower_bound_2),
+                      metric(lower_bound_2, lower_bound_1)])
+    assert not (score < 0).any()
+
+
+# 0.22 AMI and NMI changes
+@pytest.mark.filterwarnings('ignore::FutureWarning')
+@pytest.mark.parametrize(
+    "metric_name", dict(SUPERVISED_METRICS, **UNSUPERVISED_METRICS)
+)
+def test_permute_labels(metric_name):
+    # All clustering metrics do not change score due to permutations of labels
+    # that is when 0 and 1 exchanged.
+    y_label = np.array([0, 0, 0, 1, 1, 0, 1])
+    y_pred = np.array([1, 0, 1, 0, 1, 1, 0])
+    if metric_name in SUPERVISED_METRICS:
+        metric = SUPERVISED_METRICS[metric_name]
+        score_1 = metric(y_pred, y_label)
+        assert_allclose(score_1, metric(1 - y_pred, y_label))
+        assert_allclose(score_1, metric(1 - y_pred, 1 - y_label))
+        assert_allclose(score_1, metric(y_pred, 1 - y_label))
+    else:
+        metric = UNSUPERVISED_METRICS[metric_name]
+        X = np.random.randint(10, size=(7, 10))
+        score_1 = metric(X, y_pred)
+        assert_allclose(score_1, metric(X, 1 - y_pred))
+
+
+# 0.22 AMI and NMI changes
+@pytest.mark.filterwarnings('ignore::FutureWarning')
+@pytest.mark.parametrize(
+    "metric_name", dict(SUPERVISED_METRICS, **UNSUPERVISED_METRICS)
+)
+# For all clustering metrics Input parameters can be both
+# in the form of arrays lists, positive, negative or string
+def test_format_invariance(metric_name):
+    y_true = [0, 0, 0, 0, 1, 1, 1, 1]
+    y_pred = [0, 1, 2, 3, 4, 5, 6, 7]
+
+    def generate_formats(y):
+        y = np.array(y)
+        yield y, 'array of ints'
+        yield y.tolist(), 'list of ints'
+        yield [str(x) + "-a" for x in y.tolist()], 'list of strs'
+        yield (np.array([str(x) + "-a" for x in y.tolist()], dtype=object),
+               'array of strs')
+        yield y - 1, 'including negative ints'
+        yield y + 1, 'strictly positive ints'
+
+    if metric_name in SUPERVISED_METRICS:
+        metric = SUPERVISED_METRICS[metric_name]
+        score_1 = metric(y_true, y_pred)
+        y_true_gen = generate_formats(y_true)
+        y_pred_gen = generate_formats(y_pred)
+        for (y_true_fmt, fmt_name), (y_pred_fmt, _) in zip(y_true_gen,
+                                                           y_pred_gen):
+            assert score_1 == metric(y_true_fmt, y_pred_fmt)
+    else:
+        metric = UNSUPERVISED_METRICS[metric_name]
+        X = np.random.randint(10, size=(8, 10))
+        score_1 = metric(X, y_true)
+        assert score_1 == metric(X.astype(float), y_true)
+        y_true_gen = generate_formats(y_true)
+        for (y_true_fmt, fmt_name) in y_true_gen:
+            assert score_1 == metric(X, y_true_fmt)
+
+
+@pytest.mark.parametrize("metric", SUPERVISED_METRICS.values())
+def test_single_sample(metric):
+    # only the supervised metrics support single sample
+    for i, j in [(0, 0), (0, 1), (1, 0), (1, 1)]:
+        metric([i], [j])
+
+
+@pytest.mark.parametrize(
+    "metric_name, metric_func",
+    dict(SUPERVISED_METRICS, **UNSUPERVISED_METRICS).items()
+)
+def test_inf_nan_input(metric_name, metric_func):
+    if metric_name in SUPERVISED_METRICS:
+        invalids = [([0, 1], [np.inf, np.inf]),
+                    ([0, 1], [np.nan, np.nan]),
+                    ([0, 1], [np.nan, np.inf])]
+    else:
+        X = np.random.randint(10, size=(2, 10))
+        invalids = [(X, [np.inf, np.inf]),
+                    (X, [np.nan, np.nan]),
+                    (X, [np.nan, np.inf])]
+    with pytest.raises(ValueError, match='contains NaN, infinity'):
+        for args in invalids:
+            metric_func(*args)
--- a/venv/Lib/site-packages/sklearn/metrics/cluster/tests/test_supervised.py
+++ b/venv/Lib/site-packages/sklearn/metrics/cluster/tests/test_supervised.py
@ -0,0 +1,358 @@
+import numpy as np
+import pytest
+
+from sklearn.metrics.cluster import adjusted_mutual_info_score
+from sklearn.metrics.cluster import adjusted_rand_score
+from sklearn.metrics.cluster import completeness_score
+from sklearn.metrics.cluster import contingency_matrix
+from sklearn.metrics.cluster import entropy
+from sklearn.metrics.cluster import expected_mutual_information
+from sklearn.metrics.cluster import fowlkes_mallows_score
+from sklearn.metrics.cluster import homogeneity_completeness_v_measure
+from sklearn.metrics.cluster import homogeneity_score
+from sklearn.metrics.cluster import mutual_info_score
+from sklearn.metrics.cluster import normalized_mutual_info_score
+from sklearn.metrics.cluster import v_measure_score
+from sklearn.metrics.cluster._supervised import _generalized_average
+
+from sklearn.utils import assert_all_finite
+from sklearn.utils._testing import (
+        assert_almost_equal, ignore_warnings)
+from numpy.testing import assert_array_almost_equal
+
+
+score_funcs = [
+    adjusted_rand_score,
+    homogeneity_score,
+    completeness_score,
+    v_measure_score,
+    adjusted_mutual_info_score,
+    normalized_mutual_info_score,
+]
+
+
+@ignore_warnings(category=FutureWarning)
+def test_error_messages_on_wrong_input():
+    for score_func in score_funcs:
+        expected = (r'Found input variables with inconsistent numbers '
+                    r'of samples: \[2, 3\]')
+        with pytest.raises(ValueError, match=expected):
+            score_func([0, 1], [1, 1, 1])
+
+        expected = r"labels_true must be 1D: shape is \(2"
+        with pytest.raises(ValueError, match=expected):
+            score_func([[0, 1], [1, 0]], [1, 1, 1])
+
+        expected = r"labels_pred must be 1D: shape is \(2"
+        with pytest.raises(ValueError, match=expected):
+            score_func([0, 1, 0], [[1, 1], [0, 0]])
+
+
+def test_generalized_average():
+    a, b = 1, 2
+    methods = ["min", "geometric", "arithmetic", "max"]
+    means = [_generalized_average(a, b, method) for method in methods]
+    assert means[0] <= means[1] <= means[2] <= means[3]
+    c, d = 12, 12
+    means = [_generalized_average(c, d, method) for method in methods]
+    assert means[0] == means[1] == means[2] == means[3]
+
+
+@ignore_warnings(category=FutureWarning)
+def test_perfect_matches():
+    for score_func in score_funcs:
+        assert score_func([], []) == pytest.approx(1.0)
+        assert score_func([0], [1]) == pytest.approx(1.0)
+        assert score_func([0, 0, 0], [0, 0, 0]) == pytest.approx(1.0)
+        assert score_func([0, 1, 0], [42, 7, 42]) == pytest.approx(1.0)
+        assert score_func([0., 1., 0.], [42., 7., 42.]) == pytest.approx(1.0)
+        assert score_func([0., 1., 2.], [42., 7., 2.]) == pytest.approx(1.0)
+        assert score_func([0, 1, 2], [42, 7, 2]) == pytest.approx(1.0)
+    score_funcs_with_changing_means = [
+        normalized_mutual_info_score,
+        adjusted_mutual_info_score,
+    ]
+    means = {"min", "geometric", "arithmetic", "max"}
+    for score_func in score_funcs_with_changing_means:
+        for mean in means:
+            assert score_func([], [], mean) == pytest.approx(1.0)
+            assert score_func([0], [1], mean) == pytest.approx(1.0)
+            assert score_func([0, 0, 0], [0, 0, 0], mean) == pytest.approx(1.0)
+            assert score_func(
+                [0, 1, 0], [42, 7, 42], mean) == pytest.approx(1.0)
+            assert score_func(
+                [0., 1., 0.], [42., 7., 42.], mean) == pytest.approx(1.0)
+            assert score_func(
+                [0., 1., 2.], [42., 7., 2.], mean) == pytest.approx(1.0)
+            assert score_func(
+                [0, 1, 2], [42, 7, 2], mean) == pytest.approx(1.0)
+
+
+def test_homogeneous_but_not_complete_labeling():
+    # homogeneous but not complete clustering
+    h, c, v = homogeneity_completeness_v_measure(
+        [0, 0, 0, 1, 1, 1],
+        [0, 0, 0, 1, 2, 2])
+    assert_almost_equal(h, 1.00, 2)
+    assert_almost_equal(c, 0.69, 2)
+    assert_almost_equal(v, 0.81, 2)
+
+
+def test_complete_but_not_homogeneous_labeling():
+    # complete but not homogeneous clustering
+    h, c, v = homogeneity_completeness_v_measure(
+        [0, 0, 1, 1, 2, 2],
+        [0, 0, 1, 1, 1, 1])
+    assert_almost_equal(h, 0.58, 2)
+    assert_almost_equal(c, 1.00, 2)
+    assert_almost_equal(v, 0.73, 2)
+
+
+def test_not_complete_and_not_homogeneous_labeling():
+    # neither complete nor homogeneous but not so bad either
+    h, c, v = homogeneity_completeness_v_measure(
+        [0, 0, 0, 1, 1, 1],
+        [0, 1, 0, 1, 2, 2])
+    assert_almost_equal(h, 0.67, 2)
+    assert_almost_equal(c, 0.42, 2)
+    assert_almost_equal(v, 0.52, 2)
+
+
+def test_beta_parameter():
+    # test for when beta passed to
+    # homogeneity_completeness_v_measure
+    # and v_measure_score
+    beta_test = 0.2
+    h_test = 0.67
+    c_test = 0.42
+    v_test = ((1 + beta_test) * h_test * c_test
+              / (beta_test * h_test + c_test))
+
+    h, c, v = homogeneity_completeness_v_measure(
+        [0, 0, 0, 1, 1, 1],
+        [0, 1, 0, 1, 2, 2],
+        beta=beta_test)
+    assert_almost_equal(h, h_test, 2)
+    assert_almost_equal(c, c_test, 2)
+    assert_almost_equal(v, v_test, 2)
+
+    v = v_measure_score(
+        [0, 0, 0, 1, 1, 1],
+        [0, 1, 0, 1, 2, 2],
+        beta=beta_test)
+    assert_almost_equal(v, v_test, 2)
+
+
+def test_non_consecutive_labels():
+    # regression tests for labels with gaps
+    h, c, v = homogeneity_completeness_v_measure(
+        [0, 0, 0, 2, 2, 2],
+        [0, 1, 0, 1, 2, 2])
+    assert_almost_equal(h, 0.67, 2)
+    assert_almost_equal(c, 0.42, 2)
+    assert_almost_equal(v, 0.52, 2)
+
+    h, c, v = homogeneity_completeness_v_measure(
+        [0, 0, 0, 1, 1, 1],
+        [0, 4, 0, 4, 2, 2])
+    assert_almost_equal(h, 0.67, 2)
+    assert_almost_equal(c, 0.42, 2)
+    assert_almost_equal(v, 0.52, 2)
+
+    ari_1 = adjusted_rand_score([0, 0, 0, 1, 1, 1], [0, 1, 0, 1, 2, 2])
+    ari_2 = adjusted_rand_score([0, 0, 0, 1, 1, 1], [0, 4, 0, 4, 2, 2])
+    assert_almost_equal(ari_1, 0.24, 2)
+    assert_almost_equal(ari_2, 0.24, 2)
+
+
+@ignore_warnings(category=FutureWarning)
+def uniform_labelings_scores(score_func, n_samples, k_range, n_runs=10,
+                             seed=42):
+    # Compute score for random uniform cluster labelings
+    random_labels = np.random.RandomState(seed).randint
+    scores = np.zeros((len(k_range), n_runs))
+    for i, k in enumerate(k_range):
+        for j in range(n_runs):
+            labels_a = random_labels(low=0, high=k, size=n_samples)
+            labels_b = random_labels(low=0, high=k, size=n_samples)
+            scores[i, j] = score_func(labels_a, labels_b)
+    return scores
+
+
+@ignore_warnings(category=FutureWarning)
+def test_adjustment_for_chance():
+    # Check that adjusted scores are almost zero on random labels
+    n_clusters_range = [2, 10, 50, 90]
+    n_samples = 100
+    n_runs = 10
+
+    scores = uniform_labelings_scores(
+        adjusted_rand_score, n_samples, n_clusters_range, n_runs)
+
+    max_abs_scores = np.abs(scores).max(axis=1)
+    assert_array_almost_equal(max_abs_scores, [0.02, 0.03, 0.03, 0.02], 2)
+
+
+def test_adjusted_mutual_info_score():
+    # Compute the Adjusted Mutual Information and test against known values
+    labels_a = np.array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3])
+    labels_b = np.array([1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 3, 1, 3, 3, 3, 2, 2])
+    # Mutual information
+    mi = mutual_info_score(labels_a, labels_b)
+    assert_almost_equal(mi, 0.41022, 5)
+    # with provided sparse contingency
+    C = contingency_matrix(labels_a, labels_b, sparse=True)
+    mi = mutual_info_score(labels_a, labels_b, contingency=C)
+    assert_almost_equal(mi, 0.41022, 5)
+    # with provided dense contingency
+    C = contingency_matrix(labels_a, labels_b)
+    mi = mutual_info_score(labels_a, labels_b, contingency=C)
+    assert_almost_equal(mi, 0.41022, 5)
+    # Expected mutual information
+    n_samples = C.sum()
+    emi = expected_mutual_information(C, n_samples)
+    assert_almost_equal(emi, 0.15042, 5)
+    # Adjusted mutual information
+    ami = adjusted_mutual_info_score(labels_a, labels_b)
+    assert_almost_equal(ami, 0.27821, 5)
+    ami = adjusted_mutual_info_score([1, 1, 2, 2], [2, 2, 3, 3])
+    assert ami == pytest.approx(1.0)
+    # Test with a very large array
+    a110 = np.array([list(labels_a) * 110]).flatten()
+    b110 = np.array([list(labels_b) * 110]).flatten()
+    ami = adjusted_mutual_info_score(a110, b110)
+    assert_almost_equal(ami, 0.38, 2)
+
+
+def test_expected_mutual_info_overflow():
+    # Test for regression where contingency cell exceeds 2**16
+    # leading to overflow in np.outer, resulting in EMI > 1
+    assert expected_mutual_information(np.array([[70000]]), 70000) <= 1
+
+
+def test_int_overflow_mutual_info_fowlkes_mallows_score():
+    # Test overflow in mutual_info_classif and fowlkes_mallows_score
+    x = np.array([1] * (52632 + 2529) + [2] * (14660 + 793) + [3] * (3271 +
+                 204) + [4] * (814 + 39) + [5] * (316 + 20))
+    y = np.array([0] * 52632 + [1] * 2529 + [0] * 14660 + [1] * 793 +
+                 [0] * 3271 + [1] * 204 + [0] * 814 + [1] * 39 + [0] * 316 +
+                 [1] * 20)
+
+    assert_all_finite(mutual_info_score(x, y))
+    assert_all_finite(fowlkes_mallows_score(x, y))
+
+
+def test_entropy():
+    ent = entropy([0, 0, 42.])
+    assert_almost_equal(ent, 0.6365141, 5)
+    assert_almost_equal(entropy([]), 1)
+
+
+def test_contingency_matrix():
+    labels_a = np.array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3])
+    labels_b = np.array([1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 3, 1, 3, 3, 3, 2, 2])
+    C = contingency_matrix(labels_a, labels_b)
+    C2 = np.histogram2d(labels_a, labels_b,
+                        bins=(np.arange(1, 5),
+                              np.arange(1, 5)))[0]
+    assert_array_almost_equal(C, C2)
+    C = contingency_matrix(labels_a, labels_b, eps=.1)
+    assert_array_almost_equal(C, C2 + .1)
+
+
+def test_contingency_matrix_sparse():
+    labels_a = np.array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3])
+    labels_b = np.array([1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 3, 1, 3, 3, 3, 2, 2])
+    C = contingency_matrix(labels_a, labels_b)
+    C_sparse = contingency_matrix(labels_a, labels_b, sparse=True).toarray()
+    assert_array_almost_equal(C, C_sparse)
+    with pytest.raises(ValueError, match="Cannot set 'eps' when sparse=True"):
+        contingency_matrix(labels_a, labels_b, eps=1e-10, sparse=True)
+
+
+@ignore_warnings(category=FutureWarning)
+def test_exactly_zero_info_score():
+    # Check numerical stability when information is exactly zero
+    for i in np.logspace(1, 4, 4).astype(np.int):
+        labels_a, labels_b = (np.ones(i, dtype=np.int),
+                              np.arange(i, dtype=np.int))
+        assert normalized_mutual_info_score(
+            labels_a, labels_b) == pytest.approx(0.0)
+        assert v_measure_score(
+            labels_a, labels_b) == pytest.approx(0.0)
+        assert adjusted_mutual_info_score(
+            labels_a, labels_b) == pytest.approx(0.0)
+        assert normalized_mutual_info_score(
+            labels_a, labels_b) == pytest.approx(0.0)
+        for method in ["min", "geometric", "arithmetic", "max"]:
+            assert adjusted_mutual_info_score(
+                labels_a, labels_b,  method) == pytest.approx(0.0)
+            assert normalized_mutual_info_score(
+                labels_a, labels_b, method) == pytest.approx(0.0)
+
+
+def test_v_measure_and_mutual_information(seed=36):
+    # Check relation between v_measure, entropy and mutual information
+    for i in np.logspace(1, 4, 4).astype(np.int):
+        random_state = np.random.RandomState(seed)
+        labels_a, labels_b = (random_state.randint(0, 10, i),
+                              random_state.randint(0, 10, i))
+        assert_almost_equal(v_measure_score(labels_a, labels_b),
+                            2.0 * mutual_info_score(labels_a, labels_b) /
+                            (entropy(labels_a) + entropy(labels_b)), 0)
+        avg = 'arithmetic'
+        assert_almost_equal(v_measure_score(labels_a, labels_b),
+                            normalized_mutual_info_score(labels_a, labels_b,
+                                                         average_method=avg)
+                            )
+
+
+def test_fowlkes_mallows_score():
+    # General case
+    score = fowlkes_mallows_score([0, 0, 0, 1, 1, 1],
+                                  [0, 0, 1, 1, 2, 2])
+    assert_almost_equal(score, 4. / np.sqrt(12. * 6.))
+
+    # Perfect match but where the label names changed
+    perfect_score = fowlkes_mallows_score([0, 0, 0, 1, 1, 1],
+                                          [1, 1, 1, 0, 0, 0])
+    assert_almost_equal(perfect_score, 1.)
+
+    # Worst case
+    worst_score = fowlkes_mallows_score([0, 0, 0, 0, 0, 0],
+                                        [0, 1, 2, 3, 4, 5])
+    assert_almost_equal(worst_score, 0.)
+
+
+def test_fowlkes_mallows_score_properties():
+    # handcrafted example
+    labels_a = np.array([0, 0, 0, 1, 1, 2])
+    labels_b = np.array([1, 1, 2, 2, 0, 0])
+    expected = 1. / np.sqrt((1. + 3.) * (1. + 2.))
+    # FMI = TP / sqrt((TP + FP) * (TP + FN))
+
+    score_original = fowlkes_mallows_score(labels_a, labels_b)
+    assert_almost_equal(score_original, expected)
+
+    # symmetric property
+    score_symmetric = fowlkes_mallows_score(labels_b, labels_a)
+    assert_almost_equal(score_symmetric, expected)
+
+    # permutation property
+    score_permuted = fowlkes_mallows_score((labels_a + 1) % 3, labels_b)
+    assert_almost_equal(score_permuted, expected)
+
+    # symmetric and permutation(both together)
+    score_both = fowlkes_mallows_score(labels_b, (labels_a + 2) % 3)
+    assert_almost_equal(score_both, expected)
+
+
+@pytest.mark.parametrize('labels_true, labels_pred', [
+    (['a'] * 6, [1, 1, 0, 0, 1, 1]),
+    ([1] * 6, [1, 1, 0, 0, 1, 1]),
+    ([1, 1, 0, 0, 1, 1], ['a'] * 6),
+    ([1, 1, 0, 0, 1, 1], [1] * 6),
+])
+def test_mutual_info_score_positive_constant_label(labels_true, labels_pred):
+    # non-regression test for #16355
+    assert mutual_info_score(labels_true, labels_pred) >= 0
--- a/venv/Lib/site-packages/sklearn/metrics/cluster/tests/test_unsupervised.py
+++ b/venv/Lib/site-packages/sklearn/metrics/cluster/tests/test_unsupervised.py
@ -0,0 +1,252 @@
+import numpy as np
+import scipy.sparse as sp
+import pytest
+from scipy.sparse import csr_matrix
+
+from sklearn import datasets
+from sklearn.utils._testing import assert_array_equal
+from sklearn.metrics.cluster import silhouette_score
+from sklearn.metrics.cluster import silhouette_samples
+from sklearn.metrics import pairwise_distances
+from sklearn.metrics.cluster import calinski_harabasz_score
+from sklearn.metrics.cluster import davies_bouldin_score
+
+
+def test_silhouette():
+    # Tests the Silhouette Coefficient.
+    dataset = datasets.load_iris()
+    X_dense = dataset.data
+    X_csr = csr_matrix(X_dense)
+    X_dok = sp.dok_matrix(X_dense)
+    X_lil = sp.lil_matrix(X_dense)
+    y = dataset.target
+
+    for X in [X_dense, X_csr, X_dok, X_lil]:
+        D = pairwise_distances(X, metric='euclidean')
+        # Given that the actual labels are used, we can assume that S would be
+        # positive.
+        score_precomputed = silhouette_score(D, y, metric='precomputed')
+        assert score_precomputed > 0
+        # Test without calculating D
+        score_euclidean = silhouette_score(X, y, metric='euclidean')
+        pytest.approx(score_precomputed, score_euclidean)
+
+        if X is X_dense:
+            score_dense_without_sampling = score_precomputed
+        else:
+            pytest.approx(score_euclidean,
+                          score_dense_without_sampling)
+
+        # Test with sampling
+        score_precomputed = silhouette_score(D, y, metric='precomputed',
+                                             sample_size=int(X.shape[0] / 2),
+                                             random_state=0)
+        score_euclidean = silhouette_score(X, y, metric='euclidean',
+                                           sample_size=int(X.shape[0] / 2),
+                                           random_state=0)
+        assert score_precomputed > 0
+        assert score_euclidean > 0
+        pytest.approx(score_euclidean, score_precomputed)
+
+        if X is X_dense:
+            score_dense_with_sampling = score_precomputed
+        else:
+            pytest.approx(score_euclidean, score_dense_with_sampling)
+
+
+def test_cluster_size_1():
+    # Assert Silhouette Coefficient == 0 when there is 1 sample in a cluster
+    # (cluster 0). We also test the case where there are identical samples
+    # as the only members of a cluster (cluster 2). To our knowledge, this case
+    # is not discussed in reference material, and we choose for it a sample
+    # score of 1.
+    X = [[0.], [1.], [1.], [2.], [3.], [3.]]
+    labels = np.array([0, 1, 1, 1, 2, 2])
+
+    # Cluster 0: 1 sample -> score of 0 by Rousseeuw's convention
+    # Cluster 1: intra-cluster = [.5, .5, 1]
+    #            inter-cluster = [1, 1, 1]
+    #            silhouette    = [.5, .5, 0]
+    # Cluster 2: intra-cluster = [0, 0]
+    #            inter-cluster = [arbitrary, arbitrary]
+    #            silhouette    = [1., 1.]
+
+    silhouette = silhouette_score(X, labels)
+    assert not np.isnan(silhouette)
+    ss = silhouette_samples(X, labels)
+    assert_array_equal(ss, [0, .5, .5, 0, 1, 1])
+
+
+def test_silhouette_paper_example():
+    # Explicitly check per-sample results against Rousseeuw (1987)
+    # Data from Table 1
+    lower = [5.58,
+             7.00, 6.50,
+             7.08, 7.00, 3.83,
+             4.83, 5.08, 8.17, 5.83,
+             2.17, 5.75, 6.67, 6.92, 4.92,
+             6.42, 5.00, 5.58, 6.00, 4.67, 6.42,
+             3.42, 5.50, 6.42, 6.42, 5.00, 3.92, 6.17,
+             2.50, 4.92, 6.25, 7.33, 4.50, 2.25, 6.33, 2.75,
+             6.08, 6.67, 4.25, 2.67, 6.00, 6.17, 6.17, 6.92, 6.17,
+             5.25, 6.83, 4.50, 3.75, 5.75, 5.42, 6.08, 5.83, 6.67, 3.67,
+             4.75, 3.00, 6.08, 6.67, 5.00, 5.58, 4.83, 6.17, 5.67, 6.50, 6.92]
+    D = np.zeros((12, 12))
+    D[np.tril_indices(12, -1)] = lower
+    D += D.T
+
+    names = ['BEL', 'BRA', 'CHI', 'CUB', 'EGY', 'FRA', 'IND', 'ISR', 'USA',
+             'USS', 'YUG', 'ZAI']
+
+    # Data from Figure 2
+    labels1 = [1, 1, 2, 2, 1, 1, 2, 1, 1, 2, 2, 1]
+    expected1 = {'USA': .43, 'BEL': .39, 'FRA': .35, 'ISR': .30, 'BRA': .22,
+                 'EGY': .20, 'ZAI': .19, 'CUB': .40, 'USS': .34, 'CHI': .33,
+                 'YUG': .26, 'IND': -.04}
+    score1 = .28
+
+    # Data from Figure 3
+    labels2 = [1, 2, 3, 3, 1, 1, 2, 1, 1, 3, 3, 2]
+    expected2 = {'USA': .47, 'FRA': .44, 'BEL': .42, 'ISR': .37, 'EGY': .02,
+                 'ZAI': .28, 'BRA': .25, 'IND': .17, 'CUB': .48, 'USS': .44,
+                 'YUG': .31, 'CHI': .31}
+    score2 = .33
+
+    for labels, expected, score in [(labels1, expected1, score1),
+                                    (labels2, expected2, score2)]:
+        expected = [expected[name] for name in names]
+        # we check to 2dp because that's what's in the paper
+        pytest.approx(expected,
+                      silhouette_samples(D, np.array(labels),
+                                         metric='precomputed'),
+                      abs=1e-2)
+        pytest.approx(score,
+                      silhouette_score(D, np.array(labels),
+                                       metric='precomputed'),
+                      abs=1e-2)
+
+
+def test_correct_labelsize():
+    # Assert 1 < n_labels < n_samples
+    dataset = datasets.load_iris()
+    X = dataset.data
+
+    # n_labels = n_samples
+    y = np.arange(X.shape[0])
+    err_msg = (r'Number of labels is %d\. Valid values are 2 '
+               r'to n_samples - 1 \(inclusive\)' % len(np.unique(y)))
+    with pytest.raises(ValueError, match=err_msg):
+        silhouette_score(X, y)
+
+    # n_labels = 1
+    y = np.zeros(X.shape[0])
+    err_msg = (r'Number of labels is %d\. Valid values are 2 '
+               r'to n_samples - 1 \(inclusive\)' % len(np.unique(y)))
+    with pytest.raises(ValueError, match=err_msg):
+        silhouette_score(X, y)
+
+
+def test_non_encoded_labels():
+    dataset = datasets.load_iris()
+    X = dataset.data
+    labels = dataset.target
+    assert (
+        silhouette_score(X, labels * 2 + 10) == silhouette_score(X, labels))
+    assert_array_equal(
+        silhouette_samples(X, labels * 2 + 10), silhouette_samples(X, labels))
+
+
+def test_non_numpy_labels():
+    dataset = datasets.load_iris()
+    X = dataset.data
+    y = dataset.target
+    assert (
+        silhouette_score(list(X), list(y)) == silhouette_score(X, y))
+
+
+@pytest.mark.parametrize('dtype', (np.float32, np.float64))
+def test_silhouette_nonzero_diag(dtype):
+    # Make sure silhouette_samples requires diagonal to be zero.
+    # Non-regression test for #12178
+
+    # Construct a zero-diagonal matrix
+    dists = pairwise_distances(
+        np.array([[0.2, 0.1, 0.12, 1.34, 1.11, 1.6]], dtype=dtype).T)
+    labels = [0, 0, 0, 1, 1, 1]
+
+    # small values on the diagonal are OK
+    dists[2][2] = np.finfo(dists.dtype).eps * 10
+    silhouette_samples(dists, labels, metric='precomputed')
+
+    # values bigger than eps * 100 are not
+    dists[2][2] = np.finfo(dists.dtype).eps * 1000
+    with pytest.raises(ValueError, match='contains non-zero'):
+        silhouette_samples(dists, labels, metric='precomputed')
+
+
+def assert_raises_on_only_one_label(func):
+    """Assert message when there is only one label"""
+    rng = np.random.RandomState(seed=0)
+    with pytest.raises(ValueError, match="Number of labels is"):
+        func(rng.rand(10, 2), np.zeros(10))
+
+
+def assert_raises_on_all_points_same_cluster(func):
+    """Assert message when all point are in different clusters"""
+    rng = np.random.RandomState(seed=0)
+    with pytest.raises(ValueError, match="Number of labels is"):
+        func(rng.rand(10, 2), np.arange(10))
+
+
+def test_calinski_harabasz_score():
+    assert_raises_on_only_one_label(calinski_harabasz_score)
+
+    assert_raises_on_all_points_same_cluster(calinski_harabasz_score)
+
+    # Assert the value is 1. when all samples are equals
+    assert 1. == calinski_harabasz_score(np.ones((10, 2)),
+                                         [0] * 5 + [1] * 5)
+
+    # Assert the value is 0. when all the mean cluster are equal
+    assert 0. == calinski_harabasz_score([[-1, -1], [1, 1]] * 10,
+                                         [0] * 10 + [1] * 10)
+
+    # General case (with non numpy arrays)
+    X = ([[0, 0], [1, 1]] * 5 + [[3, 3], [4, 4]] * 5 +
+         [[0, 4], [1, 3]] * 5 + [[3, 1], [4, 0]] * 5)
+    labels = [0] * 10 + [1] * 10 + [2] * 10 + [3] * 10
+    pytest.approx(calinski_harabasz_score(X, labels),
+                  45 * (40 - 4) / (5 * (4 - 1)))
+
+
+def test_davies_bouldin_score():
+    assert_raises_on_only_one_label(davies_bouldin_score)
+    assert_raises_on_all_points_same_cluster(davies_bouldin_score)
+
+    # Assert the value is 0. when all samples are equals
+    assert davies_bouldin_score(np.ones((10, 2)),
+                                [0] * 5 + [1] * 5) == pytest.approx(0.0)
+
+    # Assert the value is 0. when all the mean cluster are equal
+    assert davies_bouldin_score([[-1, -1], [1, 1]] * 10,
+                                [0] * 10 + [1] * 10) == pytest.approx(0.0)
+
+    # General case (with non numpy arrays)
+    X = ([[0, 0], [1, 1]] * 5 + [[3, 3], [4, 4]] * 5 +
+         [[0, 4], [1, 3]] * 5 + [[3, 1], [4, 0]] * 5)
+    labels = [0] * 10 + [1] * 10 + [2] * 10 + [3] * 10
+    pytest.approx(davies_bouldin_score(X, labels), 2 * np.sqrt(0.5) / 3)
+
+    # Ensure divide by zero warning is not raised in general case
+    with pytest.warns(None) as record:
+        davies_bouldin_score(X, labels)
+    div_zero_warnings = [
+        warning for warning in record
+        if "divide by zero encountered" in warning.message.args[0]
+    ]
+    assert len(div_zero_warnings) == 0
+
+    # General case - cluster have one sample
+    X = ([[0, 0], [2, 2], [3, 3], [5, 5]])
+    labels = [0, 0, 1, 2]
+    pytest.approx(davies_bouldin_score(X, labels), (5. / 4) / 3)
--- a/venv/Lib/site-packages/sklearn/metrics/cluster/unsupervised.py
+++ b/venv/Lib/site-packages/sklearn/metrics/cluster/unsupervised.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _unsupervised  # type: ignore
+from ...externals._pep562 import Pep562
+from ...utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.metrics.cluster.unsupervised'
+correct_import_path = 'sklearn.metrics.cluster'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_unsupervised, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)