Uploaded Test files

2020-11-12 11:05:57 -05:00 · 2020-11-12 11:05:57 -05:00 · 2e81cb7d99
commit 2e81cb7d99
parent f584ad9d97
16627 changed files with 2065359 additions and 102444 deletions
--- a/venv/Lib/site-packages/sklearn/metrics/cluster/tests/init.py
+++ b/venv/Lib/site-packages/sklearn/metrics/cluster/tests/init.py
--- a/venv/Lib/site-packages/sklearn/metrics/cluster/tests/pycache/init.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/metrics/cluster/tests/pycache/init.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/metrics/cluster/tests/pycache/test_bicluster.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/metrics/cluster/tests/pycache/test_bicluster.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/metrics/cluster/tests/pycache/test_common.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/metrics/cluster/tests/pycache/test_common.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/metrics/cluster/tests/pycache/test_supervised.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/metrics/cluster/tests/pycache/test_supervised.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/metrics/cluster/tests/pycache/test_unsupervised.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/metrics/cluster/tests/pycache/test_unsupervised.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/metrics/cluster/tests/test_bicluster.py
+++ b/venv/Lib/site-packages/sklearn/metrics/cluster/tests/test_bicluster.py
@ -0,0 +1,50 @@
+"""Testing for bicluster metrics module"""
+
+import numpy as np
+
+from sklearn.utils._testing import assert_almost_equal
+
+from sklearn.metrics.cluster._bicluster import _jaccard
+from sklearn.metrics import consensus_score
+
+
+def test_jaccard():
+    a1 = np.array([True, True, False, False])
+    a2 = np.array([True, True, True, True])
+    a3 = np.array([False, True, True, False])
+    a4 = np.array([False, False, True, True])
+
+    assert _jaccard(a1, a1, a1, a1) == 1
+    assert _jaccard(a1, a1, a2, a2) == 0.25
+    assert _jaccard(a1, a1, a3, a3) == 1.0 / 7
+    assert _jaccard(a1, a1, a4, a4) == 0
+
+
+def test_consensus_score():
+    a = [[True, True, False, False],
+         [False, False, True, True]]
+    b = a[::-1]
+
+    assert consensus_score((a, a), (a, a)) == 1
+    assert consensus_score((a, a), (b, b)) == 1
+    assert consensus_score((a, b), (a, b)) == 1
+    assert consensus_score((a, b), (b, a)) == 1
+
+    assert consensus_score((a, a), (b, a)) == 0
+    assert consensus_score((a, a), (a, b)) == 0
+    assert consensus_score((b, b), (a, b)) == 0
+    assert consensus_score((b, b), (b, a)) == 0
+
+
+def test_consensus_score_issue2445():
+    ''' Different number of biclusters in A and B'''
+    a_rows = np.array([[True, True, False, False],
+                       [False, False, True, True],
+                       [False, False, False, True]])
+    a_cols = np.array([[True, True, False, False],
+                       [False, False, True, True],
+                       [False, False, False, True]])
+    idx = [0, 2]
+    s = consensus_score((a_rows, a_cols), (a_rows[idx], a_cols[idx]))
+    # B contains 2 of the 3 biclusters in A, so score should be 2/3
+    assert_almost_equal(s, 2.0/3.0)
--- a/venv/Lib/site-packages/sklearn/metrics/cluster/tests/test_common.py
+++ b/venv/Lib/site-packages/sklearn/metrics/cluster/tests/test_common.py
@ -0,0 +1,211 @@
+from functools import partial
+
+import pytest
+import numpy as np
+
+from sklearn.metrics.cluster import adjusted_mutual_info_score
+from sklearn.metrics.cluster import adjusted_rand_score
+from sklearn.metrics.cluster import completeness_score
+from sklearn.metrics.cluster import fowlkes_mallows_score
+from sklearn.metrics.cluster import homogeneity_score
+from sklearn.metrics.cluster import mutual_info_score
+from sklearn.metrics.cluster import normalized_mutual_info_score
+from sklearn.metrics.cluster import v_measure_score
+from sklearn.metrics.cluster import silhouette_score
+from sklearn.metrics.cluster import calinski_harabasz_score
+from sklearn.metrics.cluster import davies_bouldin_score
+
+from sklearn.utils._testing import assert_allclose
+
+
+# Dictionaries of metrics
+# ------------------------
+# The goal of having those dictionaries is to have an easy way to call a
+# particular metric and associate a name to each function:
+#   - SUPERVISED_METRICS: all supervised cluster metrics - (when given a
+# ground truth value)
+#   - UNSUPERVISED_METRICS: all unsupervised cluster metrics
+#
+# Those dictionaries will be used to test systematically some invariance
+# properties, e.g. invariance toward several input layout.
+#
+
+SUPERVISED_METRICS = {
+    "adjusted_mutual_info_score": adjusted_mutual_info_score,
+    "adjusted_rand_score": adjusted_rand_score,
+    "completeness_score": completeness_score,
+    "homogeneity_score": homogeneity_score,
+    "mutual_info_score": mutual_info_score,
+    "normalized_mutual_info_score": normalized_mutual_info_score,
+    "v_measure_score": v_measure_score,
+    "fowlkes_mallows_score": fowlkes_mallows_score
+}
+
+UNSUPERVISED_METRICS = {
+    "silhouette_score": silhouette_score,
+    "silhouette_manhattan": partial(silhouette_score, metric='manhattan'),
+    "calinski_harabasz_score": calinski_harabasz_score,
+    "davies_bouldin_score": davies_bouldin_score
+}
+
+# Lists of metrics with common properties
+# ---------------------------------------
+# Lists of metrics with common properties are used to test systematically some
+# functionalities and invariance, e.g. SYMMETRIC_METRICS lists all metrics
+# that are symmetric with respect to their input argument y_true and y_pred.
+#
+# --------------------------------------------------------------------
+# Symmetric with respect to their input arguments y_true and y_pred.
+# Symmetric metrics only apply to supervised clusters.
+SYMMETRIC_METRICS = [
+    "adjusted_rand_score", "v_measure_score",
+    "mutual_info_score", "adjusted_mutual_info_score",
+    "normalized_mutual_info_score", "fowlkes_mallows_score"
+]
+
+NON_SYMMETRIC_METRICS = ["homogeneity_score", "completeness_score"]
+
+# Metrics whose upper bound is 1
+NORMALIZED_METRICS = [
+    "adjusted_rand_score", "homogeneity_score", "completeness_score",
+    "v_measure_score", "adjusted_mutual_info_score", "fowlkes_mallows_score",
+    "normalized_mutual_info_score"
+]
+
+
+rng = np.random.RandomState(0)
+y1 = rng.randint(3, size=30)
+y2 = rng.randint(3, size=30)
+
+
+def test_symmetric_non_symmetric_union():
+    assert (sorted(SYMMETRIC_METRICS + NON_SYMMETRIC_METRICS) ==
+            sorted(SUPERVISED_METRICS))
+
+
+# 0.22 AMI and NMI changes
+@pytest.mark.filterwarnings('ignore::FutureWarning')
+@pytest.mark.parametrize(
+    'metric_name, y1, y2',
+    [(name, y1, y2) for name in SYMMETRIC_METRICS]
+)
+def test_symmetry(metric_name, y1, y2):
+    metric = SUPERVISED_METRICS[metric_name]
+    assert metric(y1, y2) == pytest.approx(metric(y2, y1))
+
+
+@pytest.mark.parametrize(
+    'metric_name, y1, y2',
+    [(name, y1, y2) for name in NON_SYMMETRIC_METRICS]
+)
+def test_non_symmetry(metric_name, y1, y2):
+    metric = SUPERVISED_METRICS[metric_name]
+    assert metric(y1, y2) != pytest.approx(metric(y2, y1))
+
+
+# 0.22 AMI and NMI changes
+@pytest.mark.filterwarnings('ignore::FutureWarning')
+@pytest.mark.parametrize("metric_name", NORMALIZED_METRICS)
+def test_normalized_output(metric_name):
+    upper_bound_1 = [0, 0, 0, 1, 1, 1]
+    upper_bound_2 = [0, 0, 0, 1, 1, 1]
+    metric = SUPERVISED_METRICS[metric_name]
+    assert metric([0, 0, 0, 1, 1], [0, 0, 0, 1, 2]) > 0.0
+    assert metric([0, 0, 1, 1, 2], [0, 0, 1, 1, 1]) > 0.0
+    assert metric([0, 0, 0, 1, 2], [0, 1, 1, 1, 1]) < 1.0
+    assert metric([0, 0, 0, 1, 2], [0, 1, 1, 1, 1]) < 1.0
+    assert metric(upper_bound_1, upper_bound_2) == pytest.approx(1.0)
+
+    lower_bound_1 = [0, 0, 0, 0, 0, 0]
+    lower_bound_2 = [0, 1, 2, 3, 4, 5]
+    score = np.array([metric(lower_bound_1, lower_bound_2),
+                      metric(lower_bound_2, lower_bound_1)])
+    assert not (score < 0).any()
+
+
+# 0.22 AMI and NMI changes
+@pytest.mark.filterwarnings('ignore::FutureWarning')
+@pytest.mark.parametrize(
+    "metric_name", dict(SUPERVISED_METRICS, **UNSUPERVISED_METRICS)
+)
+def test_permute_labels(metric_name):
+    # All clustering metrics do not change score due to permutations of labels
+    # that is when 0 and 1 exchanged.
+    y_label = np.array([0, 0, 0, 1, 1, 0, 1])
+    y_pred = np.array([1, 0, 1, 0, 1, 1, 0])
+    if metric_name in SUPERVISED_METRICS:
+        metric = SUPERVISED_METRICS[metric_name]
+        score_1 = metric(y_pred, y_label)
+        assert_allclose(score_1, metric(1 - y_pred, y_label))
+        assert_allclose(score_1, metric(1 - y_pred, 1 - y_label))
+        assert_allclose(score_1, metric(y_pred, 1 - y_label))
+    else:
+        metric = UNSUPERVISED_METRICS[metric_name]
+        X = np.random.randint(10, size=(7, 10))
+        score_1 = metric(X, y_pred)
+        assert_allclose(score_1, metric(X, 1 - y_pred))
+
+
+# 0.22 AMI and NMI changes
+@pytest.mark.filterwarnings('ignore::FutureWarning')
+@pytest.mark.parametrize(
+    "metric_name", dict(SUPERVISED_METRICS, **UNSUPERVISED_METRICS)
+)
+# For all clustering metrics Input parameters can be both
+# in the form of arrays lists, positive, negative or string
+def test_format_invariance(metric_name):
+    y_true = [0, 0, 0, 0, 1, 1, 1, 1]
+    y_pred = [0, 1, 2, 3, 4, 5, 6, 7]
+
+    def generate_formats(y):
+        y = np.array(y)
+        yield y, 'array of ints'
+        yield y.tolist(), 'list of ints'
+        yield [str(x) + "-a" for x in y.tolist()], 'list of strs'
+        yield (np.array([str(x) + "-a" for x in y.tolist()], dtype=object),
+               'array of strs')
+        yield y - 1, 'including negative ints'
+        yield y + 1, 'strictly positive ints'
+
+    if metric_name in SUPERVISED_METRICS:
+        metric = SUPERVISED_METRICS[metric_name]
+        score_1 = metric(y_true, y_pred)
+        y_true_gen = generate_formats(y_true)
+        y_pred_gen = generate_formats(y_pred)
+        for (y_true_fmt, fmt_name), (y_pred_fmt, _) in zip(y_true_gen,
+                                                           y_pred_gen):
+            assert score_1 == metric(y_true_fmt, y_pred_fmt)
+    else:
+        metric = UNSUPERVISED_METRICS[metric_name]
+        X = np.random.randint(10, size=(8, 10))
+        score_1 = metric(X, y_true)
+        assert score_1 == metric(X.astype(float), y_true)
+        y_true_gen = generate_formats(y_true)
+        for (y_true_fmt, fmt_name) in y_true_gen:
+            assert score_1 == metric(X, y_true_fmt)
+
+
+@pytest.mark.parametrize("metric", SUPERVISED_METRICS.values())
+def test_single_sample(metric):
+    # only the supervised metrics support single sample
+    for i, j in [(0, 0), (0, 1), (1, 0), (1, 1)]:
+        metric([i], [j])
+
+
+@pytest.mark.parametrize(
+    "metric_name, metric_func",
+    dict(SUPERVISED_METRICS, **UNSUPERVISED_METRICS).items()
+)
+def test_inf_nan_input(metric_name, metric_func):
+    if metric_name in SUPERVISED_METRICS:
+        invalids = [([0, 1], [np.inf, np.inf]),
+                    ([0, 1], [np.nan, np.nan]),
+                    ([0, 1], [np.nan, np.inf])]
+    else:
+        X = np.random.randint(10, size=(2, 10))
+        invalids = [(X, [np.inf, np.inf]),
+                    (X, [np.nan, np.nan]),
+                    (X, [np.nan, np.inf])]
+    with pytest.raises(ValueError, match='contains NaN, infinity'):
+        for args in invalids:
+            metric_func(*args)
--- a/venv/Lib/site-packages/sklearn/metrics/cluster/tests/test_supervised.py
+++ b/venv/Lib/site-packages/sklearn/metrics/cluster/tests/test_supervised.py
@ -0,0 +1,358 @@
+import numpy as np
+import pytest
+
+from sklearn.metrics.cluster import adjusted_mutual_info_score
+from sklearn.metrics.cluster import adjusted_rand_score
+from sklearn.metrics.cluster import completeness_score
+from sklearn.metrics.cluster import contingency_matrix
+from sklearn.metrics.cluster import entropy
+from sklearn.metrics.cluster import expected_mutual_information
+from sklearn.metrics.cluster import fowlkes_mallows_score
+from sklearn.metrics.cluster import homogeneity_completeness_v_measure
+from sklearn.metrics.cluster import homogeneity_score
+from sklearn.metrics.cluster import mutual_info_score
+from sklearn.metrics.cluster import normalized_mutual_info_score
+from sklearn.metrics.cluster import v_measure_score
+from sklearn.metrics.cluster._supervised import _generalized_average
+
+from sklearn.utils import assert_all_finite
+from sklearn.utils._testing import (
+        assert_almost_equal, ignore_warnings)
+from numpy.testing import assert_array_almost_equal
+
+
+score_funcs = [
+    adjusted_rand_score,
+    homogeneity_score,
+    completeness_score,
+    v_measure_score,
+    adjusted_mutual_info_score,
+    normalized_mutual_info_score,
+]
+
+
+@ignore_warnings(category=FutureWarning)
+def test_error_messages_on_wrong_input():
+    for score_func in score_funcs:
+        expected = (r'Found input variables with inconsistent numbers '
+                    r'of samples: \[2, 3\]')
+        with pytest.raises(ValueError, match=expected):
+            score_func([0, 1], [1, 1, 1])
+
+        expected = r"labels_true must be 1D: shape is \(2"
+        with pytest.raises(ValueError, match=expected):
+            score_func([[0, 1], [1, 0]], [1, 1, 1])
+
+        expected = r"labels_pred must be 1D: shape is \(2"
+        with pytest.raises(ValueError, match=expected):
+            score_func([0, 1, 0], [[1, 1], [0, 0]])
+
+
+def test_generalized_average():
+    a, b = 1, 2
+    methods = ["min", "geometric", "arithmetic", "max"]
+    means = [_generalized_average(a, b, method) for method in methods]
+    assert means[0] <= means[1] <= means[2] <= means[3]
+    c, d = 12, 12
+    means = [_generalized_average(c, d, method) for method in methods]
+    assert means[0] == means[1] == means[2] == means[3]
+
+
+@ignore_warnings(category=FutureWarning)
+def test_perfect_matches():
+    for score_func in score_funcs:
+        assert score_func([], []) == pytest.approx(1.0)
+        assert score_func([0], [1]) == pytest.approx(1.0)
+        assert score_func([0, 0, 0], [0, 0, 0]) == pytest.approx(1.0)
+        assert score_func([0, 1, 0], [42, 7, 42]) == pytest.approx(1.0)
+        assert score_func([0., 1., 0.], [42., 7., 42.]) == pytest.approx(1.0)
+        assert score_func([0., 1., 2.], [42., 7., 2.]) == pytest.approx(1.0)
+        assert score_func([0, 1, 2], [42, 7, 2]) == pytest.approx(1.0)
+    score_funcs_with_changing_means = [
+        normalized_mutual_info_score,
+        adjusted_mutual_info_score,
+    ]
+    means = {"min", "geometric", "arithmetic", "max"}
+    for score_func in score_funcs_with_changing_means:
+        for mean in means:
+            assert score_func([], [], mean) == pytest.approx(1.0)
+            assert score_func([0], [1], mean) == pytest.approx(1.0)
+            assert score_func([0, 0, 0], [0, 0, 0], mean) == pytest.approx(1.0)
+            assert score_func(
+                [0, 1, 0], [42, 7, 42], mean) == pytest.approx(1.0)
+            assert score_func(
+                [0., 1., 0.], [42., 7., 42.], mean) == pytest.approx(1.0)
+            assert score_func(
+                [0., 1., 2.], [42., 7., 2.], mean) == pytest.approx(1.0)
+            assert score_func(
+                [0, 1, 2], [42, 7, 2], mean) == pytest.approx(1.0)
+
+
+def test_homogeneous_but_not_complete_labeling():
+    # homogeneous but not complete clustering
+    h, c, v = homogeneity_completeness_v_measure(
+        [0, 0, 0, 1, 1, 1],
+        [0, 0, 0, 1, 2, 2])
+    assert_almost_equal(h, 1.00, 2)
+    assert_almost_equal(c, 0.69, 2)
+    assert_almost_equal(v, 0.81, 2)
+
+
+def test_complete_but_not_homogeneous_labeling():
+    # complete but not homogeneous clustering
+    h, c, v = homogeneity_completeness_v_measure(
+        [0, 0, 1, 1, 2, 2],
+        [0, 0, 1, 1, 1, 1])
+    assert_almost_equal(h, 0.58, 2)
+    assert_almost_equal(c, 1.00, 2)
+    assert_almost_equal(v, 0.73, 2)
+
+
+def test_not_complete_and_not_homogeneous_labeling():
+    # neither complete nor homogeneous but not so bad either
+    h, c, v = homogeneity_completeness_v_measure(
+        [0, 0, 0, 1, 1, 1],
+        [0, 1, 0, 1, 2, 2])
+    assert_almost_equal(h, 0.67, 2)
+    assert_almost_equal(c, 0.42, 2)
+    assert_almost_equal(v, 0.52, 2)
+
+
+def test_beta_parameter():
+    # test for when beta passed to
+    # homogeneity_completeness_v_measure
+    # and v_measure_score
+    beta_test = 0.2
+    h_test = 0.67
+    c_test = 0.42
+    v_test = ((1 + beta_test) * h_test * c_test
+              / (beta_test * h_test + c_test))
+
+    h, c, v = homogeneity_completeness_v_measure(
+        [0, 0, 0, 1, 1, 1],
+        [0, 1, 0, 1, 2, 2],
+        beta=beta_test)
+    assert_almost_equal(h, h_test, 2)
+    assert_almost_equal(c, c_test, 2)
+    assert_almost_equal(v, v_test, 2)
+
+    v = v_measure_score(
+        [0, 0, 0, 1, 1, 1],
+        [0, 1, 0, 1, 2, 2],
+        beta=beta_test)
+    assert_almost_equal(v, v_test, 2)
+
+
+def test_non_consecutive_labels():
+    # regression tests for labels with gaps
+    h, c, v = homogeneity_completeness_v_measure(
+        [0, 0, 0, 2, 2, 2],
+        [0, 1, 0, 1, 2, 2])
+    assert_almost_equal(h, 0.67, 2)
+    assert_almost_equal(c, 0.42, 2)
+    assert_almost_equal(v, 0.52, 2)
+
+    h, c, v = homogeneity_completeness_v_measure(
+        [0, 0, 0, 1, 1, 1],
+        [0, 4, 0, 4, 2, 2])
+    assert_almost_equal(h, 0.67, 2)
+    assert_almost_equal(c, 0.42, 2)
+    assert_almost_equal(v, 0.52, 2)
+
+    ari_1 = adjusted_rand_score([0, 0, 0, 1, 1, 1], [0, 1, 0, 1, 2, 2])
+    ari_2 = adjusted_rand_score([0, 0, 0, 1, 1, 1], [0, 4, 0, 4, 2, 2])
+    assert_almost_equal(ari_1, 0.24, 2)
+    assert_almost_equal(ari_2, 0.24, 2)
+
+
+@ignore_warnings(category=FutureWarning)
+def uniform_labelings_scores(score_func, n_samples, k_range, n_runs=10,
+                             seed=42):
+    # Compute score for random uniform cluster labelings
+    random_labels = np.random.RandomState(seed).randint
+    scores = np.zeros((len(k_range), n_runs))
+    for i, k in enumerate(k_range):
+        for j in range(n_runs):
+            labels_a = random_labels(low=0, high=k, size=n_samples)
+            labels_b = random_labels(low=0, high=k, size=n_samples)
+            scores[i, j] = score_func(labels_a, labels_b)
+    return scores
+
+
+@ignore_warnings(category=FutureWarning)
+def test_adjustment_for_chance():
+    # Check that adjusted scores are almost zero on random labels
+    n_clusters_range = [2, 10, 50, 90]
+    n_samples = 100
+    n_runs = 10
+
+    scores = uniform_labelings_scores(
+        adjusted_rand_score, n_samples, n_clusters_range, n_runs)
+
+    max_abs_scores = np.abs(scores).max(axis=1)
+    assert_array_almost_equal(max_abs_scores, [0.02, 0.03, 0.03, 0.02], 2)
+
+
+def test_adjusted_mutual_info_score():
+    # Compute the Adjusted Mutual Information and test against known values
+    labels_a = np.array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3])
+    labels_b = np.array([1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 3, 1, 3, 3, 3, 2, 2])
+    # Mutual information
+    mi = mutual_info_score(labels_a, labels_b)
+    assert_almost_equal(mi, 0.41022, 5)
+    # with provided sparse contingency
+    C = contingency_matrix(labels_a, labels_b, sparse=True)
+    mi = mutual_info_score(labels_a, labels_b, contingency=C)
+    assert_almost_equal(mi, 0.41022, 5)
+    # with provided dense contingency
+    C = contingency_matrix(labels_a, labels_b)
+    mi = mutual_info_score(labels_a, labels_b, contingency=C)
+    assert_almost_equal(mi, 0.41022, 5)
+    # Expected mutual information
+    n_samples = C.sum()
+    emi = expected_mutual_information(C, n_samples)
+    assert_almost_equal(emi, 0.15042, 5)
+    # Adjusted mutual information
+    ami = adjusted_mutual_info_score(labels_a, labels_b)
+    assert_almost_equal(ami, 0.27821, 5)
+    ami = adjusted_mutual_info_score([1, 1, 2, 2], [2, 2, 3, 3])
+    assert ami == pytest.approx(1.0)
+    # Test with a very large array
+    a110 = np.array([list(labels_a) * 110]).flatten()
+    b110 = np.array([list(labels_b) * 110]).flatten()
+    ami = adjusted_mutual_info_score(a110, b110)
+    assert_almost_equal(ami, 0.38, 2)
+
+
+def test_expected_mutual_info_overflow():
+    # Test for regression where contingency cell exceeds 2**16
+    # leading to overflow in np.outer, resulting in EMI > 1
+    assert expected_mutual_information(np.array([[70000]]), 70000) <= 1
+
+
+def test_int_overflow_mutual_info_fowlkes_mallows_score():
+    # Test overflow in mutual_info_classif and fowlkes_mallows_score
+    x = np.array([1] * (52632 + 2529) + [2] * (14660 + 793) + [3] * (3271 +
+                 204) + [4] * (814 + 39) + [5] * (316 + 20))
+    y = np.array([0] * 52632 + [1] * 2529 + [0] * 14660 + [1] * 793 +
+                 [0] * 3271 + [1] * 204 + [0] * 814 + [1] * 39 + [0] * 316 +
+                 [1] * 20)
+
+    assert_all_finite(mutual_info_score(x, y))
+    assert_all_finite(fowlkes_mallows_score(x, y))
+
+
+def test_entropy():
+    ent = entropy([0, 0, 42.])
+    assert_almost_equal(ent, 0.6365141, 5)
+    assert_almost_equal(entropy([]), 1)
+
+
+def test_contingency_matrix():
+    labels_a = np.array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3])
+    labels_b = np.array([1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 3, 1, 3, 3, 3, 2, 2])
+    C = contingency_matrix(labels_a, labels_b)
+    C2 = np.histogram2d(labels_a, labels_b,
+                        bins=(np.arange(1, 5),
+                              np.arange(1, 5)))[0]
+    assert_array_almost_equal(C, C2)
+    C = contingency_matrix(labels_a, labels_b, eps=.1)
+    assert_array_almost_equal(C, C2 + .1)
+
+
+def test_contingency_matrix_sparse():
+    labels_a = np.array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3])
+    labels_b = np.array([1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 3, 1, 3, 3, 3, 2, 2])
+    C = contingency_matrix(labels_a, labels_b)
+    C_sparse = contingency_matrix(labels_a, labels_b, sparse=True).toarray()
+    assert_array_almost_equal(C, C_sparse)
+    with pytest.raises(ValueError, match="Cannot set 'eps' when sparse=True"):
+        contingency_matrix(labels_a, labels_b, eps=1e-10, sparse=True)
+
+
+@ignore_warnings(category=FutureWarning)
+def test_exactly_zero_info_score():
+    # Check numerical stability when information is exactly zero
+    for i in np.logspace(1, 4, 4).astype(np.int):
+        labels_a, labels_b = (np.ones(i, dtype=np.int),
+                              np.arange(i, dtype=np.int))
+        assert normalized_mutual_info_score(
+            labels_a, labels_b) == pytest.approx(0.0)
+        assert v_measure_score(
+            labels_a, labels_b) == pytest.approx(0.0)
+        assert adjusted_mutual_info_score(
+            labels_a, labels_b) == pytest.approx(0.0)
+        assert normalized_mutual_info_score(
+            labels_a, labels_b) == pytest.approx(0.0)
+        for method in ["min", "geometric", "arithmetic", "max"]:
+            assert adjusted_mutual_info_score(
+                labels_a, labels_b,  method) == pytest.approx(0.0)
+            assert normalized_mutual_info_score(
+                labels_a, labels_b, method) == pytest.approx(0.0)
+
+
+def test_v_measure_and_mutual_information(seed=36):
+    # Check relation between v_measure, entropy and mutual information
+    for i in np.logspace(1, 4, 4).astype(np.int):
+        random_state = np.random.RandomState(seed)
+        labels_a, labels_b = (random_state.randint(0, 10, i),
+                              random_state.randint(0, 10, i))
+        assert_almost_equal(v_measure_score(labels_a, labels_b),
+                            2.0 * mutual_info_score(labels_a, labels_b) /
+                            (entropy(labels_a) + entropy(labels_b)), 0)
+        avg = 'arithmetic'
+        assert_almost_equal(v_measure_score(labels_a, labels_b),
+                            normalized_mutual_info_score(labels_a, labels_b,
+                                                         average_method=avg)
+                            )
+
+
+def test_fowlkes_mallows_score():
+    # General case
+    score = fowlkes_mallows_score([0, 0, 0, 1, 1, 1],
+                                  [0, 0, 1, 1, 2, 2])
+    assert_almost_equal(score, 4. / np.sqrt(12. * 6.))
+
+    # Perfect match but where the label names changed
+    perfect_score = fowlkes_mallows_score([0, 0, 0, 1, 1, 1],
+                                          [1, 1, 1, 0, 0, 0])
+    assert_almost_equal(perfect_score, 1.)
+
+    # Worst case
+    worst_score = fowlkes_mallows_score([0, 0, 0, 0, 0, 0],
+                                        [0, 1, 2, 3, 4, 5])
+    assert_almost_equal(worst_score, 0.)
+
+
+def test_fowlkes_mallows_score_properties():
+    # handcrafted example
+    labels_a = np.array([0, 0, 0, 1, 1, 2])
+    labels_b = np.array([1, 1, 2, 2, 0, 0])
+    expected = 1. / np.sqrt((1. + 3.) * (1. + 2.))
+    # FMI = TP / sqrt((TP + FP) * (TP + FN))
+
+    score_original = fowlkes_mallows_score(labels_a, labels_b)
+    assert_almost_equal(score_original, expected)
+
+    # symmetric property
+    score_symmetric = fowlkes_mallows_score(labels_b, labels_a)
+    assert_almost_equal(score_symmetric, expected)
+
+    # permutation property
+    score_permuted = fowlkes_mallows_score((labels_a + 1) % 3, labels_b)
+    assert_almost_equal(score_permuted, expected)
+
+    # symmetric and permutation(both together)
+    score_both = fowlkes_mallows_score(labels_b, (labels_a + 2) % 3)
+    assert_almost_equal(score_both, expected)
+
+
+@pytest.mark.parametrize('labels_true, labels_pred', [
+    (['a'] * 6, [1, 1, 0, 0, 1, 1]),
+    ([1] * 6, [1, 1, 0, 0, 1, 1]),
+    ([1, 1, 0, 0, 1, 1], ['a'] * 6),
+    ([1, 1, 0, 0, 1, 1], [1] * 6),
+])
+def test_mutual_info_score_positive_constant_label(labels_true, labels_pred):
+    # non-regression test for #16355
+    assert mutual_info_score(labels_true, labels_pred) >= 0
--- a/venv/Lib/site-packages/sklearn/metrics/cluster/tests/test_unsupervised.py
+++ b/venv/Lib/site-packages/sklearn/metrics/cluster/tests/test_unsupervised.py
@ -0,0 +1,252 @@
+import numpy as np
+import scipy.sparse as sp
+import pytest
+from scipy.sparse import csr_matrix
+
+from sklearn import datasets
+from sklearn.utils._testing import assert_array_equal
+from sklearn.metrics.cluster import silhouette_score
+from sklearn.metrics.cluster import silhouette_samples
+from sklearn.metrics import pairwise_distances
+from sklearn.metrics.cluster import calinski_harabasz_score
+from sklearn.metrics.cluster import davies_bouldin_score
+
+
+def test_silhouette():
+    # Tests the Silhouette Coefficient.
+    dataset = datasets.load_iris()
+    X_dense = dataset.data
+    X_csr = csr_matrix(X_dense)
+    X_dok = sp.dok_matrix(X_dense)
+    X_lil = sp.lil_matrix(X_dense)
+    y = dataset.target
+
+    for X in [X_dense, X_csr, X_dok, X_lil]:
+        D = pairwise_distances(X, metric='euclidean')
+        # Given that the actual labels are used, we can assume that S would be
+        # positive.
+        score_precomputed = silhouette_score(D, y, metric='precomputed')
+        assert score_precomputed > 0
+        # Test without calculating D
+        score_euclidean = silhouette_score(X, y, metric='euclidean')
+        pytest.approx(score_precomputed, score_euclidean)
+
+        if X is X_dense:
+            score_dense_without_sampling = score_precomputed
+        else:
+            pytest.approx(score_euclidean,
+                          score_dense_without_sampling)
+
+        # Test with sampling
+        score_precomputed = silhouette_score(D, y, metric='precomputed',
+                                             sample_size=int(X.shape[0] / 2),
+                                             random_state=0)
+        score_euclidean = silhouette_score(X, y, metric='euclidean',
+                                           sample_size=int(X.shape[0] / 2),
+                                           random_state=0)
+        assert score_precomputed > 0
+        assert score_euclidean > 0
+        pytest.approx(score_euclidean, score_precomputed)
+
+        if X is X_dense:
+            score_dense_with_sampling = score_precomputed
+        else:
+            pytest.approx(score_euclidean, score_dense_with_sampling)
+
+
+def test_cluster_size_1():
+    # Assert Silhouette Coefficient == 0 when there is 1 sample in a cluster
+    # (cluster 0). We also test the case where there are identical samples
+    # as the only members of a cluster (cluster 2). To our knowledge, this case
+    # is not discussed in reference material, and we choose for it a sample
+    # score of 1.
+    X = [[0.], [1.], [1.], [2.], [3.], [3.]]
+    labels = np.array([0, 1, 1, 1, 2, 2])
+
+    # Cluster 0: 1 sample -> score of 0 by Rousseeuw's convention
+    # Cluster 1: intra-cluster = [.5, .5, 1]
+    #            inter-cluster = [1, 1, 1]
+    #            silhouette    = [.5, .5, 0]
+    # Cluster 2: intra-cluster = [0, 0]
+    #            inter-cluster = [arbitrary, arbitrary]
+    #            silhouette    = [1., 1.]
+
+    silhouette = silhouette_score(X, labels)
+    assert not np.isnan(silhouette)
+    ss = silhouette_samples(X, labels)
+    assert_array_equal(ss, [0, .5, .5, 0, 1, 1])
+
+
+def test_silhouette_paper_example():
+    # Explicitly check per-sample results against Rousseeuw (1987)
+    # Data from Table 1
+    lower = [5.58,
+             7.00, 6.50,
+             7.08, 7.00, 3.83,
+             4.83, 5.08, 8.17, 5.83,
+             2.17, 5.75, 6.67, 6.92, 4.92,
+             6.42, 5.00, 5.58, 6.00, 4.67, 6.42,
+             3.42, 5.50, 6.42, 6.42, 5.00, 3.92, 6.17,
+             2.50, 4.92, 6.25, 7.33, 4.50, 2.25, 6.33, 2.75,
+             6.08, 6.67, 4.25, 2.67, 6.00, 6.17, 6.17, 6.92, 6.17,
+             5.25, 6.83, 4.50, 3.75, 5.75, 5.42, 6.08, 5.83, 6.67, 3.67,
+             4.75, 3.00, 6.08, 6.67, 5.00, 5.58, 4.83, 6.17, 5.67, 6.50, 6.92]
+    D = np.zeros((12, 12))
+    D[np.tril_indices(12, -1)] = lower
+    D += D.T
+
+    names = ['BEL', 'BRA', 'CHI', 'CUB', 'EGY', 'FRA', 'IND', 'ISR', 'USA',
+             'USS', 'YUG', 'ZAI']
+
+    # Data from Figure 2
+    labels1 = [1, 1, 2, 2, 1, 1, 2, 1, 1, 2, 2, 1]
+    expected1 = {'USA': .43, 'BEL': .39, 'FRA': .35, 'ISR': .30, 'BRA': .22,
+                 'EGY': .20, 'ZAI': .19, 'CUB': .40, 'USS': .34, 'CHI': .33,
+                 'YUG': .26, 'IND': -.04}
+    score1 = .28
+
+    # Data from Figure 3
+    labels2 = [1, 2, 3, 3, 1, 1, 2, 1, 1, 3, 3, 2]
+    expected2 = {'USA': .47, 'FRA': .44, 'BEL': .42, 'ISR': .37, 'EGY': .02,
+                 'ZAI': .28, 'BRA': .25, 'IND': .17, 'CUB': .48, 'USS': .44,
+                 'YUG': .31, 'CHI': .31}
+    score2 = .33
+
+    for labels, expected, score in [(labels1, expected1, score1),
+                                    (labels2, expected2, score2)]:
+        expected = [expected[name] for name in names]
+        # we check to 2dp because that's what's in the paper
+        pytest.approx(expected,
+                      silhouette_samples(D, np.array(labels),
+                                         metric='precomputed'),
+                      abs=1e-2)
+        pytest.approx(score,
+                      silhouette_score(D, np.array(labels),
+                                       metric='precomputed'),
+                      abs=1e-2)
+
+
+def test_correct_labelsize():
+    # Assert 1 < n_labels < n_samples
+    dataset = datasets.load_iris()
+    X = dataset.data
+
+    # n_labels = n_samples
+    y = np.arange(X.shape[0])
+    err_msg = (r'Number of labels is %d\. Valid values are 2 '
+               r'to n_samples - 1 \(inclusive\)' % len(np.unique(y)))
+    with pytest.raises(ValueError, match=err_msg):
+        silhouette_score(X, y)
+
+    # n_labels = 1
+    y = np.zeros(X.shape[0])
+    err_msg = (r'Number of labels is %d\. Valid values are 2 '
+               r'to n_samples - 1 \(inclusive\)' % len(np.unique(y)))
+    with pytest.raises(ValueError, match=err_msg):
+        silhouette_score(X, y)
+
+
+def test_non_encoded_labels():
+    dataset = datasets.load_iris()
+    X = dataset.data
+    labels = dataset.target
+    assert (
+        silhouette_score(X, labels * 2 + 10) == silhouette_score(X, labels))
+    assert_array_equal(
+        silhouette_samples(X, labels * 2 + 10), silhouette_samples(X, labels))
+
+
+def test_non_numpy_labels():
+    dataset = datasets.load_iris()
+    X = dataset.data
+    y = dataset.target
+    assert (
+        silhouette_score(list(X), list(y)) == silhouette_score(X, y))
+
+
+@pytest.mark.parametrize('dtype', (np.float32, np.float64))
+def test_silhouette_nonzero_diag(dtype):
+    # Make sure silhouette_samples requires diagonal to be zero.
+    # Non-regression test for #12178
+
+    # Construct a zero-diagonal matrix
+    dists = pairwise_distances(
+        np.array([[0.2, 0.1, 0.12, 1.34, 1.11, 1.6]], dtype=dtype).T)
+    labels = [0, 0, 0, 1, 1, 1]
+
+    # small values on the diagonal are OK
+    dists[2][2] = np.finfo(dists.dtype).eps * 10
+    silhouette_samples(dists, labels, metric='precomputed')
+
+    # values bigger than eps * 100 are not
+    dists[2][2] = np.finfo(dists.dtype).eps * 1000
+    with pytest.raises(ValueError, match='contains non-zero'):
+        silhouette_samples(dists, labels, metric='precomputed')
+
+
+def assert_raises_on_only_one_label(func):
+    """Assert message when there is only one label"""
+    rng = np.random.RandomState(seed=0)
+    with pytest.raises(ValueError, match="Number of labels is"):
+        func(rng.rand(10, 2), np.zeros(10))
+
+
+def assert_raises_on_all_points_same_cluster(func):
+    """Assert message when all point are in different clusters"""
+    rng = np.random.RandomState(seed=0)
+    with pytest.raises(ValueError, match="Number of labels is"):
+        func(rng.rand(10, 2), np.arange(10))
+
+
+def test_calinski_harabasz_score():
+    assert_raises_on_only_one_label(calinski_harabasz_score)
+
+    assert_raises_on_all_points_same_cluster(calinski_harabasz_score)
+
+    # Assert the value is 1. when all samples are equals
+    assert 1. == calinski_harabasz_score(np.ones((10, 2)),
+                                         [0] * 5 + [1] * 5)
+
+    # Assert the value is 0. when all the mean cluster are equal
+    assert 0. == calinski_harabasz_score([[-1, -1], [1, 1]] * 10,
+                                         [0] * 10 + [1] * 10)
+
+    # General case (with non numpy arrays)
+    X = ([[0, 0], [1, 1]] * 5 + [[3, 3], [4, 4]] * 5 +
+         [[0, 4], [1, 3]] * 5 + [[3, 1], [4, 0]] * 5)
+    labels = [0] * 10 + [1] * 10 + [2] * 10 + [3] * 10
+    pytest.approx(calinski_harabasz_score(X, labels),
+                  45 * (40 - 4) / (5 * (4 - 1)))
+
+
+def test_davies_bouldin_score():
+    assert_raises_on_only_one_label(davies_bouldin_score)
+    assert_raises_on_all_points_same_cluster(davies_bouldin_score)
+
+    # Assert the value is 0. when all samples are equals
+    assert davies_bouldin_score(np.ones((10, 2)),
+                                [0] * 5 + [1] * 5) == pytest.approx(0.0)
+
+    # Assert the value is 0. when all the mean cluster are equal
+    assert davies_bouldin_score([[-1, -1], [1, 1]] * 10,
+                                [0] * 10 + [1] * 10) == pytest.approx(0.0)
+
+    # General case (with non numpy arrays)
+    X = ([[0, 0], [1, 1]] * 5 + [[3, 3], [4, 4]] * 5 +
+         [[0, 4], [1, 3]] * 5 + [[3, 1], [4, 0]] * 5)
+    labels = [0] * 10 + [1] * 10 + [2] * 10 + [3] * 10
+    pytest.approx(davies_bouldin_score(X, labels), 2 * np.sqrt(0.5) / 3)
+
+    # Ensure divide by zero warning is not raised in general case
+    with pytest.warns(None) as record:
+        davies_bouldin_score(X, labels)
+    div_zero_warnings = [
+        warning for warning in record
+        if "divide by zero encountered" in warning.message.args[0]
+    ]
+    assert len(div_zero_warnings) == 0
+
+    # General case - cluster have one sample
+    X = ([[0, 0], [2, 2], [3, 3], [5, 5]])
+    labels = [0, 0, 1, 2]
+    pytest.approx(davies_bouldin_score(X, labels), (5. / 4) / 3)