Uploaded Test files
This commit is contained in:
parent
f584ad9d97
commit
2e81cb7d99
16627 changed files with 2065359 additions and 102444 deletions
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,50 @@
|
|||
"""Testing for bicluster metrics module"""
|
||||
|
||||
import numpy as np
|
||||
|
||||
from sklearn.utils._testing import assert_almost_equal
|
||||
|
||||
from sklearn.metrics.cluster._bicluster import _jaccard
|
||||
from sklearn.metrics import consensus_score
|
||||
|
||||
|
||||
def test_jaccard():
|
||||
a1 = np.array([True, True, False, False])
|
||||
a2 = np.array([True, True, True, True])
|
||||
a3 = np.array([False, True, True, False])
|
||||
a4 = np.array([False, False, True, True])
|
||||
|
||||
assert _jaccard(a1, a1, a1, a1) == 1
|
||||
assert _jaccard(a1, a1, a2, a2) == 0.25
|
||||
assert _jaccard(a1, a1, a3, a3) == 1.0 / 7
|
||||
assert _jaccard(a1, a1, a4, a4) == 0
|
||||
|
||||
|
||||
def test_consensus_score():
|
||||
a = [[True, True, False, False],
|
||||
[False, False, True, True]]
|
||||
b = a[::-1]
|
||||
|
||||
assert consensus_score((a, a), (a, a)) == 1
|
||||
assert consensus_score((a, a), (b, b)) == 1
|
||||
assert consensus_score((a, b), (a, b)) == 1
|
||||
assert consensus_score((a, b), (b, a)) == 1
|
||||
|
||||
assert consensus_score((a, a), (b, a)) == 0
|
||||
assert consensus_score((a, a), (a, b)) == 0
|
||||
assert consensus_score((b, b), (a, b)) == 0
|
||||
assert consensus_score((b, b), (b, a)) == 0
|
||||
|
||||
|
||||
def test_consensus_score_issue2445():
|
||||
''' Different number of biclusters in A and B'''
|
||||
a_rows = np.array([[True, True, False, False],
|
||||
[False, False, True, True],
|
||||
[False, False, False, True]])
|
||||
a_cols = np.array([[True, True, False, False],
|
||||
[False, False, True, True],
|
||||
[False, False, False, True]])
|
||||
idx = [0, 2]
|
||||
s = consensus_score((a_rows, a_cols), (a_rows[idx], a_cols[idx]))
|
||||
# B contains 2 of the 3 biclusters in A, so score should be 2/3
|
||||
assert_almost_equal(s, 2.0/3.0)
|
|
@ -0,0 +1,211 @@
|
|||
from functools import partial
|
||||
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
from sklearn.metrics.cluster import adjusted_mutual_info_score
|
||||
from sklearn.metrics.cluster import adjusted_rand_score
|
||||
from sklearn.metrics.cluster import completeness_score
|
||||
from sklearn.metrics.cluster import fowlkes_mallows_score
|
||||
from sklearn.metrics.cluster import homogeneity_score
|
||||
from sklearn.metrics.cluster import mutual_info_score
|
||||
from sklearn.metrics.cluster import normalized_mutual_info_score
|
||||
from sklearn.metrics.cluster import v_measure_score
|
||||
from sklearn.metrics.cluster import silhouette_score
|
||||
from sklearn.metrics.cluster import calinski_harabasz_score
|
||||
from sklearn.metrics.cluster import davies_bouldin_score
|
||||
|
||||
from sklearn.utils._testing import assert_allclose
|
||||
|
||||
|
||||
# Dictionaries of metrics
|
||||
# ------------------------
|
||||
# The goal of having those dictionaries is to have an easy way to call a
|
||||
# particular metric and associate a name to each function:
|
||||
# - SUPERVISED_METRICS: all supervised cluster metrics - (when given a
|
||||
# ground truth value)
|
||||
# - UNSUPERVISED_METRICS: all unsupervised cluster metrics
|
||||
#
|
||||
# Those dictionaries will be used to test systematically some invariance
|
||||
# properties, e.g. invariance toward several input layout.
|
||||
#
|
||||
|
||||
SUPERVISED_METRICS = {
|
||||
"adjusted_mutual_info_score": adjusted_mutual_info_score,
|
||||
"adjusted_rand_score": adjusted_rand_score,
|
||||
"completeness_score": completeness_score,
|
||||
"homogeneity_score": homogeneity_score,
|
||||
"mutual_info_score": mutual_info_score,
|
||||
"normalized_mutual_info_score": normalized_mutual_info_score,
|
||||
"v_measure_score": v_measure_score,
|
||||
"fowlkes_mallows_score": fowlkes_mallows_score
|
||||
}
|
||||
|
||||
UNSUPERVISED_METRICS = {
|
||||
"silhouette_score": silhouette_score,
|
||||
"silhouette_manhattan": partial(silhouette_score, metric='manhattan'),
|
||||
"calinski_harabasz_score": calinski_harabasz_score,
|
||||
"davies_bouldin_score": davies_bouldin_score
|
||||
}
|
||||
|
||||
# Lists of metrics with common properties
|
||||
# ---------------------------------------
|
||||
# Lists of metrics with common properties are used to test systematically some
|
||||
# functionalities and invariance, e.g. SYMMETRIC_METRICS lists all metrics
|
||||
# that are symmetric with respect to their input argument y_true and y_pred.
|
||||
#
|
||||
# --------------------------------------------------------------------
|
||||
# Symmetric with respect to their input arguments y_true and y_pred.
|
||||
# Symmetric metrics only apply to supervised clusters.
|
||||
SYMMETRIC_METRICS = [
|
||||
"adjusted_rand_score", "v_measure_score",
|
||||
"mutual_info_score", "adjusted_mutual_info_score",
|
||||
"normalized_mutual_info_score", "fowlkes_mallows_score"
|
||||
]
|
||||
|
||||
NON_SYMMETRIC_METRICS = ["homogeneity_score", "completeness_score"]
|
||||
|
||||
# Metrics whose upper bound is 1
|
||||
NORMALIZED_METRICS = [
|
||||
"adjusted_rand_score", "homogeneity_score", "completeness_score",
|
||||
"v_measure_score", "adjusted_mutual_info_score", "fowlkes_mallows_score",
|
||||
"normalized_mutual_info_score"
|
||||
]
|
||||
|
||||
|
||||
rng = np.random.RandomState(0)
|
||||
y1 = rng.randint(3, size=30)
|
||||
y2 = rng.randint(3, size=30)
|
||||
|
||||
|
||||
def test_symmetric_non_symmetric_union():
|
||||
assert (sorted(SYMMETRIC_METRICS + NON_SYMMETRIC_METRICS) ==
|
||||
sorted(SUPERVISED_METRICS))
|
||||
|
||||
|
||||
# 0.22 AMI and NMI changes
|
||||
@pytest.mark.filterwarnings('ignore::FutureWarning')
|
||||
@pytest.mark.parametrize(
|
||||
'metric_name, y1, y2',
|
||||
[(name, y1, y2) for name in SYMMETRIC_METRICS]
|
||||
)
|
||||
def test_symmetry(metric_name, y1, y2):
|
||||
metric = SUPERVISED_METRICS[metric_name]
|
||||
assert metric(y1, y2) == pytest.approx(metric(y2, y1))
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'metric_name, y1, y2',
|
||||
[(name, y1, y2) for name in NON_SYMMETRIC_METRICS]
|
||||
)
|
||||
def test_non_symmetry(metric_name, y1, y2):
|
||||
metric = SUPERVISED_METRICS[metric_name]
|
||||
assert metric(y1, y2) != pytest.approx(metric(y2, y1))
|
||||
|
||||
|
||||
# 0.22 AMI and NMI changes
|
||||
@pytest.mark.filterwarnings('ignore::FutureWarning')
|
||||
@pytest.mark.parametrize("metric_name", NORMALIZED_METRICS)
|
||||
def test_normalized_output(metric_name):
|
||||
upper_bound_1 = [0, 0, 0, 1, 1, 1]
|
||||
upper_bound_2 = [0, 0, 0, 1, 1, 1]
|
||||
metric = SUPERVISED_METRICS[metric_name]
|
||||
assert metric([0, 0, 0, 1, 1], [0, 0, 0, 1, 2]) > 0.0
|
||||
assert metric([0, 0, 1, 1, 2], [0, 0, 1, 1, 1]) > 0.0
|
||||
assert metric([0, 0, 0, 1, 2], [0, 1, 1, 1, 1]) < 1.0
|
||||
assert metric([0, 0, 0, 1, 2], [0, 1, 1, 1, 1]) < 1.0
|
||||
assert metric(upper_bound_1, upper_bound_2) == pytest.approx(1.0)
|
||||
|
||||
lower_bound_1 = [0, 0, 0, 0, 0, 0]
|
||||
lower_bound_2 = [0, 1, 2, 3, 4, 5]
|
||||
score = np.array([metric(lower_bound_1, lower_bound_2),
|
||||
metric(lower_bound_2, lower_bound_1)])
|
||||
assert not (score < 0).any()
|
||||
|
||||
|
||||
# 0.22 AMI and NMI changes
|
||||
@pytest.mark.filterwarnings('ignore::FutureWarning')
|
||||
@pytest.mark.parametrize(
|
||||
"metric_name", dict(SUPERVISED_METRICS, **UNSUPERVISED_METRICS)
|
||||
)
|
||||
def test_permute_labels(metric_name):
|
||||
# All clustering metrics do not change score due to permutations of labels
|
||||
# that is when 0 and 1 exchanged.
|
||||
y_label = np.array([0, 0, 0, 1, 1, 0, 1])
|
||||
y_pred = np.array([1, 0, 1, 0, 1, 1, 0])
|
||||
if metric_name in SUPERVISED_METRICS:
|
||||
metric = SUPERVISED_METRICS[metric_name]
|
||||
score_1 = metric(y_pred, y_label)
|
||||
assert_allclose(score_1, metric(1 - y_pred, y_label))
|
||||
assert_allclose(score_1, metric(1 - y_pred, 1 - y_label))
|
||||
assert_allclose(score_1, metric(y_pred, 1 - y_label))
|
||||
else:
|
||||
metric = UNSUPERVISED_METRICS[metric_name]
|
||||
X = np.random.randint(10, size=(7, 10))
|
||||
score_1 = metric(X, y_pred)
|
||||
assert_allclose(score_1, metric(X, 1 - y_pred))
|
||||
|
||||
|
||||
# 0.22 AMI and NMI changes
|
||||
@pytest.mark.filterwarnings('ignore::FutureWarning')
|
||||
@pytest.mark.parametrize(
|
||||
"metric_name", dict(SUPERVISED_METRICS, **UNSUPERVISED_METRICS)
|
||||
)
|
||||
# For all clustering metrics Input parameters can be both
|
||||
# in the form of arrays lists, positive, negative or string
|
||||
def test_format_invariance(metric_name):
|
||||
y_true = [0, 0, 0, 0, 1, 1, 1, 1]
|
||||
y_pred = [0, 1, 2, 3, 4, 5, 6, 7]
|
||||
|
||||
def generate_formats(y):
|
||||
y = np.array(y)
|
||||
yield y, 'array of ints'
|
||||
yield y.tolist(), 'list of ints'
|
||||
yield [str(x) + "-a" for x in y.tolist()], 'list of strs'
|
||||
yield (np.array([str(x) + "-a" for x in y.tolist()], dtype=object),
|
||||
'array of strs')
|
||||
yield y - 1, 'including negative ints'
|
||||
yield y + 1, 'strictly positive ints'
|
||||
|
||||
if metric_name in SUPERVISED_METRICS:
|
||||
metric = SUPERVISED_METRICS[metric_name]
|
||||
score_1 = metric(y_true, y_pred)
|
||||
y_true_gen = generate_formats(y_true)
|
||||
y_pred_gen = generate_formats(y_pred)
|
||||
for (y_true_fmt, fmt_name), (y_pred_fmt, _) in zip(y_true_gen,
|
||||
y_pred_gen):
|
||||
assert score_1 == metric(y_true_fmt, y_pred_fmt)
|
||||
else:
|
||||
metric = UNSUPERVISED_METRICS[metric_name]
|
||||
X = np.random.randint(10, size=(8, 10))
|
||||
score_1 = metric(X, y_true)
|
||||
assert score_1 == metric(X.astype(float), y_true)
|
||||
y_true_gen = generate_formats(y_true)
|
||||
for (y_true_fmt, fmt_name) in y_true_gen:
|
||||
assert score_1 == metric(X, y_true_fmt)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("metric", SUPERVISED_METRICS.values())
|
||||
def test_single_sample(metric):
|
||||
# only the supervised metrics support single sample
|
||||
for i, j in [(0, 0), (0, 1), (1, 0), (1, 1)]:
|
||||
metric([i], [j])
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"metric_name, metric_func",
|
||||
dict(SUPERVISED_METRICS, **UNSUPERVISED_METRICS).items()
|
||||
)
|
||||
def test_inf_nan_input(metric_name, metric_func):
|
||||
if metric_name in SUPERVISED_METRICS:
|
||||
invalids = [([0, 1], [np.inf, np.inf]),
|
||||
([0, 1], [np.nan, np.nan]),
|
||||
([0, 1], [np.nan, np.inf])]
|
||||
else:
|
||||
X = np.random.randint(10, size=(2, 10))
|
||||
invalids = [(X, [np.inf, np.inf]),
|
||||
(X, [np.nan, np.nan]),
|
||||
(X, [np.nan, np.inf])]
|
||||
with pytest.raises(ValueError, match='contains NaN, infinity'):
|
||||
for args in invalids:
|
||||
metric_func(*args)
|
|
@ -0,0 +1,358 @@
|
|||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from sklearn.metrics.cluster import adjusted_mutual_info_score
|
||||
from sklearn.metrics.cluster import adjusted_rand_score
|
||||
from sklearn.metrics.cluster import completeness_score
|
||||
from sklearn.metrics.cluster import contingency_matrix
|
||||
from sklearn.metrics.cluster import entropy
|
||||
from sklearn.metrics.cluster import expected_mutual_information
|
||||
from sklearn.metrics.cluster import fowlkes_mallows_score
|
||||
from sklearn.metrics.cluster import homogeneity_completeness_v_measure
|
||||
from sklearn.metrics.cluster import homogeneity_score
|
||||
from sklearn.metrics.cluster import mutual_info_score
|
||||
from sklearn.metrics.cluster import normalized_mutual_info_score
|
||||
from sklearn.metrics.cluster import v_measure_score
|
||||
from sklearn.metrics.cluster._supervised import _generalized_average
|
||||
|
||||
from sklearn.utils import assert_all_finite
|
||||
from sklearn.utils._testing import (
|
||||
assert_almost_equal, ignore_warnings)
|
||||
from numpy.testing import assert_array_almost_equal
|
||||
|
||||
|
||||
score_funcs = [
|
||||
adjusted_rand_score,
|
||||
homogeneity_score,
|
||||
completeness_score,
|
||||
v_measure_score,
|
||||
adjusted_mutual_info_score,
|
||||
normalized_mutual_info_score,
|
||||
]
|
||||
|
||||
|
||||
@ignore_warnings(category=FutureWarning)
|
||||
def test_error_messages_on_wrong_input():
|
||||
for score_func in score_funcs:
|
||||
expected = (r'Found input variables with inconsistent numbers '
|
||||
r'of samples: \[2, 3\]')
|
||||
with pytest.raises(ValueError, match=expected):
|
||||
score_func([0, 1], [1, 1, 1])
|
||||
|
||||
expected = r"labels_true must be 1D: shape is \(2"
|
||||
with pytest.raises(ValueError, match=expected):
|
||||
score_func([[0, 1], [1, 0]], [1, 1, 1])
|
||||
|
||||
expected = r"labels_pred must be 1D: shape is \(2"
|
||||
with pytest.raises(ValueError, match=expected):
|
||||
score_func([0, 1, 0], [[1, 1], [0, 0]])
|
||||
|
||||
|
||||
def test_generalized_average():
|
||||
a, b = 1, 2
|
||||
methods = ["min", "geometric", "arithmetic", "max"]
|
||||
means = [_generalized_average(a, b, method) for method in methods]
|
||||
assert means[0] <= means[1] <= means[2] <= means[3]
|
||||
c, d = 12, 12
|
||||
means = [_generalized_average(c, d, method) for method in methods]
|
||||
assert means[0] == means[1] == means[2] == means[3]
|
||||
|
||||
|
||||
@ignore_warnings(category=FutureWarning)
|
||||
def test_perfect_matches():
|
||||
for score_func in score_funcs:
|
||||
assert score_func([], []) == pytest.approx(1.0)
|
||||
assert score_func([0], [1]) == pytest.approx(1.0)
|
||||
assert score_func([0, 0, 0], [0, 0, 0]) == pytest.approx(1.0)
|
||||
assert score_func([0, 1, 0], [42, 7, 42]) == pytest.approx(1.0)
|
||||
assert score_func([0., 1., 0.], [42., 7., 42.]) == pytest.approx(1.0)
|
||||
assert score_func([0., 1., 2.], [42., 7., 2.]) == pytest.approx(1.0)
|
||||
assert score_func([0, 1, 2], [42, 7, 2]) == pytest.approx(1.0)
|
||||
score_funcs_with_changing_means = [
|
||||
normalized_mutual_info_score,
|
||||
adjusted_mutual_info_score,
|
||||
]
|
||||
means = {"min", "geometric", "arithmetic", "max"}
|
||||
for score_func in score_funcs_with_changing_means:
|
||||
for mean in means:
|
||||
assert score_func([], [], mean) == pytest.approx(1.0)
|
||||
assert score_func([0], [1], mean) == pytest.approx(1.0)
|
||||
assert score_func([0, 0, 0], [0, 0, 0], mean) == pytest.approx(1.0)
|
||||
assert score_func(
|
||||
[0, 1, 0], [42, 7, 42], mean) == pytest.approx(1.0)
|
||||
assert score_func(
|
||||
[0., 1., 0.], [42., 7., 42.], mean) == pytest.approx(1.0)
|
||||
assert score_func(
|
||||
[0., 1., 2.], [42., 7., 2.], mean) == pytest.approx(1.0)
|
||||
assert score_func(
|
||||
[0, 1, 2], [42, 7, 2], mean) == pytest.approx(1.0)
|
||||
|
||||
|
||||
def test_homogeneous_but_not_complete_labeling():
|
||||
# homogeneous but not complete clustering
|
||||
h, c, v = homogeneity_completeness_v_measure(
|
||||
[0, 0, 0, 1, 1, 1],
|
||||
[0, 0, 0, 1, 2, 2])
|
||||
assert_almost_equal(h, 1.00, 2)
|
||||
assert_almost_equal(c, 0.69, 2)
|
||||
assert_almost_equal(v, 0.81, 2)
|
||||
|
||||
|
||||
def test_complete_but_not_homogeneous_labeling():
|
||||
# complete but not homogeneous clustering
|
||||
h, c, v = homogeneity_completeness_v_measure(
|
||||
[0, 0, 1, 1, 2, 2],
|
||||
[0, 0, 1, 1, 1, 1])
|
||||
assert_almost_equal(h, 0.58, 2)
|
||||
assert_almost_equal(c, 1.00, 2)
|
||||
assert_almost_equal(v, 0.73, 2)
|
||||
|
||||
|
||||
def test_not_complete_and_not_homogeneous_labeling():
|
||||
# neither complete nor homogeneous but not so bad either
|
||||
h, c, v = homogeneity_completeness_v_measure(
|
||||
[0, 0, 0, 1, 1, 1],
|
||||
[0, 1, 0, 1, 2, 2])
|
||||
assert_almost_equal(h, 0.67, 2)
|
||||
assert_almost_equal(c, 0.42, 2)
|
||||
assert_almost_equal(v, 0.52, 2)
|
||||
|
||||
|
||||
def test_beta_parameter():
|
||||
# test for when beta passed to
|
||||
# homogeneity_completeness_v_measure
|
||||
# and v_measure_score
|
||||
beta_test = 0.2
|
||||
h_test = 0.67
|
||||
c_test = 0.42
|
||||
v_test = ((1 + beta_test) * h_test * c_test
|
||||
/ (beta_test * h_test + c_test))
|
||||
|
||||
h, c, v = homogeneity_completeness_v_measure(
|
||||
[0, 0, 0, 1, 1, 1],
|
||||
[0, 1, 0, 1, 2, 2],
|
||||
beta=beta_test)
|
||||
assert_almost_equal(h, h_test, 2)
|
||||
assert_almost_equal(c, c_test, 2)
|
||||
assert_almost_equal(v, v_test, 2)
|
||||
|
||||
v = v_measure_score(
|
||||
[0, 0, 0, 1, 1, 1],
|
||||
[0, 1, 0, 1, 2, 2],
|
||||
beta=beta_test)
|
||||
assert_almost_equal(v, v_test, 2)
|
||||
|
||||
|
||||
def test_non_consecutive_labels():
|
||||
# regression tests for labels with gaps
|
||||
h, c, v = homogeneity_completeness_v_measure(
|
||||
[0, 0, 0, 2, 2, 2],
|
||||
[0, 1, 0, 1, 2, 2])
|
||||
assert_almost_equal(h, 0.67, 2)
|
||||
assert_almost_equal(c, 0.42, 2)
|
||||
assert_almost_equal(v, 0.52, 2)
|
||||
|
||||
h, c, v = homogeneity_completeness_v_measure(
|
||||
[0, 0, 0, 1, 1, 1],
|
||||
[0, 4, 0, 4, 2, 2])
|
||||
assert_almost_equal(h, 0.67, 2)
|
||||
assert_almost_equal(c, 0.42, 2)
|
||||
assert_almost_equal(v, 0.52, 2)
|
||||
|
||||
ari_1 = adjusted_rand_score([0, 0, 0, 1, 1, 1], [0, 1, 0, 1, 2, 2])
|
||||
ari_2 = adjusted_rand_score([0, 0, 0, 1, 1, 1], [0, 4, 0, 4, 2, 2])
|
||||
assert_almost_equal(ari_1, 0.24, 2)
|
||||
assert_almost_equal(ari_2, 0.24, 2)
|
||||
|
||||
|
||||
@ignore_warnings(category=FutureWarning)
|
||||
def uniform_labelings_scores(score_func, n_samples, k_range, n_runs=10,
|
||||
seed=42):
|
||||
# Compute score for random uniform cluster labelings
|
||||
random_labels = np.random.RandomState(seed).randint
|
||||
scores = np.zeros((len(k_range), n_runs))
|
||||
for i, k in enumerate(k_range):
|
||||
for j in range(n_runs):
|
||||
labels_a = random_labels(low=0, high=k, size=n_samples)
|
||||
labels_b = random_labels(low=0, high=k, size=n_samples)
|
||||
scores[i, j] = score_func(labels_a, labels_b)
|
||||
return scores
|
||||
|
||||
|
||||
@ignore_warnings(category=FutureWarning)
|
||||
def test_adjustment_for_chance():
|
||||
# Check that adjusted scores are almost zero on random labels
|
||||
n_clusters_range = [2, 10, 50, 90]
|
||||
n_samples = 100
|
||||
n_runs = 10
|
||||
|
||||
scores = uniform_labelings_scores(
|
||||
adjusted_rand_score, n_samples, n_clusters_range, n_runs)
|
||||
|
||||
max_abs_scores = np.abs(scores).max(axis=1)
|
||||
assert_array_almost_equal(max_abs_scores, [0.02, 0.03, 0.03, 0.02], 2)
|
||||
|
||||
|
||||
def test_adjusted_mutual_info_score():
|
||||
# Compute the Adjusted Mutual Information and test against known values
|
||||
labels_a = np.array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3])
|
||||
labels_b = np.array([1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 3, 1, 3, 3, 3, 2, 2])
|
||||
# Mutual information
|
||||
mi = mutual_info_score(labels_a, labels_b)
|
||||
assert_almost_equal(mi, 0.41022, 5)
|
||||
# with provided sparse contingency
|
||||
C = contingency_matrix(labels_a, labels_b, sparse=True)
|
||||
mi = mutual_info_score(labels_a, labels_b, contingency=C)
|
||||
assert_almost_equal(mi, 0.41022, 5)
|
||||
# with provided dense contingency
|
||||
C = contingency_matrix(labels_a, labels_b)
|
||||
mi = mutual_info_score(labels_a, labels_b, contingency=C)
|
||||
assert_almost_equal(mi, 0.41022, 5)
|
||||
# Expected mutual information
|
||||
n_samples = C.sum()
|
||||
emi = expected_mutual_information(C, n_samples)
|
||||
assert_almost_equal(emi, 0.15042, 5)
|
||||
# Adjusted mutual information
|
||||
ami = adjusted_mutual_info_score(labels_a, labels_b)
|
||||
assert_almost_equal(ami, 0.27821, 5)
|
||||
ami = adjusted_mutual_info_score([1, 1, 2, 2], [2, 2, 3, 3])
|
||||
assert ami == pytest.approx(1.0)
|
||||
# Test with a very large array
|
||||
a110 = np.array([list(labels_a) * 110]).flatten()
|
||||
b110 = np.array([list(labels_b) * 110]).flatten()
|
||||
ami = adjusted_mutual_info_score(a110, b110)
|
||||
assert_almost_equal(ami, 0.38, 2)
|
||||
|
||||
|
||||
def test_expected_mutual_info_overflow():
|
||||
# Test for regression where contingency cell exceeds 2**16
|
||||
# leading to overflow in np.outer, resulting in EMI > 1
|
||||
assert expected_mutual_information(np.array([[70000]]), 70000) <= 1
|
||||
|
||||
|
||||
def test_int_overflow_mutual_info_fowlkes_mallows_score():
|
||||
# Test overflow in mutual_info_classif and fowlkes_mallows_score
|
||||
x = np.array([1] * (52632 + 2529) + [2] * (14660 + 793) + [3] * (3271 +
|
||||
204) + [4] * (814 + 39) + [5] * (316 + 20))
|
||||
y = np.array([0] * 52632 + [1] * 2529 + [0] * 14660 + [1] * 793 +
|
||||
[0] * 3271 + [1] * 204 + [0] * 814 + [1] * 39 + [0] * 316 +
|
||||
[1] * 20)
|
||||
|
||||
assert_all_finite(mutual_info_score(x, y))
|
||||
assert_all_finite(fowlkes_mallows_score(x, y))
|
||||
|
||||
|
||||
def test_entropy():
|
||||
ent = entropy([0, 0, 42.])
|
||||
assert_almost_equal(ent, 0.6365141, 5)
|
||||
assert_almost_equal(entropy([]), 1)
|
||||
|
||||
|
||||
def test_contingency_matrix():
|
||||
labels_a = np.array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3])
|
||||
labels_b = np.array([1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 3, 1, 3, 3, 3, 2, 2])
|
||||
C = contingency_matrix(labels_a, labels_b)
|
||||
C2 = np.histogram2d(labels_a, labels_b,
|
||||
bins=(np.arange(1, 5),
|
||||
np.arange(1, 5)))[0]
|
||||
assert_array_almost_equal(C, C2)
|
||||
C = contingency_matrix(labels_a, labels_b, eps=.1)
|
||||
assert_array_almost_equal(C, C2 + .1)
|
||||
|
||||
|
||||
def test_contingency_matrix_sparse():
|
||||
labels_a = np.array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3])
|
||||
labels_b = np.array([1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 3, 1, 3, 3, 3, 2, 2])
|
||||
C = contingency_matrix(labels_a, labels_b)
|
||||
C_sparse = contingency_matrix(labels_a, labels_b, sparse=True).toarray()
|
||||
assert_array_almost_equal(C, C_sparse)
|
||||
with pytest.raises(ValueError, match="Cannot set 'eps' when sparse=True"):
|
||||
contingency_matrix(labels_a, labels_b, eps=1e-10, sparse=True)
|
||||
|
||||
|
||||
@ignore_warnings(category=FutureWarning)
|
||||
def test_exactly_zero_info_score():
|
||||
# Check numerical stability when information is exactly zero
|
||||
for i in np.logspace(1, 4, 4).astype(np.int):
|
||||
labels_a, labels_b = (np.ones(i, dtype=np.int),
|
||||
np.arange(i, dtype=np.int))
|
||||
assert normalized_mutual_info_score(
|
||||
labels_a, labels_b) == pytest.approx(0.0)
|
||||
assert v_measure_score(
|
||||
labels_a, labels_b) == pytest.approx(0.0)
|
||||
assert adjusted_mutual_info_score(
|
||||
labels_a, labels_b) == pytest.approx(0.0)
|
||||
assert normalized_mutual_info_score(
|
||||
labels_a, labels_b) == pytest.approx(0.0)
|
||||
for method in ["min", "geometric", "arithmetic", "max"]:
|
||||
assert adjusted_mutual_info_score(
|
||||
labels_a, labels_b, method) == pytest.approx(0.0)
|
||||
assert normalized_mutual_info_score(
|
||||
labels_a, labels_b, method) == pytest.approx(0.0)
|
||||
|
||||
|
||||
def test_v_measure_and_mutual_information(seed=36):
|
||||
# Check relation between v_measure, entropy and mutual information
|
||||
for i in np.logspace(1, 4, 4).astype(np.int):
|
||||
random_state = np.random.RandomState(seed)
|
||||
labels_a, labels_b = (random_state.randint(0, 10, i),
|
||||
random_state.randint(0, 10, i))
|
||||
assert_almost_equal(v_measure_score(labels_a, labels_b),
|
||||
2.0 * mutual_info_score(labels_a, labels_b) /
|
||||
(entropy(labels_a) + entropy(labels_b)), 0)
|
||||
avg = 'arithmetic'
|
||||
assert_almost_equal(v_measure_score(labels_a, labels_b),
|
||||
normalized_mutual_info_score(labels_a, labels_b,
|
||||
average_method=avg)
|
||||
)
|
||||
|
||||
|
||||
def test_fowlkes_mallows_score():
|
||||
# General case
|
||||
score = fowlkes_mallows_score([0, 0, 0, 1, 1, 1],
|
||||
[0, 0, 1, 1, 2, 2])
|
||||
assert_almost_equal(score, 4. / np.sqrt(12. * 6.))
|
||||
|
||||
# Perfect match but where the label names changed
|
||||
perfect_score = fowlkes_mallows_score([0, 0, 0, 1, 1, 1],
|
||||
[1, 1, 1, 0, 0, 0])
|
||||
assert_almost_equal(perfect_score, 1.)
|
||||
|
||||
# Worst case
|
||||
worst_score = fowlkes_mallows_score([0, 0, 0, 0, 0, 0],
|
||||
[0, 1, 2, 3, 4, 5])
|
||||
assert_almost_equal(worst_score, 0.)
|
||||
|
||||
|
||||
def test_fowlkes_mallows_score_properties():
|
||||
# handcrafted example
|
||||
labels_a = np.array([0, 0, 0, 1, 1, 2])
|
||||
labels_b = np.array([1, 1, 2, 2, 0, 0])
|
||||
expected = 1. / np.sqrt((1. + 3.) * (1. + 2.))
|
||||
# FMI = TP / sqrt((TP + FP) * (TP + FN))
|
||||
|
||||
score_original = fowlkes_mallows_score(labels_a, labels_b)
|
||||
assert_almost_equal(score_original, expected)
|
||||
|
||||
# symmetric property
|
||||
score_symmetric = fowlkes_mallows_score(labels_b, labels_a)
|
||||
assert_almost_equal(score_symmetric, expected)
|
||||
|
||||
# permutation property
|
||||
score_permuted = fowlkes_mallows_score((labels_a + 1) % 3, labels_b)
|
||||
assert_almost_equal(score_permuted, expected)
|
||||
|
||||
# symmetric and permutation(both together)
|
||||
score_both = fowlkes_mallows_score(labels_b, (labels_a + 2) % 3)
|
||||
assert_almost_equal(score_both, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('labels_true, labels_pred', [
|
||||
(['a'] * 6, [1, 1, 0, 0, 1, 1]),
|
||||
([1] * 6, [1, 1, 0, 0, 1, 1]),
|
||||
([1, 1, 0, 0, 1, 1], ['a'] * 6),
|
||||
([1, 1, 0, 0, 1, 1], [1] * 6),
|
||||
])
|
||||
def test_mutual_info_score_positive_constant_label(labels_true, labels_pred):
|
||||
# non-regression test for #16355
|
||||
assert mutual_info_score(labels_true, labels_pred) >= 0
|
|
@ -0,0 +1,252 @@
|
|||
import numpy as np
|
||||
import scipy.sparse as sp
|
||||
import pytest
|
||||
from scipy.sparse import csr_matrix
|
||||
|
||||
from sklearn import datasets
|
||||
from sklearn.utils._testing import assert_array_equal
|
||||
from sklearn.metrics.cluster import silhouette_score
|
||||
from sklearn.metrics.cluster import silhouette_samples
|
||||
from sklearn.metrics import pairwise_distances
|
||||
from sklearn.metrics.cluster import calinski_harabasz_score
|
||||
from sklearn.metrics.cluster import davies_bouldin_score
|
||||
|
||||
|
||||
def test_silhouette():
|
||||
# Tests the Silhouette Coefficient.
|
||||
dataset = datasets.load_iris()
|
||||
X_dense = dataset.data
|
||||
X_csr = csr_matrix(X_dense)
|
||||
X_dok = sp.dok_matrix(X_dense)
|
||||
X_lil = sp.lil_matrix(X_dense)
|
||||
y = dataset.target
|
||||
|
||||
for X in [X_dense, X_csr, X_dok, X_lil]:
|
||||
D = pairwise_distances(X, metric='euclidean')
|
||||
# Given that the actual labels are used, we can assume that S would be
|
||||
# positive.
|
||||
score_precomputed = silhouette_score(D, y, metric='precomputed')
|
||||
assert score_precomputed > 0
|
||||
# Test without calculating D
|
||||
score_euclidean = silhouette_score(X, y, metric='euclidean')
|
||||
pytest.approx(score_precomputed, score_euclidean)
|
||||
|
||||
if X is X_dense:
|
||||
score_dense_without_sampling = score_precomputed
|
||||
else:
|
||||
pytest.approx(score_euclidean,
|
||||
score_dense_without_sampling)
|
||||
|
||||
# Test with sampling
|
||||
score_precomputed = silhouette_score(D, y, metric='precomputed',
|
||||
sample_size=int(X.shape[0] / 2),
|
||||
random_state=0)
|
||||
score_euclidean = silhouette_score(X, y, metric='euclidean',
|
||||
sample_size=int(X.shape[0] / 2),
|
||||
random_state=0)
|
||||
assert score_precomputed > 0
|
||||
assert score_euclidean > 0
|
||||
pytest.approx(score_euclidean, score_precomputed)
|
||||
|
||||
if X is X_dense:
|
||||
score_dense_with_sampling = score_precomputed
|
||||
else:
|
||||
pytest.approx(score_euclidean, score_dense_with_sampling)
|
||||
|
||||
|
||||
def test_cluster_size_1():
|
||||
# Assert Silhouette Coefficient == 0 when there is 1 sample in a cluster
|
||||
# (cluster 0). We also test the case where there are identical samples
|
||||
# as the only members of a cluster (cluster 2). To our knowledge, this case
|
||||
# is not discussed in reference material, and we choose for it a sample
|
||||
# score of 1.
|
||||
X = [[0.], [1.], [1.], [2.], [3.], [3.]]
|
||||
labels = np.array([0, 1, 1, 1, 2, 2])
|
||||
|
||||
# Cluster 0: 1 sample -> score of 0 by Rousseeuw's convention
|
||||
# Cluster 1: intra-cluster = [.5, .5, 1]
|
||||
# inter-cluster = [1, 1, 1]
|
||||
# silhouette = [.5, .5, 0]
|
||||
# Cluster 2: intra-cluster = [0, 0]
|
||||
# inter-cluster = [arbitrary, arbitrary]
|
||||
# silhouette = [1., 1.]
|
||||
|
||||
silhouette = silhouette_score(X, labels)
|
||||
assert not np.isnan(silhouette)
|
||||
ss = silhouette_samples(X, labels)
|
||||
assert_array_equal(ss, [0, .5, .5, 0, 1, 1])
|
||||
|
||||
|
||||
def test_silhouette_paper_example():
|
||||
# Explicitly check per-sample results against Rousseeuw (1987)
|
||||
# Data from Table 1
|
||||
lower = [5.58,
|
||||
7.00, 6.50,
|
||||
7.08, 7.00, 3.83,
|
||||
4.83, 5.08, 8.17, 5.83,
|
||||
2.17, 5.75, 6.67, 6.92, 4.92,
|
||||
6.42, 5.00, 5.58, 6.00, 4.67, 6.42,
|
||||
3.42, 5.50, 6.42, 6.42, 5.00, 3.92, 6.17,
|
||||
2.50, 4.92, 6.25, 7.33, 4.50, 2.25, 6.33, 2.75,
|
||||
6.08, 6.67, 4.25, 2.67, 6.00, 6.17, 6.17, 6.92, 6.17,
|
||||
5.25, 6.83, 4.50, 3.75, 5.75, 5.42, 6.08, 5.83, 6.67, 3.67,
|
||||
4.75, 3.00, 6.08, 6.67, 5.00, 5.58, 4.83, 6.17, 5.67, 6.50, 6.92]
|
||||
D = np.zeros((12, 12))
|
||||
D[np.tril_indices(12, -1)] = lower
|
||||
D += D.T
|
||||
|
||||
names = ['BEL', 'BRA', 'CHI', 'CUB', 'EGY', 'FRA', 'IND', 'ISR', 'USA',
|
||||
'USS', 'YUG', 'ZAI']
|
||||
|
||||
# Data from Figure 2
|
||||
labels1 = [1, 1, 2, 2, 1, 1, 2, 1, 1, 2, 2, 1]
|
||||
expected1 = {'USA': .43, 'BEL': .39, 'FRA': .35, 'ISR': .30, 'BRA': .22,
|
||||
'EGY': .20, 'ZAI': .19, 'CUB': .40, 'USS': .34, 'CHI': .33,
|
||||
'YUG': .26, 'IND': -.04}
|
||||
score1 = .28
|
||||
|
||||
# Data from Figure 3
|
||||
labels2 = [1, 2, 3, 3, 1, 1, 2, 1, 1, 3, 3, 2]
|
||||
expected2 = {'USA': .47, 'FRA': .44, 'BEL': .42, 'ISR': .37, 'EGY': .02,
|
||||
'ZAI': .28, 'BRA': .25, 'IND': .17, 'CUB': .48, 'USS': .44,
|
||||
'YUG': .31, 'CHI': .31}
|
||||
score2 = .33
|
||||
|
||||
for labels, expected, score in [(labels1, expected1, score1),
|
||||
(labels2, expected2, score2)]:
|
||||
expected = [expected[name] for name in names]
|
||||
# we check to 2dp because that's what's in the paper
|
||||
pytest.approx(expected,
|
||||
silhouette_samples(D, np.array(labels),
|
||||
metric='precomputed'),
|
||||
abs=1e-2)
|
||||
pytest.approx(score,
|
||||
silhouette_score(D, np.array(labels),
|
||||
metric='precomputed'),
|
||||
abs=1e-2)
|
||||
|
||||
|
||||
def test_correct_labelsize():
|
||||
# Assert 1 < n_labels < n_samples
|
||||
dataset = datasets.load_iris()
|
||||
X = dataset.data
|
||||
|
||||
# n_labels = n_samples
|
||||
y = np.arange(X.shape[0])
|
||||
err_msg = (r'Number of labels is %d\. Valid values are 2 '
|
||||
r'to n_samples - 1 \(inclusive\)' % len(np.unique(y)))
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
silhouette_score(X, y)
|
||||
|
||||
# n_labels = 1
|
||||
y = np.zeros(X.shape[0])
|
||||
err_msg = (r'Number of labels is %d\. Valid values are 2 '
|
||||
r'to n_samples - 1 \(inclusive\)' % len(np.unique(y)))
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
silhouette_score(X, y)
|
||||
|
||||
|
||||
def test_non_encoded_labels():
|
||||
dataset = datasets.load_iris()
|
||||
X = dataset.data
|
||||
labels = dataset.target
|
||||
assert (
|
||||
silhouette_score(X, labels * 2 + 10) == silhouette_score(X, labels))
|
||||
assert_array_equal(
|
||||
silhouette_samples(X, labels * 2 + 10), silhouette_samples(X, labels))
|
||||
|
||||
|
||||
def test_non_numpy_labels():
|
||||
dataset = datasets.load_iris()
|
||||
X = dataset.data
|
||||
y = dataset.target
|
||||
assert (
|
||||
silhouette_score(list(X), list(y)) == silhouette_score(X, y))
|
||||
|
||||
|
||||
@pytest.mark.parametrize('dtype', (np.float32, np.float64))
|
||||
def test_silhouette_nonzero_diag(dtype):
|
||||
# Make sure silhouette_samples requires diagonal to be zero.
|
||||
# Non-regression test for #12178
|
||||
|
||||
# Construct a zero-diagonal matrix
|
||||
dists = pairwise_distances(
|
||||
np.array([[0.2, 0.1, 0.12, 1.34, 1.11, 1.6]], dtype=dtype).T)
|
||||
labels = [0, 0, 0, 1, 1, 1]
|
||||
|
||||
# small values on the diagonal are OK
|
||||
dists[2][2] = np.finfo(dists.dtype).eps * 10
|
||||
silhouette_samples(dists, labels, metric='precomputed')
|
||||
|
||||
# values bigger than eps * 100 are not
|
||||
dists[2][2] = np.finfo(dists.dtype).eps * 1000
|
||||
with pytest.raises(ValueError, match='contains non-zero'):
|
||||
silhouette_samples(dists, labels, metric='precomputed')
|
||||
|
||||
|
||||
def assert_raises_on_only_one_label(func):
|
||||
"""Assert message when there is only one label"""
|
||||
rng = np.random.RandomState(seed=0)
|
||||
with pytest.raises(ValueError, match="Number of labels is"):
|
||||
func(rng.rand(10, 2), np.zeros(10))
|
||||
|
||||
|
||||
def assert_raises_on_all_points_same_cluster(func):
|
||||
"""Assert message when all point are in different clusters"""
|
||||
rng = np.random.RandomState(seed=0)
|
||||
with pytest.raises(ValueError, match="Number of labels is"):
|
||||
func(rng.rand(10, 2), np.arange(10))
|
||||
|
||||
|
||||
def test_calinski_harabasz_score():
|
||||
assert_raises_on_only_one_label(calinski_harabasz_score)
|
||||
|
||||
assert_raises_on_all_points_same_cluster(calinski_harabasz_score)
|
||||
|
||||
# Assert the value is 1. when all samples are equals
|
||||
assert 1. == calinski_harabasz_score(np.ones((10, 2)),
|
||||
[0] * 5 + [1] * 5)
|
||||
|
||||
# Assert the value is 0. when all the mean cluster are equal
|
||||
assert 0. == calinski_harabasz_score([[-1, -1], [1, 1]] * 10,
|
||||
[0] * 10 + [1] * 10)
|
||||
|
||||
# General case (with non numpy arrays)
|
||||
X = ([[0, 0], [1, 1]] * 5 + [[3, 3], [4, 4]] * 5 +
|
||||
[[0, 4], [1, 3]] * 5 + [[3, 1], [4, 0]] * 5)
|
||||
labels = [0] * 10 + [1] * 10 + [2] * 10 + [3] * 10
|
||||
pytest.approx(calinski_harabasz_score(X, labels),
|
||||
45 * (40 - 4) / (5 * (4 - 1)))
|
||||
|
||||
|
||||
def test_davies_bouldin_score():
|
||||
assert_raises_on_only_one_label(davies_bouldin_score)
|
||||
assert_raises_on_all_points_same_cluster(davies_bouldin_score)
|
||||
|
||||
# Assert the value is 0. when all samples are equals
|
||||
assert davies_bouldin_score(np.ones((10, 2)),
|
||||
[0] * 5 + [1] * 5) == pytest.approx(0.0)
|
||||
|
||||
# Assert the value is 0. when all the mean cluster are equal
|
||||
assert davies_bouldin_score([[-1, -1], [1, 1]] * 10,
|
||||
[0] * 10 + [1] * 10) == pytest.approx(0.0)
|
||||
|
||||
# General case (with non numpy arrays)
|
||||
X = ([[0, 0], [1, 1]] * 5 + [[3, 3], [4, 4]] * 5 +
|
||||
[[0, 4], [1, 3]] * 5 + [[3, 1], [4, 0]] * 5)
|
||||
labels = [0] * 10 + [1] * 10 + [2] * 10 + [3] * 10
|
||||
pytest.approx(davies_bouldin_score(X, labels), 2 * np.sqrt(0.5) / 3)
|
||||
|
||||
# Ensure divide by zero warning is not raised in general case
|
||||
with pytest.warns(None) as record:
|
||||
davies_bouldin_score(X, labels)
|
||||
div_zero_warnings = [
|
||||
warning for warning in record
|
||||
if "divide by zero encountered" in warning.message.args[0]
|
||||
]
|
||||
assert len(div_zero_warnings) == 0
|
||||
|
||||
# General case - cluster have one sample
|
||||
X = ([[0, 0], [2, 2], [3, 3], [5, 5]])
|
||||
labels = [0, 0, 1, 2]
|
||||
pytest.approx(davies_bouldin_score(X, labels), (5. / 4) / 3)
|
Loading…
Add table
Add a link
Reference in a new issue