Uploaded Test files

This commit is contained in:
Batuhan Berk Başoğlu 2020-11-12 11:05:57 -05:00
parent f584ad9d97
commit 2e81cb7d99
16627 changed files with 2065359 additions and 102444 deletions

View file

@ -0,0 +1,32 @@
"""
The :mod:`sklearn.metrics.cluster` submodule contains evaluation metrics for
cluster analysis results. There are two forms of evaluation:
- supervised, which uses a ground truth class values for each sample.
- unsupervised, which does not and measures the 'quality' of the model itself.
"""
from ._supervised import adjusted_mutual_info_score
from ._supervised import normalized_mutual_info_score
from ._supervised import adjusted_rand_score
from ._supervised import completeness_score
from ._supervised import contingency_matrix
from ._supervised import expected_mutual_information
from ._supervised import homogeneity_completeness_v_measure
from ._supervised import homogeneity_score
from ._supervised import mutual_info_score
from ._supervised import v_measure_score
from ._supervised import fowlkes_mallows_score
from ._supervised import entropy
from ._unsupervised import silhouette_samples
from ._unsupervised import silhouette_score
from ._unsupervised import calinski_harabasz_score
from ._unsupervised import davies_bouldin_score
from ._bicluster import consensus_score
__all__ = ["adjusted_mutual_info_score", "normalized_mutual_info_score",
"adjusted_rand_score", "completeness_score", "contingency_matrix",
"expected_mutual_information", "homogeneity_completeness_v_measure",
"homogeneity_score", "mutual_info_score", "v_measure_score",
"fowlkes_mallows_score", "entropy", "silhouette_samples",
"silhouette_score", "calinski_harabasz_score",
"davies_bouldin_score", "consensus_score"]

View file

@ -0,0 +1,86 @@
import numpy as np
from scipy.optimize import linear_sum_assignment
from ...utils.validation import check_consistent_length, check_array
from ...utils.validation import _deprecate_positional_args
__all__ = ["consensus_score"]
def _check_rows_and_columns(a, b):
"""Unpacks the row and column arrays and checks their shape."""
check_consistent_length(*a)
check_consistent_length(*b)
checks = lambda x: check_array(x, ensure_2d=False)
a_rows, a_cols = map(checks, a)
b_rows, b_cols = map(checks, b)
return a_rows, a_cols, b_rows, b_cols
def _jaccard(a_rows, a_cols, b_rows, b_cols):
"""Jaccard coefficient on the elements of the two biclusters."""
intersection = ((a_rows * b_rows).sum() *
(a_cols * b_cols).sum())
a_size = a_rows.sum() * a_cols.sum()
b_size = b_rows.sum() * b_cols.sum()
return intersection / (a_size + b_size - intersection)
def _pairwise_similarity(a, b, similarity):
"""Computes pairwise similarity matrix.
result[i, j] is the Jaccard coefficient of a's bicluster i and b's
bicluster j.
"""
a_rows, a_cols, b_rows, b_cols = _check_rows_and_columns(a, b)
n_a = a_rows.shape[0]
n_b = b_rows.shape[0]
result = np.array(list(list(similarity(a_rows[i], a_cols[i],
b_rows[j], b_cols[j])
for j in range(n_b))
for i in range(n_a)))
return result
@_deprecate_positional_args
def consensus_score(a, b, *, similarity="jaccard"):
"""The similarity of two sets of biclusters.
Similarity between individual biclusters is computed. Then the
best matching between sets is found using the Hungarian algorithm.
The final score is the sum of similarities divided by the size of
the larger set.
Read more in the :ref:`User Guide <biclustering>`.
Parameters
----------
a : (rows, columns)
Tuple of row and column indicators for a set of biclusters.
b : (rows, columns)
Another set of biclusters like ``a``.
similarity : string or function, optional, default: "jaccard"
May be the string "jaccard" to use the Jaccard coefficient, or
any function that takes four arguments, each of which is a 1d
indicator vector: (a_rows, a_columns, b_rows, b_columns).
References
----------
* Hochreiter, Bodenhofer, et. al., 2010. `FABIA: factor analysis
for bicluster acquisition
<https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2881408/>`__.
"""
if similarity == "jaccard":
similarity = _jaccard
matrix = _pairwise_similarity(a, b, similarity)
row_indices, col_indices = linear_sum_assignment(1. - matrix)
n_a = len(a[0])
n_b = len(b[0])
return matrix[row_indices, col_indices].sum() / max(n_a, n_b)

View file

@ -0,0 +1,980 @@
"""Utilities to evaluate the clustering performance of models.
Functions named as *_score return a scalar value to maximize: the higher the
better.
"""
# Authors: Olivier Grisel <olivier.grisel@ensta.org>
# Wei LI <kuantkid@gmail.com>
# Diego Molla <dmolla-aliod@gmail.com>
# Arnaud Fouchet <foucheta@gmail.com>
# Thierry Guillemot <thierry.guillemot.work@gmail.com>
# Gregory Stupp <stuppie@gmail.com>
# Joel Nothman <joel.nothman@gmail.com>
# Arya McCarthy <arya@jhu.edu>
# License: BSD 3 clause
from math import log
import numpy as np
from scipy import sparse as sp
from scipy.special import comb
from ._expected_mutual_info_fast import expected_mutual_information
from ...utils.validation import check_array, check_consistent_length
from ...utils.validation import _deprecate_positional_args
from ...utils.fixes import _astype_copy_false
def _comb2(n):
# the exact version is faster for k == 2: use it by default globally in
# this module instead of the float approximate variant
return comb(n, 2, exact=1)
def check_clusterings(labels_true, labels_pred):
"""Check that the labels arrays are 1D and of same dimension.
Parameters
----------
labels_true : array-like of shape (n_samples,)
The true labels.
labels_pred : array-like of shape (n_samples,)
The predicted labels.
"""
labels_true = check_array(
labels_true, ensure_2d=False, ensure_min_samples=0, dtype=None,
)
labels_pred = check_array(
labels_pred, ensure_2d=False, ensure_min_samples=0, dtype=None,
)
# input checks
if labels_true.ndim != 1:
raise ValueError(
"labels_true must be 1D: shape is %r" % (labels_true.shape,))
if labels_pred.ndim != 1:
raise ValueError(
"labels_pred must be 1D: shape is %r" % (labels_pred.shape,))
check_consistent_length(labels_true, labels_pred)
return labels_true, labels_pred
def _generalized_average(U, V, average_method):
"""Return a particular mean of two numbers."""
if average_method == "min":
return min(U, V)
elif average_method == "geometric":
return np.sqrt(U * V)
elif average_method == "arithmetic":
return np.mean([U, V])
elif average_method == "max":
return max(U, V)
else:
raise ValueError("'average_method' must be 'min', 'geometric', "
"'arithmetic', or 'max'")
@_deprecate_positional_args
def contingency_matrix(labels_true, labels_pred, *, eps=None, sparse=False):
"""Build a contingency matrix describing the relationship between labels.
Parameters
----------
labels_true : int array, shape = [n_samples]
Ground truth class labels to be used as a reference
labels_pred : array-like of shape (n_samples,)
Cluster labels to evaluate
eps : None or float, optional.
If a float, that value is added to all values in the contingency
matrix. This helps to stop NaN propagation.
If ``None``, nothing is adjusted.
sparse : boolean, optional.
If True, return a sparse CSR continency matrix. If ``eps is not None``,
and ``sparse is True``, will throw ValueError.
.. versionadded:: 0.18
Returns
-------
contingency : {array-like, sparse}, shape=[n_classes_true, n_classes_pred]
Matrix :math:`C` such that :math:`C_{i, j}` is the number of samples in
true class :math:`i` and in predicted class :math:`j`. If
``eps is None``, the dtype of this array will be integer. If ``eps`` is
given, the dtype will be float.
Will be a ``scipy.sparse.csr_matrix`` if ``sparse=True``.
"""
if eps is not None and sparse:
raise ValueError("Cannot set 'eps' when sparse=True")
classes, class_idx = np.unique(labels_true, return_inverse=True)
clusters, cluster_idx = np.unique(labels_pred, return_inverse=True)
n_classes = classes.shape[0]
n_clusters = clusters.shape[0]
# Using coo_matrix to accelerate simple histogram calculation,
# i.e. bins are consecutive integers
# Currently, coo_matrix is faster than histogram2d for simple cases
contingency = sp.coo_matrix((np.ones(class_idx.shape[0]),
(class_idx, cluster_idx)),
shape=(n_classes, n_clusters),
dtype=np.int)
if sparse:
contingency = contingency.tocsr()
contingency.sum_duplicates()
else:
contingency = contingency.toarray()
if eps is not None:
# don't use += as contingency is integer
contingency = contingency + eps
return contingency
# clustering measures
def adjusted_rand_score(labels_true, labels_pred):
"""Rand index adjusted for chance.
The Rand Index computes a similarity measure between two clusterings
by considering all pairs of samples and counting pairs that are
assigned in the same or different clusters in the predicted and
true clusterings.
The raw RI score is then "adjusted for chance" into the ARI score
using the following scheme::
ARI = (RI - Expected_RI) / (max(RI) - Expected_RI)
The adjusted Rand index is thus ensured to have a value close to
0.0 for random labeling independently of the number of clusters and
samples and exactly 1.0 when the clusterings are identical (up to
a permutation).
ARI is a symmetric measure::
adjusted_rand_score(a, b) == adjusted_rand_score(b, a)
Read more in the :ref:`User Guide <adjusted_rand_score>`.
Parameters
----------
labels_true : int array, shape = [n_samples]
Ground truth class labels to be used as a reference
labels_pred : array-like of shape (n_samples,)
Cluster labels to evaluate
Returns
-------
ari : float
Similarity score between -1.0 and 1.0. Random labelings have an ARI
close to 0.0. 1.0 stands for perfect match.
Examples
--------
Perfectly matching labelings have a score of 1 even
>>> from sklearn.metrics.cluster import adjusted_rand_score
>>> adjusted_rand_score([0, 0, 1, 1], [0, 0, 1, 1])
1.0
>>> adjusted_rand_score([0, 0, 1, 1], [1, 1, 0, 0])
1.0
Labelings that assign all classes members to the same clusters
are complete be not always pure, hence penalized::
>>> adjusted_rand_score([0, 0, 1, 2], [0, 0, 1, 1])
0.57...
ARI is symmetric, so labelings that have pure clusters with members
coming from the same classes but unnecessary splits are penalized::
>>> adjusted_rand_score([0, 0, 1, 1], [0, 0, 1, 2])
0.57...
If classes members are completely split across different clusters, the
assignment is totally incomplete, hence the ARI is very low::
>>> adjusted_rand_score([0, 0, 0, 0], [0, 1, 2, 3])
0.0
References
----------
.. [Hubert1985] L. Hubert and P. Arabie, Comparing Partitions,
Journal of Classification 1985
https://link.springer.com/article/10.1007%2FBF01908075
.. [wk] https://en.wikipedia.org/wiki/Rand_index#Adjusted_Rand_index
See also
--------
adjusted_mutual_info_score: Adjusted Mutual Information
"""
labels_true, labels_pred = check_clusterings(labels_true, labels_pred)
n_samples = labels_true.shape[0]
n_classes = np.unique(labels_true).shape[0]
n_clusters = np.unique(labels_pred).shape[0]
# Special limit cases: no clustering since the data is not split;
# or trivial clustering where each document is assigned a unique cluster.
# These are perfect matches hence return 1.0.
if (n_classes == n_clusters == 1 or
n_classes == n_clusters == 0 or
n_classes == n_clusters == n_samples):
return 1.0
# Compute the ARI using the contingency data
contingency = contingency_matrix(labels_true, labels_pred, sparse=True)
sum_comb_c = sum(_comb2(n_c) for n_c in np.ravel(contingency.sum(axis=1)))
sum_comb_k = sum(_comb2(n_k) for n_k in np.ravel(contingency.sum(axis=0)))
sum_comb = sum(_comb2(n_ij) for n_ij in contingency.data)
prod_comb = (sum_comb_c * sum_comb_k) / _comb2(n_samples)
mean_comb = (sum_comb_k + sum_comb_c) / 2.
return (sum_comb - prod_comb) / (mean_comb - prod_comb)
@_deprecate_positional_args
def homogeneity_completeness_v_measure(labels_true, labels_pred, *, beta=1.0):
"""Compute the homogeneity and completeness and V-Measure scores at once.
Those metrics are based on normalized conditional entropy measures of
the clustering labeling to evaluate given the knowledge of a Ground
Truth class labels of the same samples.
A clustering result satisfies homogeneity if all of its clusters
contain only data points which are members of a single class.
A clustering result satisfies completeness if all the data points
that are members of a given class are elements of the same cluster.
Both scores have positive values between 0.0 and 1.0, larger values
being desirable.
Those 3 metrics are independent of the absolute values of the labels:
a permutation of the class or cluster label values won't change the
score values in any way.
V-Measure is furthermore symmetric: swapping ``labels_true`` and
``label_pred`` will give the same score. This does not hold for
homogeneity and completeness. V-Measure is identical to
:func:`normalized_mutual_info_score` with the arithmetic averaging
method.
Read more in the :ref:`User Guide <homogeneity_completeness>`.
Parameters
----------
labels_true : int array, shape = [n_samples]
ground truth class labels to be used as a reference
labels_pred : array-like of shape (n_samples,)
cluster labels to evaluate
beta : float
Ratio of weight attributed to ``homogeneity`` vs ``completeness``.
If ``beta`` is greater than 1, ``completeness`` is weighted more
strongly in the calculation. If ``beta`` is less than 1,
``homogeneity`` is weighted more strongly.
Returns
-------
homogeneity : float
score between 0.0 and 1.0. 1.0 stands for perfectly homogeneous labeling
completeness : float
score between 0.0 and 1.0. 1.0 stands for perfectly complete labeling
v_measure : float
harmonic mean of the first two
See also
--------
homogeneity_score
completeness_score
v_measure_score
"""
labels_true, labels_pred = check_clusterings(labels_true, labels_pred)
if len(labels_true) == 0:
return 1.0, 1.0, 1.0
entropy_C = entropy(labels_true)
entropy_K = entropy(labels_pred)
contingency = contingency_matrix(labels_true, labels_pred, sparse=True)
MI = mutual_info_score(None, None, contingency=contingency)
homogeneity = MI / (entropy_C) if entropy_C else 1.0
completeness = MI / (entropy_K) if entropy_K else 1.0
if homogeneity + completeness == 0.0:
v_measure_score = 0.0
else:
v_measure_score = ((1 + beta) * homogeneity * completeness
/ (beta * homogeneity + completeness))
return homogeneity, completeness, v_measure_score
def homogeneity_score(labels_true, labels_pred):
"""Homogeneity metric of a cluster labeling given a ground truth.
A clustering result satisfies homogeneity if all of its clusters
contain only data points which are members of a single class.
This metric is independent of the absolute values of the labels:
a permutation of the class or cluster label values won't change the
score value in any way.
This metric is not symmetric: switching ``label_true`` with ``label_pred``
will return the :func:`completeness_score` which will be different in
general.
Read more in the :ref:`User Guide <homogeneity_completeness>`.
Parameters
----------
labels_true : int array, shape = [n_samples]
ground truth class labels to be used as a reference
labels_pred : array-like of shape (n_samples,)
cluster labels to evaluate
Returns
-------
homogeneity : float
score between 0.0 and 1.0. 1.0 stands for perfectly homogeneous labeling
References
----------
.. [1] `Andrew Rosenberg and Julia Hirschberg, 2007. V-Measure: A
conditional entropy-based external cluster evaluation measure
<https://aclweb.org/anthology/D/D07/D07-1043.pdf>`_
See also
--------
completeness_score
v_measure_score
Examples
--------
Perfect labelings are homogeneous::
>>> from sklearn.metrics.cluster import homogeneity_score
>>> homogeneity_score([0, 0, 1, 1], [1, 1, 0, 0])
1.0
Non-perfect labelings that further split classes into more clusters can be
perfectly homogeneous::
>>> print("%.6f" % homogeneity_score([0, 0, 1, 1], [0, 0, 1, 2]))
1.000000
>>> print("%.6f" % homogeneity_score([0, 0, 1, 1], [0, 1, 2, 3]))
1.000000
Clusters that include samples from different classes do not make for an
homogeneous labeling::
>>> print("%.6f" % homogeneity_score([0, 0, 1, 1], [0, 1, 0, 1]))
0.0...
>>> print("%.6f" % homogeneity_score([0, 0, 1, 1], [0, 0, 0, 0]))
0.0...
"""
return homogeneity_completeness_v_measure(labels_true, labels_pred)[0]
def completeness_score(labels_true, labels_pred):
"""Completeness metric of a cluster labeling given a ground truth.
A clustering result satisfies completeness if all the data points
that are members of a given class are elements of the same cluster.
This metric is independent of the absolute values of the labels:
a permutation of the class or cluster label values won't change the
score value in any way.
This metric is not symmetric: switching ``label_true`` with ``label_pred``
will return the :func:`homogeneity_score` which will be different in
general.
Read more in the :ref:`User Guide <homogeneity_completeness>`.
Parameters
----------
labels_true : int array, shape = [n_samples]
ground truth class labels to be used as a reference
labels_pred : array-like of shape (n_samples,)
cluster labels to evaluate
Returns
-------
completeness : float
score between 0.0 and 1.0. 1.0 stands for perfectly complete labeling
References
----------
.. [1] `Andrew Rosenberg and Julia Hirschberg, 2007. V-Measure: A
conditional entropy-based external cluster evaluation measure
<https://aclweb.org/anthology/D/D07/D07-1043.pdf>`_
See also
--------
homogeneity_score
v_measure_score
Examples
--------
Perfect labelings are complete::
>>> from sklearn.metrics.cluster import completeness_score
>>> completeness_score([0, 0, 1, 1], [1, 1, 0, 0])
1.0
Non-perfect labelings that assign all classes members to the same clusters
are still complete::
>>> print(completeness_score([0, 0, 1, 1], [0, 0, 0, 0]))
1.0
>>> print(completeness_score([0, 1, 2, 3], [0, 0, 1, 1]))
0.999...
If classes members are split across different clusters, the
assignment cannot be complete::
>>> print(completeness_score([0, 0, 1, 1], [0, 1, 0, 1]))
0.0
>>> print(completeness_score([0, 0, 0, 0], [0, 1, 2, 3]))
0.0
"""
return homogeneity_completeness_v_measure(labels_true, labels_pred)[1]
@_deprecate_positional_args
def v_measure_score(labels_true, labels_pred, *, beta=1.0):
"""V-measure cluster labeling given a ground truth.
This score is identical to :func:`normalized_mutual_info_score` with
the ``'arithmetic'`` option for averaging.
The V-measure is the harmonic mean between homogeneity and completeness::
v = (1 + beta) * homogeneity * completeness
/ (beta * homogeneity + completeness)
This metric is independent of the absolute values of the labels:
a permutation of the class or cluster label values won't change the
score value in any way.
This metric is furthermore symmetric: switching ``label_true`` with
``label_pred`` will return the same score value. This can be useful to
measure the agreement of two independent label assignments strategies
on the same dataset when the real ground truth is not known.
Read more in the :ref:`User Guide <homogeneity_completeness>`.
Parameters
----------
labels_true : int array, shape = [n_samples]
ground truth class labels to be used as a reference
labels_pred : array-like of shape (n_samples,)
cluster labels to evaluate
beta : float
Ratio of weight attributed to ``homogeneity`` vs ``completeness``.
If ``beta`` is greater than 1, ``completeness`` is weighted more
strongly in the calculation. If ``beta`` is less than 1,
``homogeneity`` is weighted more strongly.
Returns
-------
v_measure : float
score between 0.0 and 1.0. 1.0 stands for perfectly complete labeling
References
----------
.. [1] `Andrew Rosenberg and Julia Hirschberg, 2007. V-Measure: A
conditional entropy-based external cluster evaluation measure
<https://aclweb.org/anthology/D/D07/D07-1043.pdf>`_
See also
--------
homogeneity_score
completeness_score
normalized_mutual_info_score
Examples
--------
Perfect labelings are both homogeneous and complete, hence have score 1.0::
>>> from sklearn.metrics.cluster import v_measure_score
>>> v_measure_score([0, 0, 1, 1], [0, 0, 1, 1])
1.0
>>> v_measure_score([0, 0, 1, 1], [1, 1, 0, 0])
1.0
Labelings that assign all classes members to the same clusters
are complete be not homogeneous, hence penalized::
>>> print("%.6f" % v_measure_score([0, 0, 1, 2], [0, 0, 1, 1]))
0.8...
>>> print("%.6f" % v_measure_score([0, 1, 2, 3], [0, 0, 1, 1]))
0.66...
Labelings that have pure clusters with members coming from the same
classes are homogeneous but un-necessary splits harms completeness
and thus penalize V-measure as well::
>>> print("%.6f" % v_measure_score([0, 0, 1, 1], [0, 0, 1, 2]))
0.8...
>>> print("%.6f" % v_measure_score([0, 0, 1, 1], [0, 1, 2, 3]))
0.66...
If classes members are completely split across different clusters,
the assignment is totally incomplete, hence the V-Measure is null::
>>> print("%.6f" % v_measure_score([0, 0, 0, 0], [0, 1, 2, 3]))
0.0...
Clusters that include samples from totally different classes totally
destroy the homogeneity of the labeling, hence::
>>> print("%.6f" % v_measure_score([0, 0, 1, 1], [0, 0, 0, 0]))
0.0...
"""
return homogeneity_completeness_v_measure(labels_true, labels_pred,
beta=beta)[2]
@_deprecate_positional_args
def mutual_info_score(labels_true, labels_pred, *, contingency=None):
"""Mutual Information between two clusterings.
The Mutual Information is a measure of the similarity between two labels of
the same data. Where :math:`|U_i|` is the number of the samples
in cluster :math:`U_i` and :math:`|V_j|` is the number of the
samples in cluster :math:`V_j`, the Mutual Information
between clusterings :math:`U` and :math:`V` is given as:
.. math::
MI(U,V)=\\sum_{i=1}^{|U|} \\sum_{j=1}^{|V|} \\frac{|U_i\\cap V_j|}{N}
\\log\\frac{N|U_i \\cap V_j|}{|U_i||V_j|}
This metric is independent of the absolute values of the labels:
a permutation of the class or cluster label values won't change the
score value in any way.
This metric is furthermore symmetric: switching ``label_true`` with
``label_pred`` will return the same score value. This can be useful to
measure the agreement of two independent label assignments strategies
on the same dataset when the real ground truth is not known.
Read more in the :ref:`User Guide <mutual_info_score>`.
Parameters
----------
labels_true : int array, shape = [n_samples]
A clustering of the data into disjoint subsets.
labels_pred : int array-like of shape (n_samples,)
A clustering of the data into disjoint subsets.
contingency : {None, array, sparse matrix}, \
shape = [n_classes_true, n_classes_pred]
A contingency matrix given by the :func:`contingency_matrix` function.
If value is ``None``, it will be computed, otherwise the given value is
used, with ``labels_true`` and ``labels_pred`` ignored.
Returns
-------
mi : float
Mutual information, a non-negative value
Notes
-----
The logarithm used is the natural logarithm (base-e).
See also
--------
adjusted_mutual_info_score: Adjusted against chance Mutual Information
normalized_mutual_info_score: Normalized Mutual Information
"""
if contingency is None:
labels_true, labels_pred = check_clusterings(labels_true, labels_pred)
contingency = contingency_matrix(labels_true, labels_pred, sparse=True)
else:
contingency = check_array(contingency,
accept_sparse=['csr', 'csc', 'coo'],
dtype=[int, np.int32, np.int64])
if isinstance(contingency, np.ndarray):
# For an array
nzx, nzy = np.nonzero(contingency)
nz_val = contingency[nzx, nzy]
elif sp.issparse(contingency):
# For a sparse matrix
nzx, nzy, nz_val = sp.find(contingency)
else:
raise ValueError("Unsupported type for 'contingency': %s" %
type(contingency))
contingency_sum = contingency.sum()
pi = np.ravel(contingency.sum(axis=1))
pj = np.ravel(contingency.sum(axis=0))
log_contingency_nm = np.log(nz_val)
contingency_nm = nz_val / contingency_sum
# Don't need to calculate the full outer product, just for non-zeroes
outer = (pi.take(nzx).astype(np.int64, copy=False)
* pj.take(nzy).astype(np.int64, copy=False))
log_outer = -np.log(outer) + log(pi.sum()) + log(pj.sum())
mi = (contingency_nm * (log_contingency_nm - log(contingency_sum)) +
contingency_nm * log_outer)
return np.clip(mi.sum(), 0.0, None)
@_deprecate_positional_args
def adjusted_mutual_info_score(labels_true, labels_pred, *,
average_method='arithmetic'):
"""Adjusted Mutual Information between two clusterings.
Adjusted Mutual Information (AMI) is an adjustment of the Mutual
Information (MI) score to account for chance. It accounts for the fact that
the MI is generally higher for two clusterings with a larger number of
clusters, regardless of whether there is actually more information shared.
For two clusterings :math:`U` and :math:`V`, the AMI is given as::
AMI(U, V) = [MI(U, V) - E(MI(U, V))] / [avg(H(U), H(V)) - E(MI(U, V))]
This metric is independent of the absolute values of the labels:
a permutation of the class or cluster label values won't change the
score value in any way.
This metric is furthermore symmetric: switching ``label_true`` with
``label_pred`` will return the same score value. This can be useful to
measure the agreement of two independent label assignments strategies
on the same dataset when the real ground truth is not known.
Be mindful that this function is an order of magnitude slower than other
metrics, such as the Adjusted Rand Index.
Read more in the :ref:`User Guide <mutual_info_score>`.
Parameters
----------
labels_true : int array, shape = [n_samples]
A clustering of the data into disjoint subsets.
labels_pred : int array-like of shape (n_samples,)
A clustering of the data into disjoint subsets.
average_method : string, optional (default: 'arithmetic')
How to compute the normalizer in the denominator. Possible options
are 'min', 'geometric', 'arithmetic', and 'max'.
.. versionadded:: 0.20
.. versionchanged:: 0.22
The default value of ``average_method`` changed from 'max' to
'arithmetic'.
Returns
-------
ami: float (upperlimited by 1.0)
The AMI returns a value of 1 when the two partitions are identical
(ie perfectly matched). Random partitions (independent labellings) have
an expected AMI around 0 on average hence can be negative.
See also
--------
adjusted_rand_score: Adjusted Rand Index
mutual_info_score: Mutual Information (not adjusted for chance)
Examples
--------
Perfect labelings are both homogeneous and complete, hence have
score 1.0::
>>> from sklearn.metrics.cluster import adjusted_mutual_info_score
>>> adjusted_mutual_info_score([0, 0, 1, 1], [0, 0, 1, 1])
... # doctest: +SKIP
1.0
>>> adjusted_mutual_info_score([0, 0, 1, 1], [1, 1, 0, 0])
... # doctest: +SKIP
1.0
If classes members are completely split across different clusters,
the assignment is totally in-complete, hence the AMI is null::
>>> adjusted_mutual_info_score([0, 0, 0, 0], [0, 1, 2, 3])
... # doctest: +SKIP
0.0
References
----------
.. [1] `Vinh, Epps, and Bailey, (2010). Information Theoretic Measures for
Clusterings Comparison: Variants, Properties, Normalization and
Correction for Chance, JMLR
<http://jmlr.csail.mit.edu/papers/volume11/vinh10a/vinh10a.pdf>`_
.. [2] `Wikipedia entry for the Adjusted Mutual Information
<https://en.wikipedia.org/wiki/Adjusted_Mutual_Information>`_
"""
labels_true, labels_pred = check_clusterings(labels_true, labels_pred)
n_samples = labels_true.shape[0]
classes = np.unique(labels_true)
clusters = np.unique(labels_pred)
# Special limit cases: no clustering since the data is not split.
# This is a perfect match hence return 1.0.
if (classes.shape[0] == clusters.shape[0] == 1 or
classes.shape[0] == clusters.shape[0] == 0):
return 1.0
contingency = contingency_matrix(labels_true, labels_pred, sparse=True)
contingency = contingency.astype(np.float64,
**_astype_copy_false(contingency))
# Calculate the MI for the two clusterings
mi = mutual_info_score(labels_true, labels_pred,
contingency=contingency)
# Calculate the expected value for the mutual information
emi = expected_mutual_information(contingency, n_samples)
# Calculate entropy for each labeling
h_true, h_pred = entropy(labels_true), entropy(labels_pred)
normalizer = _generalized_average(h_true, h_pred, average_method)
denominator = normalizer - emi
# Avoid 0.0 / 0.0 when expectation equals maximum, i.e a perfect match.
# normalizer should always be >= emi, but because of floating-point
# representation, sometimes emi is slightly larger. Correct this
# by preserving the sign.
if denominator < 0:
denominator = min(denominator, -np.finfo('float64').eps)
else:
denominator = max(denominator, np.finfo('float64').eps)
ami = (mi - emi) / denominator
return ami
@_deprecate_positional_args
def normalized_mutual_info_score(labels_true, labels_pred, *,
average_method='arithmetic'):
"""Normalized Mutual Information between two clusterings.
Normalized Mutual Information (NMI) is a normalization of the Mutual
Information (MI) score to scale the results between 0 (no mutual
information) and 1 (perfect correlation). In this function, mutual
information is normalized by some generalized mean of ``H(labels_true)``
and ``H(labels_pred))``, defined by the `average_method`.
This measure is not adjusted for chance. Therefore
:func:`adjusted_mutual_info_score` might be preferred.
This metric is independent of the absolute values of the labels:
a permutation of the class or cluster label values won't change the
score value in any way.
This metric is furthermore symmetric: switching ``label_true`` with
``label_pred`` will return the same score value. This can be useful to
measure the agreement of two independent label assignments strategies
on the same dataset when the real ground truth is not known.
Read more in the :ref:`User Guide <mutual_info_score>`.
Parameters
----------
labels_true : int array, shape = [n_samples]
A clustering of the data into disjoint subsets.
labels_pred : int array-like of shape (n_samples,)
A clustering of the data into disjoint subsets.
average_method : string, optional (default: 'arithmetic')
How to compute the normalizer in the denominator. Possible options
are 'min', 'geometric', 'arithmetic', and 'max'.
.. versionadded:: 0.20
.. versionchanged:: 0.22
The default value of ``average_method`` changed from 'geometric' to
'arithmetic'.
Returns
-------
nmi : float
score between 0.0 and 1.0. 1.0 stands for perfectly complete labeling
See also
--------
v_measure_score: V-Measure (NMI with arithmetic mean option.)
adjusted_rand_score: Adjusted Rand Index
adjusted_mutual_info_score: Adjusted Mutual Information (adjusted
against chance)
Examples
--------
Perfect labelings are both homogeneous and complete, hence have
score 1.0::
>>> from sklearn.metrics.cluster import normalized_mutual_info_score
>>> normalized_mutual_info_score([0, 0, 1, 1], [0, 0, 1, 1])
... # doctest: +SKIP
1.0
>>> normalized_mutual_info_score([0, 0, 1, 1], [1, 1, 0, 0])
... # doctest: +SKIP
1.0
If classes members are completely split across different clusters,
the assignment is totally in-complete, hence the NMI is null::
>>> normalized_mutual_info_score([0, 0, 0, 0], [0, 1, 2, 3])
... # doctest: +SKIP
0.0
"""
labels_true, labels_pred = check_clusterings(labels_true, labels_pred)
classes = np.unique(labels_true)
clusters = np.unique(labels_pred)
# Special limit cases: no clustering since the data is not split.
# This is a perfect match hence return 1.0.
if (classes.shape[0] == clusters.shape[0] == 1 or
classes.shape[0] == clusters.shape[0] == 0):
return 1.0
contingency = contingency_matrix(labels_true, labels_pred, sparse=True)
contingency = contingency.astype(np.float64,
**_astype_copy_false(contingency))
# Calculate the MI for the two clusterings
mi = mutual_info_score(labels_true, labels_pred,
contingency=contingency)
# Calculate the expected value for the mutual information
# Calculate entropy for each labeling
h_true, h_pred = entropy(labels_true), entropy(labels_pred)
normalizer = _generalized_average(h_true, h_pred, average_method)
# Avoid 0.0 / 0.0 when either entropy is zero.
normalizer = max(normalizer, np.finfo('float64').eps)
nmi = mi / normalizer
return nmi
@_deprecate_positional_args
def fowlkes_mallows_score(labels_true, labels_pred, *, sparse=False):
"""Measure the similarity of two clusterings of a set of points.
.. versionadded:: 0.18
The Fowlkes-Mallows index (FMI) is defined as the geometric mean between of
the precision and recall::
FMI = TP / sqrt((TP + FP) * (TP + FN))
Where ``TP`` is the number of **True Positive** (i.e. the number of pair of
points that belongs in the same clusters in both ``labels_true`` and
``labels_pred``), ``FP`` is the number of **False Positive** (i.e. the
number of pair of points that belongs in the same clusters in
``labels_true`` and not in ``labels_pred``) and ``FN`` is the number of
**False Negative** (i.e the number of pair of points that belongs in the
same clusters in ``labels_pred`` and not in ``labels_True``).
The score ranges from 0 to 1. A high value indicates a good similarity
between two clusters.
Read more in the :ref:`User Guide <fowlkes_mallows_scores>`.
Parameters
----------
labels_true : int array, shape = (``n_samples``,)
A clustering of the data into disjoint subsets.
labels_pred : array, shape = (``n_samples``, )
A clustering of the data into disjoint subsets.
sparse : bool
Compute contingency matrix internally with sparse matrix.
Returns
-------
score : float
The resulting Fowlkes-Mallows score.
Examples
--------
Perfect labelings are both homogeneous and complete, hence have
score 1.0::
>>> from sklearn.metrics.cluster import fowlkes_mallows_score
>>> fowlkes_mallows_score([0, 0, 1, 1], [0, 0, 1, 1])
1.0
>>> fowlkes_mallows_score([0, 0, 1, 1], [1, 1, 0, 0])
1.0
If classes members are completely split across different clusters,
the assignment is totally random, hence the FMI is null::
>>> fowlkes_mallows_score([0, 0, 0, 0], [0, 1, 2, 3])
0.0
References
----------
.. [1] `E. B. Fowkles and C. L. Mallows, 1983. "A method for comparing two
hierarchical clusterings". Journal of the American Statistical
Association
<http://wildfire.stat.ucla.edu/pdflibrary/fowlkes.pdf>`_
.. [2] `Wikipedia entry for the Fowlkes-Mallows Index
<https://en.wikipedia.org/wiki/Fowlkes-Mallows_index>`_
"""
labels_true, labels_pred = check_clusterings(labels_true, labels_pred)
n_samples, = labels_true.shape
c = contingency_matrix(labels_true, labels_pred,
sparse=True)
c = c.astype(np.int64, **_astype_copy_false(c))
tk = np.dot(c.data, c.data) - n_samples
pk = np.sum(np.asarray(c.sum(axis=0)).ravel() ** 2) - n_samples
qk = np.sum(np.asarray(c.sum(axis=1)).ravel() ** 2) - n_samples
return np.sqrt(tk / pk) * np.sqrt(tk / qk) if tk != 0. else 0.
def entropy(labels):
"""Calculates the entropy for a labeling.
Parameters
----------
labels : int array, shape = [n_samples]
The labels
Notes
-----
The logarithm used is the natural logarithm (base-e).
"""
if len(labels) == 0:
return 1.0
label_idx = np.unique(labels, return_inverse=True)[1]
pi = np.bincount(label_idx).astype(np.float64)
pi = pi[pi > 0]
pi_sum = np.sum(pi)
# log(a / b) should be calculated as log(a) - log(b) for
# possible loss of precision
return -np.sum((pi / pi_sum) * (np.log(pi) - log(pi_sum)))

View file

@ -0,0 +1,363 @@
"""Unsupervised evaluation metrics."""
# Authors: Robert Layton <robertlayton@gmail.com>
# Arnaud Fouchet <foucheta@gmail.com>
# Thierry Guillemot <thierry.guillemot.work@gmail.com>
# License: BSD 3 clause
import functools
import numpy as np
from ...utils import check_random_state
from ...utils import check_X_y
from ...utils import _safe_indexing
from ..pairwise import pairwise_distances_chunked
from ..pairwise import pairwise_distances
from ...preprocessing import LabelEncoder
from ...utils.validation import _deprecate_positional_args
def check_number_of_labels(n_labels, n_samples):
"""Check that number of labels are valid.
Parameters
----------
n_labels : int
Number of labels
n_samples : int
Number of samples
"""
if not 1 < n_labels < n_samples:
raise ValueError("Number of labels is %d. Valid values are 2 "
"to n_samples - 1 (inclusive)" % n_labels)
@_deprecate_positional_args
def silhouette_score(X, labels, *, metric='euclidean', sample_size=None,
random_state=None, **kwds):
"""Compute the mean Silhouette Coefficient of all samples.
The Silhouette Coefficient is calculated using the mean intra-cluster
distance (``a``) and the mean nearest-cluster distance (``b``) for each
sample. The Silhouette Coefficient for a sample is ``(b - a) / max(a,
b)``. To clarify, ``b`` is the distance between a sample and the nearest
cluster that the sample is not a part of.
Note that Silhouette Coefficient is only defined if number of labels
is 2 <= n_labels <= n_samples - 1.
This function returns the mean Silhouette Coefficient over all samples.
To obtain the values for each sample, use :func:`silhouette_samples`.
The best value is 1 and the worst value is -1. Values near 0 indicate
overlapping clusters. Negative values generally indicate that a sample has
been assigned to the wrong cluster, as a different cluster is more similar.
Read more in the :ref:`User Guide <silhouette_coefficient>`.
Parameters
----------
X : array [n_samples_a, n_samples_a] if metric == "precomputed", or, \
[n_samples_a, n_features] otherwise
Array of pairwise distances between samples, or a feature array.
labels : array, shape = [n_samples]
Predicted labels for each sample.
metric : string, or callable
The metric to use when calculating distance between instances in a
feature array. If metric is a string, it must be one of the options
allowed by :func:`metrics.pairwise.pairwise_distances
<sklearn.metrics.pairwise.pairwise_distances>`. If X is the distance
array itself, use ``metric="precomputed"``.
sample_size : int or None
The size of the sample to use when computing the Silhouette Coefficient
on a random subset of the data.
If ``sample_size is None``, no sampling is used.
random_state : int, RandomState instance or None, optional (default=None)
Determines random number generation for selecting a subset of samples.
Used when ``sample_size is not None``.
Pass an int for reproducible results across multiple function calls.
See :term:`Glossary <random_state>`.
**kwds : optional keyword parameters
Any further parameters are passed directly to the distance function.
If using a scipy.spatial.distance metric, the parameters are still
metric dependent. See the scipy docs for usage examples.
Returns
-------
silhouette : float
Mean Silhouette Coefficient for all samples.
References
----------
.. [1] `Peter J. Rousseeuw (1987). "Silhouettes: a Graphical Aid to the
Interpretation and Validation of Cluster Analysis". Computational
and Applied Mathematics 20: 53-65.
<https://www.sciencedirect.com/science/article/pii/0377042787901257>`_
.. [2] `Wikipedia entry on the Silhouette Coefficient
<https://en.wikipedia.org/wiki/Silhouette_(clustering)>`_
"""
if sample_size is not None:
X, labels = check_X_y(X, labels, accept_sparse=['csc', 'csr'])
random_state = check_random_state(random_state)
indices = random_state.permutation(X.shape[0])[:sample_size]
if metric == "precomputed":
X, labels = X[indices].T[indices].T, labels[indices]
else:
X, labels = X[indices], labels[indices]
return np.mean(silhouette_samples(X, labels, metric=metric, **kwds))
def _silhouette_reduce(D_chunk, start, labels, label_freqs):
"""Accumulate silhouette statistics for vertical chunk of X
Parameters
----------
D_chunk : shape (n_chunk_samples, n_samples)
precomputed distances for a chunk
start : int
first index in chunk
labels : array, shape (n_samples,)
corresponding cluster labels, encoded as {0, ..., n_clusters-1}
label_freqs : array
distribution of cluster labels in ``labels``
"""
# accumulate distances from each sample to each cluster
clust_dists = np.zeros((len(D_chunk), len(label_freqs)),
dtype=D_chunk.dtype)
for i in range(len(D_chunk)):
clust_dists[i] += np.bincount(labels, weights=D_chunk[i],
minlength=len(label_freqs))
# intra_index selects intra-cluster distances within clust_dists
intra_index = (np.arange(len(D_chunk)), labels[start:start + len(D_chunk)])
# intra_clust_dists are averaged over cluster size outside this function
intra_clust_dists = clust_dists[intra_index]
# of the remaining distances we normalise and extract the minimum
clust_dists[intra_index] = np.inf
clust_dists /= label_freqs
inter_clust_dists = clust_dists.min(axis=1)
return intra_clust_dists, inter_clust_dists
@_deprecate_positional_args
def silhouette_samples(X, labels, *, metric='euclidean', **kwds):
"""Compute the Silhouette Coefficient for each sample.
The Silhouette Coefficient is a measure of how well samples are clustered
with samples that are similar to themselves. Clustering models with a high
Silhouette Coefficient are said to be dense, where samples in the same
cluster are similar to each other, and well separated, where samples in
different clusters are not very similar to each other.
The Silhouette Coefficient is calculated using the mean intra-cluster
distance (``a``) and the mean nearest-cluster distance (``b``) for each
sample. The Silhouette Coefficient for a sample is ``(b - a) / max(a,
b)``.
Note that Silhouette Coefficient is only defined if number of labels
is 2 <= n_labels <= n_samples - 1.
This function returns the Silhouette Coefficient for each sample.
The best value is 1 and the worst value is -1. Values near 0 indicate
overlapping clusters.
Read more in the :ref:`User Guide <silhouette_coefficient>`.
Parameters
----------
X : array [n_samples_a, n_samples_a] if metric == "precomputed", or, \
[n_samples_a, n_features] otherwise
Array of pairwise distances between samples, or a feature array.
labels : array, shape = [n_samples]
label values for each sample
metric : string, or callable
The metric to use when calculating distance between instances in a
feature array. If metric is a string, it must be one of the options
allowed by :func:`sklearn.metrics.pairwise.pairwise_distances`. If X is
the distance array itself, use "precomputed" as the metric. Precomputed
distance matrices must have 0 along the diagonal.
`**kwds` : optional keyword parameters
Any further parameters are passed directly to the distance function.
If using a ``scipy.spatial.distance`` metric, the parameters are still
metric dependent. See the scipy docs for usage examples.
Returns
-------
silhouette : array, shape = [n_samples]
Silhouette Coefficient for each samples.
References
----------
.. [1] `Peter J. Rousseeuw (1987). "Silhouettes: a Graphical Aid to the
Interpretation and Validation of Cluster Analysis". Computational
and Applied Mathematics 20: 53-65.
<https://www.sciencedirect.com/science/article/pii/0377042787901257>`_
.. [2] `Wikipedia entry on the Silhouette Coefficient
<https://en.wikipedia.org/wiki/Silhouette_(clustering)>`_
"""
X, labels = check_X_y(X, labels, accept_sparse=['csc', 'csr'])
# Check for non-zero diagonal entries in precomputed distance matrix
if metric == 'precomputed':
atol = np.finfo(X.dtype).eps * 100
if np.any(np.abs(np.diagonal(X)) > atol):
raise ValueError(
'The precomputed distance matrix contains non-zero '
'elements on the diagonal. Use np.fill_diagonal(X, 0).'
)
le = LabelEncoder()
labels = le.fit_transform(labels)
n_samples = len(labels)
label_freqs = np.bincount(labels)
check_number_of_labels(len(le.classes_), n_samples)
kwds['metric'] = metric
reduce_func = functools.partial(_silhouette_reduce,
labels=labels, label_freqs=label_freqs)
results = zip(*pairwise_distances_chunked(X, reduce_func=reduce_func,
**kwds))
intra_clust_dists, inter_clust_dists = results
intra_clust_dists = np.concatenate(intra_clust_dists)
inter_clust_dists = np.concatenate(inter_clust_dists)
denom = (label_freqs - 1).take(labels, mode='clip')
with np.errstate(divide="ignore", invalid="ignore"):
intra_clust_dists /= denom
sil_samples = inter_clust_dists - intra_clust_dists
with np.errstate(divide="ignore", invalid="ignore"):
sil_samples /= np.maximum(intra_clust_dists, inter_clust_dists)
# nan values are for clusters of size 1, and should be 0
return np.nan_to_num(sil_samples)
def calinski_harabasz_score(X, labels):
"""Compute the Calinski and Harabasz score.
It is also known as the Variance Ratio Criterion.
The score is defined as ratio between the within-cluster dispersion and
the between-cluster dispersion.
Read more in the :ref:`User Guide <calinski_harabasz_index>`.
Parameters
----------
X : array-like, shape (``n_samples``, ``n_features``)
List of ``n_features``-dimensional data points. Each row corresponds
to a single data point.
labels : array-like, shape (``n_samples``,)
Predicted labels for each sample.
Returns
-------
score : float
The resulting Calinski-Harabasz score.
References
----------
.. [1] `T. Calinski and J. Harabasz, 1974. "A dendrite method for cluster
analysis". Communications in Statistics
<https://www.tandfonline.com/doi/abs/10.1080/03610927408827101>`_
"""
X, labels = check_X_y(X, labels)
le = LabelEncoder()
labels = le.fit_transform(labels)
n_samples, _ = X.shape
n_labels = len(le.classes_)
check_number_of_labels(n_labels, n_samples)
extra_disp, intra_disp = 0., 0.
mean = np.mean(X, axis=0)
for k in range(n_labels):
cluster_k = X[labels == k]
mean_k = np.mean(cluster_k, axis=0)
extra_disp += len(cluster_k) * np.sum((mean_k - mean) ** 2)
intra_disp += np.sum((cluster_k - mean_k) ** 2)
return (1. if intra_disp == 0. else
extra_disp * (n_samples - n_labels) /
(intra_disp * (n_labels - 1.)))
def davies_bouldin_score(X, labels):
"""Computes the Davies-Bouldin score.
The score is defined as the average similarity measure of each cluster with
its most similar cluster, where similarity is the ratio of within-cluster
distances to between-cluster distances. Thus, clusters which are farther
apart and less dispersed will result in a better score.
The minimum score is zero, with lower values indicating better clustering.
Read more in the :ref:`User Guide <davies-bouldin_index>`.
.. versionadded:: 0.20
Parameters
----------
X : array-like, shape (``n_samples``, ``n_features``)
List of ``n_features``-dimensional data points. Each row corresponds
to a single data point.
labels : array-like, shape (``n_samples``,)
Predicted labels for each sample.
Returns
-------
score: float
The resulting Davies-Bouldin score.
References
----------
.. [1] Davies, David L.; Bouldin, Donald W. (1979).
`"A Cluster Separation Measure"
<https://ieeexplore.ieee.org/document/4766909>`__.
IEEE Transactions on Pattern Analysis and Machine Intelligence.
PAMI-1 (2): 224-227
"""
X, labels = check_X_y(X, labels)
le = LabelEncoder()
labels = le.fit_transform(labels)
n_samples, _ = X.shape
n_labels = len(le.classes_)
check_number_of_labels(n_labels, n_samples)
intra_dists = np.zeros(n_labels)
centroids = np.zeros((n_labels, len(X[0])), dtype=np.float)
for k in range(n_labels):
cluster_k = _safe_indexing(X, labels == k)
centroid = cluster_k.mean(axis=0)
centroids[k] = centroid
intra_dists[k] = np.average(pairwise_distances(
cluster_k, [centroid]))
centroid_distances = pairwise_distances(centroids)
if np.allclose(intra_dists, 0) or np.allclose(centroid_distances, 0):
return 0.0
centroid_distances[centroid_distances == 0] = np.inf
combined_intra_dists = intra_dists[:, None] + intra_dists
scores = np.max(combined_intra_dists / centroid_distances, axis=1)
return np.mean(scores)

View file

@ -0,0 +1,18 @@
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
import sys
# mypy error: Module X has no attribute y (typically for C extensions)
from . import _bicluster # type: ignore
from ...externals._pep562 import Pep562
from ...utils.deprecation import _raise_dep_warning_if_not_pytest
deprecated_path = 'sklearn.metrics.cluster.bicluster'
correct_import_path = 'sklearn.metrics.cluster'
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
def __getattr__(name):
return getattr(_bicluster, name)
if not sys.version_info >= (3, 7):
Pep562(__name__)

View file

@ -0,0 +1,18 @@
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
import sys
# mypy error: Module X has no attribute y (typically for C extensions)
from . import _expected_mutual_info_fast # type: ignore
from ...externals._pep562 import Pep562
from ...utils.deprecation import _raise_dep_warning_if_not_pytest
deprecated_path = 'sklearn.metrics.cluster.expected_mutual_info_fast'
correct_import_path = 'sklearn.metrics.cluster'
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
def __getattr__(name):
return getattr(_expected_mutual_info_fast, name)
if not sys.version_info >= (3, 7):
Pep562(__name__)

View file

@ -0,0 +1,24 @@
import os
import numpy
from numpy.distutils.misc_util import Configuration
def configuration(parent_package="", top_path=None):
config = Configuration("cluster", parent_package, top_path)
libraries = []
if os.name == 'posix':
libraries.append('m')
config.add_extension("_expected_mutual_info_fast",
sources=["_expected_mutual_info_fast.pyx"],
include_dirs=[numpy.get_include()],
libraries=libraries)
config.add_subpackage("tests")
return config
if __name__ == "__main__":
from numpy.distutils.core import setup
setup(**configuration().todict())

View file

@ -0,0 +1,18 @@
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
import sys
# mypy error: Module X has no attribute y (typically for C extensions)
from . import _supervised # type: ignore
from ...externals._pep562 import Pep562
from ...utils.deprecation import _raise_dep_warning_if_not_pytest
deprecated_path = 'sklearn.metrics.cluster.supervised'
correct_import_path = 'sklearn.metrics.cluster'
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
def __getattr__(name):
return getattr(_supervised, name)
if not sys.version_info >= (3, 7):
Pep562(__name__)

View file

@ -0,0 +1,50 @@
"""Testing for bicluster metrics module"""
import numpy as np
from sklearn.utils._testing import assert_almost_equal
from sklearn.metrics.cluster._bicluster import _jaccard
from sklearn.metrics import consensus_score
def test_jaccard():
a1 = np.array([True, True, False, False])
a2 = np.array([True, True, True, True])
a3 = np.array([False, True, True, False])
a4 = np.array([False, False, True, True])
assert _jaccard(a1, a1, a1, a1) == 1
assert _jaccard(a1, a1, a2, a2) == 0.25
assert _jaccard(a1, a1, a3, a3) == 1.0 / 7
assert _jaccard(a1, a1, a4, a4) == 0
def test_consensus_score():
a = [[True, True, False, False],
[False, False, True, True]]
b = a[::-1]
assert consensus_score((a, a), (a, a)) == 1
assert consensus_score((a, a), (b, b)) == 1
assert consensus_score((a, b), (a, b)) == 1
assert consensus_score((a, b), (b, a)) == 1
assert consensus_score((a, a), (b, a)) == 0
assert consensus_score((a, a), (a, b)) == 0
assert consensus_score((b, b), (a, b)) == 0
assert consensus_score((b, b), (b, a)) == 0
def test_consensus_score_issue2445():
''' Different number of biclusters in A and B'''
a_rows = np.array([[True, True, False, False],
[False, False, True, True],
[False, False, False, True]])
a_cols = np.array([[True, True, False, False],
[False, False, True, True],
[False, False, False, True]])
idx = [0, 2]
s = consensus_score((a_rows, a_cols), (a_rows[idx], a_cols[idx]))
# B contains 2 of the 3 biclusters in A, so score should be 2/3
assert_almost_equal(s, 2.0/3.0)

View file

@ -0,0 +1,211 @@
from functools import partial
import pytest
import numpy as np
from sklearn.metrics.cluster import adjusted_mutual_info_score
from sklearn.metrics.cluster import adjusted_rand_score
from sklearn.metrics.cluster import completeness_score
from sklearn.metrics.cluster import fowlkes_mallows_score
from sklearn.metrics.cluster import homogeneity_score
from sklearn.metrics.cluster import mutual_info_score
from sklearn.metrics.cluster import normalized_mutual_info_score
from sklearn.metrics.cluster import v_measure_score
from sklearn.metrics.cluster import silhouette_score
from sklearn.metrics.cluster import calinski_harabasz_score
from sklearn.metrics.cluster import davies_bouldin_score
from sklearn.utils._testing import assert_allclose
# Dictionaries of metrics
# ------------------------
# The goal of having those dictionaries is to have an easy way to call a
# particular metric and associate a name to each function:
# - SUPERVISED_METRICS: all supervised cluster metrics - (when given a
# ground truth value)
# - UNSUPERVISED_METRICS: all unsupervised cluster metrics
#
# Those dictionaries will be used to test systematically some invariance
# properties, e.g. invariance toward several input layout.
#
SUPERVISED_METRICS = {
"adjusted_mutual_info_score": adjusted_mutual_info_score,
"adjusted_rand_score": adjusted_rand_score,
"completeness_score": completeness_score,
"homogeneity_score": homogeneity_score,
"mutual_info_score": mutual_info_score,
"normalized_mutual_info_score": normalized_mutual_info_score,
"v_measure_score": v_measure_score,
"fowlkes_mallows_score": fowlkes_mallows_score
}
UNSUPERVISED_METRICS = {
"silhouette_score": silhouette_score,
"silhouette_manhattan": partial(silhouette_score, metric='manhattan'),
"calinski_harabasz_score": calinski_harabasz_score,
"davies_bouldin_score": davies_bouldin_score
}
# Lists of metrics with common properties
# ---------------------------------------
# Lists of metrics with common properties are used to test systematically some
# functionalities and invariance, e.g. SYMMETRIC_METRICS lists all metrics
# that are symmetric with respect to their input argument y_true and y_pred.
#
# --------------------------------------------------------------------
# Symmetric with respect to their input arguments y_true and y_pred.
# Symmetric metrics only apply to supervised clusters.
SYMMETRIC_METRICS = [
"adjusted_rand_score", "v_measure_score",
"mutual_info_score", "adjusted_mutual_info_score",
"normalized_mutual_info_score", "fowlkes_mallows_score"
]
NON_SYMMETRIC_METRICS = ["homogeneity_score", "completeness_score"]
# Metrics whose upper bound is 1
NORMALIZED_METRICS = [
"adjusted_rand_score", "homogeneity_score", "completeness_score",
"v_measure_score", "adjusted_mutual_info_score", "fowlkes_mallows_score",
"normalized_mutual_info_score"
]
rng = np.random.RandomState(0)
y1 = rng.randint(3, size=30)
y2 = rng.randint(3, size=30)
def test_symmetric_non_symmetric_union():
assert (sorted(SYMMETRIC_METRICS + NON_SYMMETRIC_METRICS) ==
sorted(SUPERVISED_METRICS))
# 0.22 AMI and NMI changes
@pytest.mark.filterwarnings('ignore::FutureWarning')
@pytest.mark.parametrize(
'metric_name, y1, y2',
[(name, y1, y2) for name in SYMMETRIC_METRICS]
)
def test_symmetry(metric_name, y1, y2):
metric = SUPERVISED_METRICS[metric_name]
assert metric(y1, y2) == pytest.approx(metric(y2, y1))
@pytest.mark.parametrize(
'metric_name, y1, y2',
[(name, y1, y2) for name in NON_SYMMETRIC_METRICS]
)
def test_non_symmetry(metric_name, y1, y2):
metric = SUPERVISED_METRICS[metric_name]
assert metric(y1, y2) != pytest.approx(metric(y2, y1))
# 0.22 AMI and NMI changes
@pytest.mark.filterwarnings('ignore::FutureWarning')
@pytest.mark.parametrize("metric_name", NORMALIZED_METRICS)
def test_normalized_output(metric_name):
upper_bound_1 = [0, 0, 0, 1, 1, 1]
upper_bound_2 = [0, 0, 0, 1, 1, 1]
metric = SUPERVISED_METRICS[metric_name]
assert metric([0, 0, 0, 1, 1], [0, 0, 0, 1, 2]) > 0.0
assert metric([0, 0, 1, 1, 2], [0, 0, 1, 1, 1]) > 0.0
assert metric([0, 0, 0, 1, 2], [0, 1, 1, 1, 1]) < 1.0
assert metric([0, 0, 0, 1, 2], [0, 1, 1, 1, 1]) < 1.0
assert metric(upper_bound_1, upper_bound_2) == pytest.approx(1.0)
lower_bound_1 = [0, 0, 0, 0, 0, 0]
lower_bound_2 = [0, 1, 2, 3, 4, 5]
score = np.array([metric(lower_bound_1, lower_bound_2),
metric(lower_bound_2, lower_bound_1)])
assert not (score < 0).any()
# 0.22 AMI and NMI changes
@pytest.mark.filterwarnings('ignore::FutureWarning')
@pytest.mark.parametrize(
"metric_name", dict(SUPERVISED_METRICS, **UNSUPERVISED_METRICS)
)
def test_permute_labels(metric_name):
# All clustering metrics do not change score due to permutations of labels
# that is when 0 and 1 exchanged.
y_label = np.array([0, 0, 0, 1, 1, 0, 1])
y_pred = np.array([1, 0, 1, 0, 1, 1, 0])
if metric_name in SUPERVISED_METRICS:
metric = SUPERVISED_METRICS[metric_name]
score_1 = metric(y_pred, y_label)
assert_allclose(score_1, metric(1 - y_pred, y_label))
assert_allclose(score_1, metric(1 - y_pred, 1 - y_label))
assert_allclose(score_1, metric(y_pred, 1 - y_label))
else:
metric = UNSUPERVISED_METRICS[metric_name]
X = np.random.randint(10, size=(7, 10))
score_1 = metric(X, y_pred)
assert_allclose(score_1, metric(X, 1 - y_pred))
# 0.22 AMI and NMI changes
@pytest.mark.filterwarnings('ignore::FutureWarning')
@pytest.mark.parametrize(
"metric_name", dict(SUPERVISED_METRICS, **UNSUPERVISED_METRICS)
)
# For all clustering metrics Input parameters can be both
# in the form of arrays lists, positive, negative or string
def test_format_invariance(metric_name):
y_true = [0, 0, 0, 0, 1, 1, 1, 1]
y_pred = [0, 1, 2, 3, 4, 5, 6, 7]
def generate_formats(y):
y = np.array(y)
yield y, 'array of ints'
yield y.tolist(), 'list of ints'
yield [str(x) + "-a" for x in y.tolist()], 'list of strs'
yield (np.array([str(x) + "-a" for x in y.tolist()], dtype=object),
'array of strs')
yield y - 1, 'including negative ints'
yield y + 1, 'strictly positive ints'
if metric_name in SUPERVISED_METRICS:
metric = SUPERVISED_METRICS[metric_name]
score_1 = metric(y_true, y_pred)
y_true_gen = generate_formats(y_true)
y_pred_gen = generate_formats(y_pred)
for (y_true_fmt, fmt_name), (y_pred_fmt, _) in zip(y_true_gen,
y_pred_gen):
assert score_1 == metric(y_true_fmt, y_pred_fmt)
else:
metric = UNSUPERVISED_METRICS[metric_name]
X = np.random.randint(10, size=(8, 10))
score_1 = metric(X, y_true)
assert score_1 == metric(X.astype(float), y_true)
y_true_gen = generate_formats(y_true)
for (y_true_fmt, fmt_name) in y_true_gen:
assert score_1 == metric(X, y_true_fmt)
@pytest.mark.parametrize("metric", SUPERVISED_METRICS.values())
def test_single_sample(metric):
# only the supervised metrics support single sample
for i, j in [(0, 0), (0, 1), (1, 0), (1, 1)]:
metric([i], [j])
@pytest.mark.parametrize(
"metric_name, metric_func",
dict(SUPERVISED_METRICS, **UNSUPERVISED_METRICS).items()
)
def test_inf_nan_input(metric_name, metric_func):
if metric_name in SUPERVISED_METRICS:
invalids = [([0, 1], [np.inf, np.inf]),
([0, 1], [np.nan, np.nan]),
([0, 1], [np.nan, np.inf])]
else:
X = np.random.randint(10, size=(2, 10))
invalids = [(X, [np.inf, np.inf]),
(X, [np.nan, np.nan]),
(X, [np.nan, np.inf])]
with pytest.raises(ValueError, match='contains NaN, infinity'):
for args in invalids:
metric_func(*args)

View file

@ -0,0 +1,358 @@
import numpy as np
import pytest
from sklearn.metrics.cluster import adjusted_mutual_info_score
from sklearn.metrics.cluster import adjusted_rand_score
from sklearn.metrics.cluster import completeness_score
from sklearn.metrics.cluster import contingency_matrix
from sklearn.metrics.cluster import entropy
from sklearn.metrics.cluster import expected_mutual_information
from sklearn.metrics.cluster import fowlkes_mallows_score
from sklearn.metrics.cluster import homogeneity_completeness_v_measure
from sklearn.metrics.cluster import homogeneity_score
from sklearn.metrics.cluster import mutual_info_score
from sklearn.metrics.cluster import normalized_mutual_info_score
from sklearn.metrics.cluster import v_measure_score
from sklearn.metrics.cluster._supervised import _generalized_average
from sklearn.utils import assert_all_finite
from sklearn.utils._testing import (
assert_almost_equal, ignore_warnings)
from numpy.testing import assert_array_almost_equal
score_funcs = [
adjusted_rand_score,
homogeneity_score,
completeness_score,
v_measure_score,
adjusted_mutual_info_score,
normalized_mutual_info_score,
]
@ignore_warnings(category=FutureWarning)
def test_error_messages_on_wrong_input():
for score_func in score_funcs:
expected = (r'Found input variables with inconsistent numbers '
r'of samples: \[2, 3\]')
with pytest.raises(ValueError, match=expected):
score_func([0, 1], [1, 1, 1])
expected = r"labels_true must be 1D: shape is \(2"
with pytest.raises(ValueError, match=expected):
score_func([[0, 1], [1, 0]], [1, 1, 1])
expected = r"labels_pred must be 1D: shape is \(2"
with pytest.raises(ValueError, match=expected):
score_func([0, 1, 0], [[1, 1], [0, 0]])
def test_generalized_average():
a, b = 1, 2
methods = ["min", "geometric", "arithmetic", "max"]
means = [_generalized_average(a, b, method) for method in methods]
assert means[0] <= means[1] <= means[2] <= means[3]
c, d = 12, 12
means = [_generalized_average(c, d, method) for method in methods]
assert means[0] == means[1] == means[2] == means[3]
@ignore_warnings(category=FutureWarning)
def test_perfect_matches():
for score_func in score_funcs:
assert score_func([], []) == pytest.approx(1.0)
assert score_func([0], [1]) == pytest.approx(1.0)
assert score_func([0, 0, 0], [0, 0, 0]) == pytest.approx(1.0)
assert score_func([0, 1, 0], [42, 7, 42]) == pytest.approx(1.0)
assert score_func([0., 1., 0.], [42., 7., 42.]) == pytest.approx(1.0)
assert score_func([0., 1., 2.], [42., 7., 2.]) == pytest.approx(1.0)
assert score_func([0, 1, 2], [42, 7, 2]) == pytest.approx(1.0)
score_funcs_with_changing_means = [
normalized_mutual_info_score,
adjusted_mutual_info_score,
]
means = {"min", "geometric", "arithmetic", "max"}
for score_func in score_funcs_with_changing_means:
for mean in means:
assert score_func([], [], mean) == pytest.approx(1.0)
assert score_func([0], [1], mean) == pytest.approx(1.0)
assert score_func([0, 0, 0], [0, 0, 0], mean) == pytest.approx(1.0)
assert score_func(
[0, 1, 0], [42, 7, 42], mean) == pytest.approx(1.0)
assert score_func(
[0., 1., 0.], [42., 7., 42.], mean) == pytest.approx(1.0)
assert score_func(
[0., 1., 2.], [42., 7., 2.], mean) == pytest.approx(1.0)
assert score_func(
[0, 1, 2], [42, 7, 2], mean) == pytest.approx(1.0)
def test_homogeneous_but_not_complete_labeling():
# homogeneous but not complete clustering
h, c, v = homogeneity_completeness_v_measure(
[0, 0, 0, 1, 1, 1],
[0, 0, 0, 1, 2, 2])
assert_almost_equal(h, 1.00, 2)
assert_almost_equal(c, 0.69, 2)
assert_almost_equal(v, 0.81, 2)
def test_complete_but_not_homogeneous_labeling():
# complete but not homogeneous clustering
h, c, v = homogeneity_completeness_v_measure(
[0, 0, 1, 1, 2, 2],
[0, 0, 1, 1, 1, 1])
assert_almost_equal(h, 0.58, 2)
assert_almost_equal(c, 1.00, 2)
assert_almost_equal(v, 0.73, 2)
def test_not_complete_and_not_homogeneous_labeling():
# neither complete nor homogeneous but not so bad either
h, c, v = homogeneity_completeness_v_measure(
[0, 0, 0, 1, 1, 1],
[0, 1, 0, 1, 2, 2])
assert_almost_equal(h, 0.67, 2)
assert_almost_equal(c, 0.42, 2)
assert_almost_equal(v, 0.52, 2)
def test_beta_parameter():
# test for when beta passed to
# homogeneity_completeness_v_measure
# and v_measure_score
beta_test = 0.2
h_test = 0.67
c_test = 0.42
v_test = ((1 + beta_test) * h_test * c_test
/ (beta_test * h_test + c_test))
h, c, v = homogeneity_completeness_v_measure(
[0, 0, 0, 1, 1, 1],
[0, 1, 0, 1, 2, 2],
beta=beta_test)
assert_almost_equal(h, h_test, 2)
assert_almost_equal(c, c_test, 2)
assert_almost_equal(v, v_test, 2)
v = v_measure_score(
[0, 0, 0, 1, 1, 1],
[0, 1, 0, 1, 2, 2],
beta=beta_test)
assert_almost_equal(v, v_test, 2)
def test_non_consecutive_labels():
# regression tests for labels with gaps
h, c, v = homogeneity_completeness_v_measure(
[0, 0, 0, 2, 2, 2],
[0, 1, 0, 1, 2, 2])
assert_almost_equal(h, 0.67, 2)
assert_almost_equal(c, 0.42, 2)
assert_almost_equal(v, 0.52, 2)
h, c, v = homogeneity_completeness_v_measure(
[0, 0, 0, 1, 1, 1],
[0, 4, 0, 4, 2, 2])
assert_almost_equal(h, 0.67, 2)
assert_almost_equal(c, 0.42, 2)
assert_almost_equal(v, 0.52, 2)
ari_1 = adjusted_rand_score([0, 0, 0, 1, 1, 1], [0, 1, 0, 1, 2, 2])
ari_2 = adjusted_rand_score([0, 0, 0, 1, 1, 1], [0, 4, 0, 4, 2, 2])
assert_almost_equal(ari_1, 0.24, 2)
assert_almost_equal(ari_2, 0.24, 2)
@ignore_warnings(category=FutureWarning)
def uniform_labelings_scores(score_func, n_samples, k_range, n_runs=10,
seed=42):
# Compute score for random uniform cluster labelings
random_labels = np.random.RandomState(seed).randint
scores = np.zeros((len(k_range), n_runs))
for i, k in enumerate(k_range):
for j in range(n_runs):
labels_a = random_labels(low=0, high=k, size=n_samples)
labels_b = random_labels(low=0, high=k, size=n_samples)
scores[i, j] = score_func(labels_a, labels_b)
return scores
@ignore_warnings(category=FutureWarning)
def test_adjustment_for_chance():
# Check that adjusted scores are almost zero on random labels
n_clusters_range = [2, 10, 50, 90]
n_samples = 100
n_runs = 10
scores = uniform_labelings_scores(
adjusted_rand_score, n_samples, n_clusters_range, n_runs)
max_abs_scores = np.abs(scores).max(axis=1)
assert_array_almost_equal(max_abs_scores, [0.02, 0.03, 0.03, 0.02], 2)
def test_adjusted_mutual_info_score():
# Compute the Adjusted Mutual Information and test against known values
labels_a = np.array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3])
labels_b = np.array([1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 3, 1, 3, 3, 3, 2, 2])
# Mutual information
mi = mutual_info_score(labels_a, labels_b)
assert_almost_equal(mi, 0.41022, 5)
# with provided sparse contingency
C = contingency_matrix(labels_a, labels_b, sparse=True)
mi = mutual_info_score(labels_a, labels_b, contingency=C)
assert_almost_equal(mi, 0.41022, 5)
# with provided dense contingency
C = contingency_matrix(labels_a, labels_b)
mi = mutual_info_score(labels_a, labels_b, contingency=C)
assert_almost_equal(mi, 0.41022, 5)
# Expected mutual information
n_samples = C.sum()
emi = expected_mutual_information(C, n_samples)
assert_almost_equal(emi, 0.15042, 5)
# Adjusted mutual information
ami = adjusted_mutual_info_score(labels_a, labels_b)
assert_almost_equal(ami, 0.27821, 5)
ami = adjusted_mutual_info_score([1, 1, 2, 2], [2, 2, 3, 3])
assert ami == pytest.approx(1.0)
# Test with a very large array
a110 = np.array([list(labels_a) * 110]).flatten()
b110 = np.array([list(labels_b) * 110]).flatten()
ami = adjusted_mutual_info_score(a110, b110)
assert_almost_equal(ami, 0.38, 2)
def test_expected_mutual_info_overflow():
# Test for regression where contingency cell exceeds 2**16
# leading to overflow in np.outer, resulting in EMI > 1
assert expected_mutual_information(np.array([[70000]]), 70000) <= 1
def test_int_overflow_mutual_info_fowlkes_mallows_score():
# Test overflow in mutual_info_classif and fowlkes_mallows_score
x = np.array([1] * (52632 + 2529) + [2] * (14660 + 793) + [3] * (3271 +
204) + [4] * (814 + 39) + [5] * (316 + 20))
y = np.array([0] * 52632 + [1] * 2529 + [0] * 14660 + [1] * 793 +
[0] * 3271 + [1] * 204 + [0] * 814 + [1] * 39 + [0] * 316 +
[1] * 20)
assert_all_finite(mutual_info_score(x, y))
assert_all_finite(fowlkes_mallows_score(x, y))
def test_entropy():
ent = entropy([0, 0, 42.])
assert_almost_equal(ent, 0.6365141, 5)
assert_almost_equal(entropy([]), 1)
def test_contingency_matrix():
labels_a = np.array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3])
labels_b = np.array([1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 3, 1, 3, 3, 3, 2, 2])
C = contingency_matrix(labels_a, labels_b)
C2 = np.histogram2d(labels_a, labels_b,
bins=(np.arange(1, 5),
np.arange(1, 5)))[0]
assert_array_almost_equal(C, C2)
C = contingency_matrix(labels_a, labels_b, eps=.1)
assert_array_almost_equal(C, C2 + .1)
def test_contingency_matrix_sparse():
labels_a = np.array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3])
labels_b = np.array([1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 3, 1, 3, 3, 3, 2, 2])
C = contingency_matrix(labels_a, labels_b)
C_sparse = contingency_matrix(labels_a, labels_b, sparse=True).toarray()
assert_array_almost_equal(C, C_sparse)
with pytest.raises(ValueError, match="Cannot set 'eps' when sparse=True"):
contingency_matrix(labels_a, labels_b, eps=1e-10, sparse=True)
@ignore_warnings(category=FutureWarning)
def test_exactly_zero_info_score():
# Check numerical stability when information is exactly zero
for i in np.logspace(1, 4, 4).astype(np.int):
labels_a, labels_b = (np.ones(i, dtype=np.int),
np.arange(i, dtype=np.int))
assert normalized_mutual_info_score(
labels_a, labels_b) == pytest.approx(0.0)
assert v_measure_score(
labels_a, labels_b) == pytest.approx(0.0)
assert adjusted_mutual_info_score(
labels_a, labels_b) == pytest.approx(0.0)
assert normalized_mutual_info_score(
labels_a, labels_b) == pytest.approx(0.0)
for method in ["min", "geometric", "arithmetic", "max"]:
assert adjusted_mutual_info_score(
labels_a, labels_b, method) == pytest.approx(0.0)
assert normalized_mutual_info_score(
labels_a, labels_b, method) == pytest.approx(0.0)
def test_v_measure_and_mutual_information(seed=36):
# Check relation between v_measure, entropy and mutual information
for i in np.logspace(1, 4, 4).astype(np.int):
random_state = np.random.RandomState(seed)
labels_a, labels_b = (random_state.randint(0, 10, i),
random_state.randint(0, 10, i))
assert_almost_equal(v_measure_score(labels_a, labels_b),
2.0 * mutual_info_score(labels_a, labels_b) /
(entropy(labels_a) + entropy(labels_b)), 0)
avg = 'arithmetic'
assert_almost_equal(v_measure_score(labels_a, labels_b),
normalized_mutual_info_score(labels_a, labels_b,
average_method=avg)
)
def test_fowlkes_mallows_score():
# General case
score = fowlkes_mallows_score([0, 0, 0, 1, 1, 1],
[0, 0, 1, 1, 2, 2])
assert_almost_equal(score, 4. / np.sqrt(12. * 6.))
# Perfect match but where the label names changed
perfect_score = fowlkes_mallows_score([0, 0, 0, 1, 1, 1],
[1, 1, 1, 0, 0, 0])
assert_almost_equal(perfect_score, 1.)
# Worst case
worst_score = fowlkes_mallows_score([0, 0, 0, 0, 0, 0],
[0, 1, 2, 3, 4, 5])
assert_almost_equal(worst_score, 0.)
def test_fowlkes_mallows_score_properties():
# handcrafted example
labels_a = np.array([0, 0, 0, 1, 1, 2])
labels_b = np.array([1, 1, 2, 2, 0, 0])
expected = 1. / np.sqrt((1. + 3.) * (1. + 2.))
# FMI = TP / sqrt((TP + FP) * (TP + FN))
score_original = fowlkes_mallows_score(labels_a, labels_b)
assert_almost_equal(score_original, expected)
# symmetric property
score_symmetric = fowlkes_mallows_score(labels_b, labels_a)
assert_almost_equal(score_symmetric, expected)
# permutation property
score_permuted = fowlkes_mallows_score((labels_a + 1) % 3, labels_b)
assert_almost_equal(score_permuted, expected)
# symmetric and permutation(both together)
score_both = fowlkes_mallows_score(labels_b, (labels_a + 2) % 3)
assert_almost_equal(score_both, expected)
@pytest.mark.parametrize('labels_true, labels_pred', [
(['a'] * 6, [1, 1, 0, 0, 1, 1]),
([1] * 6, [1, 1, 0, 0, 1, 1]),
([1, 1, 0, 0, 1, 1], ['a'] * 6),
([1, 1, 0, 0, 1, 1], [1] * 6),
])
def test_mutual_info_score_positive_constant_label(labels_true, labels_pred):
# non-regression test for #16355
assert mutual_info_score(labels_true, labels_pred) >= 0

View file

@ -0,0 +1,252 @@
import numpy as np
import scipy.sparse as sp
import pytest
from scipy.sparse import csr_matrix
from sklearn import datasets
from sklearn.utils._testing import assert_array_equal
from sklearn.metrics.cluster import silhouette_score
from sklearn.metrics.cluster import silhouette_samples
from sklearn.metrics import pairwise_distances
from sklearn.metrics.cluster import calinski_harabasz_score
from sklearn.metrics.cluster import davies_bouldin_score
def test_silhouette():
# Tests the Silhouette Coefficient.
dataset = datasets.load_iris()
X_dense = dataset.data
X_csr = csr_matrix(X_dense)
X_dok = sp.dok_matrix(X_dense)
X_lil = sp.lil_matrix(X_dense)
y = dataset.target
for X in [X_dense, X_csr, X_dok, X_lil]:
D = pairwise_distances(X, metric='euclidean')
# Given that the actual labels are used, we can assume that S would be
# positive.
score_precomputed = silhouette_score(D, y, metric='precomputed')
assert score_precomputed > 0
# Test without calculating D
score_euclidean = silhouette_score(X, y, metric='euclidean')
pytest.approx(score_precomputed, score_euclidean)
if X is X_dense:
score_dense_without_sampling = score_precomputed
else:
pytest.approx(score_euclidean,
score_dense_without_sampling)
# Test with sampling
score_precomputed = silhouette_score(D, y, metric='precomputed',
sample_size=int(X.shape[0] / 2),
random_state=0)
score_euclidean = silhouette_score(X, y, metric='euclidean',
sample_size=int(X.shape[0] / 2),
random_state=0)
assert score_precomputed > 0
assert score_euclidean > 0
pytest.approx(score_euclidean, score_precomputed)
if X is X_dense:
score_dense_with_sampling = score_precomputed
else:
pytest.approx(score_euclidean, score_dense_with_sampling)
def test_cluster_size_1():
# Assert Silhouette Coefficient == 0 when there is 1 sample in a cluster
# (cluster 0). We also test the case where there are identical samples
# as the only members of a cluster (cluster 2). To our knowledge, this case
# is not discussed in reference material, and we choose for it a sample
# score of 1.
X = [[0.], [1.], [1.], [2.], [3.], [3.]]
labels = np.array([0, 1, 1, 1, 2, 2])
# Cluster 0: 1 sample -> score of 0 by Rousseeuw's convention
# Cluster 1: intra-cluster = [.5, .5, 1]
# inter-cluster = [1, 1, 1]
# silhouette = [.5, .5, 0]
# Cluster 2: intra-cluster = [0, 0]
# inter-cluster = [arbitrary, arbitrary]
# silhouette = [1., 1.]
silhouette = silhouette_score(X, labels)
assert not np.isnan(silhouette)
ss = silhouette_samples(X, labels)
assert_array_equal(ss, [0, .5, .5, 0, 1, 1])
def test_silhouette_paper_example():
# Explicitly check per-sample results against Rousseeuw (1987)
# Data from Table 1
lower = [5.58,
7.00, 6.50,
7.08, 7.00, 3.83,
4.83, 5.08, 8.17, 5.83,
2.17, 5.75, 6.67, 6.92, 4.92,
6.42, 5.00, 5.58, 6.00, 4.67, 6.42,
3.42, 5.50, 6.42, 6.42, 5.00, 3.92, 6.17,
2.50, 4.92, 6.25, 7.33, 4.50, 2.25, 6.33, 2.75,
6.08, 6.67, 4.25, 2.67, 6.00, 6.17, 6.17, 6.92, 6.17,
5.25, 6.83, 4.50, 3.75, 5.75, 5.42, 6.08, 5.83, 6.67, 3.67,
4.75, 3.00, 6.08, 6.67, 5.00, 5.58, 4.83, 6.17, 5.67, 6.50, 6.92]
D = np.zeros((12, 12))
D[np.tril_indices(12, -1)] = lower
D += D.T
names = ['BEL', 'BRA', 'CHI', 'CUB', 'EGY', 'FRA', 'IND', 'ISR', 'USA',
'USS', 'YUG', 'ZAI']
# Data from Figure 2
labels1 = [1, 1, 2, 2, 1, 1, 2, 1, 1, 2, 2, 1]
expected1 = {'USA': .43, 'BEL': .39, 'FRA': .35, 'ISR': .30, 'BRA': .22,
'EGY': .20, 'ZAI': .19, 'CUB': .40, 'USS': .34, 'CHI': .33,
'YUG': .26, 'IND': -.04}
score1 = .28
# Data from Figure 3
labels2 = [1, 2, 3, 3, 1, 1, 2, 1, 1, 3, 3, 2]
expected2 = {'USA': .47, 'FRA': .44, 'BEL': .42, 'ISR': .37, 'EGY': .02,
'ZAI': .28, 'BRA': .25, 'IND': .17, 'CUB': .48, 'USS': .44,
'YUG': .31, 'CHI': .31}
score2 = .33
for labels, expected, score in [(labels1, expected1, score1),
(labels2, expected2, score2)]:
expected = [expected[name] for name in names]
# we check to 2dp because that's what's in the paper
pytest.approx(expected,
silhouette_samples(D, np.array(labels),
metric='precomputed'),
abs=1e-2)
pytest.approx(score,
silhouette_score(D, np.array(labels),
metric='precomputed'),
abs=1e-2)
def test_correct_labelsize():
# Assert 1 < n_labels < n_samples
dataset = datasets.load_iris()
X = dataset.data
# n_labels = n_samples
y = np.arange(X.shape[0])
err_msg = (r'Number of labels is %d\. Valid values are 2 '
r'to n_samples - 1 \(inclusive\)' % len(np.unique(y)))
with pytest.raises(ValueError, match=err_msg):
silhouette_score(X, y)
# n_labels = 1
y = np.zeros(X.shape[0])
err_msg = (r'Number of labels is %d\. Valid values are 2 '
r'to n_samples - 1 \(inclusive\)' % len(np.unique(y)))
with pytest.raises(ValueError, match=err_msg):
silhouette_score(X, y)
def test_non_encoded_labels():
dataset = datasets.load_iris()
X = dataset.data
labels = dataset.target
assert (
silhouette_score(X, labels * 2 + 10) == silhouette_score(X, labels))
assert_array_equal(
silhouette_samples(X, labels * 2 + 10), silhouette_samples(X, labels))
def test_non_numpy_labels():
dataset = datasets.load_iris()
X = dataset.data
y = dataset.target
assert (
silhouette_score(list(X), list(y)) == silhouette_score(X, y))
@pytest.mark.parametrize('dtype', (np.float32, np.float64))
def test_silhouette_nonzero_diag(dtype):
# Make sure silhouette_samples requires diagonal to be zero.
# Non-regression test for #12178
# Construct a zero-diagonal matrix
dists = pairwise_distances(
np.array([[0.2, 0.1, 0.12, 1.34, 1.11, 1.6]], dtype=dtype).T)
labels = [0, 0, 0, 1, 1, 1]
# small values on the diagonal are OK
dists[2][2] = np.finfo(dists.dtype).eps * 10
silhouette_samples(dists, labels, metric='precomputed')
# values bigger than eps * 100 are not
dists[2][2] = np.finfo(dists.dtype).eps * 1000
with pytest.raises(ValueError, match='contains non-zero'):
silhouette_samples(dists, labels, metric='precomputed')
def assert_raises_on_only_one_label(func):
"""Assert message when there is only one label"""
rng = np.random.RandomState(seed=0)
with pytest.raises(ValueError, match="Number of labels is"):
func(rng.rand(10, 2), np.zeros(10))
def assert_raises_on_all_points_same_cluster(func):
"""Assert message when all point are in different clusters"""
rng = np.random.RandomState(seed=0)
with pytest.raises(ValueError, match="Number of labels is"):
func(rng.rand(10, 2), np.arange(10))
def test_calinski_harabasz_score():
assert_raises_on_only_one_label(calinski_harabasz_score)
assert_raises_on_all_points_same_cluster(calinski_harabasz_score)
# Assert the value is 1. when all samples are equals
assert 1. == calinski_harabasz_score(np.ones((10, 2)),
[0] * 5 + [1] * 5)
# Assert the value is 0. when all the mean cluster are equal
assert 0. == calinski_harabasz_score([[-1, -1], [1, 1]] * 10,
[0] * 10 + [1] * 10)
# General case (with non numpy arrays)
X = ([[0, 0], [1, 1]] * 5 + [[3, 3], [4, 4]] * 5 +
[[0, 4], [1, 3]] * 5 + [[3, 1], [4, 0]] * 5)
labels = [0] * 10 + [1] * 10 + [2] * 10 + [3] * 10
pytest.approx(calinski_harabasz_score(X, labels),
45 * (40 - 4) / (5 * (4 - 1)))
def test_davies_bouldin_score():
assert_raises_on_only_one_label(davies_bouldin_score)
assert_raises_on_all_points_same_cluster(davies_bouldin_score)
# Assert the value is 0. when all samples are equals
assert davies_bouldin_score(np.ones((10, 2)),
[0] * 5 + [1] * 5) == pytest.approx(0.0)
# Assert the value is 0. when all the mean cluster are equal
assert davies_bouldin_score([[-1, -1], [1, 1]] * 10,
[0] * 10 + [1] * 10) == pytest.approx(0.0)
# General case (with non numpy arrays)
X = ([[0, 0], [1, 1]] * 5 + [[3, 3], [4, 4]] * 5 +
[[0, 4], [1, 3]] * 5 + [[3, 1], [4, 0]] * 5)
labels = [0] * 10 + [1] * 10 + [2] * 10 + [3] * 10
pytest.approx(davies_bouldin_score(X, labels), 2 * np.sqrt(0.5) / 3)
# Ensure divide by zero warning is not raised in general case
with pytest.warns(None) as record:
davies_bouldin_score(X, labels)
div_zero_warnings = [
warning for warning in record
if "divide by zero encountered" in warning.message.args[0]
]
assert len(div_zero_warnings) == 0
# General case - cluster have one sample
X = ([[0, 0], [2, 2], [3, 3], [5, 5]])
labels = [0, 0, 1, 2]
pytest.approx(davies_bouldin_score(X, labels), (5. / 4) / 3)

View file

@ -0,0 +1,18 @@
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
import sys
# mypy error: Module X has no attribute y (typically for C extensions)
from . import _unsupervised # type: ignore
from ...externals._pep562 import Pep562
from ...utils.deprecation import _raise_dep_warning_if_not_pytest
deprecated_path = 'sklearn.metrics.cluster.unsupervised'
correct_import_path = 'sklearn.metrics.cluster'
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
def __getattr__(name):
return getattr(_unsupervised, name)
if not sys.version_info >= (3, 7):
Pep562(__name__)