Uploaded Test files
This commit is contained in:
parent
f584ad9d97
commit
2e81cb7d99
16627 changed files with 2065359 additions and 102444 deletions
158
venv/Lib/site-packages/sklearn/metrics/__init__.py
Normal file
158
venv/Lib/site-packages/sklearn/metrics/__init__.py
Normal file
|
@ -0,0 +1,158 @@
|
|||
"""
|
||||
The :mod:`sklearn.metrics` module includes score functions, performance metrics
|
||||
and pairwise metrics and distance computations.
|
||||
"""
|
||||
|
||||
|
||||
from ._ranking import auc
|
||||
from ._ranking import average_precision_score
|
||||
from ._ranking import coverage_error
|
||||
from ._ranking import dcg_score
|
||||
from ._ranking import label_ranking_average_precision_score
|
||||
from ._ranking import label_ranking_loss
|
||||
from ._ranking import ndcg_score
|
||||
from ._ranking import precision_recall_curve
|
||||
from ._ranking import roc_auc_score
|
||||
from ._ranking import roc_curve
|
||||
|
||||
from ._classification import accuracy_score
|
||||
from ._classification import balanced_accuracy_score
|
||||
from ._classification import classification_report
|
||||
from ._classification import cohen_kappa_score
|
||||
from ._classification import confusion_matrix
|
||||
from ._classification import f1_score
|
||||
from ._classification import fbeta_score
|
||||
from ._classification import hamming_loss
|
||||
from ._classification import hinge_loss
|
||||
from ._classification import jaccard_score
|
||||
from ._classification import log_loss
|
||||
from ._classification import matthews_corrcoef
|
||||
from ._classification import precision_recall_fscore_support
|
||||
from ._classification import precision_score
|
||||
from ._classification import recall_score
|
||||
from ._classification import zero_one_loss
|
||||
from ._classification import brier_score_loss
|
||||
from ._classification import multilabel_confusion_matrix
|
||||
|
||||
from . import cluster
|
||||
from .cluster import adjusted_mutual_info_score
|
||||
from .cluster import adjusted_rand_score
|
||||
from .cluster import completeness_score
|
||||
from .cluster import consensus_score
|
||||
from .cluster import homogeneity_completeness_v_measure
|
||||
from .cluster import homogeneity_score
|
||||
from .cluster import mutual_info_score
|
||||
from .cluster import normalized_mutual_info_score
|
||||
from .cluster import fowlkes_mallows_score
|
||||
from .cluster import silhouette_samples
|
||||
from .cluster import silhouette_score
|
||||
from .cluster import calinski_harabasz_score
|
||||
from .cluster import v_measure_score
|
||||
from .cluster import davies_bouldin_score
|
||||
|
||||
from .pairwise import euclidean_distances
|
||||
from .pairwise import nan_euclidean_distances
|
||||
from .pairwise import pairwise_distances
|
||||
from .pairwise import pairwise_distances_argmin
|
||||
from .pairwise import pairwise_distances_argmin_min
|
||||
from .pairwise import pairwise_kernels
|
||||
from .pairwise import pairwise_distances_chunked
|
||||
|
||||
from ._regression import explained_variance_score
|
||||
from ._regression import max_error
|
||||
from ._regression import mean_absolute_error
|
||||
from ._regression import mean_squared_error
|
||||
from ._regression import mean_squared_log_error
|
||||
from ._regression import median_absolute_error
|
||||
from ._regression import r2_score
|
||||
from ._regression import mean_tweedie_deviance
|
||||
from ._regression import mean_poisson_deviance
|
||||
from ._regression import mean_gamma_deviance
|
||||
|
||||
|
||||
from ._scorer import check_scoring
|
||||
from ._scorer import make_scorer
|
||||
from ._scorer import SCORERS
|
||||
from ._scorer import get_scorer
|
||||
|
||||
from ._plot.roc_curve import plot_roc_curve
|
||||
from ._plot.roc_curve import RocCurveDisplay
|
||||
from ._plot.precision_recall_curve import plot_precision_recall_curve
|
||||
from ._plot.precision_recall_curve import PrecisionRecallDisplay
|
||||
|
||||
from ._plot.confusion_matrix import plot_confusion_matrix
|
||||
from ._plot.confusion_matrix import ConfusionMatrixDisplay
|
||||
|
||||
|
||||
__all__ = [
|
||||
'accuracy_score',
|
||||
'adjusted_mutual_info_score',
|
||||
'adjusted_rand_score',
|
||||
'auc',
|
||||
'average_precision_score',
|
||||
'balanced_accuracy_score',
|
||||
'calinski_harabasz_score',
|
||||
'check_scoring',
|
||||
'classification_report',
|
||||
'cluster',
|
||||
'cohen_kappa_score',
|
||||
'completeness_score',
|
||||
'ConfusionMatrixDisplay',
|
||||
'confusion_matrix',
|
||||
'consensus_score',
|
||||
'coverage_error',
|
||||
'dcg_score',
|
||||
'davies_bouldin_score',
|
||||
'euclidean_distances',
|
||||
'explained_variance_score',
|
||||
'f1_score',
|
||||
'fbeta_score',
|
||||
'fowlkes_mallows_score',
|
||||
'get_scorer',
|
||||
'hamming_loss',
|
||||
'hinge_loss',
|
||||
'homogeneity_completeness_v_measure',
|
||||
'homogeneity_score',
|
||||
'jaccard_score',
|
||||
'label_ranking_average_precision_score',
|
||||
'label_ranking_loss',
|
||||
'log_loss',
|
||||
'make_scorer',
|
||||
'nan_euclidean_distances',
|
||||
'matthews_corrcoef',
|
||||
'max_error',
|
||||
'mean_absolute_error',
|
||||
'mean_squared_error',
|
||||
'mean_squared_log_error',
|
||||
'mean_poisson_deviance',
|
||||
'mean_gamma_deviance',
|
||||
'mean_tweedie_deviance',
|
||||
'median_absolute_error',
|
||||
'multilabel_confusion_matrix',
|
||||
'mutual_info_score',
|
||||
'ndcg_score',
|
||||
'normalized_mutual_info_score',
|
||||
'pairwise_distances',
|
||||
'pairwise_distances_argmin',
|
||||
'pairwise_distances_argmin_min',
|
||||
'pairwise_distances_chunked',
|
||||
'pairwise_kernels',
|
||||
'plot_confusion_matrix',
|
||||
'plot_precision_recall_curve',
|
||||
'plot_roc_curve',
|
||||
'PrecisionRecallDisplay',
|
||||
'precision_recall_curve',
|
||||
'precision_recall_fscore_support',
|
||||
'precision_score',
|
||||
'r2_score',
|
||||
'recall_score',
|
||||
'RocCurveDisplay',
|
||||
'roc_auc_score',
|
||||
'roc_curve',
|
||||
'SCORERS',
|
||||
'silhouette_samples',
|
||||
'silhouette_score',
|
||||
'v_measure_score',
|
||||
'zero_one_loss',
|
||||
'brier_score_loss',
|
||||
]
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
202
venv/Lib/site-packages/sklearn/metrics/_base.py
Normal file
202
venv/Lib/site-packages/sklearn/metrics/_base.py
Normal file
|
@ -0,0 +1,202 @@
|
|||
"""
|
||||
Common code for all metrics
|
||||
|
||||
"""
|
||||
# Authors: Alexandre Gramfort <alexandre.gramfort@inria.fr>
|
||||
# Mathieu Blondel <mathieu@mblondel.org>
|
||||
# Olivier Grisel <olivier.grisel@ensta.org>
|
||||
# Arnaud Joly <a.joly@ulg.ac.be>
|
||||
# Jochen Wersdorfer <jochen@wersdoerfer.de>
|
||||
# Lars Buitinck
|
||||
# Joel Nothman <joel.nothman@gmail.com>
|
||||
# Noel Dawe <noel@dawe.me>
|
||||
# License: BSD 3 clause
|
||||
|
||||
from itertools import combinations
|
||||
|
||||
import numpy as np
|
||||
|
||||
from ..utils import check_array, check_consistent_length
|
||||
from ..utils.multiclass import type_of_target
|
||||
|
||||
|
||||
def _average_binary_score(binary_metric, y_true, y_score, average,
|
||||
sample_weight=None):
|
||||
"""Average a binary metric for multilabel classification
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y_true : array, shape = [n_samples] or [n_samples, n_classes]
|
||||
True binary labels in binary label indicators.
|
||||
|
||||
y_score : array, shape = [n_samples] or [n_samples, n_classes]
|
||||
Target scores, can either be probability estimates of the positive
|
||||
class, confidence values, or binary decisions.
|
||||
|
||||
average : string, [None, 'micro', 'macro' (default), 'samples', 'weighted']
|
||||
If ``None``, the scores for each class are returned. Otherwise,
|
||||
this determines the type of averaging performed on the data:
|
||||
|
||||
``'micro'``:
|
||||
Calculate metrics globally by considering each element of the label
|
||||
indicator matrix as a label.
|
||||
``'macro'``:
|
||||
Calculate metrics for each label, and find their unweighted
|
||||
mean. This does not take label imbalance into account.
|
||||
``'weighted'``:
|
||||
Calculate metrics for each label, and find their average, weighted
|
||||
by support (the number of true instances for each label).
|
||||
``'samples'``:
|
||||
Calculate metrics for each instance, and find their average.
|
||||
|
||||
Will be ignored when ``y_true`` is binary.
|
||||
|
||||
sample_weight : array-like of shape (n_samples,), default=None
|
||||
Sample weights.
|
||||
|
||||
binary_metric : callable, returns shape [n_classes]
|
||||
The binary metric function to use.
|
||||
|
||||
Returns
|
||||
-------
|
||||
score : float or array of shape [n_classes]
|
||||
If not ``None``, average the score, else return the score for each
|
||||
classes.
|
||||
|
||||
"""
|
||||
average_options = (None, 'micro', 'macro', 'weighted', 'samples')
|
||||
if average not in average_options:
|
||||
raise ValueError('average has to be one of {0}'
|
||||
''.format(average_options))
|
||||
|
||||
y_type = type_of_target(y_true)
|
||||
if y_type not in ("binary", "multilabel-indicator"):
|
||||
raise ValueError("{0} format is not supported".format(y_type))
|
||||
|
||||
if y_type == "binary":
|
||||
return binary_metric(y_true, y_score, sample_weight=sample_weight)
|
||||
|
||||
check_consistent_length(y_true, y_score, sample_weight)
|
||||
y_true = check_array(y_true)
|
||||
y_score = check_array(y_score)
|
||||
|
||||
not_average_axis = 1
|
||||
score_weight = sample_weight
|
||||
average_weight = None
|
||||
|
||||
if average == "micro":
|
||||
if score_weight is not None:
|
||||
score_weight = np.repeat(score_weight, y_true.shape[1])
|
||||
y_true = y_true.ravel()
|
||||
y_score = y_score.ravel()
|
||||
|
||||
elif average == 'weighted':
|
||||
if score_weight is not None:
|
||||
average_weight = np.sum(np.multiply(
|
||||
y_true, np.reshape(score_weight, (-1, 1))), axis=0)
|
||||
else:
|
||||
average_weight = np.sum(y_true, axis=0)
|
||||
if np.isclose(average_weight.sum(), 0.0):
|
||||
return 0
|
||||
|
||||
elif average == 'samples':
|
||||
# swap average_weight <-> score_weight
|
||||
average_weight = score_weight
|
||||
score_weight = None
|
||||
not_average_axis = 0
|
||||
|
||||
if y_true.ndim == 1:
|
||||
y_true = y_true.reshape((-1, 1))
|
||||
|
||||
if y_score.ndim == 1:
|
||||
y_score = y_score.reshape((-1, 1))
|
||||
|
||||
n_classes = y_score.shape[not_average_axis]
|
||||
score = np.zeros((n_classes,))
|
||||
for c in range(n_classes):
|
||||
y_true_c = y_true.take([c], axis=not_average_axis).ravel()
|
||||
y_score_c = y_score.take([c], axis=not_average_axis).ravel()
|
||||
score[c] = binary_metric(y_true_c, y_score_c,
|
||||
sample_weight=score_weight)
|
||||
|
||||
# Average the results
|
||||
if average is not None:
|
||||
if average_weight is not None:
|
||||
# Scores with 0 weights are forced to be 0, preventing the average
|
||||
# score from being affected by 0-weighted NaN elements.
|
||||
average_weight = np.asarray(average_weight)
|
||||
score[average_weight == 0] = 0
|
||||
return np.average(score, weights=average_weight)
|
||||
else:
|
||||
return score
|
||||
|
||||
|
||||
def _average_multiclass_ovo_score(binary_metric, y_true, y_score,
|
||||
average='macro'):
|
||||
"""Average one-versus-one scores for multiclass classification.
|
||||
|
||||
Uses the binary metric for one-vs-one multiclass classification,
|
||||
where the score is computed according to the Hand & Till (2001) algorithm.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
binary_metric : callable
|
||||
The binary metric function to use that accepts the following as input
|
||||
y_true_target : array, shape = [n_samples_target]
|
||||
Some sub-array of y_true for a pair of classes designated
|
||||
positive and negative in the one-vs-one scheme.
|
||||
y_score_target : array, shape = [n_samples_target]
|
||||
Scores corresponding to the probability estimates
|
||||
of a sample belonging to the designated positive class label
|
||||
|
||||
y_true : array-like of shape (n_samples,)
|
||||
True multiclass labels.
|
||||
|
||||
y_score : array-like of shape (n_samples, n_classes)
|
||||
Target scores corresponding to probability estimates of a sample
|
||||
belonging to a particular class
|
||||
|
||||
average : 'macro' or 'weighted', optional (default='macro')
|
||||
Determines the type of averaging performed on the pairwise binary
|
||||
metric scores
|
||||
``'macro'``:
|
||||
Calculate metrics for each label, and find their unweighted
|
||||
mean. This does not take label imbalance into account. Classes
|
||||
are assumed to be uniformly distributed.
|
||||
``'weighted'``:
|
||||
Calculate metrics for each label, taking into account the
|
||||
prevalence of the classes.
|
||||
|
||||
Returns
|
||||
-------
|
||||
score : float
|
||||
Average of the pairwise binary metric scores
|
||||
"""
|
||||
check_consistent_length(y_true, y_score)
|
||||
|
||||
y_true_unique = np.unique(y_true)
|
||||
n_classes = y_true_unique.shape[0]
|
||||
n_pairs = n_classes * (n_classes - 1) // 2
|
||||
pair_scores = np.empty(n_pairs)
|
||||
|
||||
is_weighted = average == "weighted"
|
||||
prevalence = np.empty(n_pairs) if is_weighted else None
|
||||
|
||||
# Compute scores treating a as positive class and b as negative class,
|
||||
# then b as positive class and a as negative class
|
||||
for ix, (a, b) in enumerate(combinations(y_true_unique, 2)):
|
||||
a_mask = y_true == a
|
||||
b_mask = y_true == b
|
||||
ab_mask = np.logical_or(a_mask, b_mask)
|
||||
|
||||
if is_weighted:
|
||||
prevalence[ix] = np.average(ab_mask)
|
||||
|
||||
a_true = a_mask[ab_mask]
|
||||
b_true = b_mask[ab_mask]
|
||||
|
||||
a_true_score = binary_metric(a_true, y_score[ab_mask, a])
|
||||
b_true_score = binary_metric(b_true, y_score[ab_mask, b])
|
||||
pair_scores[ix] = (a_true_score + b_true_score) / 2
|
||||
|
||||
return np.average(pair_scores, weights=prevalence)
|
2459
venv/Lib/site-packages/sklearn/metrics/_classification.py
Normal file
2459
venv/Lib/site-packages/sklearn/metrics/_classification.py
Normal file
File diff suppressed because it is too large
Load diff
Binary file not shown.
0
venv/Lib/site-packages/sklearn/metrics/_plot/__init__.py
Normal file
0
venv/Lib/site-packages/sklearn/metrics/_plot/__init__.py
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
40
venv/Lib/site-packages/sklearn/metrics/_plot/base.py
Normal file
40
venv/Lib/site-packages/sklearn/metrics/_plot/base.py
Normal file
|
@ -0,0 +1,40 @@
|
|||
def _check_classifer_response_method(estimator, response_method):
|
||||
"""Return prediction method from the response_method
|
||||
|
||||
Parameters
|
||||
----------
|
||||
estimator: object
|
||||
Classifier to check
|
||||
|
||||
response_method: {'auto', 'predict_proba', 'decision_function'}
|
||||
Specifies whether to use :term:`predict_proba` or
|
||||
:term:`decision_function` as the target response. If set to 'auto',
|
||||
:term:`predict_proba` is tried first and if it does not exist
|
||||
:term:`decision_function` is tried next.
|
||||
|
||||
Returns
|
||||
-------
|
||||
prediction_method: callable
|
||||
prediction method of estimator
|
||||
"""
|
||||
|
||||
if response_method not in ("predict_proba", "decision_function", "auto"):
|
||||
raise ValueError("response_method must be 'predict_proba', "
|
||||
"'decision_function' or 'auto'")
|
||||
|
||||
error_msg = "response method {} is not defined in {}"
|
||||
if response_method != "auto":
|
||||
prediction_method = getattr(estimator, response_method, None)
|
||||
if prediction_method is None:
|
||||
raise ValueError(error_msg.format(response_method,
|
||||
estimator.__class__.__name__))
|
||||
else:
|
||||
predict_proba = getattr(estimator, 'predict_proba', None)
|
||||
decision_function = getattr(estimator, 'decision_function', None)
|
||||
prediction_method = predict_proba or decision_function
|
||||
if prediction_method is None:
|
||||
raise ValueError(error_msg.format(
|
||||
"decision_function or predict_proba",
|
||||
estimator.__class__.__name__))
|
||||
|
||||
return prediction_method
|
233
venv/Lib/site-packages/sklearn/metrics/_plot/confusion_matrix.py
Normal file
233
venv/Lib/site-packages/sklearn/metrics/_plot/confusion_matrix.py
Normal file
|
@ -0,0 +1,233 @@
|
|||
from itertools import product
|
||||
|
||||
import numpy as np
|
||||
|
||||
from .. import confusion_matrix
|
||||
from ...utils import check_matplotlib_support
|
||||
from ...utils.validation import _deprecate_positional_args
|
||||
from ...base import is_classifier
|
||||
|
||||
|
||||
class ConfusionMatrixDisplay:
|
||||
"""Confusion Matrix visualization.
|
||||
|
||||
It is recommend to use :func:`~sklearn.metrics.plot_confusion_matrix` to
|
||||
create a :class:`ConfusionMatrixDisplay`. All parameters are stored as
|
||||
attributes.
|
||||
|
||||
Read more in the :ref:`User Guide <visualizations>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
confusion_matrix : ndarray of shape (n_classes, n_classes)
|
||||
Confusion matrix.
|
||||
|
||||
display_labels : ndarray of shape (n_classes,), default=None
|
||||
Display labels for plot. If None, display labels are set from 0 to
|
||||
`n_classes - 1`.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
im_ : matplotlib AxesImage
|
||||
Image representing the confusion matrix.
|
||||
|
||||
text_ : ndarray of shape (n_classes, n_classes), dtype=matplotlib Text, \
|
||||
or None
|
||||
Array of matplotlib axes. `None` if `include_values` is false.
|
||||
|
||||
ax_ : matplotlib Axes
|
||||
Axes with confusion matrix.
|
||||
|
||||
figure_ : matplotlib Figure
|
||||
Figure containing the confusion matrix.
|
||||
"""
|
||||
def __init__(self, confusion_matrix, *, display_labels=None):
|
||||
self.confusion_matrix = confusion_matrix
|
||||
self.display_labels = display_labels
|
||||
|
||||
@_deprecate_positional_args
|
||||
def plot(self, *, include_values=True, cmap='viridis',
|
||||
xticks_rotation='horizontal', values_format=None, ax=None):
|
||||
"""Plot visualization.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
include_values : bool, default=True
|
||||
Includes values in confusion matrix.
|
||||
|
||||
cmap : str or matplotlib Colormap, default='viridis'
|
||||
Colormap recognized by matplotlib.
|
||||
|
||||
xticks_rotation : {'vertical', 'horizontal'} or float, \
|
||||
default='horizontal'
|
||||
Rotation of xtick labels.
|
||||
|
||||
values_format : str, default=None
|
||||
Format specification for values in confusion matrix. If `None`,
|
||||
the format specification is 'd' or '.2g' whichever is shorter.
|
||||
|
||||
ax : matplotlib axes, default=None
|
||||
Axes object to plot on. If `None`, a new figure and axes is
|
||||
created.
|
||||
|
||||
Returns
|
||||
-------
|
||||
display : :class:`~sklearn.metrics.ConfusionMatrixDisplay`
|
||||
"""
|
||||
check_matplotlib_support("ConfusionMatrixDisplay.plot")
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
if ax is None:
|
||||
fig, ax = plt.subplots()
|
||||
else:
|
||||
fig = ax.figure
|
||||
|
||||
cm = self.confusion_matrix
|
||||
n_classes = cm.shape[0]
|
||||
self.im_ = ax.imshow(cm, interpolation='nearest', cmap=cmap)
|
||||
self.text_ = None
|
||||
cmap_min, cmap_max = self.im_.cmap(0), self.im_.cmap(256)
|
||||
|
||||
if include_values:
|
||||
self.text_ = np.empty_like(cm, dtype=object)
|
||||
|
||||
# print text with appropriate color depending on background
|
||||
thresh = (cm.max() + cm.min()) / 2.0
|
||||
|
||||
for i, j in product(range(n_classes), range(n_classes)):
|
||||
color = cmap_max if cm[i, j] < thresh else cmap_min
|
||||
|
||||
if values_format is None:
|
||||
text_cm = format(cm[i, j], '.2g')
|
||||
if cm.dtype.kind != 'f':
|
||||
text_d = format(cm[i, j], 'd')
|
||||
if len(text_d) < len(text_cm):
|
||||
text_cm = text_d
|
||||
else:
|
||||
text_cm = format(cm[i, j], values_format)
|
||||
|
||||
self.text_[i, j] = ax.text(
|
||||
j, i, text_cm,
|
||||
ha="center", va="center",
|
||||
color=color)
|
||||
|
||||
if self.display_labels is None:
|
||||
display_labels = np.arange(n_classes)
|
||||
else:
|
||||
display_labels = self.display_labels
|
||||
|
||||
fig.colorbar(self.im_, ax=ax)
|
||||
ax.set(xticks=np.arange(n_classes),
|
||||
yticks=np.arange(n_classes),
|
||||
xticklabels=display_labels,
|
||||
yticklabels=display_labels,
|
||||
ylabel="True label",
|
||||
xlabel="Predicted label")
|
||||
|
||||
ax.set_ylim((n_classes - 0.5, -0.5))
|
||||
plt.setp(ax.get_xticklabels(), rotation=xticks_rotation)
|
||||
|
||||
self.figure_ = fig
|
||||
self.ax_ = ax
|
||||
return self
|
||||
|
||||
|
||||
@_deprecate_positional_args
|
||||
def plot_confusion_matrix(estimator, X, y_true, *, labels=None,
|
||||
sample_weight=None, normalize=None,
|
||||
display_labels=None, include_values=True,
|
||||
xticks_rotation='horizontal',
|
||||
values_format=None,
|
||||
cmap='viridis', ax=None):
|
||||
"""Plot Confusion Matrix.
|
||||
|
||||
Read more in the :ref:`User Guide <confusion_matrix>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
estimator : estimator instance
|
||||
Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`
|
||||
in which the last estimator is a classifier.
|
||||
|
||||
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||||
Input values.
|
||||
|
||||
y : array-like of shape (n_samples,)
|
||||
Target values.
|
||||
|
||||
labels : array-like of shape (n_classes,), default=None
|
||||
List of labels to index the matrix. This may be used to reorder or
|
||||
select a subset of labels. If `None` is given, those that appear at
|
||||
least once in `y_true` or `y_pred` are used in sorted order.
|
||||
|
||||
sample_weight : array-like of shape (n_samples,), default=None
|
||||
Sample weights.
|
||||
|
||||
normalize : {'true', 'pred', 'all'}, default=None
|
||||
Normalizes confusion matrix over the true (rows), predicted (columns)
|
||||
conditions or all the population. If None, confusion matrix will not be
|
||||
normalized.
|
||||
|
||||
display_labels : array-like of shape (n_classes,), default=None
|
||||
Target names used for plotting. By default, `labels` will be used if
|
||||
it is defined, otherwise the unique labels of `y_true` and `y_pred`
|
||||
will be used.
|
||||
|
||||
include_values : bool, default=True
|
||||
Includes values in confusion matrix.
|
||||
|
||||
xticks_rotation : {'vertical', 'horizontal'} or float, \
|
||||
default='horizontal'
|
||||
Rotation of xtick labels.
|
||||
|
||||
values_format : str, default=None
|
||||
Format specification for values in confusion matrix. If `None`,
|
||||
the format specification is 'd' or '.2g' whichever is shorter.
|
||||
|
||||
cmap : str or matplotlib Colormap, default='viridis'
|
||||
Colormap recognized by matplotlib.
|
||||
|
||||
ax : matplotlib Axes, default=None
|
||||
Axes object to plot on. If `None`, a new figure and axes is
|
||||
created.
|
||||
|
||||
Returns
|
||||
-------
|
||||
display : :class:`~sklearn.metrics.ConfusionMatrixDisplay`
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import matplotlib.pyplot as plt # doctest: +SKIP
|
||||
>>> from sklearn.datasets import make_classification
|
||||
>>> from sklearn.metrics import plot_confusion_matrix
|
||||
>>> from sklearn.model_selection import train_test_split
|
||||
>>> from sklearn.svm import SVC
|
||||
>>> X, y = make_classification(random_state=0)
|
||||
>>> X_train, X_test, y_train, y_test = train_test_split(
|
||||
... X, y, random_state=0)
|
||||
>>> clf = SVC(random_state=0)
|
||||
>>> clf.fit(X_train, y_train)
|
||||
SVC(random_state=0)
|
||||
>>> plot_confusion_matrix(clf, X_test, y_test) # doctest: +SKIP
|
||||
>>> plt.show() # doctest: +SKIP
|
||||
"""
|
||||
check_matplotlib_support("plot_confusion_matrix")
|
||||
|
||||
if not is_classifier(estimator):
|
||||
raise ValueError("plot_confusion_matrix only supports classifiers")
|
||||
|
||||
y_pred = estimator.predict(X)
|
||||
cm = confusion_matrix(y_true, y_pred, sample_weight=sample_weight,
|
||||
labels=labels, normalize=normalize)
|
||||
|
||||
if display_labels is None:
|
||||
if labels is None:
|
||||
display_labels = estimator.classes_
|
||||
else:
|
||||
display_labels = labels
|
||||
|
||||
disp = ConfusionMatrixDisplay(confusion_matrix=cm,
|
||||
display_labels=display_labels)
|
||||
return disp.plot(include_values=include_values,
|
||||
cmap=cmap, ax=ax, xticks_rotation=xticks_rotation,
|
||||
values_format=values_format)
|
|
@ -0,0 +1,181 @@
|
|||
from .base import _check_classifer_response_method
|
||||
|
||||
from .. import average_precision_score
|
||||
from .. import precision_recall_curve
|
||||
|
||||
from ...utils import check_matplotlib_support
|
||||
from ...utils.validation import _deprecate_positional_args
|
||||
from ...base import is_classifier
|
||||
|
||||
|
||||
class PrecisionRecallDisplay:
|
||||
"""Precision Recall visualization.
|
||||
|
||||
It is recommend to use :func:`~sklearn.metrics.plot_precision_recall_curve`
|
||||
to create a visualizer. All parameters are stored as attributes.
|
||||
|
||||
Read more in the :ref:`User Guide <visualizations>`.
|
||||
|
||||
Parameters
|
||||
-----------
|
||||
precision : ndarray
|
||||
Precision values.
|
||||
|
||||
recall : ndarray
|
||||
Recall values.
|
||||
|
||||
average_precision : float, default=None
|
||||
Average precision. If None, the average precision is not shown.
|
||||
|
||||
estimator_name : str, default=None
|
||||
Name of estimator. If None, then the estimator name is not shown.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
line_ : matplotlib Artist
|
||||
Precision recall curve.
|
||||
|
||||
ax_ : matplotlib Axes
|
||||
Axes with precision recall curve.
|
||||
|
||||
figure_ : matplotlib Figure
|
||||
Figure containing the curve.
|
||||
"""
|
||||
def __init__(self, precision, recall, *,
|
||||
average_precision=None, estimator_name=None):
|
||||
self.precision = precision
|
||||
self.recall = recall
|
||||
self.average_precision = average_precision
|
||||
self.estimator_name = estimator_name
|
||||
|
||||
@_deprecate_positional_args
|
||||
def plot(self, ax=None, *, name=None, **kwargs):
|
||||
"""Plot visualization.
|
||||
|
||||
Extra keyword arguments will be passed to matplotlib's `plot`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
ax : Matplotlib Axes, default=None
|
||||
Axes object to plot on. If `None`, a new figure and axes is
|
||||
created.
|
||||
|
||||
name : str, default=None
|
||||
Name of precision recall curve for labeling. If `None`, use the
|
||||
name of the estimator.
|
||||
|
||||
**kwargs : dict
|
||||
Keyword arguments to be passed to matplotlib's `plot`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
display : :class:`~sklearn.metrics.PrecisionRecallDisplay`
|
||||
Object that stores computed values.
|
||||
"""
|
||||
check_matplotlib_support("PrecisionRecallDisplay.plot")
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
if ax is None:
|
||||
fig, ax = plt.subplots()
|
||||
|
||||
name = self.estimator_name if name is None else name
|
||||
|
||||
line_kwargs = {"drawstyle": "steps-post"}
|
||||
if self.average_precision is not None and name is not None:
|
||||
line_kwargs["label"] = (f"{name} (AP = "
|
||||
f"{self.average_precision:0.2f})")
|
||||
elif self.average_precision is not None:
|
||||
line_kwargs["label"] = (f"AP = "
|
||||
f"{self.average_precision:0.2f}")
|
||||
elif name is not None:
|
||||
line_kwargs["label"] = name
|
||||
line_kwargs.update(**kwargs)
|
||||
|
||||
self.line_, = ax.plot(self.recall, self.precision, **line_kwargs)
|
||||
ax.set(xlabel="Recall", ylabel="Precision")
|
||||
|
||||
if "label" in line_kwargs:
|
||||
ax.legend(loc='lower left')
|
||||
|
||||
self.ax_ = ax
|
||||
self.figure_ = ax.figure
|
||||
return self
|
||||
|
||||
|
||||
@_deprecate_positional_args
|
||||
def plot_precision_recall_curve(estimator, X, y, *,
|
||||
sample_weight=None, response_method="auto",
|
||||
name=None, ax=None, **kwargs):
|
||||
"""Plot Precision Recall Curve for binary classifiers.
|
||||
|
||||
Extra keyword arguments will be passed to matplotlib's `plot`.
|
||||
|
||||
Read more in the :ref:`User Guide <precision_recall_f_measure_metrics>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
estimator : estimator instance
|
||||
Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`
|
||||
in which the last estimator is a classifier.
|
||||
|
||||
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||||
Input values.
|
||||
|
||||
y : array-like of shape (n_samples,)
|
||||
Binary target values.
|
||||
|
||||
sample_weight : array-like of shape (n_samples,), default=None
|
||||
Sample weights.
|
||||
|
||||
response_method : {'predict_proba', 'decision_function', 'auto'}, \
|
||||
default='auto'
|
||||
Specifies whether to use :term:`predict_proba` or
|
||||
:term:`decision_function` as the target response. If set to 'auto',
|
||||
:term:`predict_proba` is tried first and if it does not exist
|
||||
:term:`decision_function` is tried next.
|
||||
|
||||
name : str, default=None
|
||||
Name for labeling curve. If `None`, the name of the
|
||||
estimator is used.
|
||||
|
||||
ax : matplotlib axes, default=None
|
||||
Axes object to plot on. If `None`, a new figure and axes is created.
|
||||
|
||||
**kwargs : dict
|
||||
Keyword arguments to be passed to matplotlib's `plot`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
display : :class:`~sklearn.metrics.PrecisionRecallDisplay`
|
||||
Object that stores computed values.
|
||||
"""
|
||||
check_matplotlib_support("plot_precision_recall_curve")
|
||||
|
||||
classification_error = ("{} should be a binary classifier".format(
|
||||
estimator.__class__.__name__))
|
||||
if not is_classifier(estimator):
|
||||
raise ValueError(classification_error)
|
||||
|
||||
prediction_method = _check_classifer_response_method(estimator,
|
||||
response_method)
|
||||
y_pred = prediction_method(X)
|
||||
|
||||
if y_pred.ndim != 1:
|
||||
if y_pred.shape[1] != 2:
|
||||
raise ValueError(classification_error)
|
||||
else:
|
||||
y_pred = y_pred[:, 1]
|
||||
|
||||
pos_label = estimator.classes_[1]
|
||||
precision, recall, _ = precision_recall_curve(y, y_pred,
|
||||
pos_label=pos_label,
|
||||
sample_weight=sample_weight)
|
||||
average_precision = average_precision_score(y, y_pred,
|
||||
pos_label=pos_label,
|
||||
sample_weight=sample_weight)
|
||||
name = name if name is not None else estimator.__class__.__name__
|
||||
viz = PrecisionRecallDisplay(
|
||||
precision=precision, recall=recall,
|
||||
average_precision=average_precision, estimator_name=name
|
||||
)
|
||||
return viz.plot(ax=ax, name=name, **kwargs)
|
203
venv/Lib/site-packages/sklearn/metrics/_plot/roc_curve.py
Normal file
203
venv/Lib/site-packages/sklearn/metrics/_plot/roc_curve.py
Normal file
|
@ -0,0 +1,203 @@
|
|||
from .. import auc
|
||||
from .. import roc_curve
|
||||
|
||||
from .base import _check_classifer_response_method
|
||||
from ...utils import check_matplotlib_support
|
||||
from ...base import is_classifier
|
||||
from ...utils.validation import _deprecate_positional_args
|
||||
|
||||
|
||||
class RocCurveDisplay:
|
||||
"""ROC Curve visualization.
|
||||
|
||||
It is recommend to use :func:`~sklearn.metrics.plot_roc_curve` to create a
|
||||
visualizer. All parameters are stored as attributes.
|
||||
|
||||
Read more in the :ref:`User Guide <visualizations>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
fpr : ndarray
|
||||
False positive rate.
|
||||
|
||||
tpr : ndarray
|
||||
True positive rate.
|
||||
|
||||
roc_auc : float, default=None
|
||||
Area under ROC curve. If None, the roc_auc score is not shown.
|
||||
|
||||
estimator_name : str, default=None
|
||||
Name of estimator. If None, the estimator name is not shown.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
line_ : matplotlib Artist
|
||||
ROC Curve.
|
||||
|
||||
ax_ : matplotlib Axes
|
||||
Axes with ROC Curve.
|
||||
|
||||
figure_ : matplotlib Figure
|
||||
Figure containing the curve.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import matplotlib.pyplot as plt # doctest: +SKIP
|
||||
>>> import numpy as np
|
||||
>>> from sklearn import metrics
|
||||
>>> y = np.array([0, 0, 1, 1])
|
||||
>>> pred = np.array([0.1, 0.4, 0.35, 0.8])
|
||||
>>> fpr, tpr, thresholds = metrics.roc_curve(y, pred)
|
||||
>>> roc_auc = metrics.auc(fpr, tpr)
|
||||
>>> display = metrics.RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc,\
|
||||
estimator_name='example estimator')
|
||||
>>> display.plot() # doctest: +SKIP
|
||||
>>> plt.show() # doctest: +SKIP
|
||||
"""
|
||||
def __init__(self, *, fpr, tpr, roc_auc=None, estimator_name=None):
|
||||
self.fpr = fpr
|
||||
self.tpr = tpr
|
||||
self.roc_auc = roc_auc
|
||||
self.estimator_name = estimator_name
|
||||
|
||||
@_deprecate_positional_args
|
||||
def plot(self, ax=None, *, name=None, **kwargs):
|
||||
"""Plot visualization
|
||||
|
||||
Extra keyword arguments will be passed to matplotlib's ``plot``.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
ax : matplotlib axes, default=None
|
||||
Axes object to plot on. If `None`, a new figure and axes is
|
||||
created.
|
||||
|
||||
name : str, default=None
|
||||
Name of ROC Curve for labeling. If `None`, use the name of the
|
||||
estimator.
|
||||
|
||||
Returns
|
||||
-------
|
||||
display : :class:`~sklearn.metrics.plot.RocCurveDisplay`
|
||||
Object that stores computed values.
|
||||
"""
|
||||
check_matplotlib_support('RocCurveDisplay.plot')
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
if ax is None:
|
||||
fig, ax = plt.subplots()
|
||||
|
||||
name = self.estimator_name if name is None else name
|
||||
|
||||
line_kwargs = {}
|
||||
if self.roc_auc is not None and name is not None:
|
||||
line_kwargs["label"] = f"{name} (AUC = {self.roc_auc:0.2f})"
|
||||
elif self.roc_auc is not None:
|
||||
line_kwargs["label"] = f"AUC = {self.roc_auc:0.2f}"
|
||||
elif name is not None:
|
||||
line_kwargs["label"] = name
|
||||
|
||||
line_kwargs.update(**kwargs)
|
||||
|
||||
self.line_ = ax.plot(self.fpr, self.tpr, **line_kwargs)[0]
|
||||
ax.set_xlabel("False Positive Rate")
|
||||
ax.set_ylabel("True Positive Rate")
|
||||
|
||||
if "label" in line_kwargs:
|
||||
ax.legend(loc='lower right')
|
||||
|
||||
self.ax_ = ax
|
||||
self.figure_ = ax.figure
|
||||
return self
|
||||
|
||||
|
||||
@_deprecate_positional_args
|
||||
def plot_roc_curve(estimator, X, y, *, sample_weight=None,
|
||||
drop_intermediate=True, response_method="auto",
|
||||
name=None, ax=None, **kwargs):
|
||||
"""Plot Receiver operating characteristic (ROC) curve.
|
||||
|
||||
Extra keyword arguments will be passed to matplotlib's `plot`.
|
||||
|
||||
Read more in the :ref:`User Guide <visualizations>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
estimator : estimator instance
|
||||
Fitted classifier or a fitted :class:`~sklearn.pipeline.Pipeline`
|
||||
in which the last estimator is a classifier.
|
||||
|
||||
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||||
Input values.
|
||||
|
||||
y : array-like of shape (n_samples,)
|
||||
Target values.
|
||||
|
||||
sample_weight : array-like of shape (n_samples,), default=None
|
||||
Sample weights.
|
||||
|
||||
drop_intermediate : boolean, default=True
|
||||
Whether to drop some suboptimal thresholds which would not appear
|
||||
on a plotted ROC curve. This is useful in order to create lighter
|
||||
ROC curves.
|
||||
|
||||
response_method : {'predict_proba', 'decision_function', 'auto'} \
|
||||
default='auto'
|
||||
Specifies whether to use :term:`predict_proba` or
|
||||
:term:`decision_function` as the target response. If set to 'auto',
|
||||
:term:`predict_proba` is tried first and if it does not exist
|
||||
:term:`decision_function` is tried next.
|
||||
|
||||
name : str, default=None
|
||||
Name of ROC Curve for labeling. If `None`, use the name of the
|
||||
estimator.
|
||||
|
||||
ax : matplotlib axes, default=None
|
||||
Axes object to plot on. If `None`, a new figure and axes is created.
|
||||
|
||||
Returns
|
||||
-------
|
||||
display : :class:`~sklearn.metrics.RocCurveDisplay`
|
||||
Object that stores computed values.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import matplotlib.pyplot as plt # doctest: +SKIP
|
||||
>>> from sklearn import datasets, metrics, model_selection, svm
|
||||
>>> X, y = datasets.make_classification(random_state=0)
|
||||
>>> X_train, X_test, y_train, y_test = model_selection.train_test_split(\
|
||||
X, y, random_state=0)
|
||||
>>> clf = svm.SVC(random_state=0)
|
||||
>>> clf.fit(X_train, y_train)
|
||||
SVC(random_state=0)
|
||||
>>> metrics.plot_roc_curve(clf, X_test, y_test) # doctest: +SKIP
|
||||
>>> plt.show() # doctest: +SKIP
|
||||
"""
|
||||
check_matplotlib_support('plot_roc_curve')
|
||||
|
||||
classification_error = (
|
||||
"{} should be a binary classifier".format(estimator.__class__.__name__)
|
||||
)
|
||||
if not is_classifier(estimator):
|
||||
raise ValueError(classification_error)
|
||||
|
||||
prediction_method = _check_classifer_response_method(estimator,
|
||||
response_method)
|
||||
y_pred = prediction_method(X)
|
||||
|
||||
if y_pred.ndim != 1:
|
||||
if y_pred.shape[1] != 2:
|
||||
raise ValueError(classification_error)
|
||||
else:
|
||||
y_pred = y_pred[:, 1]
|
||||
|
||||
pos_label = estimator.classes_[1]
|
||||
fpr, tpr, _ = roc_curve(y, y_pred, pos_label=pos_label,
|
||||
sample_weight=sample_weight,
|
||||
drop_intermediate=drop_intermediate)
|
||||
roc_auc = auc(fpr, tpr)
|
||||
name = estimator.__class__.__name__ if name is None else name
|
||||
viz = RocCurveDisplay(
|
||||
fpr=fpr, tpr=tpr, roc_auc=roc_auc, estimator_name=name
|
||||
)
|
||||
return viz.plot(ax=ax, name=name, **kwargs)
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,299 @@
|
|||
import pytest
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose
|
||||
from numpy.testing import assert_array_equal
|
||||
|
||||
from sklearn.compose import make_column_transformer
|
||||
from sklearn.datasets import make_classification
|
||||
from sklearn.exceptions import NotFittedError
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.pipeline import make_pipeline
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.svm import SVC, SVR
|
||||
|
||||
from sklearn.metrics import confusion_matrix
|
||||
from sklearn.metrics import plot_confusion_matrix
|
||||
from sklearn.metrics import ConfusionMatrixDisplay
|
||||
|
||||
|
||||
# TODO: Remove when https://github.com/numpy/numpy/issues/14397 is resolved
|
||||
pytestmark = pytest.mark.filterwarnings(
|
||||
"ignore:In future, it will be an error for 'np.bool_':DeprecationWarning:"
|
||||
"matplotlib.*")
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def n_classes():
|
||||
return 5
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def data(n_classes):
|
||||
X, y = make_classification(n_samples=100, n_informative=5,
|
||||
n_classes=n_classes, random_state=0)
|
||||
return X, y
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def fitted_clf(data):
|
||||
return SVC(kernel='linear', C=0.01).fit(*data)
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def y_pred(data, fitted_clf):
|
||||
X, _ = data
|
||||
return fitted_clf.predict(X)
|
||||
|
||||
|
||||
def test_error_on_regressor(pyplot, data):
|
||||
X, y = data
|
||||
est = SVR().fit(X, y)
|
||||
|
||||
msg = "plot_confusion_matrix only supports classifiers"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
plot_confusion_matrix(est, X, y)
|
||||
|
||||
|
||||
def test_error_on_invalid_option(pyplot, fitted_clf, data):
|
||||
X, y = data
|
||||
msg = (r"normalize must be one of \{'true', 'pred', 'all', "
|
||||
r"None\}")
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
plot_confusion_matrix(fitted_clf, X, y, normalize='invalid')
|
||||
|
||||
|
||||
@pytest.mark.parametrize("with_labels", [True, False])
|
||||
@pytest.mark.parametrize("with_display_labels", [True, False])
|
||||
def test_plot_confusion_matrix_custom_labels(pyplot, data, y_pred, fitted_clf,
|
||||
n_classes, with_labels,
|
||||
with_display_labels):
|
||||
X, y = data
|
||||
ax = pyplot.gca()
|
||||
labels = [2, 1, 0, 3, 4] if with_labels else None
|
||||
display_labels = ['b', 'd', 'a', 'e', 'f'] if with_display_labels else None
|
||||
|
||||
cm = confusion_matrix(y, y_pred, labels=labels)
|
||||
disp = plot_confusion_matrix(fitted_clf, X, y,
|
||||
ax=ax, display_labels=display_labels,
|
||||
labels=labels)
|
||||
|
||||
assert_allclose(disp.confusion_matrix, cm)
|
||||
|
||||
if with_display_labels:
|
||||
expected_display_labels = display_labels
|
||||
elif with_labels:
|
||||
expected_display_labels = labels
|
||||
else:
|
||||
expected_display_labels = list(range(n_classes))
|
||||
|
||||
expected_display_labels_str = [str(name)
|
||||
for name in expected_display_labels]
|
||||
|
||||
x_ticks = [tick.get_text() for tick in disp.ax_.get_xticklabels()]
|
||||
y_ticks = [tick.get_text() for tick in disp.ax_.get_yticklabels()]
|
||||
|
||||
assert_array_equal(disp.display_labels, expected_display_labels)
|
||||
assert_array_equal(x_ticks, expected_display_labels_str)
|
||||
assert_array_equal(y_ticks, expected_display_labels_str)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("normalize", ['true', 'pred', 'all', None])
|
||||
@pytest.mark.parametrize("include_values", [True, False])
|
||||
def test_plot_confusion_matrix(pyplot, data, y_pred, n_classes, fitted_clf,
|
||||
normalize, include_values):
|
||||
X, y = data
|
||||
ax = pyplot.gca()
|
||||
cmap = 'plasma'
|
||||
cm = confusion_matrix(y, y_pred)
|
||||
disp = plot_confusion_matrix(fitted_clf, X, y,
|
||||
normalize=normalize,
|
||||
cmap=cmap, ax=ax,
|
||||
include_values=include_values)
|
||||
|
||||
assert disp.ax_ == ax
|
||||
|
||||
if normalize == 'true':
|
||||
cm = cm / cm.sum(axis=1, keepdims=True)
|
||||
elif normalize == 'pred':
|
||||
cm = cm / cm.sum(axis=0, keepdims=True)
|
||||
elif normalize == 'all':
|
||||
cm = cm / cm.sum()
|
||||
|
||||
assert_allclose(disp.confusion_matrix, cm)
|
||||
import matplotlib as mpl
|
||||
assert isinstance(disp.im_, mpl.image.AxesImage)
|
||||
assert disp.im_.get_cmap().name == cmap
|
||||
assert isinstance(disp.ax_, pyplot.Axes)
|
||||
assert isinstance(disp.figure_, pyplot.Figure)
|
||||
|
||||
assert disp.ax_.get_ylabel() == "True label"
|
||||
assert disp.ax_.get_xlabel() == "Predicted label"
|
||||
|
||||
x_ticks = [tick.get_text() for tick in disp.ax_.get_xticklabels()]
|
||||
y_ticks = [tick.get_text() for tick in disp.ax_.get_yticklabels()]
|
||||
|
||||
expected_display_labels = list(range(n_classes))
|
||||
|
||||
expected_display_labels_str = [str(name)
|
||||
for name in expected_display_labels]
|
||||
|
||||
assert_array_equal(disp.display_labels, expected_display_labels)
|
||||
assert_array_equal(x_ticks, expected_display_labels_str)
|
||||
assert_array_equal(y_ticks, expected_display_labels_str)
|
||||
|
||||
image_data = disp.im_.get_array().data
|
||||
assert_allclose(image_data, cm)
|
||||
|
||||
if include_values:
|
||||
assert disp.text_.shape == (n_classes, n_classes)
|
||||
fmt = '.2g'
|
||||
expected_text = np.array([format(v, fmt) for v in cm.ravel(order="C")])
|
||||
text_text = np.array([
|
||||
t.get_text() for t in disp.text_.ravel(order="C")])
|
||||
assert_array_equal(expected_text, text_text)
|
||||
else:
|
||||
assert disp.text_ is None
|
||||
|
||||
|
||||
def test_confusion_matrix_display(pyplot, data, fitted_clf, y_pred, n_classes):
|
||||
X, y = data
|
||||
|
||||
cm = confusion_matrix(y, y_pred)
|
||||
disp = plot_confusion_matrix(fitted_clf, X, y, normalize=None,
|
||||
include_values=True, cmap='viridis',
|
||||
xticks_rotation=45.0)
|
||||
|
||||
assert_allclose(disp.confusion_matrix, cm)
|
||||
assert disp.text_.shape == (n_classes, n_classes)
|
||||
|
||||
rotations = [tick.get_rotation() for tick in disp.ax_.get_xticklabels()]
|
||||
assert_allclose(rotations, 45.0)
|
||||
|
||||
image_data = disp.im_.get_array().data
|
||||
assert_allclose(image_data, cm)
|
||||
|
||||
disp.plot(cmap='plasma')
|
||||
assert disp.im_.get_cmap().name == 'plasma'
|
||||
|
||||
disp.plot(include_values=False)
|
||||
assert disp.text_ is None
|
||||
|
||||
disp.plot(xticks_rotation=90.0)
|
||||
rotations = [tick.get_rotation() for tick in disp.ax_.get_xticklabels()]
|
||||
assert_allclose(rotations, 90.0)
|
||||
|
||||
disp.plot(values_format='e')
|
||||
expected_text = np.array([format(v, 'e') for v in cm.ravel(order="C")])
|
||||
text_text = np.array([
|
||||
t.get_text() for t in disp.text_.ravel(order="C")])
|
||||
assert_array_equal(expected_text, text_text)
|
||||
|
||||
|
||||
def test_confusion_matrix_contrast(pyplot):
|
||||
# make sure text color is appropriate depending on background
|
||||
|
||||
cm = np.eye(2) / 2
|
||||
disp = ConfusionMatrixDisplay(cm, display_labels=[0, 1])
|
||||
|
||||
disp.plot(cmap=pyplot.cm.gray)
|
||||
# diagonal text is black
|
||||
assert_allclose(disp.text_[0, 0].get_color(), [0.0, 0.0, 0.0, 1.0])
|
||||
assert_allclose(disp.text_[1, 1].get_color(), [0.0, 0.0, 0.0, 1.0])
|
||||
|
||||
# off-diagonal text is white
|
||||
assert_allclose(disp.text_[0, 1].get_color(), [1.0, 1.0, 1.0, 1.0])
|
||||
assert_allclose(disp.text_[1, 0].get_color(), [1.0, 1.0, 1.0, 1.0])
|
||||
|
||||
disp.plot(cmap=pyplot.cm.gray_r)
|
||||
# diagonal text is white
|
||||
assert_allclose(disp.text_[0, 1].get_color(), [0.0, 0.0, 0.0, 1.0])
|
||||
assert_allclose(disp.text_[1, 0].get_color(), [0.0, 0.0, 0.0, 1.0])
|
||||
|
||||
# off-diagonal text is black
|
||||
assert_allclose(disp.text_[0, 0].get_color(), [1.0, 1.0, 1.0, 1.0])
|
||||
assert_allclose(disp.text_[1, 1].get_color(), [1.0, 1.0, 1.0, 1.0])
|
||||
|
||||
# Regression test for #15920
|
||||
cm = np.array([[19, 34], [32, 58]])
|
||||
disp = ConfusionMatrixDisplay(cm, display_labels=[0, 1])
|
||||
|
||||
disp.plot(cmap=pyplot.cm.Blues)
|
||||
min_color = pyplot.cm.Blues(0)
|
||||
max_color = pyplot.cm.Blues(255)
|
||||
assert_allclose(disp.text_[0, 0].get_color(), max_color)
|
||||
assert_allclose(disp.text_[0, 1].get_color(), max_color)
|
||||
assert_allclose(disp.text_[1, 0].get_color(), max_color)
|
||||
assert_allclose(disp.text_[1, 1].get_color(), min_color)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"clf", [LogisticRegression(),
|
||||
make_pipeline(StandardScaler(), LogisticRegression()),
|
||||
make_pipeline(make_column_transformer((StandardScaler(), [0, 1])),
|
||||
LogisticRegression())])
|
||||
def test_confusion_matrix_pipeline(pyplot, clf, data, n_classes):
|
||||
X, y = data
|
||||
with pytest.raises(NotFittedError):
|
||||
plot_confusion_matrix(clf, X, y)
|
||||
clf.fit(X, y)
|
||||
y_pred = clf.predict(X)
|
||||
|
||||
disp = plot_confusion_matrix(clf, X, y)
|
||||
cm = confusion_matrix(y, y_pred)
|
||||
|
||||
assert_allclose(disp.confusion_matrix, cm)
|
||||
assert disp.text_.shape == (n_classes, n_classes)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("values_format", ['e', 'n'])
|
||||
def test_confusion_matrix_text_format(pyplot, data, y_pred, n_classes,
|
||||
fitted_clf, values_format):
|
||||
# Make sure plot text is formatted with 'values_format'.
|
||||
X, y = data
|
||||
cm = confusion_matrix(y, y_pred)
|
||||
disp = plot_confusion_matrix(fitted_clf, X, y,
|
||||
include_values=True,
|
||||
values_format=values_format)
|
||||
|
||||
assert disp.text_.shape == (n_classes, n_classes)
|
||||
|
||||
expected_text = np.array([format(v, values_format)
|
||||
for v in cm.ravel()])
|
||||
text_text = np.array([
|
||||
t.get_text() for t in disp.text_.ravel()])
|
||||
assert_array_equal(expected_text, text_text)
|
||||
|
||||
|
||||
def test_confusion_matrix_standard_format(pyplot):
|
||||
cm = np.array([[10000000, 0], [123456, 12345678]])
|
||||
plotted_text = ConfusionMatrixDisplay(
|
||||
cm, display_labels=[False, True]).plot().text_
|
||||
# Values should be shown as whole numbers 'd',
|
||||
# except the first number which should be shown as 1e+07 (longer length)
|
||||
# and the last number will be shown as 1.2e+07 (longer length)
|
||||
test = [t.get_text() for t in plotted_text.ravel()]
|
||||
assert test == ['1e+07', '0', '123456', '1.2e+07']
|
||||
|
||||
cm = np.array([[0.1, 10], [100, 0.525]])
|
||||
plotted_text = ConfusionMatrixDisplay(
|
||||
cm, display_labels=[False, True]).plot().text_
|
||||
# Values should now formatted as '.2g', since there's a float in
|
||||
# Values are have two dec places max, (e.g 100 becomes 1e+02)
|
||||
test = [t.get_text() for t in plotted_text.ravel()]
|
||||
assert test == ['0.1', '10', '1e+02', '0.53']
|
||||
|
||||
|
||||
@pytest.mark.parametrize("display_labels, expected_labels", [
|
||||
(None, ["0", "1"]),
|
||||
(["cat", "dog"], ["cat", "dog"]),
|
||||
])
|
||||
def test_default_labels(pyplot, display_labels, expected_labels):
|
||||
cm = np.array([[10, 0], [12, 120]])
|
||||
disp = ConfusionMatrixDisplay(cm, display_labels=display_labels).plot()
|
||||
|
||||
x_ticks = [tick.get_text() for tick in disp.ax_.get_xticklabels()]
|
||||
y_ticks = [tick.get_text() for tick in disp.ax_.get_yticklabels()]
|
||||
|
||||
assert_array_equal(x_ticks, expected_labels)
|
||||
assert_array_equal(y_ticks, expected_labels)
|
|
@ -0,0 +1,192 @@
|
|||
import pytest
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose
|
||||
|
||||
from sklearn.base import BaseEstimator, ClassifierMixin
|
||||
from sklearn.metrics import plot_precision_recall_curve
|
||||
from sklearn.metrics import PrecisionRecallDisplay
|
||||
from sklearn.metrics import average_precision_score
|
||||
from sklearn.metrics import precision_recall_curve
|
||||
from sklearn.datasets import make_classification
|
||||
from sklearn.datasets import load_breast_cancer
|
||||
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.exceptions import NotFittedError
|
||||
from sklearn.pipeline import make_pipeline
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.compose import make_column_transformer
|
||||
|
||||
|
||||
# TODO: Remove when https://github.com/numpy/numpy/issues/14397 is resolved
|
||||
pytestmark = pytest.mark.filterwarnings(
|
||||
"ignore:In future, it will be an error for 'np.bool_':DeprecationWarning:"
|
||||
"matplotlib.*")
|
||||
|
||||
|
||||
def test_errors(pyplot):
|
||||
X, y_multiclass = make_classification(n_classes=3, n_samples=50,
|
||||
n_informative=3,
|
||||
random_state=0)
|
||||
y_binary = y_multiclass == 0
|
||||
|
||||
# Unfitted classifer
|
||||
binary_clf = DecisionTreeClassifier()
|
||||
with pytest.raises(NotFittedError):
|
||||
plot_precision_recall_curve(binary_clf, X, y_binary)
|
||||
binary_clf.fit(X, y_binary)
|
||||
|
||||
multi_clf = DecisionTreeClassifier().fit(X, y_multiclass)
|
||||
|
||||
# Fitted multiclass classifier with binary data
|
||||
msg = "DecisionTreeClassifier should be a binary classifier"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
plot_precision_recall_curve(multi_clf, X, y_binary)
|
||||
|
||||
reg = DecisionTreeRegressor().fit(X, y_multiclass)
|
||||
msg = "DecisionTreeRegressor should be a binary classifier"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
plot_precision_recall_curve(reg, X, y_binary)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"response_method, msg",
|
||||
[("predict_proba", "response method predict_proba is not defined in "
|
||||
"MyClassifier"),
|
||||
("decision_function", "response method decision_function is not defined "
|
||||
"in MyClassifier"),
|
||||
("auto", "response method decision_function or predict_proba is not "
|
||||
"defined in MyClassifier"),
|
||||
("bad_method", "response_method must be 'predict_proba', "
|
||||
"'decision_function' or 'auto'")])
|
||||
def test_error_bad_response(pyplot, response_method, msg):
|
||||
X, y = make_classification(n_classes=2, n_samples=50, random_state=0)
|
||||
|
||||
class MyClassifier(BaseEstimator, ClassifierMixin):
|
||||
def fit(self, X, y):
|
||||
self.fitted_ = True
|
||||
self.classes_ = [0, 1]
|
||||
return self
|
||||
|
||||
clf = MyClassifier().fit(X, y)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
plot_precision_recall_curve(clf, X, y, response_method=response_method)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("response_method",
|
||||
["predict_proba", "decision_function"])
|
||||
@pytest.mark.parametrize("with_sample_weight", [True, False])
|
||||
def test_plot_precision_recall(pyplot, response_method, with_sample_weight):
|
||||
X, y = make_classification(n_classes=2, n_samples=50, random_state=0)
|
||||
|
||||
lr = LogisticRegression().fit(X, y)
|
||||
|
||||
if with_sample_weight:
|
||||
rng = np.random.RandomState(42)
|
||||
sample_weight = rng.randint(0, 4, size=X.shape[0])
|
||||
else:
|
||||
sample_weight = None
|
||||
|
||||
disp = plot_precision_recall_curve(lr, X, y, alpha=0.8,
|
||||
response_method=response_method,
|
||||
sample_weight=sample_weight)
|
||||
|
||||
y_score = getattr(lr, response_method)(X)
|
||||
if response_method == 'predict_proba':
|
||||
y_score = y_score[:, 1]
|
||||
|
||||
prec, recall, _ = precision_recall_curve(y, y_score,
|
||||
sample_weight=sample_weight)
|
||||
avg_prec = average_precision_score(y, y_score, sample_weight=sample_weight)
|
||||
|
||||
assert_allclose(disp.precision, prec)
|
||||
assert_allclose(disp.recall, recall)
|
||||
assert disp.average_precision == pytest.approx(avg_prec)
|
||||
|
||||
assert disp.estimator_name == "LogisticRegression"
|
||||
|
||||
# cannot fail thanks to pyplot fixture
|
||||
import matplotlib as mpl # noqa
|
||||
assert isinstance(disp.line_, mpl.lines.Line2D)
|
||||
assert disp.line_.get_alpha() == 0.8
|
||||
assert isinstance(disp.ax_, mpl.axes.Axes)
|
||||
assert isinstance(disp.figure_, mpl.figure.Figure)
|
||||
|
||||
expected_label = "LogisticRegression (AP = {:0.2f})".format(avg_prec)
|
||||
assert disp.line_.get_label() == expected_label
|
||||
assert disp.ax_.get_xlabel() == "Recall"
|
||||
assert disp.ax_.get_ylabel() == "Precision"
|
||||
|
||||
# draw again with another label
|
||||
disp.plot(name="MySpecialEstimator")
|
||||
expected_label = "MySpecialEstimator (AP = {:0.2f})".format(avg_prec)
|
||||
assert disp.line_.get_label() == expected_label
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"clf", [make_pipeline(StandardScaler(), LogisticRegression()),
|
||||
make_pipeline(make_column_transformer((StandardScaler(), [0, 1])),
|
||||
LogisticRegression())])
|
||||
def test_precision_recall_curve_pipeline(pyplot, clf):
|
||||
X, y = make_classification(n_classes=2, n_samples=50, random_state=0)
|
||||
with pytest.raises(NotFittedError):
|
||||
plot_precision_recall_curve(clf, X, y)
|
||||
clf.fit(X, y)
|
||||
disp = plot_precision_recall_curve(clf, X, y)
|
||||
assert disp.estimator_name == clf.__class__.__name__
|
||||
|
||||
|
||||
def test_precision_recall_curve_string_labels(pyplot):
|
||||
# regression test #15738
|
||||
cancer = load_breast_cancer()
|
||||
X = cancer.data
|
||||
y = cancer.target_names[cancer.target]
|
||||
|
||||
lr = make_pipeline(StandardScaler(), LogisticRegression())
|
||||
lr.fit(X, y)
|
||||
for klass in cancer.target_names:
|
||||
assert klass in lr.classes_
|
||||
disp = plot_precision_recall_curve(lr, X, y)
|
||||
|
||||
y_pred = lr.predict_proba(X)[:, 1]
|
||||
avg_prec = average_precision_score(y, y_pred,
|
||||
pos_label=lr.classes_[1])
|
||||
|
||||
assert disp.average_precision == pytest.approx(avg_prec)
|
||||
assert disp.estimator_name == lr.__class__.__name__
|
||||
|
||||
|
||||
def test_plot_precision_recall_curve_estimator_name_multiple_calls(pyplot):
|
||||
# non-regression test checking that the `name` used when calling
|
||||
# `plot_roc_curve` is used as well when calling `disp.plot()`
|
||||
X, y = make_classification(n_classes=2, n_samples=50, random_state=0)
|
||||
clf_name = "my hand-crafted name"
|
||||
clf = LogisticRegression().fit(X, y)
|
||||
disp = plot_precision_recall_curve(clf, X, y, name=clf_name)
|
||||
assert disp.estimator_name == clf_name
|
||||
pyplot.close("all")
|
||||
disp.plot()
|
||||
assert clf_name in disp.line_.get_label()
|
||||
pyplot.close("all")
|
||||
clf_name = "another_name"
|
||||
disp.plot(name=clf_name)
|
||||
assert clf_name in disp.line_.get_label()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"average_precision, estimator_name, expected_label",
|
||||
[
|
||||
(0.9, None, "AP = 0.90"),
|
||||
(None, "my_est", "my_est"),
|
||||
(0.8, "my_est2", "my_est2 (AP = 0.80)"),
|
||||
]
|
||||
)
|
||||
def test_default_labels(pyplot, average_precision, estimator_name,
|
||||
expected_label):
|
||||
prec = np.array([1, 0.5, 0])
|
||||
recall = np.array([0, 0.5, 1])
|
||||
disp = PrecisionRecallDisplay(prec, recall,
|
||||
average_precision=average_precision,
|
||||
estimator_name=estimator_name)
|
||||
disp.plot()
|
||||
assert disp.line_.get_label() == expected_label
|
|
@ -0,0 +1,170 @@
|
|||
import pytest
|
||||
from numpy.testing import assert_allclose
|
||||
import numpy as np
|
||||
|
||||
from sklearn.tree import DecisionTreeClassifier
|
||||
from sklearn.metrics import plot_roc_curve
|
||||
from sklearn.metrics import RocCurveDisplay
|
||||
from sklearn.datasets import load_iris
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.metrics import roc_curve, auc
|
||||
from sklearn.base import ClassifierMixin
|
||||
from sklearn.exceptions import NotFittedError
|
||||
from sklearn.pipeline import make_pipeline
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.compose import make_column_transformer
|
||||
|
||||
|
||||
# TODO: Remove when https://github.com/numpy/numpy/issues/14397 is resolved
|
||||
pytestmark = pytest.mark.filterwarnings(
|
||||
"ignore:In future, it will be an error for 'np.bool_':DeprecationWarning:"
|
||||
"matplotlib.*")
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def data():
|
||||
return load_iris(return_X_y=True)
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def data_binary(data):
|
||||
X, y = data
|
||||
return X[y < 2], y[y < 2]
|
||||
|
||||
|
||||
def test_plot_roc_curve_error_non_binary(pyplot, data):
|
||||
X, y = data
|
||||
clf = DecisionTreeClassifier()
|
||||
clf.fit(X, y)
|
||||
|
||||
msg = "DecisionTreeClassifier should be a binary classifier"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
plot_roc_curve(clf, X, y)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"response_method, msg",
|
||||
[("predict_proba", "response method predict_proba is not defined in "
|
||||
"MyClassifier"),
|
||||
("decision_function", "response method decision_function is not defined "
|
||||
"in MyClassifier"),
|
||||
("auto", "response method decision_function or predict_proba is not "
|
||||
"defined in MyClassifier"),
|
||||
("bad_method", "response_method must be 'predict_proba', "
|
||||
"'decision_function' or 'auto'")])
|
||||
def test_plot_roc_curve_error_no_response(pyplot, data_binary, response_method,
|
||||
msg):
|
||||
X, y = data_binary
|
||||
|
||||
class MyClassifier(ClassifierMixin):
|
||||
def fit(self, X, y):
|
||||
self.classes_ = [0, 1]
|
||||
return self
|
||||
|
||||
clf = MyClassifier().fit(X, y)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
plot_roc_curve(clf, X, y, response_method=response_method)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("response_method",
|
||||
["predict_proba", "decision_function"])
|
||||
@pytest.mark.parametrize("with_sample_weight", [True, False])
|
||||
@pytest.mark.parametrize("drop_intermediate", [True, False])
|
||||
@pytest.mark.parametrize("with_strings", [True, False])
|
||||
def test_plot_roc_curve(pyplot, response_method, data_binary,
|
||||
with_sample_weight, drop_intermediate,
|
||||
with_strings):
|
||||
X, y = data_binary
|
||||
|
||||
pos_label = None
|
||||
if with_strings:
|
||||
y = np.array(["c", "b"])[y]
|
||||
pos_label = "c"
|
||||
|
||||
if with_sample_weight:
|
||||
rng = np.random.RandomState(42)
|
||||
sample_weight = rng.randint(1, 4, size=(X.shape[0]))
|
||||
else:
|
||||
sample_weight = None
|
||||
|
||||
lr = LogisticRegression()
|
||||
lr.fit(X, y)
|
||||
|
||||
viz = plot_roc_curve(lr, X, y, alpha=0.8, sample_weight=sample_weight,
|
||||
drop_intermediate=drop_intermediate)
|
||||
|
||||
y_pred = getattr(lr, response_method)(X)
|
||||
if y_pred.ndim == 2:
|
||||
y_pred = y_pred[:, 1]
|
||||
|
||||
fpr, tpr, _ = roc_curve(y, y_pred, sample_weight=sample_weight,
|
||||
drop_intermediate=drop_intermediate,
|
||||
pos_label=pos_label)
|
||||
|
||||
assert_allclose(viz.roc_auc, auc(fpr, tpr))
|
||||
assert_allclose(viz.fpr, fpr)
|
||||
assert_allclose(viz.tpr, tpr)
|
||||
|
||||
assert viz.estimator_name == "LogisticRegression"
|
||||
|
||||
# cannot fail thanks to pyplot fixture
|
||||
import matplotlib as mpl # noqal
|
||||
assert isinstance(viz.line_, mpl.lines.Line2D)
|
||||
assert viz.line_.get_alpha() == 0.8
|
||||
assert isinstance(viz.ax_, mpl.axes.Axes)
|
||||
assert isinstance(viz.figure_, mpl.figure.Figure)
|
||||
|
||||
expected_label = "LogisticRegression (AUC = {:0.2f})".format(viz.roc_auc)
|
||||
assert viz.line_.get_label() == expected_label
|
||||
assert viz.ax_.get_ylabel() == "True Positive Rate"
|
||||
assert viz.ax_.get_xlabel() == "False Positive Rate"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"clf", [LogisticRegression(),
|
||||
make_pipeline(StandardScaler(), LogisticRegression()),
|
||||
make_pipeline(make_column_transformer((StandardScaler(), [0, 1])),
|
||||
LogisticRegression())])
|
||||
def test_roc_curve_not_fitted_errors(pyplot, data_binary, clf):
|
||||
X, y = data_binary
|
||||
with pytest.raises(NotFittedError):
|
||||
plot_roc_curve(clf, X, y)
|
||||
clf.fit(X, y)
|
||||
disp = plot_roc_curve(clf, X, y)
|
||||
assert clf.__class__.__name__ in disp.line_.get_label()
|
||||
assert disp.estimator_name == clf.__class__.__name__
|
||||
|
||||
|
||||
def test_plot_roc_curve_estimator_name_multiple_calls(pyplot, data_binary):
|
||||
# non-regression test checking that the `name` used when calling
|
||||
# `plot_roc_curve` is used as well when calling `disp.plot()`
|
||||
X, y = data_binary
|
||||
clf_name = "my hand-crafted name"
|
||||
clf = LogisticRegression().fit(X, y)
|
||||
disp = plot_roc_curve(clf, X, y, name=clf_name)
|
||||
assert disp.estimator_name == clf_name
|
||||
pyplot.close("all")
|
||||
disp.plot()
|
||||
assert clf_name in disp.line_.get_label()
|
||||
pyplot.close("all")
|
||||
clf_name = "another_name"
|
||||
disp.plot(name=clf_name)
|
||||
assert clf_name in disp.line_.get_label()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"roc_auc, estimator_name, expected_label",
|
||||
[
|
||||
(0.9, None, "AUC = 0.90"),
|
||||
(None, "my_est", "my_est"),
|
||||
(0.8, "my_est2", "my_est2 (AUC = 0.80)")
|
||||
]
|
||||
)
|
||||
def test_default_labels(pyplot, roc_auc, estimator_name,
|
||||
expected_label):
|
||||
fpr = np.array([0, 0.5, 1])
|
||||
tpr = np.array([0, 0.5, 1])
|
||||
disp = RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc,
|
||||
estimator_name=estimator_name).plot()
|
||||
assert disp.line_.get_label() == expected_label
|
1435
venv/Lib/site-packages/sklearn/metrics/_ranking.py
Normal file
1435
venv/Lib/site-packages/sklearn/metrics/_ranking.py
Normal file
File diff suppressed because it is too large
Load diff
810
venv/Lib/site-packages/sklearn/metrics/_regression.py
Normal file
810
venv/Lib/site-packages/sklearn/metrics/_regression.py
Normal file
|
@ -0,0 +1,810 @@
|
|||
"""Metrics to assess performance on regression task
|
||||
|
||||
Functions named as ``*_score`` return a scalar value to maximize: the higher
|
||||
the better
|
||||
|
||||
Function named as ``*_error`` or ``*_loss`` return a scalar value to minimize:
|
||||
the lower the better
|
||||
"""
|
||||
|
||||
# Authors: Alexandre Gramfort <alexandre.gramfort@inria.fr>
|
||||
# Mathieu Blondel <mathieu@mblondel.org>
|
||||
# Olivier Grisel <olivier.grisel@ensta.org>
|
||||
# Arnaud Joly <a.joly@ulg.ac.be>
|
||||
# Jochen Wersdorfer <jochen@wersdoerfer.de>
|
||||
# Lars Buitinck
|
||||
# Joel Nothman <joel.nothman@gmail.com>
|
||||
# Karan Desai <karandesai281196@gmail.com>
|
||||
# Noel Dawe <noel@dawe.me>
|
||||
# Manoj Kumar <manojkumarsivaraj334@gmail.com>
|
||||
# Michael Eickenberg <michael.eickenberg@gmail.com>
|
||||
# Konstantin Shmelkov <konstantin.shmelkov@polytechnique.edu>
|
||||
# Christian Lorentzen <lorentzen.ch@googlemail.com>
|
||||
# License: BSD 3 clause
|
||||
|
||||
import numpy as np
|
||||
import warnings
|
||||
|
||||
from .._loss.glm_distribution import TweedieDistribution
|
||||
from ..utils.validation import (check_array, check_consistent_length,
|
||||
_num_samples)
|
||||
from ..utils.validation import column_or_1d
|
||||
from ..utils.validation import _deprecate_positional_args
|
||||
from ..exceptions import UndefinedMetricWarning
|
||||
|
||||
|
||||
__ALL__ = [
|
||||
"max_error",
|
||||
"mean_absolute_error",
|
||||
"mean_squared_error",
|
||||
"mean_squared_log_error",
|
||||
"median_absolute_error",
|
||||
"r2_score",
|
||||
"explained_variance_score",
|
||||
"mean_tweedie_deviance",
|
||||
"mean_poisson_deviance",
|
||||
"mean_gamma_deviance",
|
||||
]
|
||||
|
||||
|
||||
def _check_reg_targets(y_true, y_pred, multioutput, dtype="numeric"):
|
||||
"""Check that y_true and y_pred belong to the same regression task
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y_true : array-like
|
||||
|
||||
y_pred : array-like
|
||||
|
||||
multioutput : array-like or string in ['raw_values', uniform_average',
|
||||
'variance_weighted'] or None
|
||||
None is accepted due to backward compatibility of r2_score().
|
||||
|
||||
Returns
|
||||
-------
|
||||
type_true : one of {'continuous', continuous-multioutput'}
|
||||
The type of the true target data, as output by
|
||||
'utils.multiclass.type_of_target'
|
||||
|
||||
y_true : array-like of shape (n_samples, n_outputs)
|
||||
Ground truth (correct) target values.
|
||||
|
||||
y_pred : array-like of shape (n_samples, n_outputs)
|
||||
Estimated target values.
|
||||
|
||||
multioutput : array-like of shape (n_outputs) or string in ['raw_values',
|
||||
uniform_average', 'variance_weighted'] or None
|
||||
Custom output weights if ``multioutput`` is array-like or
|
||||
just the corresponding argument if ``multioutput`` is a
|
||||
correct keyword.
|
||||
dtype: str or list, default="numeric"
|
||||
the dtype argument passed to check_array
|
||||
|
||||
"""
|
||||
check_consistent_length(y_true, y_pred)
|
||||
y_true = check_array(y_true, ensure_2d=False, dtype=dtype)
|
||||
y_pred = check_array(y_pred, ensure_2d=False, dtype=dtype)
|
||||
|
||||
if y_true.ndim == 1:
|
||||
y_true = y_true.reshape((-1, 1))
|
||||
|
||||
if y_pred.ndim == 1:
|
||||
y_pred = y_pred.reshape((-1, 1))
|
||||
|
||||
if y_true.shape[1] != y_pred.shape[1]:
|
||||
raise ValueError("y_true and y_pred have different number of output "
|
||||
"({0}!={1})".format(y_true.shape[1], y_pred.shape[1]))
|
||||
|
||||
n_outputs = y_true.shape[1]
|
||||
allowed_multioutput_str = ('raw_values', 'uniform_average',
|
||||
'variance_weighted')
|
||||
if isinstance(multioutput, str):
|
||||
if multioutput not in allowed_multioutput_str:
|
||||
raise ValueError("Allowed 'multioutput' string values are {}. "
|
||||
"You provided multioutput={!r}".format(
|
||||
allowed_multioutput_str,
|
||||
multioutput))
|
||||
elif multioutput is not None:
|
||||
multioutput = check_array(multioutput, ensure_2d=False)
|
||||
if n_outputs == 1:
|
||||
raise ValueError("Custom weights are useful only in "
|
||||
"multi-output cases.")
|
||||
elif n_outputs != len(multioutput):
|
||||
raise ValueError(("There must be equally many custom weights "
|
||||
"(%d) as outputs (%d).") %
|
||||
(len(multioutput), n_outputs))
|
||||
y_type = 'continuous' if n_outputs == 1 else 'continuous-multioutput'
|
||||
|
||||
return y_type, y_true, y_pred, multioutput
|
||||
|
||||
|
||||
@_deprecate_positional_args
|
||||
def mean_absolute_error(y_true, y_pred, *,
|
||||
sample_weight=None,
|
||||
multioutput='uniform_average'):
|
||||
"""Mean absolute error regression loss
|
||||
|
||||
Read more in the :ref:`User Guide <mean_absolute_error>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
|
||||
Ground truth (correct) target values.
|
||||
|
||||
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
|
||||
Estimated target values.
|
||||
|
||||
sample_weight : array-like of shape (n_samples,), optional
|
||||
Sample weights.
|
||||
|
||||
multioutput : string in ['raw_values', 'uniform_average'] \
|
||||
or array-like of shape (n_outputs)
|
||||
Defines aggregating of multiple output values.
|
||||
Array-like value defines weights used to average errors.
|
||||
|
||||
'raw_values' :
|
||||
Returns a full set of errors in case of multioutput input.
|
||||
|
||||
'uniform_average' :
|
||||
Errors of all outputs are averaged with uniform weight.
|
||||
|
||||
|
||||
Returns
|
||||
-------
|
||||
loss : float or ndarray of floats
|
||||
If multioutput is 'raw_values', then mean absolute error is returned
|
||||
for each output separately.
|
||||
If multioutput is 'uniform_average' or an ndarray of weights, then the
|
||||
weighted average of all output errors is returned.
|
||||
|
||||
MAE output is non-negative floating point. The best value is 0.0.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from sklearn.metrics import mean_absolute_error
|
||||
>>> y_true = [3, -0.5, 2, 7]
|
||||
>>> y_pred = [2.5, 0.0, 2, 8]
|
||||
>>> mean_absolute_error(y_true, y_pred)
|
||||
0.5
|
||||
>>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
|
||||
>>> y_pred = [[0, 2], [-1, 2], [8, -5]]
|
||||
>>> mean_absolute_error(y_true, y_pred)
|
||||
0.75
|
||||
>>> mean_absolute_error(y_true, y_pred, multioutput='raw_values')
|
||||
array([0.5, 1. ])
|
||||
>>> mean_absolute_error(y_true, y_pred, multioutput=[0.3, 0.7])
|
||||
0.85...
|
||||
"""
|
||||
y_type, y_true, y_pred, multioutput = _check_reg_targets(
|
||||
y_true, y_pred, multioutput)
|
||||
check_consistent_length(y_true, y_pred, sample_weight)
|
||||
output_errors = np.average(np.abs(y_pred - y_true),
|
||||
weights=sample_weight, axis=0)
|
||||
if isinstance(multioutput, str):
|
||||
if multioutput == 'raw_values':
|
||||
return output_errors
|
||||
elif multioutput == 'uniform_average':
|
||||
# pass None as weights to np.average: uniform mean
|
||||
multioutput = None
|
||||
|
||||
return np.average(output_errors, weights=multioutput)
|
||||
|
||||
|
||||
@_deprecate_positional_args
|
||||
def mean_squared_error(y_true, y_pred, *,
|
||||
sample_weight=None,
|
||||
multioutput='uniform_average', squared=True):
|
||||
"""Mean squared error regression loss
|
||||
|
||||
Read more in the :ref:`User Guide <mean_squared_error>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
|
||||
Ground truth (correct) target values.
|
||||
|
||||
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
|
||||
Estimated target values.
|
||||
|
||||
sample_weight : array-like of shape (n_samples,), optional
|
||||
Sample weights.
|
||||
|
||||
multioutput : string in ['raw_values', 'uniform_average'] \
|
||||
or array-like of shape (n_outputs)
|
||||
Defines aggregating of multiple output values.
|
||||
Array-like value defines weights used to average errors.
|
||||
|
||||
'raw_values' :
|
||||
Returns a full set of errors in case of multioutput input.
|
||||
|
||||
'uniform_average' :
|
||||
Errors of all outputs are averaged with uniform weight.
|
||||
|
||||
squared : boolean value, optional (default = True)
|
||||
If True returns MSE value, if False returns RMSE value.
|
||||
|
||||
Returns
|
||||
-------
|
||||
loss : float or ndarray of floats
|
||||
A non-negative floating point value (the best value is 0.0), or an
|
||||
array of floating point values, one for each individual target.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from sklearn.metrics import mean_squared_error
|
||||
>>> y_true = [3, -0.5, 2, 7]
|
||||
>>> y_pred = [2.5, 0.0, 2, 8]
|
||||
>>> mean_squared_error(y_true, y_pred)
|
||||
0.375
|
||||
>>> y_true = [3, -0.5, 2, 7]
|
||||
>>> y_pred = [2.5, 0.0, 2, 8]
|
||||
>>> mean_squared_error(y_true, y_pred, squared=False)
|
||||
0.612...
|
||||
>>> y_true = [[0.5, 1],[-1, 1],[7, -6]]
|
||||
>>> y_pred = [[0, 2],[-1, 2],[8, -5]]
|
||||
>>> mean_squared_error(y_true, y_pred)
|
||||
0.708...
|
||||
>>> mean_squared_error(y_true, y_pred, squared=False)
|
||||
0.822...
|
||||
>>> mean_squared_error(y_true, y_pred, multioutput='raw_values')
|
||||
array([0.41666667, 1. ])
|
||||
>>> mean_squared_error(y_true, y_pred, multioutput=[0.3, 0.7])
|
||||
0.825...
|
||||
|
||||
"""
|
||||
y_type, y_true, y_pred, multioutput = _check_reg_targets(
|
||||
y_true, y_pred, multioutput)
|
||||
check_consistent_length(y_true, y_pred, sample_weight)
|
||||
output_errors = np.average((y_true - y_pred) ** 2, axis=0,
|
||||
weights=sample_weight)
|
||||
|
||||
if not squared:
|
||||
output_errors = np.sqrt(output_errors)
|
||||
|
||||
if isinstance(multioutput, str):
|
||||
if multioutput == 'raw_values':
|
||||
return output_errors
|
||||
elif multioutput == 'uniform_average':
|
||||
# pass None as weights to np.average: uniform mean
|
||||
multioutput = None
|
||||
|
||||
return np.average(output_errors, weights=multioutput)
|
||||
|
||||
|
||||
@_deprecate_positional_args
|
||||
def mean_squared_log_error(y_true, y_pred, *,
|
||||
sample_weight=None,
|
||||
multioutput='uniform_average'):
|
||||
"""Mean squared logarithmic error regression loss
|
||||
|
||||
Read more in the :ref:`User Guide <mean_squared_log_error>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
|
||||
Ground truth (correct) target values.
|
||||
|
||||
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
|
||||
Estimated target values.
|
||||
|
||||
sample_weight : array-like of shape (n_samples,), optional
|
||||
Sample weights.
|
||||
|
||||
multioutput : string in ['raw_values', 'uniform_average'] \
|
||||
or array-like of shape (n_outputs)
|
||||
|
||||
Defines aggregating of multiple output values.
|
||||
Array-like value defines weights used to average errors.
|
||||
|
||||
'raw_values' :
|
||||
Returns a full set of errors when the input is of multioutput
|
||||
format.
|
||||
|
||||
'uniform_average' :
|
||||
Errors of all outputs are averaged with uniform weight.
|
||||
|
||||
Returns
|
||||
-------
|
||||
loss : float or ndarray of floats
|
||||
A non-negative floating point value (the best value is 0.0), or an
|
||||
array of floating point values, one for each individual target.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from sklearn.metrics import mean_squared_log_error
|
||||
>>> y_true = [3, 5, 2.5, 7]
|
||||
>>> y_pred = [2.5, 5, 4, 8]
|
||||
>>> mean_squared_log_error(y_true, y_pred)
|
||||
0.039...
|
||||
>>> y_true = [[0.5, 1], [1, 2], [7, 6]]
|
||||
>>> y_pred = [[0.5, 2], [1, 2.5], [8, 8]]
|
||||
>>> mean_squared_log_error(y_true, y_pred)
|
||||
0.044...
|
||||
>>> mean_squared_log_error(y_true, y_pred, multioutput='raw_values')
|
||||
array([0.00462428, 0.08377444])
|
||||
>>> mean_squared_log_error(y_true, y_pred, multioutput=[0.3, 0.7])
|
||||
0.060...
|
||||
|
||||
"""
|
||||
y_type, y_true, y_pred, multioutput = _check_reg_targets(
|
||||
y_true, y_pred, multioutput)
|
||||
check_consistent_length(y_true, y_pred, sample_weight)
|
||||
|
||||
if (y_true < 0).any() or (y_pred < 0).any():
|
||||
raise ValueError("Mean Squared Logarithmic Error cannot be used when "
|
||||
"targets contain negative values.")
|
||||
|
||||
return mean_squared_error(np.log1p(y_true), np.log1p(y_pred),
|
||||
sample_weight=sample_weight,
|
||||
multioutput=multioutput)
|
||||
|
||||
|
||||
@_deprecate_positional_args
|
||||
def median_absolute_error(y_true, y_pred, *, multioutput='uniform_average'):
|
||||
"""Median absolute error regression loss
|
||||
|
||||
Median absolute error output is non-negative floating point. The best value
|
||||
is 0.0. Read more in the :ref:`User Guide <median_absolute_error>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y_true : array-like of shape = (n_samples) or (n_samples, n_outputs)
|
||||
Ground truth (correct) target values.
|
||||
|
||||
y_pred : array-like of shape = (n_samples) or (n_samples, n_outputs)
|
||||
Estimated target values.
|
||||
|
||||
multioutput : {'raw_values', 'uniform_average'} or array-like of shape \
|
||||
(n_outputs,)
|
||||
Defines aggregating of multiple output values. Array-like value defines
|
||||
weights used to average errors.
|
||||
|
||||
'raw_values' :
|
||||
Returns a full set of errors in case of multioutput input.
|
||||
|
||||
'uniform_average' :
|
||||
Errors of all outputs are averaged with uniform weight.
|
||||
|
||||
Returns
|
||||
-------
|
||||
loss : float or ndarray of floats
|
||||
If multioutput is 'raw_values', then mean absolute error is returned
|
||||
for each output separately.
|
||||
If multioutput is 'uniform_average' or an ndarray of weights, then the
|
||||
weighted average of all output errors is returned.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from sklearn.metrics import median_absolute_error
|
||||
>>> y_true = [3, -0.5, 2, 7]
|
||||
>>> y_pred = [2.5, 0.0, 2, 8]
|
||||
>>> median_absolute_error(y_true, y_pred)
|
||||
0.5
|
||||
>>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
|
||||
>>> y_pred = [[0, 2], [-1, 2], [8, -5]]
|
||||
>>> median_absolute_error(y_true, y_pred)
|
||||
0.75
|
||||
>>> median_absolute_error(y_true, y_pred, multioutput='raw_values')
|
||||
array([0.5, 1. ])
|
||||
>>> median_absolute_error(y_true, y_pred, multioutput=[0.3, 0.7])
|
||||
0.85
|
||||
|
||||
"""
|
||||
y_type, y_true, y_pred, multioutput = _check_reg_targets(
|
||||
y_true, y_pred, multioutput)
|
||||
output_errors = np.median(np.abs(y_pred - y_true), axis=0)
|
||||
if isinstance(multioutput, str):
|
||||
if multioutput == 'raw_values':
|
||||
return output_errors
|
||||
elif multioutput == 'uniform_average':
|
||||
# pass None as weights to np.average: uniform mean
|
||||
multioutput = None
|
||||
|
||||
return np.average(output_errors, weights=multioutput)
|
||||
|
||||
|
||||
@_deprecate_positional_args
|
||||
def explained_variance_score(y_true, y_pred, *,
|
||||
sample_weight=None,
|
||||
multioutput='uniform_average'):
|
||||
"""Explained variance regression score function
|
||||
|
||||
Best possible score is 1.0, lower values are worse.
|
||||
|
||||
Read more in the :ref:`User Guide <explained_variance_score>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
|
||||
Ground truth (correct) target values.
|
||||
|
||||
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
|
||||
Estimated target values.
|
||||
|
||||
sample_weight : array-like of shape (n_samples,), optional
|
||||
Sample weights.
|
||||
|
||||
multioutput : string in ['raw_values', 'uniform_average', \
|
||||
'variance_weighted'] or array-like of shape (n_outputs)
|
||||
Defines aggregating of multiple output scores.
|
||||
Array-like value defines weights used to average scores.
|
||||
|
||||
'raw_values' :
|
||||
Returns a full set of scores in case of multioutput input.
|
||||
|
||||
'uniform_average' :
|
||||
Scores of all outputs are averaged with uniform weight.
|
||||
|
||||
'variance_weighted' :
|
||||
Scores of all outputs are averaged, weighted by the variances
|
||||
of each individual output.
|
||||
|
||||
Returns
|
||||
-------
|
||||
score : float or ndarray of floats
|
||||
The explained variance or ndarray if 'multioutput' is 'raw_values'.
|
||||
|
||||
Notes
|
||||
-----
|
||||
This is not a symmetric function.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from sklearn.metrics import explained_variance_score
|
||||
>>> y_true = [3, -0.5, 2, 7]
|
||||
>>> y_pred = [2.5, 0.0, 2, 8]
|
||||
>>> explained_variance_score(y_true, y_pred)
|
||||
0.957...
|
||||
>>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
|
||||
>>> y_pred = [[0, 2], [-1, 2], [8, -5]]
|
||||
>>> explained_variance_score(y_true, y_pred, multioutput='uniform_average')
|
||||
0.983...
|
||||
|
||||
"""
|
||||
y_type, y_true, y_pred, multioutput = _check_reg_targets(
|
||||
y_true, y_pred, multioutput)
|
||||
check_consistent_length(y_true, y_pred, sample_weight)
|
||||
|
||||
y_diff_avg = np.average(y_true - y_pred, weights=sample_weight, axis=0)
|
||||
numerator = np.average((y_true - y_pred - y_diff_avg) ** 2,
|
||||
weights=sample_weight, axis=0)
|
||||
|
||||
y_true_avg = np.average(y_true, weights=sample_weight, axis=0)
|
||||
denominator = np.average((y_true - y_true_avg) ** 2,
|
||||
weights=sample_weight, axis=0)
|
||||
|
||||
nonzero_numerator = numerator != 0
|
||||
nonzero_denominator = denominator != 0
|
||||
valid_score = nonzero_numerator & nonzero_denominator
|
||||
output_scores = np.ones(y_true.shape[1])
|
||||
|
||||
output_scores[valid_score] = 1 - (numerator[valid_score] /
|
||||
denominator[valid_score])
|
||||
output_scores[nonzero_numerator & ~nonzero_denominator] = 0.
|
||||
if isinstance(multioutput, str):
|
||||
if multioutput == 'raw_values':
|
||||
# return scores individually
|
||||
return output_scores
|
||||
elif multioutput == 'uniform_average':
|
||||
# passing to np.average() None as weights results is uniform mean
|
||||
avg_weights = None
|
||||
elif multioutput == 'variance_weighted':
|
||||
avg_weights = denominator
|
||||
else:
|
||||
avg_weights = multioutput
|
||||
|
||||
return np.average(output_scores, weights=avg_weights)
|
||||
|
||||
|
||||
@_deprecate_positional_args
|
||||
def r2_score(y_true, y_pred, *, sample_weight=None,
|
||||
multioutput="uniform_average"):
|
||||
"""R^2 (coefficient of determination) regression score function.
|
||||
|
||||
Best possible score is 1.0 and it can be negative (because the
|
||||
model can be arbitrarily worse). A constant model that always
|
||||
predicts the expected value of y, disregarding the input features,
|
||||
would get a R^2 score of 0.0.
|
||||
|
||||
Read more in the :ref:`User Guide <r2_score>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
|
||||
Ground truth (correct) target values.
|
||||
|
||||
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
|
||||
Estimated target values.
|
||||
|
||||
sample_weight : array-like of shape (n_samples,), optional
|
||||
Sample weights.
|
||||
|
||||
multioutput : string in ['raw_values', 'uniform_average', \
|
||||
'variance_weighted'] or None or array-like of shape (n_outputs)
|
||||
|
||||
Defines aggregating of multiple output scores.
|
||||
Array-like value defines weights used to average scores.
|
||||
Default is "uniform_average".
|
||||
|
||||
'raw_values' :
|
||||
Returns a full set of scores in case of multioutput input.
|
||||
|
||||
'uniform_average' :
|
||||
Scores of all outputs are averaged with uniform weight.
|
||||
|
||||
'variance_weighted' :
|
||||
Scores of all outputs are averaged, weighted by the variances
|
||||
of each individual output.
|
||||
|
||||
.. versionchanged:: 0.19
|
||||
Default value of multioutput is 'uniform_average'.
|
||||
|
||||
Returns
|
||||
-------
|
||||
z : float or ndarray of floats
|
||||
The R^2 score or ndarray of scores if 'multioutput' is
|
||||
'raw_values'.
|
||||
|
||||
Notes
|
||||
-----
|
||||
This is not a symmetric function.
|
||||
|
||||
Unlike most other scores, R^2 score may be negative (it need not actually
|
||||
be the square of a quantity R).
|
||||
|
||||
This metric is not well-defined for single samples and will return a NaN
|
||||
value if n_samples is less than two.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] `Wikipedia entry on the Coefficient of determination
|
||||
<https://en.wikipedia.org/wiki/Coefficient_of_determination>`_
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from sklearn.metrics import r2_score
|
||||
>>> y_true = [3, -0.5, 2, 7]
|
||||
>>> y_pred = [2.5, 0.0, 2, 8]
|
||||
>>> r2_score(y_true, y_pred)
|
||||
0.948...
|
||||
>>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
|
||||
>>> y_pred = [[0, 2], [-1, 2], [8, -5]]
|
||||
>>> r2_score(y_true, y_pred,
|
||||
... multioutput='variance_weighted')
|
||||
0.938...
|
||||
>>> y_true = [1, 2, 3]
|
||||
>>> y_pred = [1, 2, 3]
|
||||
>>> r2_score(y_true, y_pred)
|
||||
1.0
|
||||
>>> y_true = [1, 2, 3]
|
||||
>>> y_pred = [2, 2, 2]
|
||||
>>> r2_score(y_true, y_pred)
|
||||
0.0
|
||||
>>> y_true = [1, 2, 3]
|
||||
>>> y_pred = [3, 2, 1]
|
||||
>>> r2_score(y_true, y_pred)
|
||||
-3.0
|
||||
"""
|
||||
y_type, y_true, y_pred, multioutput = _check_reg_targets(
|
||||
y_true, y_pred, multioutput)
|
||||
check_consistent_length(y_true, y_pred, sample_weight)
|
||||
|
||||
if _num_samples(y_pred) < 2:
|
||||
msg = "R^2 score is not well-defined with less than two samples."
|
||||
warnings.warn(msg, UndefinedMetricWarning)
|
||||
return float('nan')
|
||||
|
||||
if sample_weight is not None:
|
||||
sample_weight = column_or_1d(sample_weight)
|
||||
weight = sample_weight[:, np.newaxis]
|
||||
else:
|
||||
weight = 1.
|
||||
|
||||
numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0,
|
||||
dtype=np.float64)
|
||||
denominator = (weight * (y_true - np.average(
|
||||
y_true, axis=0, weights=sample_weight)) ** 2).sum(axis=0,
|
||||
dtype=np.float64)
|
||||
nonzero_denominator = denominator != 0
|
||||
nonzero_numerator = numerator != 0
|
||||
valid_score = nonzero_denominator & nonzero_numerator
|
||||
output_scores = np.ones([y_true.shape[1]])
|
||||
output_scores[valid_score] = 1 - (numerator[valid_score] /
|
||||
denominator[valid_score])
|
||||
# arbitrary set to zero to avoid -inf scores, having a constant
|
||||
# y_true is not interesting for scoring a regression anyway
|
||||
output_scores[nonzero_numerator & ~nonzero_denominator] = 0.
|
||||
if isinstance(multioutput, str):
|
||||
if multioutput == 'raw_values':
|
||||
# return scores individually
|
||||
return output_scores
|
||||
elif multioutput == 'uniform_average':
|
||||
# passing None as weights results is uniform mean
|
||||
avg_weights = None
|
||||
elif multioutput == 'variance_weighted':
|
||||
avg_weights = denominator
|
||||
# avoid fail on constant y or one-element arrays
|
||||
if not np.any(nonzero_denominator):
|
||||
if not np.any(nonzero_numerator):
|
||||
return 1.0
|
||||
else:
|
||||
return 0.0
|
||||
else:
|
||||
avg_weights = multioutput
|
||||
|
||||
return np.average(output_scores, weights=avg_weights)
|
||||
|
||||
|
||||
def max_error(y_true, y_pred):
|
||||
"""
|
||||
max_error metric calculates the maximum residual error.
|
||||
|
||||
Read more in the :ref:`User Guide <max_error>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y_true : array-like of shape (n_samples,)
|
||||
Ground truth (correct) target values.
|
||||
|
||||
y_pred : array-like of shape (n_samples,)
|
||||
Estimated target values.
|
||||
|
||||
Returns
|
||||
-------
|
||||
max_error : float
|
||||
A positive floating point value (the best value is 0.0).
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from sklearn.metrics import max_error
|
||||
>>> y_true = [3, 2, 7, 1]
|
||||
>>> y_pred = [4, 2, 7, 1]
|
||||
>>> max_error(y_true, y_pred)
|
||||
1
|
||||
"""
|
||||
y_type, y_true, y_pred, _ = _check_reg_targets(y_true, y_pred, None)
|
||||
if y_type == 'continuous-multioutput':
|
||||
raise ValueError("Multioutput not supported in max_error")
|
||||
return np.max(np.abs(y_true - y_pred))
|
||||
|
||||
|
||||
@_deprecate_positional_args
|
||||
def mean_tweedie_deviance(y_true, y_pred, *, sample_weight=None, power=0):
|
||||
"""Mean Tweedie deviance regression loss.
|
||||
|
||||
Read more in the :ref:`User Guide <mean_tweedie_deviance>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y_true : array-like of shape (n_samples,)
|
||||
Ground truth (correct) target values.
|
||||
|
||||
y_pred : array-like of shape (n_samples,)
|
||||
Estimated target values.
|
||||
|
||||
sample_weight : array-like of shape (n_samples,), default=None
|
||||
Sample weights.
|
||||
|
||||
power : float, default=0
|
||||
Tweedie power parameter. Either power <= 0 or power >= 1.
|
||||
|
||||
The higher `p` the less weight is given to extreme
|
||||
deviations between true and predicted targets.
|
||||
|
||||
- power < 0: Extreme stable distribution. Requires: y_pred > 0.
|
||||
- power = 0 : Normal distribution, output corresponds to
|
||||
mean_squared_error. y_true and y_pred can be any real numbers.
|
||||
- power = 1 : Poisson distribution. Requires: y_true >= 0 and
|
||||
y_pred > 0.
|
||||
- 1 < p < 2 : Compound Poisson distribution. Requires: y_true >= 0
|
||||
and y_pred > 0.
|
||||
- power = 2 : Gamma distribution. Requires: y_true > 0 and y_pred > 0.
|
||||
- power = 3 : Inverse Gaussian distribution. Requires: y_true > 0
|
||||
and y_pred > 0.
|
||||
- otherwise : Positive stable distribution. Requires: y_true > 0
|
||||
and y_pred > 0.
|
||||
|
||||
Returns
|
||||
-------
|
||||
loss : float
|
||||
A non-negative floating point value (the best value is 0.0).
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from sklearn.metrics import mean_tweedie_deviance
|
||||
>>> y_true = [2, 0, 1, 4]
|
||||
>>> y_pred = [0.5, 0.5, 2., 2.]
|
||||
>>> mean_tweedie_deviance(y_true, y_pred, power=1)
|
||||
1.4260...
|
||||
"""
|
||||
y_type, y_true, y_pred, _ = _check_reg_targets(
|
||||
y_true, y_pred, None, dtype=[np.float64, np.float32])
|
||||
if y_type == 'continuous-multioutput':
|
||||
raise ValueError("Multioutput not supported in mean_tweedie_deviance")
|
||||
check_consistent_length(y_true, y_pred, sample_weight)
|
||||
|
||||
if sample_weight is not None:
|
||||
sample_weight = column_or_1d(sample_weight)
|
||||
sample_weight = sample_weight[:, np.newaxis]
|
||||
|
||||
dist = TweedieDistribution(power=power)
|
||||
dev = dist.unit_deviance(y_true, y_pred, check_input=True)
|
||||
|
||||
return np.average(dev, weights=sample_weight)
|
||||
|
||||
|
||||
@_deprecate_positional_args
|
||||
def mean_poisson_deviance(y_true, y_pred, *, sample_weight=None):
|
||||
"""Mean Poisson deviance regression loss.
|
||||
|
||||
Poisson deviance is equivalent to the Tweedie deviance with
|
||||
the power parameter `power=1`.
|
||||
|
||||
Read more in the :ref:`User Guide <mean_tweedie_deviance>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y_true : array-like of shape (n_samples,)
|
||||
Ground truth (correct) target values. Requires y_true >= 0.
|
||||
|
||||
y_pred : array-like of shape (n_samples,)
|
||||
Estimated target values. Requires y_pred > 0.
|
||||
|
||||
sample_weight : array-like of shape (n_samples,), default=None
|
||||
Sample weights.
|
||||
|
||||
Returns
|
||||
-------
|
||||
loss : float
|
||||
A non-negative floating point value (the best value is 0.0).
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from sklearn.metrics import mean_poisson_deviance
|
||||
>>> y_true = [2, 0, 1, 4]
|
||||
>>> y_pred = [0.5, 0.5, 2., 2.]
|
||||
>>> mean_poisson_deviance(y_true, y_pred)
|
||||
1.4260...
|
||||
"""
|
||||
return mean_tweedie_deviance(
|
||||
y_true, y_pred, sample_weight=sample_weight, power=1
|
||||
)
|
||||
|
||||
|
||||
@_deprecate_positional_args
|
||||
def mean_gamma_deviance(y_true, y_pred, *, sample_weight=None):
|
||||
"""Mean Gamma deviance regression loss.
|
||||
|
||||
Gamma deviance is equivalent to the Tweedie deviance with
|
||||
the power parameter `power=2`. It is invariant to scaling of
|
||||
the target variable, and measures relative errors.
|
||||
|
||||
Read more in the :ref:`User Guide <mean_tweedie_deviance>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y_true : array-like of shape (n_samples,)
|
||||
Ground truth (correct) target values. Requires y_true > 0.
|
||||
|
||||
y_pred : array-like of shape (n_samples,)
|
||||
Estimated target values. Requires y_pred > 0.
|
||||
|
||||
sample_weight : array-like of shape (n_samples,), default=None
|
||||
Sample weights.
|
||||
|
||||
Returns
|
||||
-------
|
||||
loss : float
|
||||
A non-negative floating point value (the best value is 0.0).
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from sklearn.metrics import mean_gamma_deviance
|
||||
>>> y_true = [2, 0.5, 1, 4]
|
||||
>>> y_pred = [0.5, 0.5, 2., 2.]
|
||||
>>> mean_gamma_deviance(y_true, y_pred)
|
||||
1.0568...
|
||||
"""
|
||||
return mean_tweedie_deviance(
|
||||
y_true, y_pred, sample_weight=sample_weight, power=2
|
||||
)
|
725
venv/Lib/site-packages/sklearn/metrics/_scorer.py
Normal file
725
venv/Lib/site-packages/sklearn/metrics/_scorer.py
Normal file
|
@ -0,0 +1,725 @@
|
|||
"""
|
||||
The :mod:`sklearn.metrics.scorer` submodule implements a flexible
|
||||
interface for model selection and evaluation using
|
||||
arbitrary score functions.
|
||||
|
||||
A scorer object is a callable that can be passed to
|
||||
:class:`sklearn.model_selection.GridSearchCV` or
|
||||
:func:`sklearn.model_selection.cross_val_score` as the ``scoring``
|
||||
parameter, to specify how a model should be evaluated.
|
||||
|
||||
The signature of the call is ``(estimator, X, y)`` where ``estimator``
|
||||
is the model to be evaluated, ``X`` is the test data and ``y`` is the
|
||||
ground truth labeling (or ``None`` in the case of unsupervised models).
|
||||
"""
|
||||
|
||||
# Authors: Andreas Mueller <amueller@ais.uni-bonn.de>
|
||||
# Lars Buitinck
|
||||
# Arnaud Joly <arnaud.v.joly@gmail.com>
|
||||
# License: Simplified BSD
|
||||
|
||||
from collections.abc import Iterable
|
||||
from functools import partial
|
||||
from collections import Counter
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
|
||||
from . import (r2_score, median_absolute_error, max_error, mean_absolute_error,
|
||||
mean_squared_error, mean_squared_log_error,
|
||||
mean_poisson_deviance, mean_gamma_deviance, accuracy_score,
|
||||
f1_score, roc_auc_score, average_precision_score,
|
||||
precision_score, recall_score, log_loss,
|
||||
balanced_accuracy_score, explained_variance_score,
|
||||
brier_score_loss, jaccard_score)
|
||||
|
||||
from .cluster import adjusted_rand_score
|
||||
from .cluster import homogeneity_score
|
||||
from .cluster import completeness_score
|
||||
from .cluster import v_measure_score
|
||||
from .cluster import mutual_info_score
|
||||
from .cluster import adjusted_mutual_info_score
|
||||
from .cluster import normalized_mutual_info_score
|
||||
from .cluster import fowlkes_mallows_score
|
||||
|
||||
from ..utils.multiclass import type_of_target
|
||||
from ..utils.validation import _deprecate_positional_args
|
||||
from ..base import is_regressor
|
||||
|
||||
|
||||
def _cached_call(cache, estimator, method, *args, **kwargs):
|
||||
"""Call estimator with method and args and kwargs."""
|
||||
if cache is None:
|
||||
return getattr(estimator, method)(*args, **kwargs)
|
||||
|
||||
try:
|
||||
return cache[method]
|
||||
except KeyError:
|
||||
result = getattr(estimator, method)(*args, **kwargs)
|
||||
cache[method] = result
|
||||
return result
|
||||
|
||||
|
||||
class _MultimetricScorer:
|
||||
"""Callable for multimetric scoring used to avoid repeated calls
|
||||
to `predict_proba`, `predict`, and `decision_function`.
|
||||
|
||||
`_MultimetricScorer` will return a dictionary of scores corresponding to
|
||||
the scorers in the dictionary. Note that `_MultimetricScorer` can be
|
||||
created with a dictionary with one key (i.e. only one actual scorer).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
scorers : dict
|
||||
Dictionary mapping names to callable scorers.
|
||||
"""
|
||||
def __init__(self, **scorers):
|
||||
self._scorers = scorers
|
||||
|
||||
def __call__(self, estimator, *args, **kwargs):
|
||||
"""Evaluate predicted target values."""
|
||||
scores = {}
|
||||
cache = {} if self._use_cache(estimator) else None
|
||||
cached_call = partial(_cached_call, cache)
|
||||
|
||||
for name, scorer in self._scorers.items():
|
||||
if isinstance(scorer, _BaseScorer):
|
||||
score = scorer._score(cached_call, estimator,
|
||||
*args, **kwargs)
|
||||
else:
|
||||
score = scorer(estimator, *args, **kwargs)
|
||||
scores[name] = score
|
||||
return scores
|
||||
|
||||
def _use_cache(self, estimator):
|
||||
"""Return True if using a cache is beneficial.
|
||||
|
||||
Caching may be beneficial when one of these conditions holds:
|
||||
- `_ProbaScorer` will be called twice.
|
||||
- `_PredictScorer` will be called twice.
|
||||
- `_ThresholdScorer` will be called twice.
|
||||
- `_ThresholdScorer` and `_PredictScorer` are called and
|
||||
estimator is a regressor.
|
||||
- `_ThresholdScorer` and `_ProbaScorer` are called and
|
||||
estimator does not have a `decision_function` attribute.
|
||||
|
||||
"""
|
||||
if len(self._scorers) == 1: # Only one scorer
|
||||
return False
|
||||
|
||||
counter = Counter([type(v) for v in self._scorers.values()])
|
||||
|
||||
if any(counter[known_type] > 1 for known_type in
|
||||
[_PredictScorer, _ProbaScorer, _ThresholdScorer]):
|
||||
return True
|
||||
|
||||
if counter[_ThresholdScorer]:
|
||||
if is_regressor(estimator) and counter[_PredictScorer]:
|
||||
return True
|
||||
elif (counter[_ProbaScorer] and
|
||||
not hasattr(estimator, "decision_function")):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
class _BaseScorer:
|
||||
def __init__(self, score_func, sign, kwargs):
|
||||
self._kwargs = kwargs
|
||||
self._score_func = score_func
|
||||
self._sign = sign
|
||||
# XXX After removing the deprecated scorers (v0.24) remove the
|
||||
# XXX deprecation_msg property again and remove __call__'s body again
|
||||
self._deprecation_msg = None
|
||||
|
||||
def __repr__(self):
|
||||
kwargs_string = "".join([", %s=%s" % (str(k), str(v))
|
||||
for k, v in self._kwargs.items()])
|
||||
return ("make_scorer(%s%s%s%s)"
|
||||
% (self._score_func.__name__,
|
||||
"" if self._sign > 0 else ", greater_is_better=False",
|
||||
self._factory_args(), kwargs_string))
|
||||
|
||||
def __call__(self, estimator, X, y_true, sample_weight=None):
|
||||
"""Evaluate predicted target values for X relative to y_true.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
estimator : object
|
||||
Trained estimator to use for scoring. Must have a predict_proba
|
||||
method; the output of that is used to compute the score.
|
||||
|
||||
X : array-like or sparse matrix
|
||||
Test data that will be fed to estimator.predict.
|
||||
|
||||
y_true : array-like
|
||||
Gold standard target values for X.
|
||||
|
||||
sample_weight : array-like, optional (default=None)
|
||||
Sample weights.
|
||||
|
||||
Returns
|
||||
-------
|
||||
score : float
|
||||
Score function applied to prediction of estimator on X.
|
||||
"""
|
||||
if self._deprecation_msg is not None:
|
||||
warnings.warn(self._deprecation_msg,
|
||||
category=FutureWarning,
|
||||
stacklevel=2)
|
||||
return self._score(partial(_cached_call, None), estimator, X, y_true,
|
||||
sample_weight=sample_weight)
|
||||
|
||||
def _factory_args(self):
|
||||
"""Return non-default make_scorer arguments for repr."""
|
||||
return ""
|
||||
|
||||
|
||||
class _PredictScorer(_BaseScorer):
|
||||
def _score(self, method_caller, estimator, X, y_true, sample_weight=None):
|
||||
"""Evaluate predicted target values for X relative to y_true.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
method_caller : callable
|
||||
Returns predictions given an estimator, method name, and other
|
||||
arguments, potentially caching results.
|
||||
|
||||
estimator : object
|
||||
Trained estimator to use for scoring. Must have a predict_proba
|
||||
method; the output of that is used to compute the score.
|
||||
|
||||
X : array-like or sparse matrix
|
||||
Test data that will be fed to estimator.predict.
|
||||
|
||||
y_true : array-like
|
||||
Gold standard target values for X.
|
||||
|
||||
sample_weight : array-like, optional (default=None)
|
||||
Sample weights.
|
||||
|
||||
Returns
|
||||
-------
|
||||
score : float
|
||||
Score function applied to prediction of estimator on X.
|
||||
"""
|
||||
|
||||
y_pred = method_caller(estimator, "predict", X)
|
||||
if sample_weight is not None:
|
||||
return self._sign * self._score_func(y_true, y_pred,
|
||||
sample_weight=sample_weight,
|
||||
**self._kwargs)
|
||||
else:
|
||||
return self._sign * self._score_func(y_true, y_pred,
|
||||
**self._kwargs)
|
||||
|
||||
|
||||
class _ProbaScorer(_BaseScorer):
|
||||
def _score(self, method_caller, clf, X, y, sample_weight=None):
|
||||
"""Evaluate predicted probabilities for X relative to y_true.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
method_caller : callable
|
||||
Returns predictions given an estimator, method name, and other
|
||||
arguments, potentially caching results.
|
||||
|
||||
clf : object
|
||||
Trained classifier to use for scoring. Must have a predict_proba
|
||||
method; the output of that is used to compute the score.
|
||||
|
||||
X : array-like or sparse matrix
|
||||
Test data that will be fed to clf.predict_proba.
|
||||
|
||||
y : array-like
|
||||
Gold standard target values for X. These must be class labels,
|
||||
not probabilities.
|
||||
|
||||
sample_weight : array-like, optional (default=None)
|
||||
Sample weights.
|
||||
|
||||
Returns
|
||||
-------
|
||||
score : float
|
||||
Score function applied to prediction of estimator on X.
|
||||
"""
|
||||
|
||||
y_type = type_of_target(y)
|
||||
y_pred = method_caller(clf, "predict_proba", X)
|
||||
if y_type == "binary":
|
||||
if y_pred.shape[1] == 2:
|
||||
y_pred = y_pred[:, 1]
|
||||
elif y_pred.shape[1] == 1: # not multiclass
|
||||
raise ValueError('got predict_proba of shape {},'
|
||||
' but need classifier with two'
|
||||
' classes for {} scoring'.format(
|
||||
y_pred.shape, self._score_func.__name__))
|
||||
if sample_weight is not None:
|
||||
return self._sign * self._score_func(y, y_pred,
|
||||
sample_weight=sample_weight,
|
||||
**self._kwargs)
|
||||
else:
|
||||
return self._sign * self._score_func(y, y_pred, **self._kwargs)
|
||||
|
||||
def _factory_args(self):
|
||||
return ", needs_proba=True"
|
||||
|
||||
|
||||
class _ThresholdScorer(_BaseScorer):
|
||||
def _score(self, method_caller, clf, X, y, sample_weight=None):
|
||||
"""Evaluate decision function output for X relative to y_true.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
method_caller : callable
|
||||
Returns predictions given an estimator, method name, and other
|
||||
arguments, potentially caching results.
|
||||
|
||||
clf : object
|
||||
Trained classifier to use for scoring. Must have either a
|
||||
decision_function method or a predict_proba method; the output of
|
||||
that is used to compute the score.
|
||||
|
||||
X : array-like or sparse matrix
|
||||
Test data that will be fed to clf.decision_function or
|
||||
clf.predict_proba.
|
||||
|
||||
y : array-like
|
||||
Gold standard target values for X. These must be class labels,
|
||||
not decision function values.
|
||||
|
||||
sample_weight : array-like, optional (default=None)
|
||||
Sample weights.
|
||||
|
||||
Returns
|
||||
-------
|
||||
score : float
|
||||
Score function applied to prediction of estimator on X.
|
||||
"""
|
||||
|
||||
y_type = type_of_target(y)
|
||||
if y_type not in ("binary", "multilabel-indicator"):
|
||||
raise ValueError("{0} format is not supported".format(y_type))
|
||||
|
||||
if is_regressor(clf):
|
||||
y_pred = method_caller(clf, "predict", X)
|
||||
else:
|
||||
try:
|
||||
y_pred = method_caller(clf, "decision_function", X)
|
||||
|
||||
# For multi-output multi-class estimator
|
||||
if isinstance(y_pred, list):
|
||||
y_pred = np.vstack([p for p in y_pred]).T
|
||||
|
||||
except (NotImplementedError, AttributeError):
|
||||
y_pred = method_caller(clf, "predict_proba", X)
|
||||
|
||||
if y_type == "binary":
|
||||
if y_pred.shape[1] == 2:
|
||||
y_pred = y_pred[:, 1]
|
||||
else:
|
||||
raise ValueError('got predict_proba of shape {},'
|
||||
' but need classifier with two'
|
||||
' classes for {} scoring'.format(
|
||||
y_pred.shape,
|
||||
self._score_func.__name__))
|
||||
elif isinstance(y_pred, list):
|
||||
y_pred = np.vstack([p[:, -1] for p in y_pred]).T
|
||||
|
||||
if sample_weight is not None:
|
||||
return self._sign * self._score_func(y, y_pred,
|
||||
sample_weight=sample_weight,
|
||||
**self._kwargs)
|
||||
else:
|
||||
return self._sign * self._score_func(y, y_pred, **self._kwargs)
|
||||
|
||||
def _factory_args(self):
|
||||
return ", needs_threshold=True"
|
||||
|
||||
|
||||
def get_scorer(scoring):
|
||||
"""Get a scorer from string.
|
||||
|
||||
Read more in the :ref:`User Guide <scoring_parameter>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
scoring : str | callable
|
||||
scoring method as string. If callable it is returned as is.
|
||||
|
||||
Returns
|
||||
-------
|
||||
scorer : callable
|
||||
The scorer.
|
||||
"""
|
||||
if isinstance(scoring, str):
|
||||
try:
|
||||
if scoring == 'brier_score_loss':
|
||||
# deprecated
|
||||
scorer = brier_score_loss_scorer
|
||||
else:
|
||||
scorer = SCORERS[scoring]
|
||||
except KeyError:
|
||||
raise ValueError('%r is not a valid scoring value. '
|
||||
'Use sorted(sklearn.metrics.SCORERS.keys()) '
|
||||
'to get valid options.' % scoring)
|
||||
else:
|
||||
scorer = scoring
|
||||
return scorer
|
||||
|
||||
|
||||
def _passthrough_scorer(estimator, *args, **kwargs):
|
||||
"""Function that wraps estimator.score"""
|
||||
return estimator.score(*args, **kwargs)
|
||||
|
||||
|
||||
@_deprecate_positional_args
|
||||
def check_scoring(estimator, scoring=None, *, allow_none=False):
|
||||
"""Determine scorer from user options.
|
||||
|
||||
A TypeError will be thrown if the estimator cannot be scored.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
estimator : estimator object implementing 'fit'
|
||||
The object to use to fit the data.
|
||||
|
||||
scoring : string, callable or None, optional, default: None
|
||||
A string (see model evaluation documentation) or
|
||||
a scorer callable object / function with signature
|
||||
``scorer(estimator, X, y)``.
|
||||
|
||||
allow_none : boolean, optional, default: False
|
||||
If no scoring is specified and the estimator has no score function, we
|
||||
can either return None or raise an exception.
|
||||
|
||||
Returns
|
||||
-------
|
||||
scoring : callable
|
||||
A scorer callable object / function with signature
|
||||
``scorer(estimator, X, y)``.
|
||||
"""
|
||||
if not hasattr(estimator, 'fit'):
|
||||
raise TypeError("estimator should be an estimator implementing "
|
||||
"'fit' method, %r was passed" % estimator)
|
||||
if isinstance(scoring, str):
|
||||
return get_scorer(scoring)
|
||||
elif callable(scoring):
|
||||
# Heuristic to ensure user has not passed a metric
|
||||
module = getattr(scoring, '__module__', None)
|
||||
if hasattr(module, 'startswith') and \
|
||||
module.startswith('sklearn.metrics.') and \
|
||||
not module.startswith('sklearn.metrics._scorer') and \
|
||||
not module.startswith('sklearn.metrics.tests.'):
|
||||
raise ValueError('scoring value %r looks like it is a metric '
|
||||
'function rather than a scorer. A scorer should '
|
||||
'require an estimator as its first parameter. '
|
||||
'Please use `make_scorer` to convert a metric '
|
||||
'to a scorer.' % scoring)
|
||||
return get_scorer(scoring)
|
||||
elif scoring is None:
|
||||
if hasattr(estimator, 'score'):
|
||||
return _passthrough_scorer
|
||||
elif allow_none:
|
||||
return None
|
||||
else:
|
||||
raise TypeError(
|
||||
"If no scoring is specified, the estimator passed should "
|
||||
"have a 'score' method. The estimator %r does not."
|
||||
% estimator)
|
||||
elif isinstance(scoring, Iterable):
|
||||
raise ValueError("For evaluating multiple scores, use "
|
||||
"sklearn.model_selection.cross_validate instead. "
|
||||
"{0} was passed.".format(scoring))
|
||||
else:
|
||||
raise ValueError("scoring value should either be a callable, string or"
|
||||
" None. %r was passed" % scoring)
|
||||
|
||||
|
||||
def _check_multimetric_scoring(estimator, scoring=None):
|
||||
"""Check the scoring parameter in cases when multiple metrics are allowed
|
||||
|
||||
Parameters
|
||||
----------
|
||||
estimator : sklearn estimator instance
|
||||
The estimator for which the scoring will be applied.
|
||||
|
||||
scoring : string, callable, list/tuple, dict or None, default: None
|
||||
A single string (see :ref:`scoring_parameter`) or a callable
|
||||
(see :ref:`scoring`) to evaluate the predictions on the test set.
|
||||
|
||||
For evaluating multiple metrics, either give a list of (unique) strings
|
||||
or a dict with names as keys and callables as values.
|
||||
|
||||
NOTE that when using custom scorers, each scorer should return a single
|
||||
value. Metric functions returning a list/array of values can be wrapped
|
||||
into multiple scorers that return one value each.
|
||||
|
||||
See :ref:`multimetric_grid_search` for an example.
|
||||
|
||||
If None the estimator's score method is used.
|
||||
The return value in that case will be ``{'score': <default_scorer>}``.
|
||||
If the estimator's score method is not available, a ``TypeError``
|
||||
is raised.
|
||||
|
||||
Returns
|
||||
-------
|
||||
scorers_dict : dict
|
||||
A dict mapping each scorer name to its validated scorer.
|
||||
|
||||
is_multimetric : bool
|
||||
True if scorer is a list/tuple or dict of callables
|
||||
False if scorer is None/str/callable
|
||||
"""
|
||||
if callable(scoring) or scoring is None or isinstance(scoring,
|
||||
str):
|
||||
scorers = {"score": check_scoring(estimator, scoring=scoring)}
|
||||
return scorers, False
|
||||
else:
|
||||
err_msg_generic = ("scoring should either be a single string or "
|
||||
"callable for single metric evaluation or a "
|
||||
"list/tuple of strings or a dict of scorer name "
|
||||
"mapped to the callable for multiple metric "
|
||||
"evaluation. Got %s of type %s"
|
||||
% (repr(scoring), type(scoring)))
|
||||
|
||||
if isinstance(scoring, (list, tuple, set)):
|
||||
err_msg = ("The list/tuple elements must be unique "
|
||||
"strings of predefined scorers. ")
|
||||
invalid = False
|
||||
try:
|
||||
keys = set(scoring)
|
||||
except TypeError:
|
||||
invalid = True
|
||||
if invalid:
|
||||
raise ValueError(err_msg)
|
||||
|
||||
if len(keys) != len(scoring):
|
||||
raise ValueError(err_msg + "Duplicate elements were found in"
|
||||
" the given list. %r" % repr(scoring))
|
||||
elif len(keys) > 0:
|
||||
if not all(isinstance(k, str) for k in keys):
|
||||
if any(callable(k) for k in keys):
|
||||
raise ValueError(err_msg +
|
||||
"One or more of the elements were "
|
||||
"callables. Use a dict of score name "
|
||||
"mapped to the scorer callable. "
|
||||
"Got %r" % repr(scoring))
|
||||
else:
|
||||
raise ValueError(err_msg +
|
||||
"Non-string types were found in "
|
||||
"the given list. Got %r"
|
||||
% repr(scoring))
|
||||
scorers = {scorer: check_scoring(estimator, scoring=scorer)
|
||||
for scorer in scoring}
|
||||
else:
|
||||
raise ValueError(err_msg +
|
||||
"Empty list was given. %r" % repr(scoring))
|
||||
|
||||
elif isinstance(scoring, dict):
|
||||
keys = set(scoring)
|
||||
if not all(isinstance(k, str) for k in keys):
|
||||
raise ValueError("Non-string types were found in the keys of "
|
||||
"the given dict. scoring=%r" % repr(scoring))
|
||||
if len(keys) == 0:
|
||||
raise ValueError("An empty dict was passed. %r"
|
||||
% repr(scoring))
|
||||
scorers = {key: check_scoring(estimator, scoring=scorer)
|
||||
for key, scorer in scoring.items()}
|
||||
else:
|
||||
raise ValueError(err_msg_generic)
|
||||
return scorers, True
|
||||
|
||||
|
||||
@_deprecate_positional_args
|
||||
def make_scorer(score_func, *, greater_is_better=True, needs_proba=False,
|
||||
needs_threshold=False, **kwargs):
|
||||
"""Make a scorer from a performance metric or loss function.
|
||||
|
||||
This factory function wraps scoring functions for use in GridSearchCV
|
||||
and cross_val_score. It takes a score function, such as ``accuracy_score``,
|
||||
``mean_squared_error``, ``adjusted_rand_index`` or ``average_precision``
|
||||
and returns a callable that scores an estimator's output.
|
||||
|
||||
Read more in the :ref:`User Guide <scoring>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
score_func : callable,
|
||||
Score function (or loss function) with signature
|
||||
``score_func(y, y_pred, **kwargs)``.
|
||||
|
||||
greater_is_better : boolean, default=True
|
||||
Whether score_func is a score function (default), meaning high is good,
|
||||
or a loss function, meaning low is good. In the latter case, the
|
||||
scorer object will sign-flip the outcome of the score_func.
|
||||
|
||||
needs_proba : boolean, default=False
|
||||
Whether score_func requires predict_proba to get probability estimates
|
||||
out of a classifier.
|
||||
|
||||
If True, for binary `y_true`, the score function is supposed to accept
|
||||
a 1D `y_pred` (i.e., probability of the positive class, shape
|
||||
`(n_samples,)`).
|
||||
|
||||
needs_threshold : boolean, default=False
|
||||
Whether score_func takes a continuous decision certainty.
|
||||
This only works for binary classification using estimators that
|
||||
have either a decision_function or predict_proba method.
|
||||
|
||||
If True, for binary `y_true`, the score function is supposed to accept
|
||||
a 1D `y_pred` (i.e., probability of the positive class or the decision
|
||||
function, shape `(n_samples,)`).
|
||||
|
||||
For example ``average_precision`` or the area under the roc curve
|
||||
can not be computed using discrete predictions alone.
|
||||
|
||||
**kwargs : additional arguments
|
||||
Additional parameters to be passed to score_func.
|
||||
|
||||
Returns
|
||||
-------
|
||||
scorer : callable
|
||||
Callable object that returns a scalar score; greater is better.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from sklearn.metrics import fbeta_score, make_scorer
|
||||
>>> ftwo_scorer = make_scorer(fbeta_score, beta=2)
|
||||
>>> ftwo_scorer
|
||||
make_scorer(fbeta_score, beta=2)
|
||||
>>> from sklearn.model_selection import GridSearchCV
|
||||
>>> from sklearn.svm import LinearSVC
|
||||
>>> grid = GridSearchCV(LinearSVC(), param_grid={'C': [1, 10]},
|
||||
... scoring=ftwo_scorer)
|
||||
|
||||
Notes
|
||||
-----
|
||||
If `needs_proba=False` and `needs_threshold=False`, the score
|
||||
function is supposed to accept the output of :term:`predict`. If
|
||||
`needs_proba=True`, the score function is supposed to accept the
|
||||
output of :term:`predict_proba` (For binary `y_true`, the score function is
|
||||
supposed to accept probability of the positive class). If
|
||||
`needs_threshold=True`, the score function is supposed to accept the
|
||||
output of :term:`decision_function`.
|
||||
"""
|
||||
sign = 1 if greater_is_better else -1
|
||||
if needs_proba and needs_threshold:
|
||||
raise ValueError("Set either needs_proba or needs_threshold to True,"
|
||||
" but not both.")
|
||||
if needs_proba:
|
||||
cls = _ProbaScorer
|
||||
elif needs_threshold:
|
||||
cls = _ThresholdScorer
|
||||
else:
|
||||
cls = _PredictScorer
|
||||
return cls(score_func, sign, kwargs)
|
||||
|
||||
|
||||
# Standard regression scores
|
||||
explained_variance_scorer = make_scorer(explained_variance_score)
|
||||
r2_scorer = make_scorer(r2_score)
|
||||
max_error_scorer = make_scorer(max_error,
|
||||
greater_is_better=False)
|
||||
neg_mean_squared_error_scorer = make_scorer(mean_squared_error,
|
||||
greater_is_better=False)
|
||||
neg_mean_squared_log_error_scorer = make_scorer(mean_squared_log_error,
|
||||
greater_is_better=False)
|
||||
neg_mean_absolute_error_scorer = make_scorer(mean_absolute_error,
|
||||
greater_is_better=False)
|
||||
neg_median_absolute_error_scorer = make_scorer(median_absolute_error,
|
||||
greater_is_better=False)
|
||||
neg_root_mean_squared_error_scorer = make_scorer(mean_squared_error,
|
||||
greater_is_better=False,
|
||||
squared=False)
|
||||
neg_mean_poisson_deviance_scorer = make_scorer(
|
||||
mean_poisson_deviance, greater_is_better=False
|
||||
)
|
||||
|
||||
neg_mean_gamma_deviance_scorer = make_scorer(
|
||||
mean_gamma_deviance, greater_is_better=False
|
||||
)
|
||||
|
||||
# Standard Classification Scores
|
||||
accuracy_scorer = make_scorer(accuracy_score)
|
||||
balanced_accuracy_scorer = make_scorer(balanced_accuracy_score)
|
||||
|
||||
# Score functions that need decision values
|
||||
roc_auc_scorer = make_scorer(roc_auc_score, greater_is_better=True,
|
||||
needs_threshold=True)
|
||||
average_precision_scorer = make_scorer(average_precision_score,
|
||||
needs_threshold=True)
|
||||
roc_auc_ovo_scorer = make_scorer(roc_auc_score, needs_proba=True,
|
||||
multi_class='ovo')
|
||||
roc_auc_ovo_weighted_scorer = make_scorer(roc_auc_score, needs_proba=True,
|
||||
multi_class='ovo',
|
||||
average='weighted')
|
||||
roc_auc_ovr_scorer = make_scorer(roc_auc_score, needs_proba=True,
|
||||
multi_class='ovr')
|
||||
roc_auc_ovr_weighted_scorer = make_scorer(roc_auc_score, needs_proba=True,
|
||||
multi_class='ovr',
|
||||
average='weighted')
|
||||
|
||||
# Score function for probabilistic classification
|
||||
neg_log_loss_scorer = make_scorer(log_loss, greater_is_better=False,
|
||||
needs_proba=True)
|
||||
neg_brier_score_scorer = make_scorer(brier_score_loss,
|
||||
greater_is_better=False,
|
||||
needs_proba=True)
|
||||
brier_score_loss_scorer = make_scorer(brier_score_loss,
|
||||
greater_is_better=False,
|
||||
needs_proba=True)
|
||||
deprecation_msg = ('Scoring method brier_score_loss was renamed to '
|
||||
'neg_brier_score in version 0.22 and will '
|
||||
'be removed in 0.24.')
|
||||
brier_score_loss_scorer._deprecation_msg = deprecation_msg
|
||||
|
||||
|
||||
# Clustering scores
|
||||
adjusted_rand_scorer = make_scorer(adjusted_rand_score)
|
||||
homogeneity_scorer = make_scorer(homogeneity_score)
|
||||
completeness_scorer = make_scorer(completeness_score)
|
||||
v_measure_scorer = make_scorer(v_measure_score)
|
||||
mutual_info_scorer = make_scorer(mutual_info_score)
|
||||
adjusted_mutual_info_scorer = make_scorer(adjusted_mutual_info_score)
|
||||
normalized_mutual_info_scorer = make_scorer(normalized_mutual_info_score)
|
||||
fowlkes_mallows_scorer = make_scorer(fowlkes_mallows_score)
|
||||
|
||||
|
||||
SCORERS = dict(explained_variance=explained_variance_scorer,
|
||||
r2=r2_scorer,
|
||||
max_error=max_error_scorer,
|
||||
neg_median_absolute_error=neg_median_absolute_error_scorer,
|
||||
neg_mean_absolute_error=neg_mean_absolute_error_scorer,
|
||||
neg_mean_squared_error=neg_mean_squared_error_scorer,
|
||||
neg_mean_squared_log_error=neg_mean_squared_log_error_scorer,
|
||||
neg_root_mean_squared_error=neg_root_mean_squared_error_scorer,
|
||||
neg_mean_poisson_deviance=neg_mean_poisson_deviance_scorer,
|
||||
neg_mean_gamma_deviance=neg_mean_gamma_deviance_scorer,
|
||||
accuracy=accuracy_scorer, roc_auc=roc_auc_scorer,
|
||||
roc_auc_ovr=roc_auc_ovr_scorer,
|
||||
roc_auc_ovo=roc_auc_ovo_scorer,
|
||||
roc_auc_ovr_weighted=roc_auc_ovr_weighted_scorer,
|
||||
roc_auc_ovo_weighted=roc_auc_ovo_weighted_scorer,
|
||||
balanced_accuracy=balanced_accuracy_scorer,
|
||||
average_precision=average_precision_scorer,
|
||||
neg_log_loss=neg_log_loss_scorer,
|
||||
neg_brier_score=neg_brier_score_scorer,
|
||||
# Cluster metrics that use supervised evaluation
|
||||
adjusted_rand_score=adjusted_rand_scorer,
|
||||
homogeneity_score=homogeneity_scorer,
|
||||
completeness_score=completeness_scorer,
|
||||
v_measure_score=v_measure_scorer,
|
||||
mutual_info_score=mutual_info_scorer,
|
||||
adjusted_mutual_info_score=adjusted_mutual_info_scorer,
|
||||
normalized_mutual_info_score=normalized_mutual_info_scorer,
|
||||
fowlkes_mallows_score=fowlkes_mallows_scorer)
|
||||
|
||||
|
||||
for name, metric in [('precision', precision_score),
|
||||
('recall', recall_score), ('f1', f1_score),
|
||||
('jaccard', jaccard_score)]:
|
||||
SCORERS[name] = make_scorer(metric, average='binary')
|
||||
for average in ['macro', 'micro', 'samples', 'weighted']:
|
||||
qualified_name = '{0}_{1}'.format(name, average)
|
||||
SCORERS[qualified_name] = make_scorer(metric, pos_label=None,
|
||||
average=average)
|
18
venv/Lib/site-packages/sklearn/metrics/base.py
Normal file
18
venv/Lib/site-packages/sklearn/metrics/base.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
|
||||
import sys
|
||||
# mypy error: Module X has no attribute y (typically for C extensions)
|
||||
from . import _base # type: ignore
|
||||
from ..externals._pep562 import Pep562
|
||||
from ..utils.deprecation import _raise_dep_warning_if_not_pytest
|
||||
|
||||
deprecated_path = 'sklearn.metrics.base'
|
||||
correct_import_path = 'sklearn.metrics'
|
||||
|
||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
|
||||
|
||||
def __getattr__(name):
|
||||
return getattr(_base, name)
|
||||
|
||||
if not sys.version_info >= (3, 7):
|
||||
Pep562(__name__)
|
18
venv/Lib/site-packages/sklearn/metrics/classification.py
Normal file
18
venv/Lib/site-packages/sklearn/metrics/classification.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
|
||||
import sys
|
||||
# mypy error: Module X has no attribute y (typically for C extensions)
|
||||
from . import _classification # type: ignore
|
||||
from ..externals._pep562 import Pep562
|
||||
from ..utils.deprecation import _raise_dep_warning_if_not_pytest
|
||||
|
||||
deprecated_path = 'sklearn.metrics.classification'
|
||||
correct_import_path = 'sklearn.metrics'
|
||||
|
||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
|
||||
|
||||
def __getattr__(name):
|
||||
return getattr(_classification, name)
|
||||
|
||||
if not sys.version_info >= (3, 7):
|
||||
Pep562(__name__)
|
32
venv/Lib/site-packages/sklearn/metrics/cluster/__init__.py
Normal file
32
venv/Lib/site-packages/sklearn/metrics/cluster/__init__.py
Normal file
|
@ -0,0 +1,32 @@
|
|||
"""
|
||||
The :mod:`sklearn.metrics.cluster` submodule contains evaluation metrics for
|
||||
cluster analysis results. There are two forms of evaluation:
|
||||
|
||||
- supervised, which uses a ground truth class values for each sample.
|
||||
- unsupervised, which does not and measures the 'quality' of the model itself.
|
||||
"""
|
||||
from ._supervised import adjusted_mutual_info_score
|
||||
from ._supervised import normalized_mutual_info_score
|
||||
from ._supervised import adjusted_rand_score
|
||||
from ._supervised import completeness_score
|
||||
from ._supervised import contingency_matrix
|
||||
from ._supervised import expected_mutual_information
|
||||
from ._supervised import homogeneity_completeness_v_measure
|
||||
from ._supervised import homogeneity_score
|
||||
from ._supervised import mutual_info_score
|
||||
from ._supervised import v_measure_score
|
||||
from ._supervised import fowlkes_mallows_score
|
||||
from ._supervised import entropy
|
||||
from ._unsupervised import silhouette_samples
|
||||
from ._unsupervised import silhouette_score
|
||||
from ._unsupervised import calinski_harabasz_score
|
||||
from ._unsupervised import davies_bouldin_score
|
||||
from ._bicluster import consensus_score
|
||||
|
||||
__all__ = ["adjusted_mutual_info_score", "normalized_mutual_info_score",
|
||||
"adjusted_rand_score", "completeness_score", "contingency_matrix",
|
||||
"expected_mutual_information", "homogeneity_completeness_v_measure",
|
||||
"homogeneity_score", "mutual_info_score", "v_measure_score",
|
||||
"fowlkes_mallows_score", "entropy", "silhouette_samples",
|
||||
"silhouette_score", "calinski_harabasz_score",
|
||||
"davies_bouldin_score", "consensus_score"]
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
86
venv/Lib/site-packages/sklearn/metrics/cluster/_bicluster.py
Normal file
86
venv/Lib/site-packages/sklearn/metrics/cluster/_bicluster.py
Normal file
|
@ -0,0 +1,86 @@
|
|||
import numpy as np
|
||||
from scipy.optimize import linear_sum_assignment
|
||||
|
||||
from ...utils.validation import check_consistent_length, check_array
|
||||
from ...utils.validation import _deprecate_positional_args
|
||||
|
||||
__all__ = ["consensus_score"]
|
||||
|
||||
|
||||
def _check_rows_and_columns(a, b):
|
||||
"""Unpacks the row and column arrays and checks their shape."""
|
||||
check_consistent_length(*a)
|
||||
check_consistent_length(*b)
|
||||
checks = lambda x: check_array(x, ensure_2d=False)
|
||||
a_rows, a_cols = map(checks, a)
|
||||
b_rows, b_cols = map(checks, b)
|
||||
return a_rows, a_cols, b_rows, b_cols
|
||||
|
||||
|
||||
def _jaccard(a_rows, a_cols, b_rows, b_cols):
|
||||
"""Jaccard coefficient on the elements of the two biclusters."""
|
||||
intersection = ((a_rows * b_rows).sum() *
|
||||
(a_cols * b_cols).sum())
|
||||
|
||||
a_size = a_rows.sum() * a_cols.sum()
|
||||
b_size = b_rows.sum() * b_cols.sum()
|
||||
|
||||
return intersection / (a_size + b_size - intersection)
|
||||
|
||||
|
||||
def _pairwise_similarity(a, b, similarity):
|
||||
"""Computes pairwise similarity matrix.
|
||||
|
||||
result[i, j] is the Jaccard coefficient of a's bicluster i and b's
|
||||
bicluster j.
|
||||
|
||||
"""
|
||||
a_rows, a_cols, b_rows, b_cols = _check_rows_and_columns(a, b)
|
||||
n_a = a_rows.shape[0]
|
||||
n_b = b_rows.shape[0]
|
||||
result = np.array(list(list(similarity(a_rows[i], a_cols[i],
|
||||
b_rows[j], b_cols[j])
|
||||
for j in range(n_b))
|
||||
for i in range(n_a)))
|
||||
return result
|
||||
|
||||
|
||||
@_deprecate_positional_args
|
||||
def consensus_score(a, b, *, similarity="jaccard"):
|
||||
"""The similarity of two sets of biclusters.
|
||||
|
||||
Similarity between individual biclusters is computed. Then the
|
||||
best matching between sets is found using the Hungarian algorithm.
|
||||
The final score is the sum of similarities divided by the size of
|
||||
the larger set.
|
||||
|
||||
Read more in the :ref:`User Guide <biclustering>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
a : (rows, columns)
|
||||
Tuple of row and column indicators for a set of biclusters.
|
||||
|
||||
b : (rows, columns)
|
||||
Another set of biclusters like ``a``.
|
||||
|
||||
similarity : string or function, optional, default: "jaccard"
|
||||
May be the string "jaccard" to use the Jaccard coefficient, or
|
||||
any function that takes four arguments, each of which is a 1d
|
||||
indicator vector: (a_rows, a_columns, b_rows, b_columns).
|
||||
|
||||
References
|
||||
----------
|
||||
|
||||
* Hochreiter, Bodenhofer, et. al., 2010. `FABIA: factor analysis
|
||||
for bicluster acquisition
|
||||
<https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2881408/>`__.
|
||||
|
||||
"""
|
||||
if similarity == "jaccard":
|
||||
similarity = _jaccard
|
||||
matrix = _pairwise_similarity(a, b, similarity)
|
||||
row_indices, col_indices = linear_sum_assignment(1. - matrix)
|
||||
n_a = len(a[0])
|
||||
n_b = len(b[0])
|
||||
return matrix[row_indices, col_indices].sum() / max(n_a, n_b)
|
Binary file not shown.
980
venv/Lib/site-packages/sklearn/metrics/cluster/_supervised.py
Normal file
980
venv/Lib/site-packages/sklearn/metrics/cluster/_supervised.py
Normal file
|
@ -0,0 +1,980 @@
|
|||
"""Utilities to evaluate the clustering performance of models.
|
||||
|
||||
Functions named as *_score return a scalar value to maximize: the higher the
|
||||
better.
|
||||
"""
|
||||
|
||||
# Authors: Olivier Grisel <olivier.grisel@ensta.org>
|
||||
# Wei LI <kuantkid@gmail.com>
|
||||
# Diego Molla <dmolla-aliod@gmail.com>
|
||||
# Arnaud Fouchet <foucheta@gmail.com>
|
||||
# Thierry Guillemot <thierry.guillemot.work@gmail.com>
|
||||
# Gregory Stupp <stuppie@gmail.com>
|
||||
# Joel Nothman <joel.nothman@gmail.com>
|
||||
# Arya McCarthy <arya@jhu.edu>
|
||||
# License: BSD 3 clause
|
||||
|
||||
|
||||
from math import log
|
||||
|
||||
import numpy as np
|
||||
from scipy import sparse as sp
|
||||
from scipy.special import comb
|
||||
|
||||
from ._expected_mutual_info_fast import expected_mutual_information
|
||||
from ...utils.validation import check_array, check_consistent_length
|
||||
from ...utils.validation import _deprecate_positional_args
|
||||
from ...utils.fixes import _astype_copy_false
|
||||
|
||||
|
||||
def _comb2(n):
|
||||
# the exact version is faster for k == 2: use it by default globally in
|
||||
# this module instead of the float approximate variant
|
||||
return comb(n, 2, exact=1)
|
||||
|
||||
|
||||
def check_clusterings(labels_true, labels_pred):
|
||||
"""Check that the labels arrays are 1D and of same dimension.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
labels_true : array-like of shape (n_samples,)
|
||||
The true labels.
|
||||
|
||||
labels_pred : array-like of shape (n_samples,)
|
||||
The predicted labels.
|
||||
"""
|
||||
labels_true = check_array(
|
||||
labels_true, ensure_2d=False, ensure_min_samples=0, dtype=None,
|
||||
)
|
||||
labels_pred = check_array(
|
||||
labels_pred, ensure_2d=False, ensure_min_samples=0, dtype=None,
|
||||
)
|
||||
|
||||
# input checks
|
||||
if labels_true.ndim != 1:
|
||||
raise ValueError(
|
||||
"labels_true must be 1D: shape is %r" % (labels_true.shape,))
|
||||
if labels_pred.ndim != 1:
|
||||
raise ValueError(
|
||||
"labels_pred must be 1D: shape is %r" % (labels_pred.shape,))
|
||||
check_consistent_length(labels_true, labels_pred)
|
||||
|
||||
return labels_true, labels_pred
|
||||
|
||||
|
||||
def _generalized_average(U, V, average_method):
|
||||
"""Return a particular mean of two numbers."""
|
||||
if average_method == "min":
|
||||
return min(U, V)
|
||||
elif average_method == "geometric":
|
||||
return np.sqrt(U * V)
|
||||
elif average_method == "arithmetic":
|
||||
return np.mean([U, V])
|
||||
elif average_method == "max":
|
||||
return max(U, V)
|
||||
else:
|
||||
raise ValueError("'average_method' must be 'min', 'geometric', "
|
||||
"'arithmetic', or 'max'")
|
||||
|
||||
|
||||
@_deprecate_positional_args
|
||||
def contingency_matrix(labels_true, labels_pred, *, eps=None, sparse=False):
|
||||
"""Build a contingency matrix describing the relationship between labels.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
labels_true : int array, shape = [n_samples]
|
||||
Ground truth class labels to be used as a reference
|
||||
|
||||
labels_pred : array-like of shape (n_samples,)
|
||||
Cluster labels to evaluate
|
||||
|
||||
eps : None or float, optional.
|
||||
If a float, that value is added to all values in the contingency
|
||||
matrix. This helps to stop NaN propagation.
|
||||
If ``None``, nothing is adjusted.
|
||||
|
||||
sparse : boolean, optional.
|
||||
If True, return a sparse CSR continency matrix. If ``eps is not None``,
|
||||
and ``sparse is True``, will throw ValueError.
|
||||
|
||||
.. versionadded:: 0.18
|
||||
|
||||
Returns
|
||||
-------
|
||||
contingency : {array-like, sparse}, shape=[n_classes_true, n_classes_pred]
|
||||
Matrix :math:`C` such that :math:`C_{i, j}` is the number of samples in
|
||||
true class :math:`i` and in predicted class :math:`j`. If
|
||||
``eps is None``, the dtype of this array will be integer. If ``eps`` is
|
||||
given, the dtype will be float.
|
||||
Will be a ``scipy.sparse.csr_matrix`` if ``sparse=True``.
|
||||
"""
|
||||
|
||||
if eps is not None and sparse:
|
||||
raise ValueError("Cannot set 'eps' when sparse=True")
|
||||
|
||||
classes, class_idx = np.unique(labels_true, return_inverse=True)
|
||||
clusters, cluster_idx = np.unique(labels_pred, return_inverse=True)
|
||||
n_classes = classes.shape[0]
|
||||
n_clusters = clusters.shape[0]
|
||||
# Using coo_matrix to accelerate simple histogram calculation,
|
||||
# i.e. bins are consecutive integers
|
||||
# Currently, coo_matrix is faster than histogram2d for simple cases
|
||||
contingency = sp.coo_matrix((np.ones(class_idx.shape[0]),
|
||||
(class_idx, cluster_idx)),
|
||||
shape=(n_classes, n_clusters),
|
||||
dtype=np.int)
|
||||
if sparse:
|
||||
contingency = contingency.tocsr()
|
||||
contingency.sum_duplicates()
|
||||
else:
|
||||
contingency = contingency.toarray()
|
||||
if eps is not None:
|
||||
# don't use += as contingency is integer
|
||||
contingency = contingency + eps
|
||||
return contingency
|
||||
|
||||
|
||||
# clustering measures
|
||||
|
||||
def adjusted_rand_score(labels_true, labels_pred):
|
||||
"""Rand index adjusted for chance.
|
||||
|
||||
The Rand Index computes a similarity measure between two clusterings
|
||||
by considering all pairs of samples and counting pairs that are
|
||||
assigned in the same or different clusters in the predicted and
|
||||
true clusterings.
|
||||
|
||||
The raw RI score is then "adjusted for chance" into the ARI score
|
||||
using the following scheme::
|
||||
|
||||
ARI = (RI - Expected_RI) / (max(RI) - Expected_RI)
|
||||
|
||||
The adjusted Rand index is thus ensured to have a value close to
|
||||
0.0 for random labeling independently of the number of clusters and
|
||||
samples and exactly 1.0 when the clusterings are identical (up to
|
||||
a permutation).
|
||||
|
||||
ARI is a symmetric measure::
|
||||
|
||||
adjusted_rand_score(a, b) == adjusted_rand_score(b, a)
|
||||
|
||||
Read more in the :ref:`User Guide <adjusted_rand_score>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
labels_true : int array, shape = [n_samples]
|
||||
Ground truth class labels to be used as a reference
|
||||
|
||||
labels_pred : array-like of shape (n_samples,)
|
||||
Cluster labels to evaluate
|
||||
|
||||
Returns
|
||||
-------
|
||||
ari : float
|
||||
Similarity score between -1.0 and 1.0. Random labelings have an ARI
|
||||
close to 0.0. 1.0 stands for perfect match.
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
Perfectly matching labelings have a score of 1 even
|
||||
|
||||
>>> from sklearn.metrics.cluster import adjusted_rand_score
|
||||
>>> adjusted_rand_score([0, 0, 1, 1], [0, 0, 1, 1])
|
||||
1.0
|
||||
>>> adjusted_rand_score([0, 0, 1, 1], [1, 1, 0, 0])
|
||||
1.0
|
||||
|
||||
Labelings that assign all classes members to the same clusters
|
||||
are complete be not always pure, hence penalized::
|
||||
|
||||
>>> adjusted_rand_score([0, 0, 1, 2], [0, 0, 1, 1])
|
||||
0.57...
|
||||
|
||||
ARI is symmetric, so labelings that have pure clusters with members
|
||||
coming from the same classes but unnecessary splits are penalized::
|
||||
|
||||
>>> adjusted_rand_score([0, 0, 1, 1], [0, 0, 1, 2])
|
||||
0.57...
|
||||
|
||||
If classes members are completely split across different clusters, the
|
||||
assignment is totally incomplete, hence the ARI is very low::
|
||||
|
||||
>>> adjusted_rand_score([0, 0, 0, 0], [0, 1, 2, 3])
|
||||
0.0
|
||||
|
||||
References
|
||||
----------
|
||||
|
||||
.. [Hubert1985] L. Hubert and P. Arabie, Comparing Partitions,
|
||||
Journal of Classification 1985
|
||||
https://link.springer.com/article/10.1007%2FBF01908075
|
||||
|
||||
.. [wk] https://en.wikipedia.org/wiki/Rand_index#Adjusted_Rand_index
|
||||
|
||||
See also
|
||||
--------
|
||||
adjusted_mutual_info_score: Adjusted Mutual Information
|
||||
|
||||
"""
|
||||
labels_true, labels_pred = check_clusterings(labels_true, labels_pred)
|
||||
n_samples = labels_true.shape[0]
|
||||
n_classes = np.unique(labels_true).shape[0]
|
||||
n_clusters = np.unique(labels_pred).shape[0]
|
||||
|
||||
# Special limit cases: no clustering since the data is not split;
|
||||
# or trivial clustering where each document is assigned a unique cluster.
|
||||
# These are perfect matches hence return 1.0.
|
||||
if (n_classes == n_clusters == 1 or
|
||||
n_classes == n_clusters == 0 or
|
||||
n_classes == n_clusters == n_samples):
|
||||
return 1.0
|
||||
|
||||
# Compute the ARI using the contingency data
|
||||
contingency = contingency_matrix(labels_true, labels_pred, sparse=True)
|
||||
sum_comb_c = sum(_comb2(n_c) for n_c in np.ravel(contingency.sum(axis=1)))
|
||||
sum_comb_k = sum(_comb2(n_k) for n_k in np.ravel(contingency.sum(axis=0)))
|
||||
sum_comb = sum(_comb2(n_ij) for n_ij in contingency.data)
|
||||
|
||||
prod_comb = (sum_comb_c * sum_comb_k) / _comb2(n_samples)
|
||||
mean_comb = (sum_comb_k + sum_comb_c) / 2.
|
||||
return (sum_comb - prod_comb) / (mean_comb - prod_comb)
|
||||
|
||||
|
||||
@_deprecate_positional_args
|
||||
def homogeneity_completeness_v_measure(labels_true, labels_pred, *, beta=1.0):
|
||||
"""Compute the homogeneity and completeness and V-Measure scores at once.
|
||||
|
||||
Those metrics are based on normalized conditional entropy measures of
|
||||
the clustering labeling to evaluate given the knowledge of a Ground
|
||||
Truth class labels of the same samples.
|
||||
|
||||
A clustering result satisfies homogeneity if all of its clusters
|
||||
contain only data points which are members of a single class.
|
||||
|
||||
A clustering result satisfies completeness if all the data points
|
||||
that are members of a given class are elements of the same cluster.
|
||||
|
||||
Both scores have positive values between 0.0 and 1.0, larger values
|
||||
being desirable.
|
||||
|
||||
Those 3 metrics are independent of the absolute values of the labels:
|
||||
a permutation of the class or cluster label values won't change the
|
||||
score values in any way.
|
||||
|
||||
V-Measure is furthermore symmetric: swapping ``labels_true`` and
|
||||
``label_pred`` will give the same score. This does not hold for
|
||||
homogeneity and completeness. V-Measure is identical to
|
||||
:func:`normalized_mutual_info_score` with the arithmetic averaging
|
||||
method.
|
||||
|
||||
Read more in the :ref:`User Guide <homogeneity_completeness>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
labels_true : int array, shape = [n_samples]
|
||||
ground truth class labels to be used as a reference
|
||||
|
||||
labels_pred : array-like of shape (n_samples,)
|
||||
cluster labels to evaluate
|
||||
|
||||
beta : float
|
||||
Ratio of weight attributed to ``homogeneity`` vs ``completeness``.
|
||||
If ``beta`` is greater than 1, ``completeness`` is weighted more
|
||||
strongly in the calculation. If ``beta`` is less than 1,
|
||||
``homogeneity`` is weighted more strongly.
|
||||
|
||||
Returns
|
||||
-------
|
||||
homogeneity : float
|
||||
score between 0.0 and 1.0. 1.0 stands for perfectly homogeneous labeling
|
||||
|
||||
completeness : float
|
||||
score between 0.0 and 1.0. 1.0 stands for perfectly complete labeling
|
||||
|
||||
v_measure : float
|
||||
harmonic mean of the first two
|
||||
|
||||
See also
|
||||
--------
|
||||
homogeneity_score
|
||||
completeness_score
|
||||
v_measure_score
|
||||
"""
|
||||
labels_true, labels_pred = check_clusterings(labels_true, labels_pred)
|
||||
|
||||
if len(labels_true) == 0:
|
||||
return 1.0, 1.0, 1.0
|
||||
|
||||
entropy_C = entropy(labels_true)
|
||||
entropy_K = entropy(labels_pred)
|
||||
|
||||
contingency = contingency_matrix(labels_true, labels_pred, sparse=True)
|
||||
MI = mutual_info_score(None, None, contingency=contingency)
|
||||
|
||||
homogeneity = MI / (entropy_C) if entropy_C else 1.0
|
||||
completeness = MI / (entropy_K) if entropy_K else 1.0
|
||||
|
||||
if homogeneity + completeness == 0.0:
|
||||
v_measure_score = 0.0
|
||||
else:
|
||||
v_measure_score = ((1 + beta) * homogeneity * completeness
|
||||
/ (beta * homogeneity + completeness))
|
||||
|
||||
return homogeneity, completeness, v_measure_score
|
||||
|
||||
|
||||
def homogeneity_score(labels_true, labels_pred):
|
||||
"""Homogeneity metric of a cluster labeling given a ground truth.
|
||||
|
||||
A clustering result satisfies homogeneity if all of its clusters
|
||||
contain only data points which are members of a single class.
|
||||
|
||||
This metric is independent of the absolute values of the labels:
|
||||
a permutation of the class or cluster label values won't change the
|
||||
score value in any way.
|
||||
|
||||
This metric is not symmetric: switching ``label_true`` with ``label_pred``
|
||||
will return the :func:`completeness_score` which will be different in
|
||||
general.
|
||||
|
||||
Read more in the :ref:`User Guide <homogeneity_completeness>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
labels_true : int array, shape = [n_samples]
|
||||
ground truth class labels to be used as a reference
|
||||
|
||||
labels_pred : array-like of shape (n_samples,)
|
||||
cluster labels to evaluate
|
||||
|
||||
Returns
|
||||
-------
|
||||
homogeneity : float
|
||||
score between 0.0 and 1.0. 1.0 stands for perfectly homogeneous labeling
|
||||
|
||||
References
|
||||
----------
|
||||
|
||||
.. [1] `Andrew Rosenberg and Julia Hirschberg, 2007. V-Measure: A
|
||||
conditional entropy-based external cluster evaluation measure
|
||||
<https://aclweb.org/anthology/D/D07/D07-1043.pdf>`_
|
||||
|
||||
See also
|
||||
--------
|
||||
completeness_score
|
||||
v_measure_score
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
Perfect labelings are homogeneous::
|
||||
|
||||
>>> from sklearn.metrics.cluster import homogeneity_score
|
||||
>>> homogeneity_score([0, 0, 1, 1], [1, 1, 0, 0])
|
||||
1.0
|
||||
|
||||
Non-perfect labelings that further split classes into more clusters can be
|
||||
perfectly homogeneous::
|
||||
|
||||
>>> print("%.6f" % homogeneity_score([0, 0, 1, 1], [0, 0, 1, 2]))
|
||||
1.000000
|
||||
>>> print("%.6f" % homogeneity_score([0, 0, 1, 1], [0, 1, 2, 3]))
|
||||
1.000000
|
||||
|
||||
Clusters that include samples from different classes do not make for an
|
||||
homogeneous labeling::
|
||||
|
||||
>>> print("%.6f" % homogeneity_score([0, 0, 1, 1], [0, 1, 0, 1]))
|
||||
0.0...
|
||||
>>> print("%.6f" % homogeneity_score([0, 0, 1, 1], [0, 0, 0, 0]))
|
||||
0.0...
|
||||
|
||||
"""
|
||||
return homogeneity_completeness_v_measure(labels_true, labels_pred)[0]
|
||||
|
||||
|
||||
def completeness_score(labels_true, labels_pred):
|
||||
"""Completeness metric of a cluster labeling given a ground truth.
|
||||
|
||||
A clustering result satisfies completeness if all the data points
|
||||
that are members of a given class are elements of the same cluster.
|
||||
|
||||
This metric is independent of the absolute values of the labels:
|
||||
a permutation of the class or cluster label values won't change the
|
||||
score value in any way.
|
||||
|
||||
This metric is not symmetric: switching ``label_true`` with ``label_pred``
|
||||
will return the :func:`homogeneity_score` which will be different in
|
||||
general.
|
||||
|
||||
Read more in the :ref:`User Guide <homogeneity_completeness>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
labels_true : int array, shape = [n_samples]
|
||||
ground truth class labels to be used as a reference
|
||||
|
||||
labels_pred : array-like of shape (n_samples,)
|
||||
cluster labels to evaluate
|
||||
|
||||
Returns
|
||||
-------
|
||||
completeness : float
|
||||
score between 0.0 and 1.0. 1.0 stands for perfectly complete labeling
|
||||
|
||||
References
|
||||
----------
|
||||
|
||||
.. [1] `Andrew Rosenberg and Julia Hirschberg, 2007. V-Measure: A
|
||||
conditional entropy-based external cluster evaluation measure
|
||||
<https://aclweb.org/anthology/D/D07/D07-1043.pdf>`_
|
||||
|
||||
See also
|
||||
--------
|
||||
homogeneity_score
|
||||
v_measure_score
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
Perfect labelings are complete::
|
||||
|
||||
>>> from sklearn.metrics.cluster import completeness_score
|
||||
>>> completeness_score([0, 0, 1, 1], [1, 1, 0, 0])
|
||||
1.0
|
||||
|
||||
Non-perfect labelings that assign all classes members to the same clusters
|
||||
are still complete::
|
||||
|
||||
>>> print(completeness_score([0, 0, 1, 1], [0, 0, 0, 0]))
|
||||
1.0
|
||||
>>> print(completeness_score([0, 1, 2, 3], [0, 0, 1, 1]))
|
||||
0.999...
|
||||
|
||||
If classes members are split across different clusters, the
|
||||
assignment cannot be complete::
|
||||
|
||||
>>> print(completeness_score([0, 0, 1, 1], [0, 1, 0, 1]))
|
||||
0.0
|
||||
>>> print(completeness_score([0, 0, 0, 0], [0, 1, 2, 3]))
|
||||
0.0
|
||||
|
||||
"""
|
||||
return homogeneity_completeness_v_measure(labels_true, labels_pred)[1]
|
||||
|
||||
|
||||
@_deprecate_positional_args
|
||||
def v_measure_score(labels_true, labels_pred, *, beta=1.0):
|
||||
"""V-measure cluster labeling given a ground truth.
|
||||
|
||||
This score is identical to :func:`normalized_mutual_info_score` with
|
||||
the ``'arithmetic'`` option for averaging.
|
||||
|
||||
The V-measure is the harmonic mean between homogeneity and completeness::
|
||||
|
||||
v = (1 + beta) * homogeneity * completeness
|
||||
/ (beta * homogeneity + completeness)
|
||||
|
||||
This metric is independent of the absolute values of the labels:
|
||||
a permutation of the class or cluster label values won't change the
|
||||
score value in any way.
|
||||
|
||||
This metric is furthermore symmetric: switching ``label_true`` with
|
||||
``label_pred`` will return the same score value. This can be useful to
|
||||
measure the agreement of two independent label assignments strategies
|
||||
on the same dataset when the real ground truth is not known.
|
||||
|
||||
|
||||
Read more in the :ref:`User Guide <homogeneity_completeness>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
labels_true : int array, shape = [n_samples]
|
||||
ground truth class labels to be used as a reference
|
||||
|
||||
labels_pred : array-like of shape (n_samples,)
|
||||
cluster labels to evaluate
|
||||
|
||||
beta : float
|
||||
Ratio of weight attributed to ``homogeneity`` vs ``completeness``.
|
||||
If ``beta`` is greater than 1, ``completeness`` is weighted more
|
||||
strongly in the calculation. If ``beta`` is less than 1,
|
||||
``homogeneity`` is weighted more strongly.
|
||||
|
||||
Returns
|
||||
-------
|
||||
v_measure : float
|
||||
score between 0.0 and 1.0. 1.0 stands for perfectly complete labeling
|
||||
|
||||
References
|
||||
----------
|
||||
|
||||
.. [1] `Andrew Rosenberg and Julia Hirschberg, 2007. V-Measure: A
|
||||
conditional entropy-based external cluster evaluation measure
|
||||
<https://aclweb.org/anthology/D/D07/D07-1043.pdf>`_
|
||||
|
||||
See also
|
||||
--------
|
||||
homogeneity_score
|
||||
completeness_score
|
||||
normalized_mutual_info_score
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
Perfect labelings are both homogeneous and complete, hence have score 1.0::
|
||||
|
||||
>>> from sklearn.metrics.cluster import v_measure_score
|
||||
>>> v_measure_score([0, 0, 1, 1], [0, 0, 1, 1])
|
||||
1.0
|
||||
>>> v_measure_score([0, 0, 1, 1], [1, 1, 0, 0])
|
||||
1.0
|
||||
|
||||
Labelings that assign all classes members to the same clusters
|
||||
are complete be not homogeneous, hence penalized::
|
||||
|
||||
>>> print("%.6f" % v_measure_score([0, 0, 1, 2], [0, 0, 1, 1]))
|
||||
0.8...
|
||||
>>> print("%.6f" % v_measure_score([0, 1, 2, 3], [0, 0, 1, 1]))
|
||||
0.66...
|
||||
|
||||
Labelings that have pure clusters with members coming from the same
|
||||
classes are homogeneous but un-necessary splits harms completeness
|
||||
and thus penalize V-measure as well::
|
||||
|
||||
>>> print("%.6f" % v_measure_score([0, 0, 1, 1], [0, 0, 1, 2]))
|
||||
0.8...
|
||||
>>> print("%.6f" % v_measure_score([0, 0, 1, 1], [0, 1, 2, 3]))
|
||||
0.66...
|
||||
|
||||
If classes members are completely split across different clusters,
|
||||
the assignment is totally incomplete, hence the V-Measure is null::
|
||||
|
||||
>>> print("%.6f" % v_measure_score([0, 0, 0, 0], [0, 1, 2, 3]))
|
||||
0.0...
|
||||
|
||||
Clusters that include samples from totally different classes totally
|
||||
destroy the homogeneity of the labeling, hence::
|
||||
|
||||
>>> print("%.6f" % v_measure_score([0, 0, 1, 1], [0, 0, 0, 0]))
|
||||
0.0...
|
||||
|
||||
"""
|
||||
return homogeneity_completeness_v_measure(labels_true, labels_pred,
|
||||
beta=beta)[2]
|
||||
|
||||
|
||||
@_deprecate_positional_args
|
||||
def mutual_info_score(labels_true, labels_pred, *, contingency=None):
|
||||
"""Mutual Information between two clusterings.
|
||||
|
||||
The Mutual Information is a measure of the similarity between two labels of
|
||||
the same data. Where :math:`|U_i|` is the number of the samples
|
||||
in cluster :math:`U_i` and :math:`|V_j|` is the number of the
|
||||
samples in cluster :math:`V_j`, the Mutual Information
|
||||
between clusterings :math:`U` and :math:`V` is given as:
|
||||
|
||||
.. math::
|
||||
|
||||
MI(U,V)=\\sum_{i=1}^{|U|} \\sum_{j=1}^{|V|} \\frac{|U_i\\cap V_j|}{N}
|
||||
\\log\\frac{N|U_i \\cap V_j|}{|U_i||V_j|}
|
||||
|
||||
This metric is independent of the absolute values of the labels:
|
||||
a permutation of the class or cluster label values won't change the
|
||||
score value in any way.
|
||||
|
||||
This metric is furthermore symmetric: switching ``label_true`` with
|
||||
``label_pred`` will return the same score value. This can be useful to
|
||||
measure the agreement of two independent label assignments strategies
|
||||
on the same dataset when the real ground truth is not known.
|
||||
|
||||
Read more in the :ref:`User Guide <mutual_info_score>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
labels_true : int array, shape = [n_samples]
|
||||
A clustering of the data into disjoint subsets.
|
||||
|
||||
labels_pred : int array-like of shape (n_samples,)
|
||||
A clustering of the data into disjoint subsets.
|
||||
|
||||
contingency : {None, array, sparse matrix}, \
|
||||
shape = [n_classes_true, n_classes_pred]
|
||||
A contingency matrix given by the :func:`contingency_matrix` function.
|
||||
If value is ``None``, it will be computed, otherwise the given value is
|
||||
used, with ``labels_true`` and ``labels_pred`` ignored.
|
||||
|
||||
Returns
|
||||
-------
|
||||
mi : float
|
||||
Mutual information, a non-negative value
|
||||
|
||||
Notes
|
||||
-----
|
||||
The logarithm used is the natural logarithm (base-e).
|
||||
|
||||
See also
|
||||
--------
|
||||
adjusted_mutual_info_score: Adjusted against chance Mutual Information
|
||||
normalized_mutual_info_score: Normalized Mutual Information
|
||||
"""
|
||||
if contingency is None:
|
||||
labels_true, labels_pred = check_clusterings(labels_true, labels_pred)
|
||||
contingency = contingency_matrix(labels_true, labels_pred, sparse=True)
|
||||
else:
|
||||
contingency = check_array(contingency,
|
||||
accept_sparse=['csr', 'csc', 'coo'],
|
||||
dtype=[int, np.int32, np.int64])
|
||||
|
||||
if isinstance(contingency, np.ndarray):
|
||||
# For an array
|
||||
nzx, nzy = np.nonzero(contingency)
|
||||
nz_val = contingency[nzx, nzy]
|
||||
elif sp.issparse(contingency):
|
||||
# For a sparse matrix
|
||||
nzx, nzy, nz_val = sp.find(contingency)
|
||||
else:
|
||||
raise ValueError("Unsupported type for 'contingency': %s" %
|
||||
type(contingency))
|
||||
|
||||
contingency_sum = contingency.sum()
|
||||
pi = np.ravel(contingency.sum(axis=1))
|
||||
pj = np.ravel(contingency.sum(axis=0))
|
||||
log_contingency_nm = np.log(nz_val)
|
||||
contingency_nm = nz_val / contingency_sum
|
||||
# Don't need to calculate the full outer product, just for non-zeroes
|
||||
outer = (pi.take(nzx).astype(np.int64, copy=False)
|
||||
* pj.take(nzy).astype(np.int64, copy=False))
|
||||
log_outer = -np.log(outer) + log(pi.sum()) + log(pj.sum())
|
||||
mi = (contingency_nm * (log_contingency_nm - log(contingency_sum)) +
|
||||
contingency_nm * log_outer)
|
||||
return np.clip(mi.sum(), 0.0, None)
|
||||
|
||||
|
||||
@_deprecate_positional_args
|
||||
def adjusted_mutual_info_score(labels_true, labels_pred, *,
|
||||
average_method='arithmetic'):
|
||||
"""Adjusted Mutual Information between two clusterings.
|
||||
|
||||
Adjusted Mutual Information (AMI) is an adjustment of the Mutual
|
||||
Information (MI) score to account for chance. It accounts for the fact that
|
||||
the MI is generally higher for two clusterings with a larger number of
|
||||
clusters, regardless of whether there is actually more information shared.
|
||||
For two clusterings :math:`U` and :math:`V`, the AMI is given as::
|
||||
|
||||
AMI(U, V) = [MI(U, V) - E(MI(U, V))] / [avg(H(U), H(V)) - E(MI(U, V))]
|
||||
|
||||
This metric is independent of the absolute values of the labels:
|
||||
a permutation of the class or cluster label values won't change the
|
||||
score value in any way.
|
||||
|
||||
This metric is furthermore symmetric: switching ``label_true`` with
|
||||
``label_pred`` will return the same score value. This can be useful to
|
||||
measure the agreement of two independent label assignments strategies
|
||||
on the same dataset when the real ground truth is not known.
|
||||
|
||||
Be mindful that this function is an order of magnitude slower than other
|
||||
metrics, such as the Adjusted Rand Index.
|
||||
|
||||
Read more in the :ref:`User Guide <mutual_info_score>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
labels_true : int array, shape = [n_samples]
|
||||
A clustering of the data into disjoint subsets.
|
||||
|
||||
labels_pred : int array-like of shape (n_samples,)
|
||||
A clustering of the data into disjoint subsets.
|
||||
|
||||
average_method : string, optional (default: 'arithmetic')
|
||||
How to compute the normalizer in the denominator. Possible options
|
||||
are 'min', 'geometric', 'arithmetic', and 'max'.
|
||||
|
||||
.. versionadded:: 0.20
|
||||
|
||||
.. versionchanged:: 0.22
|
||||
The default value of ``average_method`` changed from 'max' to
|
||||
'arithmetic'.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ami: float (upperlimited by 1.0)
|
||||
The AMI returns a value of 1 when the two partitions are identical
|
||||
(ie perfectly matched). Random partitions (independent labellings) have
|
||||
an expected AMI around 0 on average hence can be negative.
|
||||
|
||||
See also
|
||||
--------
|
||||
adjusted_rand_score: Adjusted Rand Index
|
||||
mutual_info_score: Mutual Information (not adjusted for chance)
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
Perfect labelings are both homogeneous and complete, hence have
|
||||
score 1.0::
|
||||
|
||||
>>> from sklearn.metrics.cluster import adjusted_mutual_info_score
|
||||
>>> adjusted_mutual_info_score([0, 0, 1, 1], [0, 0, 1, 1])
|
||||
... # doctest: +SKIP
|
||||
1.0
|
||||
>>> adjusted_mutual_info_score([0, 0, 1, 1], [1, 1, 0, 0])
|
||||
... # doctest: +SKIP
|
||||
1.0
|
||||
|
||||
If classes members are completely split across different clusters,
|
||||
the assignment is totally in-complete, hence the AMI is null::
|
||||
|
||||
>>> adjusted_mutual_info_score([0, 0, 0, 0], [0, 1, 2, 3])
|
||||
... # doctest: +SKIP
|
||||
0.0
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] `Vinh, Epps, and Bailey, (2010). Information Theoretic Measures for
|
||||
Clusterings Comparison: Variants, Properties, Normalization and
|
||||
Correction for Chance, JMLR
|
||||
<http://jmlr.csail.mit.edu/papers/volume11/vinh10a/vinh10a.pdf>`_
|
||||
|
||||
.. [2] `Wikipedia entry for the Adjusted Mutual Information
|
||||
<https://en.wikipedia.org/wiki/Adjusted_Mutual_Information>`_
|
||||
|
||||
"""
|
||||
labels_true, labels_pred = check_clusterings(labels_true, labels_pred)
|
||||
n_samples = labels_true.shape[0]
|
||||
classes = np.unique(labels_true)
|
||||
clusters = np.unique(labels_pred)
|
||||
# Special limit cases: no clustering since the data is not split.
|
||||
# This is a perfect match hence return 1.0.
|
||||
if (classes.shape[0] == clusters.shape[0] == 1 or
|
||||
classes.shape[0] == clusters.shape[0] == 0):
|
||||
return 1.0
|
||||
contingency = contingency_matrix(labels_true, labels_pred, sparse=True)
|
||||
contingency = contingency.astype(np.float64,
|
||||
**_astype_copy_false(contingency))
|
||||
# Calculate the MI for the two clusterings
|
||||
mi = mutual_info_score(labels_true, labels_pred,
|
||||
contingency=contingency)
|
||||
# Calculate the expected value for the mutual information
|
||||
emi = expected_mutual_information(contingency, n_samples)
|
||||
# Calculate entropy for each labeling
|
||||
h_true, h_pred = entropy(labels_true), entropy(labels_pred)
|
||||
normalizer = _generalized_average(h_true, h_pred, average_method)
|
||||
denominator = normalizer - emi
|
||||
# Avoid 0.0 / 0.0 when expectation equals maximum, i.e a perfect match.
|
||||
# normalizer should always be >= emi, but because of floating-point
|
||||
# representation, sometimes emi is slightly larger. Correct this
|
||||
# by preserving the sign.
|
||||
if denominator < 0:
|
||||
denominator = min(denominator, -np.finfo('float64').eps)
|
||||
else:
|
||||
denominator = max(denominator, np.finfo('float64').eps)
|
||||
ami = (mi - emi) / denominator
|
||||
return ami
|
||||
|
||||
|
||||
@_deprecate_positional_args
|
||||
def normalized_mutual_info_score(labels_true, labels_pred, *,
|
||||
average_method='arithmetic'):
|
||||
"""Normalized Mutual Information between two clusterings.
|
||||
|
||||
Normalized Mutual Information (NMI) is a normalization of the Mutual
|
||||
Information (MI) score to scale the results between 0 (no mutual
|
||||
information) and 1 (perfect correlation). In this function, mutual
|
||||
information is normalized by some generalized mean of ``H(labels_true)``
|
||||
and ``H(labels_pred))``, defined by the `average_method`.
|
||||
|
||||
This measure is not adjusted for chance. Therefore
|
||||
:func:`adjusted_mutual_info_score` might be preferred.
|
||||
|
||||
This metric is independent of the absolute values of the labels:
|
||||
a permutation of the class or cluster label values won't change the
|
||||
score value in any way.
|
||||
|
||||
This metric is furthermore symmetric: switching ``label_true`` with
|
||||
``label_pred`` will return the same score value. This can be useful to
|
||||
measure the agreement of two independent label assignments strategies
|
||||
on the same dataset when the real ground truth is not known.
|
||||
|
||||
Read more in the :ref:`User Guide <mutual_info_score>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
labels_true : int array, shape = [n_samples]
|
||||
A clustering of the data into disjoint subsets.
|
||||
|
||||
labels_pred : int array-like of shape (n_samples,)
|
||||
A clustering of the data into disjoint subsets.
|
||||
|
||||
average_method : string, optional (default: 'arithmetic')
|
||||
How to compute the normalizer in the denominator. Possible options
|
||||
are 'min', 'geometric', 'arithmetic', and 'max'.
|
||||
|
||||
.. versionadded:: 0.20
|
||||
|
||||
.. versionchanged:: 0.22
|
||||
The default value of ``average_method`` changed from 'geometric' to
|
||||
'arithmetic'.
|
||||
|
||||
Returns
|
||||
-------
|
||||
nmi : float
|
||||
score between 0.0 and 1.0. 1.0 stands for perfectly complete labeling
|
||||
|
||||
See also
|
||||
--------
|
||||
v_measure_score: V-Measure (NMI with arithmetic mean option.)
|
||||
adjusted_rand_score: Adjusted Rand Index
|
||||
adjusted_mutual_info_score: Adjusted Mutual Information (adjusted
|
||||
against chance)
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
Perfect labelings are both homogeneous and complete, hence have
|
||||
score 1.0::
|
||||
|
||||
>>> from sklearn.metrics.cluster import normalized_mutual_info_score
|
||||
>>> normalized_mutual_info_score([0, 0, 1, 1], [0, 0, 1, 1])
|
||||
... # doctest: +SKIP
|
||||
1.0
|
||||
>>> normalized_mutual_info_score([0, 0, 1, 1], [1, 1, 0, 0])
|
||||
... # doctest: +SKIP
|
||||
1.0
|
||||
|
||||
If classes members are completely split across different clusters,
|
||||
the assignment is totally in-complete, hence the NMI is null::
|
||||
|
||||
>>> normalized_mutual_info_score([0, 0, 0, 0], [0, 1, 2, 3])
|
||||
... # doctest: +SKIP
|
||||
0.0
|
||||
|
||||
"""
|
||||
labels_true, labels_pred = check_clusterings(labels_true, labels_pred)
|
||||
classes = np.unique(labels_true)
|
||||
clusters = np.unique(labels_pred)
|
||||
# Special limit cases: no clustering since the data is not split.
|
||||
# This is a perfect match hence return 1.0.
|
||||
if (classes.shape[0] == clusters.shape[0] == 1 or
|
||||
classes.shape[0] == clusters.shape[0] == 0):
|
||||
return 1.0
|
||||
contingency = contingency_matrix(labels_true, labels_pred, sparse=True)
|
||||
contingency = contingency.astype(np.float64,
|
||||
**_astype_copy_false(contingency))
|
||||
# Calculate the MI for the two clusterings
|
||||
mi = mutual_info_score(labels_true, labels_pred,
|
||||
contingency=contingency)
|
||||
# Calculate the expected value for the mutual information
|
||||
# Calculate entropy for each labeling
|
||||
h_true, h_pred = entropy(labels_true), entropy(labels_pred)
|
||||
normalizer = _generalized_average(h_true, h_pred, average_method)
|
||||
# Avoid 0.0 / 0.0 when either entropy is zero.
|
||||
normalizer = max(normalizer, np.finfo('float64').eps)
|
||||
nmi = mi / normalizer
|
||||
return nmi
|
||||
|
||||
|
||||
@_deprecate_positional_args
|
||||
def fowlkes_mallows_score(labels_true, labels_pred, *, sparse=False):
|
||||
"""Measure the similarity of two clusterings of a set of points.
|
||||
|
||||
.. versionadded:: 0.18
|
||||
|
||||
The Fowlkes-Mallows index (FMI) is defined as the geometric mean between of
|
||||
the precision and recall::
|
||||
|
||||
FMI = TP / sqrt((TP + FP) * (TP + FN))
|
||||
|
||||
Where ``TP`` is the number of **True Positive** (i.e. the number of pair of
|
||||
points that belongs in the same clusters in both ``labels_true`` and
|
||||
``labels_pred``), ``FP`` is the number of **False Positive** (i.e. the
|
||||
number of pair of points that belongs in the same clusters in
|
||||
``labels_true`` and not in ``labels_pred``) and ``FN`` is the number of
|
||||
**False Negative** (i.e the number of pair of points that belongs in the
|
||||
same clusters in ``labels_pred`` and not in ``labels_True``).
|
||||
|
||||
The score ranges from 0 to 1. A high value indicates a good similarity
|
||||
between two clusters.
|
||||
|
||||
Read more in the :ref:`User Guide <fowlkes_mallows_scores>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
labels_true : int array, shape = (``n_samples``,)
|
||||
A clustering of the data into disjoint subsets.
|
||||
|
||||
labels_pred : array, shape = (``n_samples``, )
|
||||
A clustering of the data into disjoint subsets.
|
||||
|
||||
sparse : bool
|
||||
Compute contingency matrix internally with sparse matrix.
|
||||
|
||||
Returns
|
||||
-------
|
||||
score : float
|
||||
The resulting Fowlkes-Mallows score.
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
Perfect labelings are both homogeneous and complete, hence have
|
||||
score 1.0::
|
||||
|
||||
>>> from sklearn.metrics.cluster import fowlkes_mallows_score
|
||||
>>> fowlkes_mallows_score([0, 0, 1, 1], [0, 0, 1, 1])
|
||||
1.0
|
||||
>>> fowlkes_mallows_score([0, 0, 1, 1], [1, 1, 0, 0])
|
||||
1.0
|
||||
|
||||
If classes members are completely split across different clusters,
|
||||
the assignment is totally random, hence the FMI is null::
|
||||
|
||||
>>> fowlkes_mallows_score([0, 0, 0, 0], [0, 1, 2, 3])
|
||||
0.0
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] `E. B. Fowkles and C. L. Mallows, 1983. "A method for comparing two
|
||||
hierarchical clusterings". Journal of the American Statistical
|
||||
Association
|
||||
<http://wildfire.stat.ucla.edu/pdflibrary/fowlkes.pdf>`_
|
||||
|
||||
.. [2] `Wikipedia entry for the Fowlkes-Mallows Index
|
||||
<https://en.wikipedia.org/wiki/Fowlkes-Mallows_index>`_
|
||||
"""
|
||||
labels_true, labels_pred = check_clusterings(labels_true, labels_pred)
|
||||
n_samples, = labels_true.shape
|
||||
|
||||
c = contingency_matrix(labels_true, labels_pred,
|
||||
sparse=True)
|
||||
c = c.astype(np.int64, **_astype_copy_false(c))
|
||||
tk = np.dot(c.data, c.data) - n_samples
|
||||
pk = np.sum(np.asarray(c.sum(axis=0)).ravel() ** 2) - n_samples
|
||||
qk = np.sum(np.asarray(c.sum(axis=1)).ravel() ** 2) - n_samples
|
||||
return np.sqrt(tk / pk) * np.sqrt(tk / qk) if tk != 0. else 0.
|
||||
|
||||
|
||||
def entropy(labels):
|
||||
"""Calculates the entropy for a labeling.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
labels : int array, shape = [n_samples]
|
||||
The labels
|
||||
|
||||
Notes
|
||||
-----
|
||||
The logarithm used is the natural logarithm (base-e).
|
||||
"""
|
||||
if len(labels) == 0:
|
||||
return 1.0
|
||||
label_idx = np.unique(labels, return_inverse=True)[1]
|
||||
pi = np.bincount(label_idx).astype(np.float64)
|
||||
pi = pi[pi > 0]
|
||||
pi_sum = np.sum(pi)
|
||||
# log(a / b) should be calculated as log(a) - log(b) for
|
||||
# possible loss of precision
|
||||
return -np.sum((pi / pi_sum) * (np.log(pi) - log(pi_sum)))
|
363
venv/Lib/site-packages/sklearn/metrics/cluster/_unsupervised.py
Normal file
363
venv/Lib/site-packages/sklearn/metrics/cluster/_unsupervised.py
Normal file
|
@ -0,0 +1,363 @@
|
|||
"""Unsupervised evaluation metrics."""
|
||||
|
||||
# Authors: Robert Layton <robertlayton@gmail.com>
|
||||
# Arnaud Fouchet <foucheta@gmail.com>
|
||||
# Thierry Guillemot <thierry.guillemot.work@gmail.com>
|
||||
# License: BSD 3 clause
|
||||
|
||||
|
||||
import functools
|
||||
|
||||
import numpy as np
|
||||
|
||||
from ...utils import check_random_state
|
||||
from ...utils import check_X_y
|
||||
from ...utils import _safe_indexing
|
||||
from ..pairwise import pairwise_distances_chunked
|
||||
from ..pairwise import pairwise_distances
|
||||
from ...preprocessing import LabelEncoder
|
||||
from ...utils.validation import _deprecate_positional_args
|
||||
|
||||
|
||||
def check_number_of_labels(n_labels, n_samples):
|
||||
"""Check that number of labels are valid.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
n_labels : int
|
||||
Number of labels
|
||||
|
||||
n_samples : int
|
||||
Number of samples
|
||||
"""
|
||||
if not 1 < n_labels < n_samples:
|
||||
raise ValueError("Number of labels is %d. Valid values are 2 "
|
||||
"to n_samples - 1 (inclusive)" % n_labels)
|
||||
|
||||
|
||||
@_deprecate_positional_args
|
||||
def silhouette_score(X, labels, *, metric='euclidean', sample_size=None,
|
||||
random_state=None, **kwds):
|
||||
"""Compute the mean Silhouette Coefficient of all samples.
|
||||
|
||||
The Silhouette Coefficient is calculated using the mean intra-cluster
|
||||
distance (``a``) and the mean nearest-cluster distance (``b``) for each
|
||||
sample. The Silhouette Coefficient for a sample is ``(b - a) / max(a,
|
||||
b)``. To clarify, ``b`` is the distance between a sample and the nearest
|
||||
cluster that the sample is not a part of.
|
||||
Note that Silhouette Coefficient is only defined if number of labels
|
||||
is 2 <= n_labels <= n_samples - 1.
|
||||
|
||||
This function returns the mean Silhouette Coefficient over all samples.
|
||||
To obtain the values for each sample, use :func:`silhouette_samples`.
|
||||
|
||||
The best value is 1 and the worst value is -1. Values near 0 indicate
|
||||
overlapping clusters. Negative values generally indicate that a sample has
|
||||
been assigned to the wrong cluster, as a different cluster is more similar.
|
||||
|
||||
Read more in the :ref:`User Guide <silhouette_coefficient>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array [n_samples_a, n_samples_a] if metric == "precomputed", or, \
|
||||
[n_samples_a, n_features] otherwise
|
||||
Array of pairwise distances between samples, or a feature array.
|
||||
|
||||
labels : array, shape = [n_samples]
|
||||
Predicted labels for each sample.
|
||||
|
||||
metric : string, or callable
|
||||
The metric to use when calculating distance between instances in a
|
||||
feature array. If metric is a string, it must be one of the options
|
||||
allowed by :func:`metrics.pairwise.pairwise_distances
|
||||
<sklearn.metrics.pairwise.pairwise_distances>`. If X is the distance
|
||||
array itself, use ``metric="precomputed"``.
|
||||
|
||||
sample_size : int or None
|
||||
The size of the sample to use when computing the Silhouette Coefficient
|
||||
on a random subset of the data.
|
||||
If ``sample_size is None``, no sampling is used.
|
||||
|
||||
random_state : int, RandomState instance or None, optional (default=None)
|
||||
Determines random number generation for selecting a subset of samples.
|
||||
Used when ``sample_size is not None``.
|
||||
Pass an int for reproducible results across multiple function calls.
|
||||
See :term:`Glossary <random_state>`.
|
||||
|
||||
**kwds : optional keyword parameters
|
||||
Any further parameters are passed directly to the distance function.
|
||||
If using a scipy.spatial.distance metric, the parameters are still
|
||||
metric dependent. See the scipy docs for usage examples.
|
||||
|
||||
Returns
|
||||
-------
|
||||
silhouette : float
|
||||
Mean Silhouette Coefficient for all samples.
|
||||
|
||||
References
|
||||
----------
|
||||
|
||||
.. [1] `Peter J. Rousseeuw (1987). "Silhouettes: a Graphical Aid to the
|
||||
Interpretation and Validation of Cluster Analysis". Computational
|
||||
and Applied Mathematics 20: 53-65.
|
||||
<https://www.sciencedirect.com/science/article/pii/0377042787901257>`_
|
||||
|
||||
.. [2] `Wikipedia entry on the Silhouette Coefficient
|
||||
<https://en.wikipedia.org/wiki/Silhouette_(clustering)>`_
|
||||
|
||||
"""
|
||||
if sample_size is not None:
|
||||
X, labels = check_X_y(X, labels, accept_sparse=['csc', 'csr'])
|
||||
random_state = check_random_state(random_state)
|
||||
indices = random_state.permutation(X.shape[0])[:sample_size]
|
||||
if metric == "precomputed":
|
||||
X, labels = X[indices].T[indices].T, labels[indices]
|
||||
else:
|
||||
X, labels = X[indices], labels[indices]
|
||||
return np.mean(silhouette_samples(X, labels, metric=metric, **kwds))
|
||||
|
||||
|
||||
def _silhouette_reduce(D_chunk, start, labels, label_freqs):
|
||||
"""Accumulate silhouette statistics for vertical chunk of X
|
||||
|
||||
Parameters
|
||||
----------
|
||||
D_chunk : shape (n_chunk_samples, n_samples)
|
||||
precomputed distances for a chunk
|
||||
start : int
|
||||
first index in chunk
|
||||
labels : array, shape (n_samples,)
|
||||
corresponding cluster labels, encoded as {0, ..., n_clusters-1}
|
||||
label_freqs : array
|
||||
distribution of cluster labels in ``labels``
|
||||
"""
|
||||
# accumulate distances from each sample to each cluster
|
||||
clust_dists = np.zeros((len(D_chunk), len(label_freqs)),
|
||||
dtype=D_chunk.dtype)
|
||||
for i in range(len(D_chunk)):
|
||||
clust_dists[i] += np.bincount(labels, weights=D_chunk[i],
|
||||
minlength=len(label_freqs))
|
||||
|
||||
# intra_index selects intra-cluster distances within clust_dists
|
||||
intra_index = (np.arange(len(D_chunk)), labels[start:start + len(D_chunk)])
|
||||
# intra_clust_dists are averaged over cluster size outside this function
|
||||
intra_clust_dists = clust_dists[intra_index]
|
||||
# of the remaining distances we normalise and extract the minimum
|
||||
clust_dists[intra_index] = np.inf
|
||||
clust_dists /= label_freqs
|
||||
inter_clust_dists = clust_dists.min(axis=1)
|
||||
return intra_clust_dists, inter_clust_dists
|
||||
|
||||
|
||||
@_deprecate_positional_args
|
||||
def silhouette_samples(X, labels, *, metric='euclidean', **kwds):
|
||||
"""Compute the Silhouette Coefficient for each sample.
|
||||
|
||||
The Silhouette Coefficient is a measure of how well samples are clustered
|
||||
with samples that are similar to themselves. Clustering models with a high
|
||||
Silhouette Coefficient are said to be dense, where samples in the same
|
||||
cluster are similar to each other, and well separated, where samples in
|
||||
different clusters are not very similar to each other.
|
||||
|
||||
The Silhouette Coefficient is calculated using the mean intra-cluster
|
||||
distance (``a``) and the mean nearest-cluster distance (``b``) for each
|
||||
sample. The Silhouette Coefficient for a sample is ``(b - a) / max(a,
|
||||
b)``.
|
||||
Note that Silhouette Coefficient is only defined if number of labels
|
||||
is 2 <= n_labels <= n_samples - 1.
|
||||
|
||||
This function returns the Silhouette Coefficient for each sample.
|
||||
|
||||
The best value is 1 and the worst value is -1. Values near 0 indicate
|
||||
overlapping clusters.
|
||||
|
||||
Read more in the :ref:`User Guide <silhouette_coefficient>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array [n_samples_a, n_samples_a] if metric == "precomputed", or, \
|
||||
[n_samples_a, n_features] otherwise
|
||||
Array of pairwise distances between samples, or a feature array.
|
||||
|
||||
labels : array, shape = [n_samples]
|
||||
label values for each sample
|
||||
|
||||
metric : string, or callable
|
||||
The metric to use when calculating distance between instances in a
|
||||
feature array. If metric is a string, it must be one of the options
|
||||
allowed by :func:`sklearn.metrics.pairwise.pairwise_distances`. If X is
|
||||
the distance array itself, use "precomputed" as the metric. Precomputed
|
||||
distance matrices must have 0 along the diagonal.
|
||||
|
||||
`**kwds` : optional keyword parameters
|
||||
Any further parameters are passed directly to the distance function.
|
||||
If using a ``scipy.spatial.distance`` metric, the parameters are still
|
||||
metric dependent. See the scipy docs for usage examples.
|
||||
|
||||
Returns
|
||||
-------
|
||||
silhouette : array, shape = [n_samples]
|
||||
Silhouette Coefficient for each samples.
|
||||
|
||||
References
|
||||
----------
|
||||
|
||||
.. [1] `Peter J. Rousseeuw (1987). "Silhouettes: a Graphical Aid to the
|
||||
Interpretation and Validation of Cluster Analysis". Computational
|
||||
and Applied Mathematics 20: 53-65.
|
||||
<https://www.sciencedirect.com/science/article/pii/0377042787901257>`_
|
||||
|
||||
.. [2] `Wikipedia entry on the Silhouette Coefficient
|
||||
<https://en.wikipedia.org/wiki/Silhouette_(clustering)>`_
|
||||
|
||||
"""
|
||||
X, labels = check_X_y(X, labels, accept_sparse=['csc', 'csr'])
|
||||
|
||||
# Check for non-zero diagonal entries in precomputed distance matrix
|
||||
if metric == 'precomputed':
|
||||
atol = np.finfo(X.dtype).eps * 100
|
||||
if np.any(np.abs(np.diagonal(X)) > atol):
|
||||
raise ValueError(
|
||||
'The precomputed distance matrix contains non-zero '
|
||||
'elements on the diagonal. Use np.fill_diagonal(X, 0).'
|
||||
)
|
||||
|
||||
le = LabelEncoder()
|
||||
labels = le.fit_transform(labels)
|
||||
n_samples = len(labels)
|
||||
label_freqs = np.bincount(labels)
|
||||
check_number_of_labels(len(le.classes_), n_samples)
|
||||
|
||||
kwds['metric'] = metric
|
||||
reduce_func = functools.partial(_silhouette_reduce,
|
||||
labels=labels, label_freqs=label_freqs)
|
||||
results = zip(*pairwise_distances_chunked(X, reduce_func=reduce_func,
|
||||
**kwds))
|
||||
intra_clust_dists, inter_clust_dists = results
|
||||
intra_clust_dists = np.concatenate(intra_clust_dists)
|
||||
inter_clust_dists = np.concatenate(inter_clust_dists)
|
||||
|
||||
denom = (label_freqs - 1).take(labels, mode='clip')
|
||||
with np.errstate(divide="ignore", invalid="ignore"):
|
||||
intra_clust_dists /= denom
|
||||
|
||||
sil_samples = inter_clust_dists - intra_clust_dists
|
||||
with np.errstate(divide="ignore", invalid="ignore"):
|
||||
sil_samples /= np.maximum(intra_clust_dists, inter_clust_dists)
|
||||
# nan values are for clusters of size 1, and should be 0
|
||||
return np.nan_to_num(sil_samples)
|
||||
|
||||
|
||||
def calinski_harabasz_score(X, labels):
|
||||
"""Compute the Calinski and Harabasz score.
|
||||
|
||||
It is also known as the Variance Ratio Criterion.
|
||||
|
||||
The score is defined as ratio between the within-cluster dispersion and
|
||||
the between-cluster dispersion.
|
||||
|
||||
Read more in the :ref:`User Guide <calinski_harabasz_index>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like, shape (``n_samples``, ``n_features``)
|
||||
List of ``n_features``-dimensional data points. Each row corresponds
|
||||
to a single data point.
|
||||
|
||||
labels : array-like, shape (``n_samples``,)
|
||||
Predicted labels for each sample.
|
||||
|
||||
Returns
|
||||
-------
|
||||
score : float
|
||||
The resulting Calinski-Harabasz score.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] `T. Calinski and J. Harabasz, 1974. "A dendrite method for cluster
|
||||
analysis". Communications in Statistics
|
||||
<https://www.tandfonline.com/doi/abs/10.1080/03610927408827101>`_
|
||||
"""
|
||||
X, labels = check_X_y(X, labels)
|
||||
le = LabelEncoder()
|
||||
labels = le.fit_transform(labels)
|
||||
|
||||
n_samples, _ = X.shape
|
||||
n_labels = len(le.classes_)
|
||||
|
||||
check_number_of_labels(n_labels, n_samples)
|
||||
|
||||
extra_disp, intra_disp = 0., 0.
|
||||
mean = np.mean(X, axis=0)
|
||||
for k in range(n_labels):
|
||||
cluster_k = X[labels == k]
|
||||
mean_k = np.mean(cluster_k, axis=0)
|
||||
extra_disp += len(cluster_k) * np.sum((mean_k - mean) ** 2)
|
||||
intra_disp += np.sum((cluster_k - mean_k) ** 2)
|
||||
|
||||
return (1. if intra_disp == 0. else
|
||||
extra_disp * (n_samples - n_labels) /
|
||||
(intra_disp * (n_labels - 1.)))
|
||||
|
||||
|
||||
def davies_bouldin_score(X, labels):
|
||||
"""Computes the Davies-Bouldin score.
|
||||
|
||||
The score is defined as the average similarity measure of each cluster with
|
||||
its most similar cluster, where similarity is the ratio of within-cluster
|
||||
distances to between-cluster distances. Thus, clusters which are farther
|
||||
apart and less dispersed will result in a better score.
|
||||
|
||||
The minimum score is zero, with lower values indicating better clustering.
|
||||
|
||||
Read more in the :ref:`User Guide <davies-bouldin_index>`.
|
||||
|
||||
.. versionadded:: 0.20
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like, shape (``n_samples``, ``n_features``)
|
||||
List of ``n_features``-dimensional data points. Each row corresponds
|
||||
to a single data point.
|
||||
|
||||
labels : array-like, shape (``n_samples``,)
|
||||
Predicted labels for each sample.
|
||||
|
||||
Returns
|
||||
-------
|
||||
score: float
|
||||
The resulting Davies-Bouldin score.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Davies, David L.; Bouldin, Donald W. (1979).
|
||||
`"A Cluster Separation Measure"
|
||||
<https://ieeexplore.ieee.org/document/4766909>`__.
|
||||
IEEE Transactions on Pattern Analysis and Machine Intelligence.
|
||||
PAMI-1 (2): 224-227
|
||||
"""
|
||||
X, labels = check_X_y(X, labels)
|
||||
le = LabelEncoder()
|
||||
labels = le.fit_transform(labels)
|
||||
n_samples, _ = X.shape
|
||||
n_labels = len(le.classes_)
|
||||
check_number_of_labels(n_labels, n_samples)
|
||||
|
||||
intra_dists = np.zeros(n_labels)
|
||||
centroids = np.zeros((n_labels, len(X[0])), dtype=np.float)
|
||||
for k in range(n_labels):
|
||||
cluster_k = _safe_indexing(X, labels == k)
|
||||
centroid = cluster_k.mean(axis=0)
|
||||
centroids[k] = centroid
|
||||
intra_dists[k] = np.average(pairwise_distances(
|
||||
cluster_k, [centroid]))
|
||||
|
||||
centroid_distances = pairwise_distances(centroids)
|
||||
|
||||
if np.allclose(intra_dists, 0) or np.allclose(centroid_distances, 0):
|
||||
return 0.0
|
||||
|
||||
centroid_distances[centroid_distances == 0] = np.inf
|
||||
combined_intra_dists = intra_dists[:, None] + intra_dists
|
||||
scores = np.max(combined_intra_dists / centroid_distances, axis=1)
|
||||
return np.mean(scores)
|
18
venv/Lib/site-packages/sklearn/metrics/cluster/bicluster.py
Normal file
18
venv/Lib/site-packages/sklearn/metrics/cluster/bicluster.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
|
||||
import sys
|
||||
# mypy error: Module X has no attribute y (typically for C extensions)
|
||||
from . import _bicluster # type: ignore
|
||||
from ...externals._pep562 import Pep562
|
||||
from ...utils.deprecation import _raise_dep_warning_if_not_pytest
|
||||
|
||||
deprecated_path = 'sklearn.metrics.cluster.bicluster'
|
||||
correct_import_path = 'sklearn.metrics.cluster'
|
||||
|
||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
|
||||
|
||||
def __getattr__(name):
|
||||
return getattr(_bicluster, name)
|
||||
|
||||
if not sys.version_info >= (3, 7):
|
||||
Pep562(__name__)
|
|
@ -0,0 +1,18 @@
|
|||
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
|
||||
import sys
|
||||
# mypy error: Module X has no attribute y (typically for C extensions)
|
||||
from . import _expected_mutual_info_fast # type: ignore
|
||||
from ...externals._pep562 import Pep562
|
||||
from ...utils.deprecation import _raise_dep_warning_if_not_pytest
|
||||
|
||||
deprecated_path = 'sklearn.metrics.cluster.expected_mutual_info_fast'
|
||||
correct_import_path = 'sklearn.metrics.cluster'
|
||||
|
||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
|
||||
|
||||
def __getattr__(name):
|
||||
return getattr(_expected_mutual_info_fast, name)
|
||||
|
||||
if not sys.version_info >= (3, 7):
|
||||
Pep562(__name__)
|
24
venv/Lib/site-packages/sklearn/metrics/cluster/setup.py
Normal file
24
venv/Lib/site-packages/sklearn/metrics/cluster/setup.py
Normal file
|
@ -0,0 +1,24 @@
|
|||
import os
|
||||
|
||||
import numpy
|
||||
from numpy.distutils.misc_util import Configuration
|
||||
|
||||
|
||||
def configuration(parent_package="", top_path=None):
|
||||
config = Configuration("cluster", parent_package, top_path)
|
||||
libraries = []
|
||||
if os.name == 'posix':
|
||||
libraries.append('m')
|
||||
config.add_extension("_expected_mutual_info_fast",
|
||||
sources=["_expected_mutual_info_fast.pyx"],
|
||||
include_dirs=[numpy.get_include()],
|
||||
libraries=libraries)
|
||||
|
||||
config.add_subpackage("tests")
|
||||
|
||||
return config
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from numpy.distutils.core import setup
|
||||
setup(**configuration().todict())
|
18
venv/Lib/site-packages/sklearn/metrics/cluster/supervised.py
Normal file
18
venv/Lib/site-packages/sklearn/metrics/cluster/supervised.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
|
||||
import sys
|
||||
# mypy error: Module X has no attribute y (typically for C extensions)
|
||||
from . import _supervised # type: ignore
|
||||
from ...externals._pep562 import Pep562
|
||||
from ...utils.deprecation import _raise_dep_warning_if_not_pytest
|
||||
|
||||
deprecated_path = 'sklearn.metrics.cluster.supervised'
|
||||
correct_import_path = 'sklearn.metrics.cluster'
|
||||
|
||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
|
||||
|
||||
def __getattr__(name):
|
||||
return getattr(_supervised, name)
|
||||
|
||||
if not sys.version_info >= (3, 7):
|
||||
Pep562(__name__)
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,50 @@
|
|||
"""Testing for bicluster metrics module"""
|
||||
|
||||
import numpy as np
|
||||
|
||||
from sklearn.utils._testing import assert_almost_equal
|
||||
|
||||
from sklearn.metrics.cluster._bicluster import _jaccard
|
||||
from sklearn.metrics import consensus_score
|
||||
|
||||
|
||||
def test_jaccard():
|
||||
a1 = np.array([True, True, False, False])
|
||||
a2 = np.array([True, True, True, True])
|
||||
a3 = np.array([False, True, True, False])
|
||||
a4 = np.array([False, False, True, True])
|
||||
|
||||
assert _jaccard(a1, a1, a1, a1) == 1
|
||||
assert _jaccard(a1, a1, a2, a2) == 0.25
|
||||
assert _jaccard(a1, a1, a3, a3) == 1.0 / 7
|
||||
assert _jaccard(a1, a1, a4, a4) == 0
|
||||
|
||||
|
||||
def test_consensus_score():
|
||||
a = [[True, True, False, False],
|
||||
[False, False, True, True]]
|
||||
b = a[::-1]
|
||||
|
||||
assert consensus_score((a, a), (a, a)) == 1
|
||||
assert consensus_score((a, a), (b, b)) == 1
|
||||
assert consensus_score((a, b), (a, b)) == 1
|
||||
assert consensus_score((a, b), (b, a)) == 1
|
||||
|
||||
assert consensus_score((a, a), (b, a)) == 0
|
||||
assert consensus_score((a, a), (a, b)) == 0
|
||||
assert consensus_score((b, b), (a, b)) == 0
|
||||
assert consensus_score((b, b), (b, a)) == 0
|
||||
|
||||
|
||||
def test_consensus_score_issue2445():
|
||||
''' Different number of biclusters in A and B'''
|
||||
a_rows = np.array([[True, True, False, False],
|
||||
[False, False, True, True],
|
||||
[False, False, False, True]])
|
||||
a_cols = np.array([[True, True, False, False],
|
||||
[False, False, True, True],
|
||||
[False, False, False, True]])
|
||||
idx = [0, 2]
|
||||
s = consensus_score((a_rows, a_cols), (a_rows[idx], a_cols[idx]))
|
||||
# B contains 2 of the 3 biclusters in A, so score should be 2/3
|
||||
assert_almost_equal(s, 2.0/3.0)
|
|
@ -0,0 +1,211 @@
|
|||
from functools import partial
|
||||
|
||||
import pytest
|
||||
import numpy as np
|
||||
|
||||
from sklearn.metrics.cluster import adjusted_mutual_info_score
|
||||
from sklearn.metrics.cluster import adjusted_rand_score
|
||||
from sklearn.metrics.cluster import completeness_score
|
||||
from sklearn.metrics.cluster import fowlkes_mallows_score
|
||||
from sklearn.metrics.cluster import homogeneity_score
|
||||
from sklearn.metrics.cluster import mutual_info_score
|
||||
from sklearn.metrics.cluster import normalized_mutual_info_score
|
||||
from sklearn.metrics.cluster import v_measure_score
|
||||
from sklearn.metrics.cluster import silhouette_score
|
||||
from sklearn.metrics.cluster import calinski_harabasz_score
|
||||
from sklearn.metrics.cluster import davies_bouldin_score
|
||||
|
||||
from sklearn.utils._testing import assert_allclose
|
||||
|
||||
|
||||
# Dictionaries of metrics
|
||||
# ------------------------
|
||||
# The goal of having those dictionaries is to have an easy way to call a
|
||||
# particular metric and associate a name to each function:
|
||||
# - SUPERVISED_METRICS: all supervised cluster metrics - (when given a
|
||||
# ground truth value)
|
||||
# - UNSUPERVISED_METRICS: all unsupervised cluster metrics
|
||||
#
|
||||
# Those dictionaries will be used to test systematically some invariance
|
||||
# properties, e.g. invariance toward several input layout.
|
||||
#
|
||||
|
||||
SUPERVISED_METRICS = {
|
||||
"adjusted_mutual_info_score": adjusted_mutual_info_score,
|
||||
"adjusted_rand_score": adjusted_rand_score,
|
||||
"completeness_score": completeness_score,
|
||||
"homogeneity_score": homogeneity_score,
|
||||
"mutual_info_score": mutual_info_score,
|
||||
"normalized_mutual_info_score": normalized_mutual_info_score,
|
||||
"v_measure_score": v_measure_score,
|
||||
"fowlkes_mallows_score": fowlkes_mallows_score
|
||||
}
|
||||
|
||||
UNSUPERVISED_METRICS = {
|
||||
"silhouette_score": silhouette_score,
|
||||
"silhouette_manhattan": partial(silhouette_score, metric='manhattan'),
|
||||
"calinski_harabasz_score": calinski_harabasz_score,
|
||||
"davies_bouldin_score": davies_bouldin_score
|
||||
}
|
||||
|
||||
# Lists of metrics with common properties
|
||||
# ---------------------------------------
|
||||
# Lists of metrics with common properties are used to test systematically some
|
||||
# functionalities and invariance, e.g. SYMMETRIC_METRICS lists all metrics
|
||||
# that are symmetric with respect to their input argument y_true and y_pred.
|
||||
#
|
||||
# --------------------------------------------------------------------
|
||||
# Symmetric with respect to their input arguments y_true and y_pred.
|
||||
# Symmetric metrics only apply to supervised clusters.
|
||||
SYMMETRIC_METRICS = [
|
||||
"adjusted_rand_score", "v_measure_score",
|
||||
"mutual_info_score", "adjusted_mutual_info_score",
|
||||
"normalized_mutual_info_score", "fowlkes_mallows_score"
|
||||
]
|
||||
|
||||
NON_SYMMETRIC_METRICS = ["homogeneity_score", "completeness_score"]
|
||||
|
||||
# Metrics whose upper bound is 1
|
||||
NORMALIZED_METRICS = [
|
||||
"adjusted_rand_score", "homogeneity_score", "completeness_score",
|
||||
"v_measure_score", "adjusted_mutual_info_score", "fowlkes_mallows_score",
|
||||
"normalized_mutual_info_score"
|
||||
]
|
||||
|
||||
|
||||
rng = np.random.RandomState(0)
|
||||
y1 = rng.randint(3, size=30)
|
||||
y2 = rng.randint(3, size=30)
|
||||
|
||||
|
||||
def test_symmetric_non_symmetric_union():
|
||||
assert (sorted(SYMMETRIC_METRICS + NON_SYMMETRIC_METRICS) ==
|
||||
sorted(SUPERVISED_METRICS))
|
||||
|
||||
|
||||
# 0.22 AMI and NMI changes
|
||||
@pytest.mark.filterwarnings('ignore::FutureWarning')
|
||||
@pytest.mark.parametrize(
|
||||
'metric_name, y1, y2',
|
||||
[(name, y1, y2) for name in SYMMETRIC_METRICS]
|
||||
)
|
||||
def test_symmetry(metric_name, y1, y2):
|
||||
metric = SUPERVISED_METRICS[metric_name]
|
||||
assert metric(y1, y2) == pytest.approx(metric(y2, y1))
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'metric_name, y1, y2',
|
||||
[(name, y1, y2) for name in NON_SYMMETRIC_METRICS]
|
||||
)
|
||||
def test_non_symmetry(metric_name, y1, y2):
|
||||
metric = SUPERVISED_METRICS[metric_name]
|
||||
assert metric(y1, y2) != pytest.approx(metric(y2, y1))
|
||||
|
||||
|
||||
# 0.22 AMI and NMI changes
|
||||
@pytest.mark.filterwarnings('ignore::FutureWarning')
|
||||
@pytest.mark.parametrize("metric_name", NORMALIZED_METRICS)
|
||||
def test_normalized_output(metric_name):
|
||||
upper_bound_1 = [0, 0, 0, 1, 1, 1]
|
||||
upper_bound_2 = [0, 0, 0, 1, 1, 1]
|
||||
metric = SUPERVISED_METRICS[metric_name]
|
||||
assert metric([0, 0, 0, 1, 1], [0, 0, 0, 1, 2]) > 0.0
|
||||
assert metric([0, 0, 1, 1, 2], [0, 0, 1, 1, 1]) > 0.0
|
||||
assert metric([0, 0, 0, 1, 2], [0, 1, 1, 1, 1]) < 1.0
|
||||
assert metric([0, 0, 0, 1, 2], [0, 1, 1, 1, 1]) < 1.0
|
||||
assert metric(upper_bound_1, upper_bound_2) == pytest.approx(1.0)
|
||||
|
||||
lower_bound_1 = [0, 0, 0, 0, 0, 0]
|
||||
lower_bound_2 = [0, 1, 2, 3, 4, 5]
|
||||
score = np.array([metric(lower_bound_1, lower_bound_2),
|
||||
metric(lower_bound_2, lower_bound_1)])
|
||||
assert not (score < 0).any()
|
||||
|
||||
|
||||
# 0.22 AMI and NMI changes
|
||||
@pytest.mark.filterwarnings('ignore::FutureWarning')
|
||||
@pytest.mark.parametrize(
|
||||
"metric_name", dict(SUPERVISED_METRICS, **UNSUPERVISED_METRICS)
|
||||
)
|
||||
def test_permute_labels(metric_name):
|
||||
# All clustering metrics do not change score due to permutations of labels
|
||||
# that is when 0 and 1 exchanged.
|
||||
y_label = np.array([0, 0, 0, 1, 1, 0, 1])
|
||||
y_pred = np.array([1, 0, 1, 0, 1, 1, 0])
|
||||
if metric_name in SUPERVISED_METRICS:
|
||||
metric = SUPERVISED_METRICS[metric_name]
|
||||
score_1 = metric(y_pred, y_label)
|
||||
assert_allclose(score_1, metric(1 - y_pred, y_label))
|
||||
assert_allclose(score_1, metric(1 - y_pred, 1 - y_label))
|
||||
assert_allclose(score_1, metric(y_pred, 1 - y_label))
|
||||
else:
|
||||
metric = UNSUPERVISED_METRICS[metric_name]
|
||||
X = np.random.randint(10, size=(7, 10))
|
||||
score_1 = metric(X, y_pred)
|
||||
assert_allclose(score_1, metric(X, 1 - y_pred))
|
||||
|
||||
|
||||
# 0.22 AMI and NMI changes
|
||||
@pytest.mark.filterwarnings('ignore::FutureWarning')
|
||||
@pytest.mark.parametrize(
|
||||
"metric_name", dict(SUPERVISED_METRICS, **UNSUPERVISED_METRICS)
|
||||
)
|
||||
# For all clustering metrics Input parameters can be both
|
||||
# in the form of arrays lists, positive, negative or string
|
||||
def test_format_invariance(metric_name):
|
||||
y_true = [0, 0, 0, 0, 1, 1, 1, 1]
|
||||
y_pred = [0, 1, 2, 3, 4, 5, 6, 7]
|
||||
|
||||
def generate_formats(y):
|
||||
y = np.array(y)
|
||||
yield y, 'array of ints'
|
||||
yield y.tolist(), 'list of ints'
|
||||
yield [str(x) + "-a" for x in y.tolist()], 'list of strs'
|
||||
yield (np.array([str(x) + "-a" for x in y.tolist()], dtype=object),
|
||||
'array of strs')
|
||||
yield y - 1, 'including negative ints'
|
||||
yield y + 1, 'strictly positive ints'
|
||||
|
||||
if metric_name in SUPERVISED_METRICS:
|
||||
metric = SUPERVISED_METRICS[metric_name]
|
||||
score_1 = metric(y_true, y_pred)
|
||||
y_true_gen = generate_formats(y_true)
|
||||
y_pred_gen = generate_formats(y_pred)
|
||||
for (y_true_fmt, fmt_name), (y_pred_fmt, _) in zip(y_true_gen,
|
||||
y_pred_gen):
|
||||
assert score_1 == metric(y_true_fmt, y_pred_fmt)
|
||||
else:
|
||||
metric = UNSUPERVISED_METRICS[metric_name]
|
||||
X = np.random.randint(10, size=(8, 10))
|
||||
score_1 = metric(X, y_true)
|
||||
assert score_1 == metric(X.astype(float), y_true)
|
||||
y_true_gen = generate_formats(y_true)
|
||||
for (y_true_fmt, fmt_name) in y_true_gen:
|
||||
assert score_1 == metric(X, y_true_fmt)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("metric", SUPERVISED_METRICS.values())
|
||||
def test_single_sample(metric):
|
||||
# only the supervised metrics support single sample
|
||||
for i, j in [(0, 0), (0, 1), (1, 0), (1, 1)]:
|
||||
metric([i], [j])
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"metric_name, metric_func",
|
||||
dict(SUPERVISED_METRICS, **UNSUPERVISED_METRICS).items()
|
||||
)
|
||||
def test_inf_nan_input(metric_name, metric_func):
|
||||
if metric_name in SUPERVISED_METRICS:
|
||||
invalids = [([0, 1], [np.inf, np.inf]),
|
||||
([0, 1], [np.nan, np.nan]),
|
||||
([0, 1], [np.nan, np.inf])]
|
||||
else:
|
||||
X = np.random.randint(10, size=(2, 10))
|
||||
invalids = [(X, [np.inf, np.inf]),
|
||||
(X, [np.nan, np.nan]),
|
||||
(X, [np.nan, np.inf])]
|
||||
with pytest.raises(ValueError, match='contains NaN, infinity'):
|
||||
for args in invalids:
|
||||
metric_func(*args)
|
|
@ -0,0 +1,358 @@
|
|||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from sklearn.metrics.cluster import adjusted_mutual_info_score
|
||||
from sklearn.metrics.cluster import adjusted_rand_score
|
||||
from sklearn.metrics.cluster import completeness_score
|
||||
from sklearn.metrics.cluster import contingency_matrix
|
||||
from sklearn.metrics.cluster import entropy
|
||||
from sklearn.metrics.cluster import expected_mutual_information
|
||||
from sklearn.metrics.cluster import fowlkes_mallows_score
|
||||
from sklearn.metrics.cluster import homogeneity_completeness_v_measure
|
||||
from sklearn.metrics.cluster import homogeneity_score
|
||||
from sklearn.metrics.cluster import mutual_info_score
|
||||
from sklearn.metrics.cluster import normalized_mutual_info_score
|
||||
from sklearn.metrics.cluster import v_measure_score
|
||||
from sklearn.metrics.cluster._supervised import _generalized_average
|
||||
|
||||
from sklearn.utils import assert_all_finite
|
||||
from sklearn.utils._testing import (
|
||||
assert_almost_equal, ignore_warnings)
|
||||
from numpy.testing import assert_array_almost_equal
|
||||
|
||||
|
||||
score_funcs = [
|
||||
adjusted_rand_score,
|
||||
homogeneity_score,
|
||||
completeness_score,
|
||||
v_measure_score,
|
||||
adjusted_mutual_info_score,
|
||||
normalized_mutual_info_score,
|
||||
]
|
||||
|
||||
|
||||
@ignore_warnings(category=FutureWarning)
|
||||
def test_error_messages_on_wrong_input():
|
||||
for score_func in score_funcs:
|
||||
expected = (r'Found input variables with inconsistent numbers '
|
||||
r'of samples: \[2, 3\]')
|
||||
with pytest.raises(ValueError, match=expected):
|
||||
score_func([0, 1], [1, 1, 1])
|
||||
|
||||
expected = r"labels_true must be 1D: shape is \(2"
|
||||
with pytest.raises(ValueError, match=expected):
|
||||
score_func([[0, 1], [1, 0]], [1, 1, 1])
|
||||
|
||||
expected = r"labels_pred must be 1D: shape is \(2"
|
||||
with pytest.raises(ValueError, match=expected):
|
||||
score_func([0, 1, 0], [[1, 1], [0, 0]])
|
||||
|
||||
|
||||
def test_generalized_average():
|
||||
a, b = 1, 2
|
||||
methods = ["min", "geometric", "arithmetic", "max"]
|
||||
means = [_generalized_average(a, b, method) for method in methods]
|
||||
assert means[0] <= means[1] <= means[2] <= means[3]
|
||||
c, d = 12, 12
|
||||
means = [_generalized_average(c, d, method) for method in methods]
|
||||
assert means[0] == means[1] == means[2] == means[3]
|
||||
|
||||
|
||||
@ignore_warnings(category=FutureWarning)
|
||||
def test_perfect_matches():
|
||||
for score_func in score_funcs:
|
||||
assert score_func([], []) == pytest.approx(1.0)
|
||||
assert score_func([0], [1]) == pytest.approx(1.0)
|
||||
assert score_func([0, 0, 0], [0, 0, 0]) == pytest.approx(1.0)
|
||||
assert score_func([0, 1, 0], [42, 7, 42]) == pytest.approx(1.0)
|
||||
assert score_func([0., 1., 0.], [42., 7., 42.]) == pytest.approx(1.0)
|
||||
assert score_func([0., 1., 2.], [42., 7., 2.]) == pytest.approx(1.0)
|
||||
assert score_func([0, 1, 2], [42, 7, 2]) == pytest.approx(1.0)
|
||||
score_funcs_with_changing_means = [
|
||||
normalized_mutual_info_score,
|
||||
adjusted_mutual_info_score,
|
||||
]
|
||||
means = {"min", "geometric", "arithmetic", "max"}
|
||||
for score_func in score_funcs_with_changing_means:
|
||||
for mean in means:
|
||||
assert score_func([], [], mean) == pytest.approx(1.0)
|
||||
assert score_func([0], [1], mean) == pytest.approx(1.0)
|
||||
assert score_func([0, 0, 0], [0, 0, 0], mean) == pytest.approx(1.0)
|
||||
assert score_func(
|
||||
[0, 1, 0], [42, 7, 42], mean) == pytest.approx(1.0)
|
||||
assert score_func(
|
||||
[0., 1., 0.], [42., 7., 42.], mean) == pytest.approx(1.0)
|
||||
assert score_func(
|
||||
[0., 1., 2.], [42., 7., 2.], mean) == pytest.approx(1.0)
|
||||
assert score_func(
|
||||
[0, 1, 2], [42, 7, 2], mean) == pytest.approx(1.0)
|
||||
|
||||
|
||||
def test_homogeneous_but_not_complete_labeling():
|
||||
# homogeneous but not complete clustering
|
||||
h, c, v = homogeneity_completeness_v_measure(
|
||||
[0, 0, 0, 1, 1, 1],
|
||||
[0, 0, 0, 1, 2, 2])
|
||||
assert_almost_equal(h, 1.00, 2)
|
||||
assert_almost_equal(c, 0.69, 2)
|
||||
assert_almost_equal(v, 0.81, 2)
|
||||
|
||||
|
||||
def test_complete_but_not_homogeneous_labeling():
|
||||
# complete but not homogeneous clustering
|
||||
h, c, v = homogeneity_completeness_v_measure(
|
||||
[0, 0, 1, 1, 2, 2],
|
||||
[0, 0, 1, 1, 1, 1])
|
||||
assert_almost_equal(h, 0.58, 2)
|
||||
assert_almost_equal(c, 1.00, 2)
|
||||
assert_almost_equal(v, 0.73, 2)
|
||||
|
||||
|
||||
def test_not_complete_and_not_homogeneous_labeling():
|
||||
# neither complete nor homogeneous but not so bad either
|
||||
h, c, v = homogeneity_completeness_v_measure(
|
||||
[0, 0, 0, 1, 1, 1],
|
||||
[0, 1, 0, 1, 2, 2])
|
||||
assert_almost_equal(h, 0.67, 2)
|
||||
assert_almost_equal(c, 0.42, 2)
|
||||
assert_almost_equal(v, 0.52, 2)
|
||||
|
||||
|
||||
def test_beta_parameter():
|
||||
# test for when beta passed to
|
||||
# homogeneity_completeness_v_measure
|
||||
# and v_measure_score
|
||||
beta_test = 0.2
|
||||
h_test = 0.67
|
||||
c_test = 0.42
|
||||
v_test = ((1 + beta_test) * h_test * c_test
|
||||
/ (beta_test * h_test + c_test))
|
||||
|
||||
h, c, v = homogeneity_completeness_v_measure(
|
||||
[0, 0, 0, 1, 1, 1],
|
||||
[0, 1, 0, 1, 2, 2],
|
||||
beta=beta_test)
|
||||
assert_almost_equal(h, h_test, 2)
|
||||
assert_almost_equal(c, c_test, 2)
|
||||
assert_almost_equal(v, v_test, 2)
|
||||
|
||||
v = v_measure_score(
|
||||
[0, 0, 0, 1, 1, 1],
|
||||
[0, 1, 0, 1, 2, 2],
|
||||
beta=beta_test)
|
||||
assert_almost_equal(v, v_test, 2)
|
||||
|
||||
|
||||
def test_non_consecutive_labels():
|
||||
# regression tests for labels with gaps
|
||||
h, c, v = homogeneity_completeness_v_measure(
|
||||
[0, 0, 0, 2, 2, 2],
|
||||
[0, 1, 0, 1, 2, 2])
|
||||
assert_almost_equal(h, 0.67, 2)
|
||||
assert_almost_equal(c, 0.42, 2)
|
||||
assert_almost_equal(v, 0.52, 2)
|
||||
|
||||
h, c, v = homogeneity_completeness_v_measure(
|
||||
[0, 0, 0, 1, 1, 1],
|
||||
[0, 4, 0, 4, 2, 2])
|
||||
assert_almost_equal(h, 0.67, 2)
|
||||
assert_almost_equal(c, 0.42, 2)
|
||||
assert_almost_equal(v, 0.52, 2)
|
||||
|
||||
ari_1 = adjusted_rand_score([0, 0, 0, 1, 1, 1], [0, 1, 0, 1, 2, 2])
|
||||
ari_2 = adjusted_rand_score([0, 0, 0, 1, 1, 1], [0, 4, 0, 4, 2, 2])
|
||||
assert_almost_equal(ari_1, 0.24, 2)
|
||||
assert_almost_equal(ari_2, 0.24, 2)
|
||||
|
||||
|
||||
@ignore_warnings(category=FutureWarning)
|
||||
def uniform_labelings_scores(score_func, n_samples, k_range, n_runs=10,
|
||||
seed=42):
|
||||
# Compute score for random uniform cluster labelings
|
||||
random_labels = np.random.RandomState(seed).randint
|
||||
scores = np.zeros((len(k_range), n_runs))
|
||||
for i, k in enumerate(k_range):
|
||||
for j in range(n_runs):
|
||||
labels_a = random_labels(low=0, high=k, size=n_samples)
|
||||
labels_b = random_labels(low=0, high=k, size=n_samples)
|
||||
scores[i, j] = score_func(labels_a, labels_b)
|
||||
return scores
|
||||
|
||||
|
||||
@ignore_warnings(category=FutureWarning)
|
||||
def test_adjustment_for_chance():
|
||||
# Check that adjusted scores are almost zero on random labels
|
||||
n_clusters_range = [2, 10, 50, 90]
|
||||
n_samples = 100
|
||||
n_runs = 10
|
||||
|
||||
scores = uniform_labelings_scores(
|
||||
adjusted_rand_score, n_samples, n_clusters_range, n_runs)
|
||||
|
||||
max_abs_scores = np.abs(scores).max(axis=1)
|
||||
assert_array_almost_equal(max_abs_scores, [0.02, 0.03, 0.03, 0.02], 2)
|
||||
|
||||
|
||||
def test_adjusted_mutual_info_score():
|
||||
# Compute the Adjusted Mutual Information and test against known values
|
||||
labels_a = np.array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3])
|
||||
labels_b = np.array([1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 3, 1, 3, 3, 3, 2, 2])
|
||||
# Mutual information
|
||||
mi = mutual_info_score(labels_a, labels_b)
|
||||
assert_almost_equal(mi, 0.41022, 5)
|
||||
# with provided sparse contingency
|
||||
C = contingency_matrix(labels_a, labels_b, sparse=True)
|
||||
mi = mutual_info_score(labels_a, labels_b, contingency=C)
|
||||
assert_almost_equal(mi, 0.41022, 5)
|
||||
# with provided dense contingency
|
||||
C = contingency_matrix(labels_a, labels_b)
|
||||
mi = mutual_info_score(labels_a, labels_b, contingency=C)
|
||||
assert_almost_equal(mi, 0.41022, 5)
|
||||
# Expected mutual information
|
||||
n_samples = C.sum()
|
||||
emi = expected_mutual_information(C, n_samples)
|
||||
assert_almost_equal(emi, 0.15042, 5)
|
||||
# Adjusted mutual information
|
||||
ami = adjusted_mutual_info_score(labels_a, labels_b)
|
||||
assert_almost_equal(ami, 0.27821, 5)
|
||||
ami = adjusted_mutual_info_score([1, 1, 2, 2], [2, 2, 3, 3])
|
||||
assert ami == pytest.approx(1.0)
|
||||
# Test with a very large array
|
||||
a110 = np.array([list(labels_a) * 110]).flatten()
|
||||
b110 = np.array([list(labels_b) * 110]).flatten()
|
||||
ami = adjusted_mutual_info_score(a110, b110)
|
||||
assert_almost_equal(ami, 0.38, 2)
|
||||
|
||||
|
||||
def test_expected_mutual_info_overflow():
|
||||
# Test for regression where contingency cell exceeds 2**16
|
||||
# leading to overflow in np.outer, resulting in EMI > 1
|
||||
assert expected_mutual_information(np.array([[70000]]), 70000) <= 1
|
||||
|
||||
|
||||
def test_int_overflow_mutual_info_fowlkes_mallows_score():
|
||||
# Test overflow in mutual_info_classif and fowlkes_mallows_score
|
||||
x = np.array([1] * (52632 + 2529) + [2] * (14660 + 793) + [3] * (3271 +
|
||||
204) + [4] * (814 + 39) + [5] * (316 + 20))
|
||||
y = np.array([0] * 52632 + [1] * 2529 + [0] * 14660 + [1] * 793 +
|
||||
[0] * 3271 + [1] * 204 + [0] * 814 + [1] * 39 + [0] * 316 +
|
||||
[1] * 20)
|
||||
|
||||
assert_all_finite(mutual_info_score(x, y))
|
||||
assert_all_finite(fowlkes_mallows_score(x, y))
|
||||
|
||||
|
||||
def test_entropy():
|
||||
ent = entropy([0, 0, 42.])
|
||||
assert_almost_equal(ent, 0.6365141, 5)
|
||||
assert_almost_equal(entropy([]), 1)
|
||||
|
||||
|
||||
def test_contingency_matrix():
|
||||
labels_a = np.array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3])
|
||||
labels_b = np.array([1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 3, 1, 3, 3, 3, 2, 2])
|
||||
C = contingency_matrix(labels_a, labels_b)
|
||||
C2 = np.histogram2d(labels_a, labels_b,
|
||||
bins=(np.arange(1, 5),
|
||||
np.arange(1, 5)))[0]
|
||||
assert_array_almost_equal(C, C2)
|
||||
C = contingency_matrix(labels_a, labels_b, eps=.1)
|
||||
assert_array_almost_equal(C, C2 + .1)
|
||||
|
||||
|
||||
def test_contingency_matrix_sparse():
|
||||
labels_a = np.array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3])
|
||||
labels_b = np.array([1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 3, 1, 3, 3, 3, 2, 2])
|
||||
C = contingency_matrix(labels_a, labels_b)
|
||||
C_sparse = contingency_matrix(labels_a, labels_b, sparse=True).toarray()
|
||||
assert_array_almost_equal(C, C_sparse)
|
||||
with pytest.raises(ValueError, match="Cannot set 'eps' when sparse=True"):
|
||||
contingency_matrix(labels_a, labels_b, eps=1e-10, sparse=True)
|
||||
|
||||
|
||||
@ignore_warnings(category=FutureWarning)
|
||||
def test_exactly_zero_info_score():
|
||||
# Check numerical stability when information is exactly zero
|
||||
for i in np.logspace(1, 4, 4).astype(np.int):
|
||||
labels_a, labels_b = (np.ones(i, dtype=np.int),
|
||||
np.arange(i, dtype=np.int))
|
||||
assert normalized_mutual_info_score(
|
||||
labels_a, labels_b) == pytest.approx(0.0)
|
||||
assert v_measure_score(
|
||||
labels_a, labels_b) == pytest.approx(0.0)
|
||||
assert adjusted_mutual_info_score(
|
||||
labels_a, labels_b) == pytest.approx(0.0)
|
||||
assert normalized_mutual_info_score(
|
||||
labels_a, labels_b) == pytest.approx(0.0)
|
||||
for method in ["min", "geometric", "arithmetic", "max"]:
|
||||
assert adjusted_mutual_info_score(
|
||||
labels_a, labels_b, method) == pytest.approx(0.0)
|
||||
assert normalized_mutual_info_score(
|
||||
labels_a, labels_b, method) == pytest.approx(0.0)
|
||||
|
||||
|
||||
def test_v_measure_and_mutual_information(seed=36):
|
||||
# Check relation between v_measure, entropy and mutual information
|
||||
for i in np.logspace(1, 4, 4).astype(np.int):
|
||||
random_state = np.random.RandomState(seed)
|
||||
labels_a, labels_b = (random_state.randint(0, 10, i),
|
||||
random_state.randint(0, 10, i))
|
||||
assert_almost_equal(v_measure_score(labels_a, labels_b),
|
||||
2.0 * mutual_info_score(labels_a, labels_b) /
|
||||
(entropy(labels_a) + entropy(labels_b)), 0)
|
||||
avg = 'arithmetic'
|
||||
assert_almost_equal(v_measure_score(labels_a, labels_b),
|
||||
normalized_mutual_info_score(labels_a, labels_b,
|
||||
average_method=avg)
|
||||
)
|
||||
|
||||
|
||||
def test_fowlkes_mallows_score():
|
||||
# General case
|
||||
score = fowlkes_mallows_score([0, 0, 0, 1, 1, 1],
|
||||
[0, 0, 1, 1, 2, 2])
|
||||
assert_almost_equal(score, 4. / np.sqrt(12. * 6.))
|
||||
|
||||
# Perfect match but where the label names changed
|
||||
perfect_score = fowlkes_mallows_score([0, 0, 0, 1, 1, 1],
|
||||
[1, 1, 1, 0, 0, 0])
|
||||
assert_almost_equal(perfect_score, 1.)
|
||||
|
||||
# Worst case
|
||||
worst_score = fowlkes_mallows_score([0, 0, 0, 0, 0, 0],
|
||||
[0, 1, 2, 3, 4, 5])
|
||||
assert_almost_equal(worst_score, 0.)
|
||||
|
||||
|
||||
def test_fowlkes_mallows_score_properties():
|
||||
# handcrafted example
|
||||
labels_a = np.array([0, 0, 0, 1, 1, 2])
|
||||
labels_b = np.array([1, 1, 2, 2, 0, 0])
|
||||
expected = 1. / np.sqrt((1. + 3.) * (1. + 2.))
|
||||
# FMI = TP / sqrt((TP + FP) * (TP + FN))
|
||||
|
||||
score_original = fowlkes_mallows_score(labels_a, labels_b)
|
||||
assert_almost_equal(score_original, expected)
|
||||
|
||||
# symmetric property
|
||||
score_symmetric = fowlkes_mallows_score(labels_b, labels_a)
|
||||
assert_almost_equal(score_symmetric, expected)
|
||||
|
||||
# permutation property
|
||||
score_permuted = fowlkes_mallows_score((labels_a + 1) % 3, labels_b)
|
||||
assert_almost_equal(score_permuted, expected)
|
||||
|
||||
# symmetric and permutation(both together)
|
||||
score_both = fowlkes_mallows_score(labels_b, (labels_a + 2) % 3)
|
||||
assert_almost_equal(score_both, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('labels_true, labels_pred', [
|
||||
(['a'] * 6, [1, 1, 0, 0, 1, 1]),
|
||||
([1] * 6, [1, 1, 0, 0, 1, 1]),
|
||||
([1, 1, 0, 0, 1, 1], ['a'] * 6),
|
||||
([1, 1, 0, 0, 1, 1], [1] * 6),
|
||||
])
|
||||
def test_mutual_info_score_positive_constant_label(labels_true, labels_pred):
|
||||
# non-regression test for #16355
|
||||
assert mutual_info_score(labels_true, labels_pred) >= 0
|
|
@ -0,0 +1,252 @@
|
|||
import numpy as np
|
||||
import scipy.sparse as sp
|
||||
import pytest
|
||||
from scipy.sparse import csr_matrix
|
||||
|
||||
from sklearn import datasets
|
||||
from sklearn.utils._testing import assert_array_equal
|
||||
from sklearn.metrics.cluster import silhouette_score
|
||||
from sklearn.metrics.cluster import silhouette_samples
|
||||
from sklearn.metrics import pairwise_distances
|
||||
from sklearn.metrics.cluster import calinski_harabasz_score
|
||||
from sklearn.metrics.cluster import davies_bouldin_score
|
||||
|
||||
|
||||
def test_silhouette():
|
||||
# Tests the Silhouette Coefficient.
|
||||
dataset = datasets.load_iris()
|
||||
X_dense = dataset.data
|
||||
X_csr = csr_matrix(X_dense)
|
||||
X_dok = sp.dok_matrix(X_dense)
|
||||
X_lil = sp.lil_matrix(X_dense)
|
||||
y = dataset.target
|
||||
|
||||
for X in [X_dense, X_csr, X_dok, X_lil]:
|
||||
D = pairwise_distances(X, metric='euclidean')
|
||||
# Given that the actual labels are used, we can assume that S would be
|
||||
# positive.
|
||||
score_precomputed = silhouette_score(D, y, metric='precomputed')
|
||||
assert score_precomputed > 0
|
||||
# Test without calculating D
|
||||
score_euclidean = silhouette_score(X, y, metric='euclidean')
|
||||
pytest.approx(score_precomputed, score_euclidean)
|
||||
|
||||
if X is X_dense:
|
||||
score_dense_without_sampling = score_precomputed
|
||||
else:
|
||||
pytest.approx(score_euclidean,
|
||||
score_dense_without_sampling)
|
||||
|
||||
# Test with sampling
|
||||
score_precomputed = silhouette_score(D, y, metric='precomputed',
|
||||
sample_size=int(X.shape[0] / 2),
|
||||
random_state=0)
|
||||
score_euclidean = silhouette_score(X, y, metric='euclidean',
|
||||
sample_size=int(X.shape[0] / 2),
|
||||
random_state=0)
|
||||
assert score_precomputed > 0
|
||||
assert score_euclidean > 0
|
||||
pytest.approx(score_euclidean, score_precomputed)
|
||||
|
||||
if X is X_dense:
|
||||
score_dense_with_sampling = score_precomputed
|
||||
else:
|
||||
pytest.approx(score_euclidean, score_dense_with_sampling)
|
||||
|
||||
|
||||
def test_cluster_size_1():
|
||||
# Assert Silhouette Coefficient == 0 when there is 1 sample in a cluster
|
||||
# (cluster 0). We also test the case where there are identical samples
|
||||
# as the only members of a cluster (cluster 2). To our knowledge, this case
|
||||
# is not discussed in reference material, and we choose for it a sample
|
||||
# score of 1.
|
||||
X = [[0.], [1.], [1.], [2.], [3.], [3.]]
|
||||
labels = np.array([0, 1, 1, 1, 2, 2])
|
||||
|
||||
# Cluster 0: 1 sample -> score of 0 by Rousseeuw's convention
|
||||
# Cluster 1: intra-cluster = [.5, .5, 1]
|
||||
# inter-cluster = [1, 1, 1]
|
||||
# silhouette = [.5, .5, 0]
|
||||
# Cluster 2: intra-cluster = [0, 0]
|
||||
# inter-cluster = [arbitrary, arbitrary]
|
||||
# silhouette = [1., 1.]
|
||||
|
||||
silhouette = silhouette_score(X, labels)
|
||||
assert not np.isnan(silhouette)
|
||||
ss = silhouette_samples(X, labels)
|
||||
assert_array_equal(ss, [0, .5, .5, 0, 1, 1])
|
||||
|
||||
|
||||
def test_silhouette_paper_example():
|
||||
# Explicitly check per-sample results against Rousseeuw (1987)
|
||||
# Data from Table 1
|
||||
lower = [5.58,
|
||||
7.00, 6.50,
|
||||
7.08, 7.00, 3.83,
|
||||
4.83, 5.08, 8.17, 5.83,
|
||||
2.17, 5.75, 6.67, 6.92, 4.92,
|
||||
6.42, 5.00, 5.58, 6.00, 4.67, 6.42,
|
||||
3.42, 5.50, 6.42, 6.42, 5.00, 3.92, 6.17,
|
||||
2.50, 4.92, 6.25, 7.33, 4.50, 2.25, 6.33, 2.75,
|
||||
6.08, 6.67, 4.25, 2.67, 6.00, 6.17, 6.17, 6.92, 6.17,
|
||||
5.25, 6.83, 4.50, 3.75, 5.75, 5.42, 6.08, 5.83, 6.67, 3.67,
|
||||
4.75, 3.00, 6.08, 6.67, 5.00, 5.58, 4.83, 6.17, 5.67, 6.50, 6.92]
|
||||
D = np.zeros((12, 12))
|
||||
D[np.tril_indices(12, -1)] = lower
|
||||
D += D.T
|
||||
|
||||
names = ['BEL', 'BRA', 'CHI', 'CUB', 'EGY', 'FRA', 'IND', 'ISR', 'USA',
|
||||
'USS', 'YUG', 'ZAI']
|
||||
|
||||
# Data from Figure 2
|
||||
labels1 = [1, 1, 2, 2, 1, 1, 2, 1, 1, 2, 2, 1]
|
||||
expected1 = {'USA': .43, 'BEL': .39, 'FRA': .35, 'ISR': .30, 'BRA': .22,
|
||||
'EGY': .20, 'ZAI': .19, 'CUB': .40, 'USS': .34, 'CHI': .33,
|
||||
'YUG': .26, 'IND': -.04}
|
||||
score1 = .28
|
||||
|
||||
# Data from Figure 3
|
||||
labels2 = [1, 2, 3, 3, 1, 1, 2, 1, 1, 3, 3, 2]
|
||||
expected2 = {'USA': .47, 'FRA': .44, 'BEL': .42, 'ISR': .37, 'EGY': .02,
|
||||
'ZAI': .28, 'BRA': .25, 'IND': .17, 'CUB': .48, 'USS': .44,
|
||||
'YUG': .31, 'CHI': .31}
|
||||
score2 = .33
|
||||
|
||||
for labels, expected, score in [(labels1, expected1, score1),
|
||||
(labels2, expected2, score2)]:
|
||||
expected = [expected[name] for name in names]
|
||||
# we check to 2dp because that's what's in the paper
|
||||
pytest.approx(expected,
|
||||
silhouette_samples(D, np.array(labels),
|
||||
metric='precomputed'),
|
||||
abs=1e-2)
|
||||
pytest.approx(score,
|
||||
silhouette_score(D, np.array(labels),
|
||||
metric='precomputed'),
|
||||
abs=1e-2)
|
||||
|
||||
|
||||
def test_correct_labelsize():
|
||||
# Assert 1 < n_labels < n_samples
|
||||
dataset = datasets.load_iris()
|
||||
X = dataset.data
|
||||
|
||||
# n_labels = n_samples
|
||||
y = np.arange(X.shape[0])
|
||||
err_msg = (r'Number of labels is %d\. Valid values are 2 '
|
||||
r'to n_samples - 1 \(inclusive\)' % len(np.unique(y)))
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
silhouette_score(X, y)
|
||||
|
||||
# n_labels = 1
|
||||
y = np.zeros(X.shape[0])
|
||||
err_msg = (r'Number of labels is %d\. Valid values are 2 '
|
||||
r'to n_samples - 1 \(inclusive\)' % len(np.unique(y)))
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
silhouette_score(X, y)
|
||||
|
||||
|
||||
def test_non_encoded_labels():
|
||||
dataset = datasets.load_iris()
|
||||
X = dataset.data
|
||||
labels = dataset.target
|
||||
assert (
|
||||
silhouette_score(X, labels * 2 + 10) == silhouette_score(X, labels))
|
||||
assert_array_equal(
|
||||
silhouette_samples(X, labels * 2 + 10), silhouette_samples(X, labels))
|
||||
|
||||
|
||||
def test_non_numpy_labels():
|
||||
dataset = datasets.load_iris()
|
||||
X = dataset.data
|
||||
y = dataset.target
|
||||
assert (
|
||||
silhouette_score(list(X), list(y)) == silhouette_score(X, y))
|
||||
|
||||
|
||||
@pytest.mark.parametrize('dtype', (np.float32, np.float64))
|
||||
def test_silhouette_nonzero_diag(dtype):
|
||||
# Make sure silhouette_samples requires diagonal to be zero.
|
||||
# Non-regression test for #12178
|
||||
|
||||
# Construct a zero-diagonal matrix
|
||||
dists = pairwise_distances(
|
||||
np.array([[0.2, 0.1, 0.12, 1.34, 1.11, 1.6]], dtype=dtype).T)
|
||||
labels = [0, 0, 0, 1, 1, 1]
|
||||
|
||||
# small values on the diagonal are OK
|
||||
dists[2][2] = np.finfo(dists.dtype).eps * 10
|
||||
silhouette_samples(dists, labels, metric='precomputed')
|
||||
|
||||
# values bigger than eps * 100 are not
|
||||
dists[2][2] = np.finfo(dists.dtype).eps * 1000
|
||||
with pytest.raises(ValueError, match='contains non-zero'):
|
||||
silhouette_samples(dists, labels, metric='precomputed')
|
||||
|
||||
|
||||
def assert_raises_on_only_one_label(func):
|
||||
"""Assert message when there is only one label"""
|
||||
rng = np.random.RandomState(seed=0)
|
||||
with pytest.raises(ValueError, match="Number of labels is"):
|
||||
func(rng.rand(10, 2), np.zeros(10))
|
||||
|
||||
|
||||
def assert_raises_on_all_points_same_cluster(func):
|
||||
"""Assert message when all point are in different clusters"""
|
||||
rng = np.random.RandomState(seed=0)
|
||||
with pytest.raises(ValueError, match="Number of labels is"):
|
||||
func(rng.rand(10, 2), np.arange(10))
|
||||
|
||||
|
||||
def test_calinski_harabasz_score():
|
||||
assert_raises_on_only_one_label(calinski_harabasz_score)
|
||||
|
||||
assert_raises_on_all_points_same_cluster(calinski_harabasz_score)
|
||||
|
||||
# Assert the value is 1. when all samples are equals
|
||||
assert 1. == calinski_harabasz_score(np.ones((10, 2)),
|
||||
[0] * 5 + [1] * 5)
|
||||
|
||||
# Assert the value is 0. when all the mean cluster are equal
|
||||
assert 0. == calinski_harabasz_score([[-1, -1], [1, 1]] * 10,
|
||||
[0] * 10 + [1] * 10)
|
||||
|
||||
# General case (with non numpy arrays)
|
||||
X = ([[0, 0], [1, 1]] * 5 + [[3, 3], [4, 4]] * 5 +
|
||||
[[0, 4], [1, 3]] * 5 + [[3, 1], [4, 0]] * 5)
|
||||
labels = [0] * 10 + [1] * 10 + [2] * 10 + [3] * 10
|
||||
pytest.approx(calinski_harabasz_score(X, labels),
|
||||
45 * (40 - 4) / (5 * (4 - 1)))
|
||||
|
||||
|
||||
def test_davies_bouldin_score():
|
||||
assert_raises_on_only_one_label(davies_bouldin_score)
|
||||
assert_raises_on_all_points_same_cluster(davies_bouldin_score)
|
||||
|
||||
# Assert the value is 0. when all samples are equals
|
||||
assert davies_bouldin_score(np.ones((10, 2)),
|
||||
[0] * 5 + [1] * 5) == pytest.approx(0.0)
|
||||
|
||||
# Assert the value is 0. when all the mean cluster are equal
|
||||
assert davies_bouldin_score([[-1, -1], [1, 1]] * 10,
|
||||
[0] * 10 + [1] * 10) == pytest.approx(0.0)
|
||||
|
||||
# General case (with non numpy arrays)
|
||||
X = ([[0, 0], [1, 1]] * 5 + [[3, 3], [4, 4]] * 5 +
|
||||
[[0, 4], [1, 3]] * 5 + [[3, 1], [4, 0]] * 5)
|
||||
labels = [0] * 10 + [1] * 10 + [2] * 10 + [3] * 10
|
||||
pytest.approx(davies_bouldin_score(X, labels), 2 * np.sqrt(0.5) / 3)
|
||||
|
||||
# Ensure divide by zero warning is not raised in general case
|
||||
with pytest.warns(None) as record:
|
||||
davies_bouldin_score(X, labels)
|
||||
div_zero_warnings = [
|
||||
warning for warning in record
|
||||
if "divide by zero encountered" in warning.message.args[0]
|
||||
]
|
||||
assert len(div_zero_warnings) == 0
|
||||
|
||||
# General case - cluster have one sample
|
||||
X = ([[0, 0], [2, 2], [3, 3], [5, 5]])
|
||||
labels = [0, 0, 1, 2]
|
||||
pytest.approx(davies_bouldin_score(X, labels), (5. / 4) / 3)
|
|
@ -0,0 +1,18 @@
|
|||
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
|
||||
import sys
|
||||
# mypy error: Module X has no attribute y (typically for C extensions)
|
||||
from . import _unsupervised # type: ignore
|
||||
from ...externals._pep562 import Pep562
|
||||
from ...utils.deprecation import _raise_dep_warning_if_not_pytest
|
||||
|
||||
deprecated_path = 'sklearn.metrics.cluster.unsupervised'
|
||||
correct_import_path = 'sklearn.metrics.cluster'
|
||||
|
||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
|
||||
|
||||
def __getattr__(name):
|
||||
return getattr(_unsupervised, name)
|
||||
|
||||
if not sys.version_info >= (3, 7):
|
||||
Pep562(__name__)
|
1940
venv/Lib/site-packages/sklearn/metrics/pairwise.py
Normal file
1940
venv/Lib/site-packages/sklearn/metrics/pairwise.py
Normal file
File diff suppressed because it is too large
Load diff
18
venv/Lib/site-packages/sklearn/metrics/pairwise_fast.py
Normal file
18
venv/Lib/site-packages/sklearn/metrics/pairwise_fast.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
|
||||
import sys
|
||||
# mypy error: Module X has no attribute y (typically for C extensions)
|
||||
from . import _pairwise_fast # type: ignore
|
||||
from ..externals._pep562 import Pep562
|
||||
from ..utils.deprecation import _raise_dep_warning_if_not_pytest
|
||||
|
||||
deprecated_path = 'sklearn.metrics.pairwise_fast'
|
||||
correct_import_path = 'sklearn.metrics'
|
||||
|
||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
|
||||
|
||||
def __getattr__(name):
|
||||
return getattr(_pairwise_fast, name)
|
||||
|
||||
if not sys.version_info >= (3, 7):
|
||||
Pep562(__name__)
|
18
venv/Lib/site-packages/sklearn/metrics/ranking.py
Normal file
18
venv/Lib/site-packages/sklearn/metrics/ranking.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
|
||||
import sys
|
||||
# mypy error: Module X has no attribute y (typically for C extensions)
|
||||
from . import _ranking # type: ignore
|
||||
from ..externals._pep562 import Pep562
|
||||
from ..utils.deprecation import _raise_dep_warning_if_not_pytest
|
||||
|
||||
deprecated_path = 'sklearn.metrics.ranking'
|
||||
correct_import_path = 'sklearn.metrics'
|
||||
|
||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
|
||||
|
||||
def __getattr__(name):
|
||||
return getattr(_ranking, name)
|
||||
|
||||
if not sys.version_info >= (3, 7):
|
||||
Pep562(__name__)
|
18
venv/Lib/site-packages/sklearn/metrics/regression.py
Normal file
18
venv/Lib/site-packages/sklearn/metrics/regression.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
|
||||
import sys
|
||||
# mypy error: Module X has no attribute y (typically for C extensions)
|
||||
from . import _regression # type: ignore
|
||||
from ..externals._pep562 import Pep562
|
||||
from ..utils.deprecation import _raise_dep_warning_if_not_pytest
|
||||
|
||||
deprecated_path = 'sklearn.metrics.regression'
|
||||
correct_import_path = 'sklearn.metrics'
|
||||
|
||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
|
||||
|
||||
def __getattr__(name):
|
||||
return getattr(_regression, name)
|
||||
|
||||
if not sys.version_info >= (3, 7):
|
||||
Pep562(__name__)
|
18
venv/Lib/site-packages/sklearn/metrics/scorer.py
Normal file
18
venv/Lib/site-packages/sklearn/metrics/scorer.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
|
||||
import sys
|
||||
# mypy error: Module X has no attribute y (typically for C extensions)
|
||||
from . import _scorer # type: ignore
|
||||
from ..externals._pep562 import Pep562
|
||||
from ..utils.deprecation import _raise_dep_warning_if_not_pytest
|
||||
|
||||
deprecated_path = 'sklearn.metrics.scorer'
|
||||
correct_import_path = 'sklearn.metrics'
|
||||
|
||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
|
||||
|
||||
def __getattr__(name):
|
||||
return getattr(_scorer, name)
|
||||
|
||||
if not sys.version_info >= (3, 7):
|
||||
Pep562(__name__)
|
28
venv/Lib/site-packages/sklearn/metrics/setup.py
Normal file
28
venv/Lib/site-packages/sklearn/metrics/setup.py
Normal file
|
@ -0,0 +1,28 @@
|
|||
import os
|
||||
|
||||
from numpy.distutils.misc_util import Configuration
|
||||
|
||||
|
||||
def configuration(parent_package="", top_path=None):
|
||||
config = Configuration("metrics", parent_package, top_path)
|
||||
|
||||
libraries = []
|
||||
if os.name == 'posix':
|
||||
libraries.append('m')
|
||||
|
||||
config.add_subpackage('_plot')
|
||||
config.add_subpackage('_plot.tests')
|
||||
config.add_subpackage('cluster')
|
||||
|
||||
config.add_extension("_pairwise_fast",
|
||||
sources=["_pairwise_fast.pyx"],
|
||||
libraries=libraries)
|
||||
|
||||
config.add_subpackage('tests')
|
||||
|
||||
return config
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from numpy.distutils.core import setup
|
||||
setup(**configuration().todict())
|
0
venv/Lib/site-packages/sklearn/metrics/tests/__init__.py
Normal file
0
venv/Lib/site-packages/sklearn/metrics/tests/__init__.py
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
2275
venv/Lib/site-packages/sklearn/metrics/tests/test_classification.py
Normal file
2275
venv/Lib/site-packages/sklearn/metrics/tests/test_classification.py
Normal file
File diff suppressed because it is too large
Load diff
1398
venv/Lib/site-packages/sklearn/metrics/tests/test_common.py
Normal file
1398
venv/Lib/site-packages/sklearn/metrics/tests/test_common.py
Normal file
File diff suppressed because it is too large
Load diff
1340
venv/Lib/site-packages/sklearn/metrics/tests/test_pairwise.py
Normal file
1340
venv/Lib/site-packages/sklearn/metrics/tests/test_pairwise.py
Normal file
File diff suppressed because it is too large
Load diff
1471
venv/Lib/site-packages/sklearn/metrics/tests/test_ranking.py
Normal file
1471
venv/Lib/site-packages/sklearn/metrics/tests/test_ranking.py
Normal file
File diff suppressed because it is too large
Load diff
310
venv/Lib/site-packages/sklearn/metrics/tests/test_regression.py
Normal file
310
venv/Lib/site-packages/sklearn/metrics/tests/test_regression.py
Normal file
|
@ -0,0 +1,310 @@
|
|||
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose
|
||||
from itertools import product
|
||||
import pytest
|
||||
|
||||
from sklearn.utils._testing import assert_almost_equal
|
||||
from sklearn.utils._testing import assert_array_equal
|
||||
from sklearn.utils._testing import assert_array_almost_equal
|
||||
|
||||
from sklearn.metrics import explained_variance_score
|
||||
from sklearn.metrics import mean_absolute_error
|
||||
from sklearn.metrics import mean_squared_error
|
||||
from sklearn.metrics import mean_squared_log_error
|
||||
from sklearn.metrics import median_absolute_error
|
||||
from sklearn.metrics import max_error
|
||||
from sklearn.metrics import r2_score
|
||||
from sklearn.metrics import mean_tweedie_deviance
|
||||
|
||||
from sklearn.metrics._regression import _check_reg_targets
|
||||
|
||||
from ...exceptions import UndefinedMetricWarning
|
||||
|
||||
|
||||
def test_regression_metrics(n_samples=50):
|
||||
y_true = np.arange(n_samples)
|
||||
y_pred = y_true + 1
|
||||
|
||||
assert_almost_equal(mean_squared_error(y_true, y_pred), 1.)
|
||||
assert_almost_equal(mean_squared_log_error(y_true, y_pred),
|
||||
mean_squared_error(np.log(1 + y_true),
|
||||
np.log(1 + y_pred)))
|
||||
assert_almost_equal(mean_absolute_error(y_true, y_pred), 1.)
|
||||
assert_almost_equal(median_absolute_error(y_true, y_pred), 1.)
|
||||
assert_almost_equal(max_error(y_true, y_pred), 1.)
|
||||
assert_almost_equal(r2_score(y_true, y_pred), 0.995, 2)
|
||||
assert_almost_equal(explained_variance_score(y_true, y_pred), 1.)
|
||||
assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, power=0),
|
||||
mean_squared_error(y_true, y_pred))
|
||||
|
||||
# Tweedie deviance needs positive y_pred, except for p=0,
|
||||
# p>=2 needs positive y_true
|
||||
# results evaluated by sympy
|
||||
y_true = np.arange(1, 1 + n_samples)
|
||||
y_pred = 2 * y_true
|
||||
n = n_samples
|
||||
assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, power=-1),
|
||||
5/12 * n * (n**2 + 2 * n + 1))
|
||||
assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, power=1),
|
||||
(n + 1) * (1 - np.log(2)))
|
||||
assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, power=2),
|
||||
2 * np.log(2) - 1)
|
||||
assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, power=3/2),
|
||||
((6 * np.sqrt(2) - 8) / n) * np.sqrt(y_true).sum())
|
||||
assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, power=3),
|
||||
np.sum(1 / y_true) / (4 * n))
|
||||
|
||||
|
||||
def test_mean_squared_error_multioutput_raw_value_squared():
|
||||
# non-regression test for
|
||||
# https://github.com/scikit-learn/scikit-learn/pull/16323
|
||||
mse1 = mean_squared_error(
|
||||
[[1]], [[10]], multioutput="raw_values", squared=True
|
||||
)
|
||||
mse2 = mean_squared_error(
|
||||
[[1]], [[10]], multioutput="raw_values", squared=False
|
||||
)
|
||||
assert np.sqrt(mse1) == pytest.approx(mse2)
|
||||
|
||||
|
||||
def test_multioutput_regression():
|
||||
y_true = np.array([[1, 0, 0, 1], [0, 1, 1, 1], [1, 1, 0, 1]])
|
||||
y_pred = np.array([[0, 0, 0, 1], [1, 0, 1, 1], [0, 0, 0, 1]])
|
||||
|
||||
error = mean_squared_error(y_true, y_pred)
|
||||
assert_almost_equal(error, (1. / 3 + 2. / 3 + 2. / 3) / 4.)
|
||||
|
||||
error = mean_squared_error(y_true, y_pred, squared=False)
|
||||
assert_almost_equal(error, 0.454, decimal=2)
|
||||
|
||||
error = mean_squared_log_error(y_true, y_pred)
|
||||
assert_almost_equal(error, 0.200, decimal=2)
|
||||
|
||||
# mean_absolute_error and mean_squared_error are equal because
|
||||
# it is a binary problem.
|
||||
error = mean_absolute_error(y_true, y_pred)
|
||||
assert_almost_equal(error, (1. + 2. / 3) / 4.)
|
||||
|
||||
error = median_absolute_error(y_true, y_pred)
|
||||
assert_almost_equal(error, (1. + 1.) / 4.)
|
||||
|
||||
error = r2_score(y_true, y_pred, multioutput='variance_weighted')
|
||||
assert_almost_equal(error, 1. - 5. / 2)
|
||||
error = r2_score(y_true, y_pred, multioutput='uniform_average')
|
||||
assert_almost_equal(error, -.875)
|
||||
|
||||
|
||||
def test_regression_metrics_at_limits():
|
||||
assert_almost_equal(mean_squared_error([0.], [0.]), 0.00, 2)
|
||||
assert_almost_equal(mean_squared_error([0.], [0.], squared=False), 0.00, 2)
|
||||
assert_almost_equal(mean_squared_log_error([0.], [0.]), 0.00, 2)
|
||||
assert_almost_equal(mean_absolute_error([0.], [0.]), 0.00, 2)
|
||||
assert_almost_equal(median_absolute_error([0.], [0.]), 0.00, 2)
|
||||
assert_almost_equal(max_error([0.], [0.]), 0.00, 2)
|
||||
assert_almost_equal(explained_variance_score([0.], [0.]), 1.00, 2)
|
||||
assert_almost_equal(r2_score([0., 1], [0., 1]), 1.00, 2)
|
||||
err_msg = ("Mean Squared Logarithmic Error cannot be used when targets "
|
||||
"contain negative values.")
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
mean_squared_log_error([-1.], [-1.])
|
||||
err_msg = ("Mean Squared Logarithmic Error cannot be used when targets "
|
||||
"contain negative values.")
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
mean_squared_log_error([1., 2., 3.], [1., -2., 3.])
|
||||
err_msg = ("Mean Squared Logarithmic Error cannot be used when targets "
|
||||
"contain negative values.")
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
mean_squared_log_error([1., -2., 3.], [1., 2., 3.])
|
||||
|
||||
# Tweedie deviance error
|
||||
power = -1.2
|
||||
assert_allclose(mean_tweedie_deviance([0], [1.], power=power),
|
||||
2 / (2 - power), rtol=1e-3)
|
||||
with pytest.raises(ValueError,
|
||||
match="can only be used on strictly positive y_pred."):
|
||||
mean_tweedie_deviance([0.], [0.], power=power)
|
||||
assert_almost_equal(mean_tweedie_deviance([0.], [0.], power=0), 0.00, 2)
|
||||
|
||||
msg = "only be used on non-negative y and strictly positive y_pred."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
mean_tweedie_deviance([0.], [0.], power=1.0)
|
||||
|
||||
power = 1.5
|
||||
assert_allclose(mean_tweedie_deviance([0.], [1.], power=power),
|
||||
2 / (2 - power))
|
||||
msg = "only be used on non-negative y and strictly positive y_pred."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
mean_tweedie_deviance([0.], [0.], power=power)
|
||||
power = 2.
|
||||
assert_allclose(mean_tweedie_deviance([1.], [1.], power=power), 0.00,
|
||||
atol=1e-8)
|
||||
msg = "can only be used on strictly positive y and y_pred."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
mean_tweedie_deviance([0.], [0.], power=power)
|
||||
power = 3.
|
||||
assert_allclose(mean_tweedie_deviance([1.], [1.], power=power),
|
||||
0.00, atol=1e-8)
|
||||
|
||||
msg = "can only be used on strictly positive y and y_pred."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
mean_tweedie_deviance([0.], [0.], power=power)
|
||||
|
||||
with pytest.raises(ValueError,
|
||||
match="is only defined for power<=0 and power>=1"):
|
||||
mean_tweedie_deviance([0.], [0.], power=0.5)
|
||||
|
||||
|
||||
def test__check_reg_targets():
|
||||
# All of length 3
|
||||
EXAMPLES = [
|
||||
("continuous", [1, 2, 3], 1),
|
||||
("continuous", [[1], [2], [3]], 1),
|
||||
("continuous-multioutput", [[1, 1], [2, 2], [3, 1]], 2),
|
||||
("continuous-multioutput", [[5, 1], [4, 2], [3, 1]], 2),
|
||||
("continuous-multioutput", [[1, 3, 4], [2, 2, 2], [3, 1, 1]], 3),
|
||||
]
|
||||
|
||||
for (type1, y1, n_out1), (type2, y2, n_out2) in product(EXAMPLES,
|
||||
repeat=2):
|
||||
|
||||
if type1 == type2 and n_out1 == n_out2:
|
||||
y_type, y_check1, y_check2, multioutput = _check_reg_targets(
|
||||
y1, y2, None)
|
||||
assert type1 == y_type
|
||||
if type1 == 'continuous':
|
||||
assert_array_equal(y_check1, np.reshape(y1, (-1, 1)))
|
||||
assert_array_equal(y_check2, np.reshape(y2, (-1, 1)))
|
||||
else:
|
||||
assert_array_equal(y_check1, y1)
|
||||
assert_array_equal(y_check2, y2)
|
||||
else:
|
||||
with pytest.raises(ValueError):
|
||||
_check_reg_targets(y1, y2, None)
|
||||
|
||||
|
||||
def test__check_reg_targets_exception():
|
||||
invalid_multioutput = 'this_value_is_not_valid'
|
||||
expected_message = ("Allowed 'multioutput' string values are.+"
|
||||
"You provided multioutput={!r}".format(
|
||||
invalid_multioutput))
|
||||
with pytest.raises(ValueError, match=expected_message):
|
||||
_check_reg_targets([1, 2, 3], [[1], [2], [3]], invalid_multioutput)
|
||||
|
||||
|
||||
def test_regression_multioutput_array():
|
||||
y_true = [[1, 2], [2.5, -1], [4.5, 3], [5, 7]]
|
||||
y_pred = [[1, 1], [2, -1], [5, 4], [5, 6.5]]
|
||||
|
||||
mse = mean_squared_error(y_true, y_pred, multioutput='raw_values')
|
||||
mae = mean_absolute_error(y_true, y_pred, multioutput='raw_values')
|
||||
r = r2_score(y_true, y_pred, multioutput='raw_values')
|
||||
evs = explained_variance_score(y_true, y_pred, multioutput='raw_values')
|
||||
|
||||
assert_array_almost_equal(mse, [0.125, 0.5625], decimal=2)
|
||||
assert_array_almost_equal(mae, [0.25, 0.625], decimal=2)
|
||||
assert_array_almost_equal(r, [0.95, 0.93], decimal=2)
|
||||
assert_array_almost_equal(evs, [0.95, 0.93], decimal=2)
|
||||
|
||||
# mean_absolute_error and mean_squared_error are equal because
|
||||
# it is a binary problem.
|
||||
y_true = [[0, 0]]*4
|
||||
y_pred = [[1, 1]]*4
|
||||
mse = mean_squared_error(y_true, y_pred, multioutput='raw_values')
|
||||
mae = mean_absolute_error(y_true, y_pred, multioutput='raw_values')
|
||||
r = r2_score(y_true, y_pred, multioutput='raw_values')
|
||||
assert_array_almost_equal(mse, [1., 1.], decimal=2)
|
||||
assert_array_almost_equal(mae, [1., 1.], decimal=2)
|
||||
assert_array_almost_equal(r, [0., 0.], decimal=2)
|
||||
|
||||
r = r2_score([[0, -1], [0, 1]], [[2, 2], [1, 1]], multioutput='raw_values')
|
||||
assert_array_almost_equal(r, [0, -3.5], decimal=2)
|
||||
assert np.mean(r) == r2_score([[0, -1], [0, 1]], [[2, 2], [1, 1]],
|
||||
multioutput='uniform_average')
|
||||
evs = explained_variance_score([[0, -1], [0, 1]], [[2, 2], [1, 1]],
|
||||
multioutput='raw_values')
|
||||
assert_array_almost_equal(evs, [0, -1.25], decimal=2)
|
||||
|
||||
# Checking for the condition in which both numerator and denominator is
|
||||
# zero.
|
||||
y_true = [[1, 3], [-1, 2]]
|
||||
y_pred = [[1, 4], [-1, 1]]
|
||||
r2 = r2_score(y_true, y_pred, multioutput='raw_values')
|
||||
assert_array_almost_equal(r2, [1., -3.], decimal=2)
|
||||
assert np.mean(r2) == r2_score(y_true, y_pred,
|
||||
multioutput='uniform_average')
|
||||
evs = explained_variance_score(y_true, y_pred, multioutput='raw_values')
|
||||
assert_array_almost_equal(evs, [1., -3.], decimal=2)
|
||||
assert np.mean(evs) == explained_variance_score(y_true, y_pred)
|
||||
|
||||
# Handling msle separately as it does not accept negative inputs.
|
||||
y_true = np.array([[0.5, 1], [1, 2], [7, 6]])
|
||||
y_pred = np.array([[0.5, 2], [1, 2.5], [8, 8]])
|
||||
msle = mean_squared_log_error(y_true, y_pred, multioutput='raw_values')
|
||||
msle2 = mean_squared_error(np.log(1 + y_true), np.log(1 + y_pred),
|
||||
multioutput='raw_values')
|
||||
assert_array_almost_equal(msle, msle2, decimal=2)
|
||||
|
||||
|
||||
def test_regression_custom_weights():
|
||||
y_true = [[1, 2], [2.5, -1], [4.5, 3], [5, 7]]
|
||||
y_pred = [[1, 1], [2, -1], [5, 4], [5, 6.5]]
|
||||
|
||||
msew = mean_squared_error(y_true, y_pred, multioutput=[0.4, 0.6])
|
||||
rmsew = mean_squared_error(y_true, y_pred, multioutput=[0.4, 0.6],
|
||||
squared=False)
|
||||
maew = mean_absolute_error(y_true, y_pred, multioutput=[0.4, 0.6])
|
||||
rw = r2_score(y_true, y_pred, multioutput=[0.4, 0.6])
|
||||
evsw = explained_variance_score(y_true, y_pred, multioutput=[0.4, 0.6])
|
||||
|
||||
assert_almost_equal(msew, 0.39, decimal=2)
|
||||
assert_almost_equal(rmsew, 0.59, decimal=2)
|
||||
assert_almost_equal(maew, 0.475, decimal=3)
|
||||
assert_almost_equal(rw, 0.94, decimal=2)
|
||||
assert_almost_equal(evsw, 0.94, decimal=2)
|
||||
|
||||
# Handling msle separately as it does not accept negative inputs.
|
||||
y_true = np.array([[0.5, 1], [1, 2], [7, 6]])
|
||||
y_pred = np.array([[0.5, 2], [1, 2.5], [8, 8]])
|
||||
msle = mean_squared_log_error(y_true, y_pred, multioutput=[0.3, 0.7])
|
||||
msle2 = mean_squared_error(np.log(1 + y_true), np.log(1 + y_pred),
|
||||
multioutput=[0.3, 0.7])
|
||||
assert_almost_equal(msle, msle2, decimal=2)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('metric', [r2_score])
|
||||
def test_regression_single_sample(metric):
|
||||
y_true = [0]
|
||||
y_pred = [1]
|
||||
warning_msg = 'not well-defined with less than two samples.'
|
||||
|
||||
# Trigger the warning
|
||||
with pytest.warns(UndefinedMetricWarning, match=warning_msg):
|
||||
score = metric(y_true, y_pred)
|
||||
assert np.isnan(score)
|
||||
|
||||
|
||||
def test_tweedie_deviance_continuity():
|
||||
n_samples = 100
|
||||
|
||||
y_true = np.random.RandomState(0).rand(n_samples) + 0.1
|
||||
y_pred = np.random.RandomState(1).rand(n_samples) + 0.1
|
||||
|
||||
assert_allclose(mean_tweedie_deviance(y_true, y_pred, power=0 - 1e-10),
|
||||
mean_tweedie_deviance(y_true, y_pred, power=0))
|
||||
|
||||
# Ws we get closer to the limit, with 1e-12 difference the absolute
|
||||
# tolerance to pass the below check increases. There are likely
|
||||
# numerical precision issues on the edges of different definition
|
||||
# regions.
|
||||
assert_allclose(mean_tweedie_deviance(y_true, y_pred, power=1 + 1e-10),
|
||||
mean_tweedie_deviance(y_true, y_pred, power=1),
|
||||
atol=1e-6)
|
||||
|
||||
assert_allclose(mean_tweedie_deviance(y_true, y_pred, power=2 - 1e-10),
|
||||
mean_tweedie_deviance(y_true, y_pred, power=2),
|
||||
atol=1e-6)
|
||||
|
||||
assert_allclose(mean_tweedie_deviance(y_true, y_pred, power=2 + 1e-10),
|
||||
mean_tweedie_deviance(y_true, y_pred, power=2),
|
||||
atol=1e-6)
|
|
@ -0,0 +1,721 @@
|
|||
import pickle
|
||||
import tempfile
|
||||
import shutil
|
||||
import os
|
||||
import numbers
|
||||
from unittest.mock import Mock
|
||||
from functools import partial
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import joblib
|
||||
|
||||
from numpy.testing import assert_allclose
|
||||
from sklearn.utils._testing import assert_almost_equal
|
||||
from sklearn.utils._testing import assert_array_equal
|
||||
from sklearn.utils._testing import ignore_warnings
|
||||
|
||||
from sklearn.base import BaseEstimator
|
||||
from sklearn.metrics import (f1_score, r2_score, roc_auc_score, fbeta_score,
|
||||
log_loss, precision_score, recall_score,
|
||||
jaccard_score)
|
||||
from sklearn.metrics import cluster as cluster_module
|
||||
from sklearn.metrics import check_scoring
|
||||
from sklearn.metrics._scorer import (_PredictScorer, _passthrough_scorer,
|
||||
_MultimetricScorer,
|
||||
_check_multimetric_scoring)
|
||||
from sklearn.metrics import accuracy_score
|
||||
from sklearn.metrics import make_scorer, get_scorer, SCORERS
|
||||
from sklearn.neighbors import KNeighborsClassifier
|
||||
from sklearn.svm import LinearSVC
|
||||
from sklearn.pipeline import make_pipeline
|
||||
from sklearn.cluster import KMeans
|
||||
from sklearn.linear_model import Ridge, LogisticRegression, Perceptron
|
||||
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
|
||||
from sklearn.datasets import make_blobs
|
||||
from sklearn.datasets import make_classification
|
||||
from sklearn.datasets import make_multilabel_classification
|
||||
from sklearn.datasets import load_diabetes
|
||||
from sklearn.model_selection import train_test_split, cross_val_score
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
from sklearn.multiclass import OneVsRestClassifier
|
||||
|
||||
|
||||
REGRESSION_SCORERS = ['explained_variance', 'r2',
|
||||
'neg_mean_absolute_error', 'neg_mean_squared_error',
|
||||
'neg_mean_squared_log_error',
|
||||
'neg_median_absolute_error',
|
||||
'neg_root_mean_squared_error',
|
||||
'mean_absolute_error',
|
||||
'mean_squared_error', 'median_absolute_error',
|
||||
'max_error', 'neg_mean_poisson_deviance',
|
||||
'neg_mean_gamma_deviance']
|
||||
|
||||
CLF_SCORERS = ['accuracy', 'balanced_accuracy',
|
||||
'f1', 'f1_weighted', 'f1_macro', 'f1_micro',
|
||||
'roc_auc', 'average_precision', 'precision',
|
||||
'precision_weighted', 'precision_macro', 'precision_micro',
|
||||
'recall', 'recall_weighted', 'recall_macro', 'recall_micro',
|
||||
'neg_log_loss', 'log_loss', 'neg_brier_score',
|
||||
'jaccard', 'jaccard_weighted', 'jaccard_macro',
|
||||
'jaccard_micro', 'roc_auc_ovr', 'roc_auc_ovo',
|
||||
'roc_auc_ovr_weighted', 'roc_auc_ovo_weighted']
|
||||
|
||||
# All supervised cluster scorers (They behave like classification metric)
|
||||
CLUSTER_SCORERS = ["adjusted_rand_score",
|
||||
"homogeneity_score",
|
||||
"completeness_score",
|
||||
"v_measure_score",
|
||||
"mutual_info_score",
|
||||
"adjusted_mutual_info_score",
|
||||
"normalized_mutual_info_score",
|
||||
"fowlkes_mallows_score"]
|
||||
|
||||
MULTILABEL_ONLY_SCORERS = ['precision_samples', 'recall_samples', 'f1_samples',
|
||||
'jaccard_samples']
|
||||
|
||||
REQUIRE_POSITIVE_Y_SCORERS = ['neg_mean_poisson_deviance',
|
||||
'neg_mean_gamma_deviance']
|
||||
|
||||
|
||||
def _require_positive_y(y):
|
||||
"""Make targets strictly positive"""
|
||||
offset = abs(y.min()) + 1
|
||||
y = y + offset
|
||||
return y
|
||||
|
||||
|
||||
def _make_estimators(X_train, y_train, y_ml_train):
|
||||
# Make estimators that make sense to test various scoring methods
|
||||
sensible_regr = DecisionTreeRegressor(random_state=0)
|
||||
# some of the regressions scorers require strictly positive input.
|
||||
sensible_regr.fit(X_train, y_train + 1)
|
||||
sensible_clf = DecisionTreeClassifier(random_state=0)
|
||||
sensible_clf.fit(X_train, y_train)
|
||||
sensible_ml_clf = DecisionTreeClassifier(random_state=0)
|
||||
sensible_ml_clf.fit(X_train, y_ml_train)
|
||||
return dict(
|
||||
[(name, sensible_regr) for name in REGRESSION_SCORERS] +
|
||||
[(name, sensible_clf) for name in CLF_SCORERS] +
|
||||
[(name, sensible_clf) for name in CLUSTER_SCORERS] +
|
||||
[(name, sensible_ml_clf) for name in MULTILABEL_ONLY_SCORERS]
|
||||
)
|
||||
|
||||
|
||||
X_mm, y_mm, y_ml_mm = None, None, None
|
||||
ESTIMATORS = None
|
||||
TEMP_FOLDER = None
|
||||
|
||||
|
||||
def setup_module():
|
||||
# Create some memory mapped data
|
||||
global X_mm, y_mm, y_ml_mm, TEMP_FOLDER, ESTIMATORS
|
||||
TEMP_FOLDER = tempfile.mkdtemp(prefix='sklearn_test_score_objects_')
|
||||
X, y = make_classification(n_samples=30, n_features=5, random_state=0)
|
||||
_, y_ml = make_multilabel_classification(n_samples=X.shape[0],
|
||||
random_state=0)
|
||||
filename = os.path.join(TEMP_FOLDER, 'test_data.pkl')
|
||||
joblib.dump((X, y, y_ml), filename)
|
||||
X_mm, y_mm, y_ml_mm = joblib.load(filename, mmap_mode='r')
|
||||
ESTIMATORS = _make_estimators(X_mm, y_mm, y_ml_mm)
|
||||
|
||||
|
||||
def teardown_module():
|
||||
global X_mm, y_mm, y_ml_mm, TEMP_FOLDER, ESTIMATORS
|
||||
# GC closes the mmap file descriptors
|
||||
X_mm, y_mm, y_ml_mm, ESTIMATORS = None, None, None, None
|
||||
shutil.rmtree(TEMP_FOLDER)
|
||||
|
||||
|
||||
class EstimatorWithoutFit:
|
||||
"""Dummy estimator to test scoring validators"""
|
||||
pass
|
||||
|
||||
|
||||
class EstimatorWithFit(BaseEstimator):
|
||||
"""Dummy estimator to test scoring validators"""
|
||||
def fit(self, X, y):
|
||||
return self
|
||||
|
||||
|
||||
class EstimatorWithFitAndScore:
|
||||
"""Dummy estimator to test scoring validators"""
|
||||
def fit(self, X, y):
|
||||
return self
|
||||
|
||||
def score(self, X, y):
|
||||
return 1.0
|
||||
|
||||
|
||||
class EstimatorWithFitAndPredict:
|
||||
"""Dummy estimator to test scoring validators"""
|
||||
def fit(self, X, y):
|
||||
self.y = y
|
||||
return self
|
||||
|
||||
def predict(self, X):
|
||||
return self.y
|
||||
|
||||
|
||||
class DummyScorer:
|
||||
"""Dummy scorer that always returns 1."""
|
||||
def __call__(self, est, X, y):
|
||||
return 1
|
||||
|
||||
|
||||
def test_all_scorers_repr():
|
||||
# Test that all scorers have a working repr
|
||||
for name, scorer in SCORERS.items():
|
||||
repr(scorer)
|
||||
|
||||
|
||||
def check_scoring_validator_for_single_metric_usecases(scoring_validator):
|
||||
# Test all branches of single metric usecases
|
||||
estimator = EstimatorWithoutFit()
|
||||
pattern = (r"estimator should be an estimator implementing 'fit' method,"
|
||||
r" .* was passed")
|
||||
with pytest.raises(TypeError, match=pattern):
|
||||
scoring_validator(estimator)
|
||||
|
||||
estimator = EstimatorWithFitAndScore()
|
||||
estimator.fit([[1]], [1])
|
||||
scorer = scoring_validator(estimator)
|
||||
assert scorer is _passthrough_scorer
|
||||
assert_almost_equal(scorer(estimator, [[1]], [1]), 1.0)
|
||||
|
||||
estimator = EstimatorWithFitAndPredict()
|
||||
estimator.fit([[1]], [1])
|
||||
pattern = (r"If no scoring is specified, the estimator passed should have"
|
||||
r" a 'score' method\. The estimator .* does not\.")
|
||||
with pytest.raises(TypeError, match=pattern):
|
||||
scoring_validator(estimator)
|
||||
|
||||
scorer = scoring_validator(estimator, scoring="accuracy")
|
||||
assert_almost_equal(scorer(estimator, [[1]], [1]), 1.0)
|
||||
|
||||
estimator = EstimatorWithFit()
|
||||
scorer = scoring_validator(estimator, scoring="accuracy")
|
||||
assert isinstance(scorer, _PredictScorer)
|
||||
|
||||
# Test the allow_none parameter for check_scoring alone
|
||||
if scoring_validator is check_scoring:
|
||||
estimator = EstimatorWithFit()
|
||||
scorer = scoring_validator(estimator, allow_none=True)
|
||||
assert scorer is None
|
||||
|
||||
|
||||
def check_multimetric_scoring_single_metric_wrapper(*args, **kwargs):
|
||||
# This wraps the _check_multimetric_scoring to take in
|
||||
# single metric scoring parameter so we can run the tests
|
||||
# that we will run for check_scoring, for check_multimetric_scoring
|
||||
# too for single-metric usecases
|
||||
|
||||
scorers, is_multi = _check_multimetric_scoring(*args, **kwargs)
|
||||
# For all single metric use cases, it should register as not multimetric
|
||||
assert not is_multi
|
||||
if args[0] is not None:
|
||||
assert scorers is not None
|
||||
names, scorers = zip(*scorers.items())
|
||||
assert len(scorers) == 1
|
||||
assert names[0] == 'score'
|
||||
scorers = scorers[0]
|
||||
return scorers
|
||||
|
||||
|
||||
def test_check_scoring_and_check_multimetric_scoring():
|
||||
check_scoring_validator_for_single_metric_usecases(check_scoring)
|
||||
# To make sure the check_scoring is correctly applied to the constituent
|
||||
# scorers
|
||||
check_scoring_validator_for_single_metric_usecases(
|
||||
check_multimetric_scoring_single_metric_wrapper)
|
||||
|
||||
# For multiple metric use cases
|
||||
# Make sure it works for the valid cases
|
||||
for scoring in (('accuracy',), ['precision'],
|
||||
{'acc': 'accuracy', 'precision': 'precision'},
|
||||
('accuracy', 'precision'), ['precision', 'accuracy'],
|
||||
{'accuracy': make_scorer(accuracy_score),
|
||||
'precision': make_scorer(precision_score)}):
|
||||
estimator = LinearSVC(random_state=0)
|
||||
estimator.fit([[1], [2], [3]], [1, 1, 0])
|
||||
|
||||
scorers, is_multi = _check_multimetric_scoring(estimator, scoring)
|
||||
assert is_multi
|
||||
assert isinstance(scorers, dict)
|
||||
assert sorted(scorers.keys()) == sorted(list(scoring))
|
||||
assert all([isinstance(scorer, _PredictScorer)
|
||||
for scorer in list(scorers.values())])
|
||||
|
||||
if 'acc' in scoring:
|
||||
assert_almost_equal(scorers['acc'](
|
||||
estimator, [[1], [2], [3]], [1, 0, 0]), 2. / 3.)
|
||||
if 'accuracy' in scoring:
|
||||
assert_almost_equal(scorers['accuracy'](
|
||||
estimator, [[1], [2], [3]], [1, 0, 0]), 2. / 3.)
|
||||
if 'precision' in scoring:
|
||||
assert_almost_equal(scorers['precision'](
|
||||
estimator, [[1], [2], [3]], [1, 0, 0]), 0.5)
|
||||
|
||||
estimator = EstimatorWithFitAndPredict()
|
||||
estimator.fit([[1]], [1])
|
||||
|
||||
# Make sure it raises errors when scoring parameter is not valid.
|
||||
# More weird corner cases are tested at test_validation.py
|
||||
error_message_regexp = ".*must be unique strings.*"
|
||||
for scoring in ((make_scorer(precision_score), # Tuple of callables
|
||||
make_scorer(accuracy_score)), [5],
|
||||
(make_scorer(precision_score),), (), ('f1', 'f1')):
|
||||
with pytest.raises(ValueError, match=error_message_regexp):
|
||||
_check_multimetric_scoring(estimator, scoring=scoring)
|
||||
|
||||
|
||||
def test_check_scoring_gridsearchcv():
|
||||
# test that check_scoring works on GridSearchCV and pipeline.
|
||||
# slightly redundant non-regression test.
|
||||
|
||||
grid = GridSearchCV(LinearSVC(), param_grid={'C': [.1, 1]}, cv=3)
|
||||
scorer = check_scoring(grid, scoring="f1")
|
||||
assert isinstance(scorer, _PredictScorer)
|
||||
|
||||
pipe = make_pipeline(LinearSVC())
|
||||
scorer = check_scoring(pipe, scoring="f1")
|
||||
assert isinstance(scorer, _PredictScorer)
|
||||
|
||||
# check that cross_val_score definitely calls the scorer
|
||||
# and doesn't make any assumptions about the estimator apart from having a
|
||||
# fit.
|
||||
scores = cross_val_score(EstimatorWithFit(), [[1], [2], [3]], [1, 0, 1],
|
||||
scoring=DummyScorer(), cv=3)
|
||||
assert_array_equal(scores, 1)
|
||||
|
||||
|
||||
def test_make_scorer():
|
||||
# Sanity check on the make_scorer factory function.
|
||||
f = lambda *args: 0
|
||||
with pytest.raises(ValueError):
|
||||
make_scorer(f, needs_threshold=True, needs_proba=True)
|
||||
|
||||
|
||||
def test_classification_scores():
|
||||
# Test classification scorers.
|
||||
X, y = make_blobs(random_state=0, centers=2)
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
|
||||
clf = LinearSVC(random_state=0)
|
||||
clf.fit(X_train, y_train)
|
||||
|
||||
for prefix, metric in [('f1', f1_score), ('precision', precision_score),
|
||||
('recall', recall_score),
|
||||
('jaccard', jaccard_score)]:
|
||||
|
||||
score1 = get_scorer('%s_weighted' % prefix)(clf, X_test, y_test)
|
||||
score2 = metric(y_test, clf.predict(X_test), pos_label=None,
|
||||
average='weighted')
|
||||
assert_almost_equal(score1, score2)
|
||||
|
||||
score1 = get_scorer('%s_macro' % prefix)(clf, X_test, y_test)
|
||||
score2 = metric(y_test, clf.predict(X_test), pos_label=None,
|
||||
average='macro')
|
||||
assert_almost_equal(score1, score2)
|
||||
|
||||
score1 = get_scorer('%s_micro' % prefix)(clf, X_test, y_test)
|
||||
score2 = metric(y_test, clf.predict(X_test), pos_label=None,
|
||||
average='micro')
|
||||
assert_almost_equal(score1, score2)
|
||||
|
||||
score1 = get_scorer('%s' % prefix)(clf, X_test, y_test)
|
||||
score2 = metric(y_test, clf.predict(X_test), pos_label=1)
|
||||
assert_almost_equal(score1, score2)
|
||||
|
||||
# test fbeta score that takes an argument
|
||||
scorer = make_scorer(fbeta_score, beta=2)
|
||||
score1 = scorer(clf, X_test, y_test)
|
||||
score2 = fbeta_score(y_test, clf.predict(X_test), beta=2)
|
||||
assert_almost_equal(score1, score2)
|
||||
|
||||
# test that custom scorer can be pickled
|
||||
unpickled_scorer = pickle.loads(pickle.dumps(scorer))
|
||||
score3 = unpickled_scorer(clf, X_test, y_test)
|
||||
assert_almost_equal(score1, score3)
|
||||
|
||||
# smoke test the repr:
|
||||
repr(fbeta_score)
|
||||
|
||||
|
||||
def test_regression_scorers():
|
||||
# Test regression scorers.
|
||||
diabetes = load_diabetes()
|
||||
X, y = diabetes.data, diabetes.target
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
|
||||
clf = Ridge()
|
||||
clf.fit(X_train, y_train)
|
||||
score1 = get_scorer('r2')(clf, X_test, y_test)
|
||||
score2 = r2_score(y_test, clf.predict(X_test))
|
||||
assert_almost_equal(score1, score2)
|
||||
|
||||
|
||||
def test_thresholded_scorers():
|
||||
# Test scorers that take thresholds.
|
||||
X, y = make_blobs(random_state=0, centers=2)
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
|
||||
clf = LogisticRegression(random_state=0)
|
||||
clf.fit(X_train, y_train)
|
||||
score1 = get_scorer('roc_auc')(clf, X_test, y_test)
|
||||
score2 = roc_auc_score(y_test, clf.decision_function(X_test))
|
||||
score3 = roc_auc_score(y_test, clf.predict_proba(X_test)[:, 1])
|
||||
assert_almost_equal(score1, score2)
|
||||
assert_almost_equal(score1, score3)
|
||||
|
||||
logscore = get_scorer('neg_log_loss')(clf, X_test, y_test)
|
||||
logloss = log_loss(y_test, clf.predict_proba(X_test))
|
||||
assert_almost_equal(-logscore, logloss)
|
||||
|
||||
# same for an estimator without decision_function
|
||||
clf = DecisionTreeClassifier()
|
||||
clf.fit(X_train, y_train)
|
||||
score1 = get_scorer('roc_auc')(clf, X_test, y_test)
|
||||
score2 = roc_auc_score(y_test, clf.predict_proba(X_test)[:, 1])
|
||||
assert_almost_equal(score1, score2)
|
||||
|
||||
# test with a regressor (no decision_function)
|
||||
reg = DecisionTreeRegressor()
|
||||
reg.fit(X_train, y_train)
|
||||
score1 = get_scorer('roc_auc')(reg, X_test, y_test)
|
||||
score2 = roc_auc_score(y_test, reg.predict(X_test))
|
||||
assert_almost_equal(score1, score2)
|
||||
|
||||
# Test that an exception is raised on more than two classes
|
||||
X, y = make_blobs(random_state=0, centers=3)
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
|
||||
clf.fit(X_train, y_train)
|
||||
with pytest.raises(ValueError, match="multiclass format is not supported"):
|
||||
get_scorer('roc_auc')(clf, X_test, y_test)
|
||||
|
||||
# test error is raised with a single class present in model
|
||||
# (predict_proba shape is not suitable for binary auc)
|
||||
X, y = make_blobs(random_state=0, centers=2)
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
|
||||
clf = DecisionTreeClassifier()
|
||||
clf.fit(X_train, np.zeros_like(y_train))
|
||||
with pytest.raises(ValueError, match="need classifier with two classes"):
|
||||
get_scorer('roc_auc')(clf, X_test, y_test)
|
||||
|
||||
# for proba scorers
|
||||
with pytest.raises(ValueError, match="need classifier with two classes"):
|
||||
get_scorer('neg_log_loss')(clf, X_test, y_test)
|
||||
|
||||
|
||||
def test_thresholded_scorers_multilabel_indicator_data():
|
||||
# Test that the scorer work with multilabel-indicator format
|
||||
# for multilabel and multi-output multi-class classifier
|
||||
X, y = make_multilabel_classification(allow_unlabeled=False,
|
||||
random_state=0)
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
|
||||
|
||||
# Multi-output multi-class predict_proba
|
||||
clf = DecisionTreeClassifier()
|
||||
clf.fit(X_train, y_train)
|
||||
y_proba = clf.predict_proba(X_test)
|
||||
score1 = get_scorer('roc_auc')(clf, X_test, y_test)
|
||||
score2 = roc_auc_score(y_test, np.vstack([p[:, -1] for p in y_proba]).T)
|
||||
assert_almost_equal(score1, score2)
|
||||
|
||||
# Multi-output multi-class decision_function
|
||||
# TODO Is there any yet?
|
||||
clf = DecisionTreeClassifier()
|
||||
clf.fit(X_train, y_train)
|
||||
clf._predict_proba = clf.predict_proba
|
||||
clf.predict_proba = None
|
||||
clf.decision_function = lambda X: [p[:, 1] for p in clf._predict_proba(X)]
|
||||
|
||||
y_proba = clf.decision_function(X_test)
|
||||
score1 = get_scorer('roc_auc')(clf, X_test, y_test)
|
||||
score2 = roc_auc_score(y_test, np.vstack([p for p in y_proba]).T)
|
||||
assert_almost_equal(score1, score2)
|
||||
|
||||
# Multilabel predict_proba
|
||||
clf = OneVsRestClassifier(DecisionTreeClassifier())
|
||||
clf.fit(X_train, y_train)
|
||||
score1 = get_scorer('roc_auc')(clf, X_test, y_test)
|
||||
score2 = roc_auc_score(y_test, clf.predict_proba(X_test))
|
||||
assert_almost_equal(score1, score2)
|
||||
|
||||
# Multilabel decision function
|
||||
clf = OneVsRestClassifier(LinearSVC(random_state=0))
|
||||
clf.fit(X_train, y_train)
|
||||
score1 = get_scorer('roc_auc')(clf, X_test, y_test)
|
||||
score2 = roc_auc_score(y_test, clf.decision_function(X_test))
|
||||
assert_almost_equal(score1, score2)
|
||||
|
||||
|
||||
def test_supervised_cluster_scorers():
|
||||
# Test clustering scorers against gold standard labeling.
|
||||
X, y = make_blobs(random_state=0, centers=2)
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
|
||||
km = KMeans(n_clusters=3)
|
||||
km.fit(X_train)
|
||||
for name in CLUSTER_SCORERS:
|
||||
score1 = get_scorer(name)(km, X_test, y_test)
|
||||
score2 = getattr(cluster_module, name)(y_test, km.predict(X_test))
|
||||
assert_almost_equal(score1, score2)
|
||||
|
||||
|
||||
@ignore_warnings
|
||||
def test_raises_on_score_list():
|
||||
# Test that when a list of scores is returned, we raise proper errors.
|
||||
X, y = make_blobs(random_state=0)
|
||||
f1_scorer_no_average = make_scorer(f1_score, average=None)
|
||||
clf = DecisionTreeClassifier()
|
||||
with pytest.raises(ValueError):
|
||||
cross_val_score(clf, X, y, scoring=f1_scorer_no_average)
|
||||
grid_search = GridSearchCV(clf, scoring=f1_scorer_no_average,
|
||||
param_grid={'max_depth': [1, 2]})
|
||||
with pytest.raises(ValueError):
|
||||
grid_search.fit(X, y)
|
||||
|
||||
|
||||
@ignore_warnings
|
||||
def test_scorer_sample_weight():
|
||||
# Test that scorers support sample_weight or raise sensible errors
|
||||
|
||||
# Unlike the metrics invariance test, in the scorer case it's harder
|
||||
# to ensure that, on the classifier output, weighted and unweighted
|
||||
# scores really should be unequal.
|
||||
X, y = make_classification(random_state=0)
|
||||
_, y_ml = make_multilabel_classification(n_samples=X.shape[0],
|
||||
random_state=0)
|
||||
split = train_test_split(X, y, y_ml, random_state=0)
|
||||
X_train, X_test, y_train, y_test, y_ml_train, y_ml_test = split
|
||||
|
||||
sample_weight = np.ones_like(y_test)
|
||||
sample_weight[:10] = 0
|
||||
|
||||
# get sensible estimators for each metric
|
||||
estimator = _make_estimators(X_train, y_train, y_ml_train)
|
||||
|
||||
for name, scorer in SCORERS.items():
|
||||
if name in MULTILABEL_ONLY_SCORERS:
|
||||
target = y_ml_test
|
||||
else:
|
||||
target = y_test
|
||||
if name in REQUIRE_POSITIVE_Y_SCORERS:
|
||||
target = _require_positive_y(target)
|
||||
try:
|
||||
weighted = scorer(estimator[name], X_test, target,
|
||||
sample_weight=sample_weight)
|
||||
ignored = scorer(estimator[name], X_test[10:], target[10:])
|
||||
unweighted = scorer(estimator[name], X_test, target)
|
||||
assert weighted != unweighted, (
|
||||
"scorer {0} behaves identically when "
|
||||
"called with sample weights: {1} vs "
|
||||
"{2}".format(name, weighted, unweighted))
|
||||
assert_almost_equal(weighted, ignored,
|
||||
err_msg="scorer {0} behaves differently when "
|
||||
"ignoring samples and setting sample_weight to"
|
||||
" 0: {1} vs {2}".format(name, weighted,
|
||||
ignored))
|
||||
|
||||
except TypeError as e:
|
||||
assert "sample_weight" in str(e), (
|
||||
"scorer {0} raises unhelpful exception when called "
|
||||
"with sample weights: {1}".format(name, str(e)))
|
||||
|
||||
|
||||
@pytest.mark.parametrize('name', SCORERS)
|
||||
def test_scorer_memmap_input(name):
|
||||
# Non-regression test for #6147: some score functions would
|
||||
# return singleton memmap when computed on memmap data instead of scalar
|
||||
# float values.
|
||||
|
||||
if name in REQUIRE_POSITIVE_Y_SCORERS:
|
||||
y_mm_1 = _require_positive_y(y_mm)
|
||||
y_ml_mm_1 = _require_positive_y(y_ml_mm)
|
||||
else:
|
||||
y_mm_1, y_ml_mm_1 = y_mm, y_ml_mm
|
||||
|
||||
# UndefinedMetricWarning for P / R scores
|
||||
with ignore_warnings():
|
||||
scorer, estimator = SCORERS[name], ESTIMATORS[name]
|
||||
if name in MULTILABEL_ONLY_SCORERS:
|
||||
score = scorer(estimator, X_mm, y_ml_mm_1)
|
||||
else:
|
||||
score = scorer(estimator, X_mm, y_mm_1)
|
||||
assert isinstance(score, numbers.Number), name
|
||||
|
||||
|
||||
def test_scoring_is_not_metric():
|
||||
with pytest.raises(ValueError, match='make_scorer'):
|
||||
check_scoring(LogisticRegression(), scoring=f1_score)
|
||||
with pytest.raises(ValueError, match='make_scorer'):
|
||||
check_scoring(LogisticRegression(), scoring=roc_auc_score)
|
||||
with pytest.raises(ValueError, match='make_scorer'):
|
||||
check_scoring(Ridge(), scoring=r2_score)
|
||||
with pytest.raises(ValueError, match='make_scorer'):
|
||||
check_scoring(KMeans(), scoring=cluster_module.adjusted_rand_score)
|
||||
|
||||
|
||||
def test_deprecated_scorer():
|
||||
X, y = make_blobs(random_state=0, centers=2)
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
|
||||
clf = DecisionTreeClassifier()
|
||||
clf.fit(X_train, y_train)
|
||||
|
||||
deprecated_scorer = get_scorer('brier_score_loss')
|
||||
with pytest.warns(FutureWarning):
|
||||
deprecated_scorer(clf, X_test, y_test)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("scorers,expected_predict_count,"
|
||||
"expected_predict_proba_count,expected_decision_func_count"),
|
||||
[({'a1': 'accuracy', 'a2': 'accuracy',
|
||||
'll1': 'neg_log_loss', 'll2': 'neg_log_loss',
|
||||
'ra1': 'roc_auc', 'ra2': 'roc_auc'}, 1, 1, 1),
|
||||
(['roc_auc', 'accuracy'], 1, 0, 1),
|
||||
(['neg_log_loss', 'accuracy'], 1, 1, 0)])
|
||||
def test_multimetric_scorer_calls_method_once(scorers, expected_predict_count,
|
||||
expected_predict_proba_count,
|
||||
expected_decision_func_count):
|
||||
X, y = np.array([[1], [1], [0], [0], [0]]), np.array([0, 1, 1, 1, 0])
|
||||
|
||||
mock_est = Mock()
|
||||
fit_func = Mock(return_value=mock_est)
|
||||
predict_func = Mock(return_value=y)
|
||||
|
||||
pos_proba = np.random.rand(X.shape[0])
|
||||
proba = np.c_[1 - pos_proba, pos_proba]
|
||||
predict_proba_func = Mock(return_value=proba)
|
||||
decision_function_func = Mock(return_value=pos_proba)
|
||||
|
||||
mock_est.fit = fit_func
|
||||
mock_est.predict = predict_func
|
||||
mock_est.predict_proba = predict_proba_func
|
||||
mock_est.decision_function = decision_function_func
|
||||
|
||||
scorer_dict, _ = _check_multimetric_scoring(LogisticRegression(), scorers)
|
||||
multi_scorer = _MultimetricScorer(**scorer_dict)
|
||||
results = multi_scorer(mock_est, X, y)
|
||||
|
||||
assert set(scorers) == set(results) # compare dict keys
|
||||
|
||||
assert predict_func.call_count == expected_predict_count
|
||||
assert predict_proba_func.call_count == expected_predict_proba_count
|
||||
assert decision_function_func.call_count == expected_decision_func_count
|
||||
|
||||
|
||||
def test_multimetric_scorer_calls_method_once_classifier_no_decision():
|
||||
predict_proba_call_cnt = 0
|
||||
|
||||
class MockKNeighborsClassifier(KNeighborsClassifier):
|
||||
def predict_proba(self, X):
|
||||
nonlocal predict_proba_call_cnt
|
||||
predict_proba_call_cnt += 1
|
||||
return super().predict_proba(X)
|
||||
|
||||
X, y = np.array([[1], [1], [0], [0], [0]]), np.array([0, 1, 1, 1, 0])
|
||||
|
||||
# no decision function
|
||||
clf = MockKNeighborsClassifier(n_neighbors=1)
|
||||
clf.fit(X, y)
|
||||
|
||||
scorers = ['roc_auc', 'neg_log_loss']
|
||||
scorer_dict, _ = _check_multimetric_scoring(clf, scorers)
|
||||
scorer = _MultimetricScorer(**scorer_dict)
|
||||
scorer(clf, X, y)
|
||||
|
||||
assert predict_proba_call_cnt == 1
|
||||
|
||||
|
||||
def test_multimetric_scorer_calls_method_once_regressor_threshold():
|
||||
predict_called_cnt = 0
|
||||
|
||||
class MockDecisionTreeRegressor(DecisionTreeRegressor):
|
||||
def predict(self, X):
|
||||
nonlocal predict_called_cnt
|
||||
predict_called_cnt += 1
|
||||
return super().predict(X)
|
||||
|
||||
X, y = np.array([[1], [1], [0], [0], [0]]), np.array([0, 1, 1, 1, 0])
|
||||
|
||||
# no decision function
|
||||
clf = MockDecisionTreeRegressor()
|
||||
clf.fit(X, y)
|
||||
|
||||
scorers = {'neg_mse': 'neg_mean_squared_error', 'r2': 'roc_auc'}
|
||||
scorer_dict, _ = _check_multimetric_scoring(clf, scorers)
|
||||
scorer = _MultimetricScorer(**scorer_dict)
|
||||
scorer(clf, X, y)
|
||||
|
||||
assert predict_called_cnt == 1
|
||||
|
||||
|
||||
def test_multimetric_scorer_sanity_check():
|
||||
# scoring dictionary returned is the same as calling each scorer separately
|
||||
scorers = {'a1': 'accuracy', 'a2': 'accuracy',
|
||||
'll1': 'neg_log_loss', 'll2': 'neg_log_loss',
|
||||
'ra1': 'roc_auc', 'ra2': 'roc_auc'}
|
||||
|
||||
X, y = make_classification(random_state=0)
|
||||
|
||||
clf = DecisionTreeClassifier()
|
||||
clf.fit(X, y)
|
||||
|
||||
scorer_dict, _ = _check_multimetric_scoring(clf, scorers)
|
||||
multi_scorer = _MultimetricScorer(**scorer_dict)
|
||||
|
||||
result = multi_scorer(clf, X, y)
|
||||
|
||||
separate_scores = {
|
||||
name: get_scorer(name)(clf, X, y)
|
||||
for name in ['accuracy', 'neg_log_loss', 'roc_auc']}
|
||||
|
||||
for key, value in result.items():
|
||||
score_name = scorers[key]
|
||||
assert_allclose(value, separate_scores[score_name])
|
||||
|
||||
|
||||
@pytest.mark.parametrize('scorer_name, metric', [
|
||||
('roc_auc_ovr', partial(roc_auc_score, multi_class='ovr')),
|
||||
('roc_auc_ovo', partial(roc_auc_score, multi_class='ovo')),
|
||||
('roc_auc_ovr_weighted', partial(roc_auc_score, multi_class='ovr',
|
||||
average='weighted')),
|
||||
('roc_auc_ovo_weighted', partial(roc_auc_score, multi_class='ovo',
|
||||
average='weighted'))])
|
||||
def test_multiclass_roc_proba_scorer(scorer_name, metric):
|
||||
scorer = get_scorer(scorer_name)
|
||||
X, y = make_classification(n_classes=3, n_informative=3, n_samples=20,
|
||||
random_state=0)
|
||||
lr = LogisticRegression(multi_class="multinomial").fit(X, y)
|
||||
y_proba = lr.predict_proba(X)
|
||||
expected_score = metric(y, y_proba)
|
||||
|
||||
assert scorer(lr, X, y) == pytest.approx(expected_score)
|
||||
|
||||
|
||||
def test_multiclass_roc_proba_scorer_label():
|
||||
scorer = make_scorer(roc_auc_score, multi_class='ovo',
|
||||
labels=[0, 1, 2], needs_proba=True)
|
||||
X, y = make_classification(n_classes=3, n_informative=3, n_samples=20,
|
||||
random_state=0)
|
||||
lr = LogisticRegression(multi_class="multinomial").fit(X, y)
|
||||
y_proba = lr.predict_proba(X)
|
||||
|
||||
y_binary = y == 0
|
||||
expected_score = roc_auc_score(y_binary, y_proba,
|
||||
multi_class='ovo',
|
||||
labels=[0, 1, 2])
|
||||
|
||||
assert scorer(lr, X, y_binary) == pytest.approx(expected_score)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('scorer_name', [
|
||||
'roc_auc_ovr', 'roc_auc_ovo',
|
||||
'roc_auc_ovr_weighted', 'roc_auc_ovo_weighted'])
|
||||
def test_multiclass_roc_no_proba_scorer_errors(scorer_name):
|
||||
# Perceptron has no predict_proba
|
||||
scorer = get_scorer(scorer_name)
|
||||
X, y = make_classification(n_classes=3, n_informative=3, n_samples=20,
|
||||
random_state=0)
|
||||
lr = Perceptron().fit(X, y)
|
||||
msg = "'Perceptron' object has no attribute 'predict_proba'"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
scorer(lr, X, y)
|
Loading…
Add table
Add a link
Reference in a new issue