"""Metrics to assess performance on regression task Functions named as ``*_score`` return a scalar value to maximize: the higher the better Function named as ``*_error`` or ``*_loss`` return a scalar value to minimize: the lower the better """ # Authors: Alexandre Gramfort <alexandre.gramfort@inria.fr> # Mathieu Blondel <mathieu@mblondel.org> # Olivier Grisel <olivier.grisel@ensta.org> # Arnaud Joly <a.joly@ulg.ac.be> # Jochen Wersdorfer <jochen@wersdoerfer.de> # Lars Buitinck # Joel Nothman <joel.nothman@gmail.com> # Karan Desai <karandesai281196@gmail.com> # Noel Dawe <noel@dawe.me> # Manoj Kumar <manojkumarsivaraj334@gmail.com> # Michael Eickenberg <michael.eickenberg@gmail.com> # Konstantin Shmelkov <konstantin.shmelkov@polytechnique.edu> # Christian Lorentzen <lorentzen.ch@googlemail.com> # License: BSD 3 clause import numpy as np import warnings from .._loss.glm_distribution import TweedieDistribution from ..utils.validation import (check_array, check_consistent_length, _num_samples) from ..utils.validation import column_or_1d from ..utils.validation import _deprecate_positional_args from ..exceptions import UndefinedMetricWarning __ALL__ = [ "max_error", "mean_absolute_error", "mean_squared_error", "mean_squared_log_error", "median_absolute_error", "r2_score", "explained_variance_score", "mean_tweedie_deviance", "mean_poisson_deviance", "mean_gamma_deviance", ] def _check_reg_targets(y_true, y_pred, multioutput, dtype="numeric"): """Check that y_true and y_pred belong to the same regression task Parameters ---------- y_true : array-like y_pred : array-like multioutput : array-like or string in ['raw_values', uniform_average', 'variance_weighted'] or None None is accepted due to backward compatibility of r2_score(). Returns ------- type_true : one of {'continuous', continuous-multioutput'} The type of the true target data, as output by 'utils.multiclass.type_of_target' y_true : array-like of shape (n_samples, n_outputs) Ground truth (correct) target values. y_pred : array-like of shape (n_samples, n_outputs) Estimated target values. multioutput : array-like of shape (n_outputs) or string in ['raw_values', uniform_average', 'variance_weighted'] or None Custom output weights if ``multioutput`` is array-like or just the corresponding argument if ``multioutput`` is a correct keyword. dtype: str or list, default="numeric" the dtype argument passed to check_array """ check_consistent_length(y_true, y_pred) y_true = check_array(y_true, ensure_2d=False, dtype=dtype) y_pred = check_array(y_pred, ensure_2d=False, dtype=dtype) if y_true.ndim == 1: y_true = y_true.reshape((-1, 1)) if y_pred.ndim == 1: y_pred = y_pred.reshape((-1, 1)) if y_true.shape[1] != y_pred.shape[1]: raise ValueError("y_true and y_pred have different number of output " "({0}!={1})".format(y_true.shape[1], y_pred.shape[1])) n_outputs = y_true.shape[1] allowed_multioutput_str = ('raw_values', 'uniform_average', 'variance_weighted') if isinstance(multioutput, str): if multioutput not in allowed_multioutput_str: raise ValueError("Allowed 'multioutput' string values are {}. " "You provided multioutput={!r}".format( allowed_multioutput_str, multioutput)) elif multioutput is not None: multioutput = check_array(multioutput, ensure_2d=False) if n_outputs == 1: raise ValueError("Custom weights are useful only in " "multi-output cases.") elif n_outputs != len(multioutput): raise ValueError(("There must be equally many custom weights " "(%d) as outputs (%d).") % (len(multioutput), n_outputs)) y_type = 'continuous' if n_outputs == 1 else 'continuous-multioutput' return y_type, y_true, y_pred, multioutput @_deprecate_positional_args def mean_absolute_error(y_true, y_pred, *, sample_weight=None, multioutput='uniform_average'): """Mean absolute error regression loss Read more in the :ref:`User Guide <mean_absolute_error>`. Parameters ---------- y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) Ground truth (correct) target values. y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) Estimated target values. sample_weight : array-like of shape (n_samples,), optional Sample weights. multioutput : string in ['raw_values', 'uniform_average'] \ or array-like of shape (n_outputs) Defines aggregating of multiple output values. Array-like value defines weights used to average errors. 'raw_values' : Returns a full set of errors in case of multioutput input. 'uniform_average' : Errors of all outputs are averaged with uniform weight. Returns ------- loss : float or ndarray of floats If multioutput is 'raw_values', then mean absolute error is returned for each output separately. If multioutput is 'uniform_average' or an ndarray of weights, then the weighted average of all output errors is returned. MAE output is non-negative floating point. The best value is 0.0. Examples -------- >>> from sklearn.metrics import mean_absolute_error >>> y_true = [3, -0.5, 2, 7] >>> y_pred = [2.5, 0.0, 2, 8] >>> mean_absolute_error(y_true, y_pred) 0.5 >>> y_true = [[0.5, 1], [-1, 1], [7, -6]] >>> y_pred = [[0, 2], [-1, 2], [8, -5]] >>> mean_absolute_error(y_true, y_pred) 0.75 >>> mean_absolute_error(y_true, y_pred, multioutput='raw_values') array([0.5, 1. ]) >>> mean_absolute_error(y_true, y_pred, multioutput=[0.3, 0.7]) 0.85... """ y_type, y_true, y_pred, multioutput = _check_reg_targets( y_true, y_pred, multioutput) check_consistent_length(y_true, y_pred, sample_weight) output_errors = np.average(np.abs(y_pred - y_true), weights=sample_weight, axis=0) if isinstance(multioutput, str): if multioutput == 'raw_values': return output_errors elif multioutput == 'uniform_average': # pass None as weights to np.average: uniform mean multioutput = None return np.average(output_errors, weights=multioutput) @_deprecate_positional_args def mean_squared_error(y_true, y_pred, *, sample_weight=None, multioutput='uniform_average', squared=True): """Mean squared error regression loss Read more in the :ref:`User Guide <mean_squared_error>`. Parameters ---------- y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) Ground truth (correct) target values. y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) Estimated target values. sample_weight : array-like of shape (n_samples,), optional Sample weights. multioutput : string in ['raw_values', 'uniform_average'] \ or array-like of shape (n_outputs) Defines aggregating of multiple output values. Array-like value defines weights used to average errors. 'raw_values' : Returns a full set of errors in case of multioutput input. 'uniform_average' : Errors of all outputs are averaged with uniform weight. squared : boolean value, optional (default = True) If True returns MSE value, if False returns RMSE value. Returns ------- loss : float or ndarray of floats A non-negative floating point value (the best value is 0.0), or an array of floating point values, one for each individual target. Examples -------- >>> from sklearn.metrics import mean_squared_error >>> y_true = [3, -0.5, 2, 7] >>> y_pred = [2.5, 0.0, 2, 8] >>> mean_squared_error(y_true, y_pred) 0.375 >>> y_true = [3, -0.5, 2, 7] >>> y_pred = [2.5, 0.0, 2, 8] >>> mean_squared_error(y_true, y_pred, squared=False) 0.612... >>> y_true = [[0.5, 1],[-1, 1],[7, -6]] >>> y_pred = [[0, 2],[-1, 2],[8, -5]] >>> mean_squared_error(y_true, y_pred) 0.708... >>> mean_squared_error(y_true, y_pred, squared=False) 0.822... >>> mean_squared_error(y_true, y_pred, multioutput='raw_values') array([0.41666667, 1. ]) >>> mean_squared_error(y_true, y_pred, multioutput=[0.3, 0.7]) 0.825... """ y_type, y_true, y_pred, multioutput = _check_reg_targets( y_true, y_pred, multioutput) check_consistent_length(y_true, y_pred, sample_weight) output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight) if not squared: output_errors = np.sqrt(output_errors) if isinstance(multioutput, str): if multioutput == 'raw_values': return output_errors elif multioutput == 'uniform_average': # pass None as weights to np.average: uniform mean multioutput = None return np.average(output_errors, weights=multioutput) @_deprecate_positional_args def mean_squared_log_error(y_true, y_pred, *, sample_weight=None, multioutput='uniform_average'): """Mean squared logarithmic error regression loss Read more in the :ref:`User Guide <mean_squared_log_error>`. Parameters ---------- y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) Ground truth (correct) target values. y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) Estimated target values. sample_weight : array-like of shape (n_samples,), optional Sample weights. multioutput : string in ['raw_values', 'uniform_average'] \ or array-like of shape (n_outputs) Defines aggregating of multiple output values. Array-like value defines weights used to average errors. 'raw_values' : Returns a full set of errors when the input is of multioutput format. 'uniform_average' : Errors of all outputs are averaged with uniform weight. Returns ------- loss : float or ndarray of floats A non-negative floating point value (the best value is 0.0), or an array of floating point values, one for each individual target. Examples -------- >>> from sklearn.metrics import mean_squared_log_error >>> y_true = [3, 5, 2.5, 7] >>> y_pred = [2.5, 5, 4, 8] >>> mean_squared_log_error(y_true, y_pred) 0.039... >>> y_true = [[0.5, 1], [1, 2], [7, 6]] >>> y_pred = [[0.5, 2], [1, 2.5], [8, 8]] >>> mean_squared_log_error(y_true, y_pred) 0.044... >>> mean_squared_log_error(y_true, y_pred, multioutput='raw_values') array([0.00462428, 0.08377444]) >>> mean_squared_log_error(y_true, y_pred, multioutput=[0.3, 0.7]) 0.060... """ y_type, y_true, y_pred, multioutput = _check_reg_targets( y_true, y_pred, multioutput) check_consistent_length(y_true, y_pred, sample_weight) if (y_true < 0).any() or (y_pred < 0).any(): raise ValueError("Mean Squared Logarithmic Error cannot be used when " "targets contain negative values.") return mean_squared_error(np.log1p(y_true), np.log1p(y_pred), sample_weight=sample_weight, multioutput=multioutput) @_deprecate_positional_args def median_absolute_error(y_true, y_pred, *, multioutput='uniform_average'): """Median absolute error regression loss Median absolute error output is non-negative floating point. The best value is 0.0. Read more in the :ref:`User Guide <median_absolute_error>`. Parameters ---------- y_true : array-like of shape = (n_samples) or (n_samples, n_outputs) Ground truth (correct) target values. y_pred : array-like of shape = (n_samples) or (n_samples, n_outputs) Estimated target values. multioutput : {'raw_values', 'uniform_average'} or array-like of shape \ (n_outputs,) Defines aggregating of multiple output values. Array-like value defines weights used to average errors. 'raw_values' : Returns a full set of errors in case of multioutput input. 'uniform_average' : Errors of all outputs are averaged with uniform weight. Returns ------- loss : float or ndarray of floats If multioutput is 'raw_values', then mean absolute error is returned for each output separately. If multioutput is 'uniform_average' or an ndarray of weights, then the weighted average of all output errors is returned. Examples -------- >>> from sklearn.metrics import median_absolute_error >>> y_true = [3, -0.5, 2, 7] >>> y_pred = [2.5, 0.0, 2, 8] >>> median_absolute_error(y_true, y_pred) 0.5 >>> y_true = [[0.5, 1], [-1, 1], [7, -6]] >>> y_pred = [[0, 2], [-1, 2], [8, -5]] >>> median_absolute_error(y_true, y_pred) 0.75 >>> median_absolute_error(y_true, y_pred, multioutput='raw_values') array([0.5, 1. ]) >>> median_absolute_error(y_true, y_pred, multioutput=[0.3, 0.7]) 0.85 """ y_type, y_true, y_pred, multioutput = _check_reg_targets( y_true, y_pred, multioutput) output_errors = np.median(np.abs(y_pred - y_true), axis=0) if isinstance(multioutput, str): if multioutput == 'raw_values': return output_errors elif multioutput == 'uniform_average': # pass None as weights to np.average: uniform mean multioutput = None return np.average(output_errors, weights=multioutput) @_deprecate_positional_args def explained_variance_score(y_true, y_pred, *, sample_weight=None, multioutput='uniform_average'): """Explained variance regression score function Best possible score is 1.0, lower values are worse. Read more in the :ref:`User Guide <explained_variance_score>`. Parameters ---------- y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) Ground truth (correct) target values. y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) Estimated target values. sample_weight : array-like of shape (n_samples,), optional Sample weights. multioutput : string in ['raw_values', 'uniform_average', \ 'variance_weighted'] or array-like of shape (n_outputs) Defines aggregating of multiple output scores. Array-like value defines weights used to average scores. 'raw_values' : Returns a full set of scores in case of multioutput input. 'uniform_average' : Scores of all outputs are averaged with uniform weight. 'variance_weighted' : Scores of all outputs are averaged, weighted by the variances of each individual output. Returns ------- score : float or ndarray of floats The explained variance or ndarray if 'multioutput' is 'raw_values'. Notes ----- This is not a symmetric function. Examples -------- >>> from sklearn.metrics import explained_variance_score >>> y_true = [3, -0.5, 2, 7] >>> y_pred = [2.5, 0.0, 2, 8] >>> explained_variance_score(y_true, y_pred) 0.957... >>> y_true = [[0.5, 1], [-1, 1], [7, -6]] >>> y_pred = [[0, 2], [-1, 2], [8, -5]] >>> explained_variance_score(y_true, y_pred, multioutput='uniform_average') 0.983... """ y_type, y_true, y_pred, multioutput = _check_reg_targets( y_true, y_pred, multioutput) check_consistent_length(y_true, y_pred, sample_weight) y_diff_avg = np.average(y_true - y_pred, weights=sample_weight, axis=0) numerator = np.average((y_true - y_pred - y_diff_avg) ** 2, weights=sample_weight, axis=0) y_true_avg = np.average(y_true, weights=sample_weight, axis=0) denominator = np.average((y_true - y_true_avg) ** 2, weights=sample_weight, axis=0) nonzero_numerator = numerator != 0 nonzero_denominator = denominator != 0 valid_score = nonzero_numerator & nonzero_denominator output_scores = np.ones(y_true.shape[1]) output_scores[valid_score] = 1 - (numerator[valid_score] / denominator[valid_score]) output_scores[nonzero_numerator & ~nonzero_denominator] = 0. if isinstance(multioutput, str): if multioutput == 'raw_values': # return scores individually return output_scores elif multioutput == 'uniform_average': # passing to np.average() None as weights results is uniform mean avg_weights = None elif multioutput == 'variance_weighted': avg_weights = denominator else: avg_weights = multioutput return np.average(output_scores, weights=avg_weights) @_deprecate_positional_args def r2_score(y_true, y_pred, *, sample_weight=None, multioutput="uniform_average"): """R^2 (coefficient of determination) regression score function. Best possible score is 1.0 and it can be negative (because the model can be arbitrarily worse). A constant model that always predicts the expected value of y, disregarding the input features, would get a R^2 score of 0.0. Read more in the :ref:`User Guide <r2_score>`. Parameters ---------- y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) Ground truth (correct) target values. y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) Estimated target values. sample_weight : array-like of shape (n_samples,), optional Sample weights. multioutput : string in ['raw_values', 'uniform_average', \ 'variance_weighted'] or None or array-like of shape (n_outputs) Defines aggregating of multiple output scores. Array-like value defines weights used to average scores. Default is "uniform_average". 'raw_values' : Returns a full set of scores in case of multioutput input. 'uniform_average' : Scores of all outputs are averaged with uniform weight. 'variance_weighted' : Scores of all outputs are averaged, weighted by the variances of each individual output. .. versionchanged:: 0.19 Default value of multioutput is 'uniform_average'. Returns ------- z : float or ndarray of floats The R^2 score or ndarray of scores if 'multioutput' is 'raw_values'. Notes ----- This is not a symmetric function. Unlike most other scores, R^2 score may be negative (it need not actually be the square of a quantity R). This metric is not well-defined for single samples and will return a NaN value if n_samples is less than two. References ---------- .. [1] `Wikipedia entry on the Coefficient of determination <https://en.wikipedia.org/wiki/Coefficient_of_determination>`_ Examples -------- >>> from sklearn.metrics import r2_score >>> y_true = [3, -0.5, 2, 7] >>> y_pred = [2.5, 0.0, 2, 8] >>> r2_score(y_true, y_pred) 0.948... >>> y_true = [[0.5, 1], [-1, 1], [7, -6]] >>> y_pred = [[0, 2], [-1, 2], [8, -5]] >>> r2_score(y_true, y_pred, ... multioutput='variance_weighted') 0.938... >>> y_true = [1, 2, 3] >>> y_pred = [1, 2, 3] >>> r2_score(y_true, y_pred) 1.0 >>> y_true = [1, 2, 3] >>> y_pred = [2, 2, 2] >>> r2_score(y_true, y_pred) 0.0 >>> y_true = [1, 2, 3] >>> y_pred = [3, 2, 1] >>> r2_score(y_true, y_pred) -3.0 """ y_type, y_true, y_pred, multioutput = _check_reg_targets( y_true, y_pred, multioutput) check_consistent_length(y_true, y_pred, sample_weight) if _num_samples(y_pred) < 2: msg = "R^2 score is not well-defined with less than two samples." warnings.warn(msg, UndefinedMetricWarning) return float('nan') if sample_weight is not None: sample_weight = column_or_1d(sample_weight) weight = sample_weight[:, np.newaxis] else: weight = 1. numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0, dtype=np.float64) denominator = (weight * (y_true - np.average( y_true, axis=0, weights=sample_weight)) ** 2).sum(axis=0, dtype=np.float64) nonzero_denominator = denominator != 0 nonzero_numerator = numerator != 0 valid_score = nonzero_denominator & nonzero_numerator output_scores = np.ones([y_true.shape[1]]) output_scores[valid_score] = 1 - (numerator[valid_score] / denominator[valid_score]) # arbitrary set to zero to avoid -inf scores, having a constant # y_true is not interesting for scoring a regression anyway output_scores[nonzero_numerator & ~nonzero_denominator] = 0. if isinstance(multioutput, str): if multioutput == 'raw_values': # return scores individually return output_scores elif multioutput == 'uniform_average': # passing None as weights results is uniform mean avg_weights = None elif multioutput == 'variance_weighted': avg_weights = denominator # avoid fail on constant y or one-element arrays if not np.any(nonzero_denominator): if not np.any(nonzero_numerator): return 1.0 else: return 0.0 else: avg_weights = multioutput return np.average(output_scores, weights=avg_weights) def max_error(y_true, y_pred): """ max_error metric calculates the maximum residual error. Read more in the :ref:`User Guide <max_error>`. Parameters ---------- y_true : array-like of shape (n_samples,) Ground truth (correct) target values. y_pred : array-like of shape (n_samples,) Estimated target values. Returns ------- max_error : float A positive floating point value (the best value is 0.0). Examples -------- >>> from sklearn.metrics import max_error >>> y_true = [3, 2, 7, 1] >>> y_pred = [4, 2, 7, 1] >>> max_error(y_true, y_pred) 1 """ y_type, y_true, y_pred, _ = _check_reg_targets(y_true, y_pred, None) if y_type == 'continuous-multioutput': raise ValueError("Multioutput not supported in max_error") return np.max(np.abs(y_true - y_pred)) @_deprecate_positional_args def mean_tweedie_deviance(y_true, y_pred, *, sample_weight=None, power=0): """Mean Tweedie deviance regression loss. Read more in the :ref:`User Guide <mean_tweedie_deviance>`. Parameters ---------- y_true : array-like of shape (n_samples,) Ground truth (correct) target values. y_pred : array-like of shape (n_samples,) Estimated target values. sample_weight : array-like of shape (n_samples,), default=None Sample weights. power : float, default=0 Tweedie power parameter. Either power <= 0 or power >= 1. The higher `p` the less weight is given to extreme deviations between true and predicted targets. - power < 0: Extreme stable distribution. Requires: y_pred > 0. - power = 0 : Normal distribution, output corresponds to mean_squared_error. y_true and y_pred can be any real numbers. - power = 1 : Poisson distribution. Requires: y_true >= 0 and y_pred > 0. - 1 < p < 2 : Compound Poisson distribution. Requires: y_true >= 0 and y_pred > 0. - power = 2 : Gamma distribution. Requires: y_true > 0 and y_pred > 0. - power = 3 : Inverse Gaussian distribution. Requires: y_true > 0 and y_pred > 0. - otherwise : Positive stable distribution. Requires: y_true > 0 and y_pred > 0. Returns ------- loss : float A non-negative floating point value (the best value is 0.0). Examples -------- >>> from sklearn.metrics import mean_tweedie_deviance >>> y_true = [2, 0, 1, 4] >>> y_pred = [0.5, 0.5, 2., 2.] >>> mean_tweedie_deviance(y_true, y_pred, power=1) 1.4260... """ y_type, y_true, y_pred, _ = _check_reg_targets( y_true, y_pred, None, dtype=[np.float64, np.float32]) if y_type == 'continuous-multioutput': raise ValueError("Multioutput not supported in mean_tweedie_deviance") check_consistent_length(y_true, y_pred, sample_weight) if sample_weight is not None: sample_weight = column_or_1d(sample_weight) sample_weight = sample_weight[:, np.newaxis] dist = TweedieDistribution(power=power) dev = dist.unit_deviance(y_true, y_pred, check_input=True) return np.average(dev, weights=sample_weight) @_deprecate_positional_args def mean_poisson_deviance(y_true, y_pred, *, sample_weight=None): """Mean Poisson deviance regression loss. Poisson deviance is equivalent to the Tweedie deviance with the power parameter `power=1`. Read more in the :ref:`User Guide <mean_tweedie_deviance>`. Parameters ---------- y_true : array-like of shape (n_samples,) Ground truth (correct) target values. Requires y_true >= 0. y_pred : array-like of shape (n_samples,) Estimated target values. Requires y_pred > 0. sample_weight : array-like of shape (n_samples,), default=None Sample weights. Returns ------- loss : float A non-negative floating point value (the best value is 0.0). Examples -------- >>> from sklearn.metrics import mean_poisson_deviance >>> y_true = [2, 0, 1, 4] >>> y_pred = [0.5, 0.5, 2., 2.] >>> mean_poisson_deviance(y_true, y_pred) 1.4260... """ return mean_tweedie_deviance( y_true, y_pred, sample_weight=sample_weight, power=1 ) @_deprecate_positional_args def mean_gamma_deviance(y_true, y_pred, *, sample_weight=None): """Mean Gamma deviance regression loss. Gamma deviance is equivalent to the Tweedie deviance with the power parameter `power=2`. It is invariant to scaling of the target variable, and measures relative errors. Read more in the :ref:`User Guide <mean_tweedie_deviance>`. Parameters ---------- y_true : array-like of shape (n_samples,) Ground truth (correct) target values. Requires y_true > 0. y_pred : array-like of shape (n_samples,) Estimated target values. Requires y_pred > 0. sample_weight : array-like of shape (n_samples,), default=None Sample weights. Returns ------- loss : float A non-negative floating point value (the best value is 0.0). Examples -------- >>> from sklearn.metrics import mean_gamma_deviance >>> y_true = [2, 0.5, 1, 4] >>> y_pred = [0.5, 0.5, 2., 2.] >>> mean_gamma_deviance(y_true, y_pred) 1.0568... """ return mean_tweedie_deviance( y_true, y_pred, sample_weight=sample_weight, power=2 )