Uploaded Test files
This commit is contained in:
parent
f584ad9d97
commit
2e81cb7d99
16627 changed files with 2065359 additions and 102444 deletions
26
venv/Lib/site-packages/sklearn/inspection/__init__.py
Normal file
26
venv/Lib/site-packages/sklearn/inspection/__init__.py
Normal file
|
@ -0,0 +1,26 @@
|
|||
"""The :mod:`sklearn.inspection` module includes tools for model inspection."""
|
||||
|
||||
# TODO: remove me in 0.24 (as well as the noqa markers) and
|
||||
# import the partial_dependence func directly from the
|
||||
# ._partial_dependence module instead.
|
||||
# Pre-cache the import of the deprecated module so that import
|
||||
# sklearn.inspection.partial_dependence returns the function as in
|
||||
# 0.21, instead of the module
|
||||
# https://github.com/scikit-learn/scikit-learn/issues/15842
|
||||
import warnings
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore", category=FutureWarning)
|
||||
from .partial_dependence import partial_dependence
|
||||
|
||||
from ._permutation_importance import permutation_importance # noqa
|
||||
|
||||
from ._plot.partial_dependence import plot_partial_dependence # noqa
|
||||
from ._plot.partial_dependence import PartialDependenceDisplay # noqa
|
||||
|
||||
|
||||
__all__ = [
|
||||
'partial_dependence',
|
||||
'plot_partial_dependence',
|
||||
'permutation_importance',
|
||||
'PartialDependenceDisplay'
|
||||
]
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
421
venv/Lib/site-packages/sklearn/inspection/_partial_dependence.py
Normal file
421
venv/Lib/site-packages/sklearn/inspection/_partial_dependence.py
Normal file
|
@ -0,0 +1,421 @@
|
|||
"""Partial dependence plots for regression and classification models."""
|
||||
|
||||
# Authors: Peter Prettenhofer
|
||||
# Trevor Stephens
|
||||
# Nicolas Hug
|
||||
# License: BSD 3 clause
|
||||
|
||||
from collections.abc import Iterable
|
||||
|
||||
import numpy as np
|
||||
from scipy import sparse
|
||||
from scipy.stats.mstats import mquantiles
|
||||
|
||||
from ..base import is_classifier, is_regressor
|
||||
from ..pipeline import Pipeline
|
||||
from ..utils.extmath import cartesian
|
||||
from ..utils import check_array
|
||||
from ..utils import check_matplotlib_support # noqa
|
||||
from ..utils import _safe_indexing
|
||||
from ..utils import _determine_key_type
|
||||
from ..utils import _get_column_indices
|
||||
from ..utils.validation import check_is_fitted
|
||||
from ..utils.validation import _deprecate_positional_args
|
||||
from ..tree import DecisionTreeRegressor
|
||||
from ..ensemble import RandomForestRegressor
|
||||
from ..exceptions import NotFittedError
|
||||
from ..ensemble._gb import BaseGradientBoosting
|
||||
from sklearn.ensemble._hist_gradient_boosting.gradient_boosting import (
|
||||
BaseHistGradientBoosting)
|
||||
|
||||
|
||||
__all__ = [
|
||||
'partial_dependence',
|
||||
]
|
||||
|
||||
|
||||
def _grid_from_X(X, percentiles, grid_resolution):
|
||||
"""Generate a grid of points based on the percentiles of X.
|
||||
|
||||
The grid is a cartesian product between the columns of ``values``. The
|
||||
ith column of ``values`` consists in ``grid_resolution`` equally-spaced
|
||||
points between the percentiles of the jth column of X.
|
||||
If ``grid_resolution`` is bigger than the number of unique values in the
|
||||
jth column of X, then those unique values will be used instead.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : ndarray, shape (n_samples, n_target_features)
|
||||
The data
|
||||
|
||||
percentiles : tuple of floats
|
||||
The percentiles which are used to construct the extreme values of
|
||||
the grid. Must be in [0, 1].
|
||||
|
||||
grid_resolution : int
|
||||
The number of equally spaced points to be placed on the grid for each
|
||||
feature.
|
||||
|
||||
Returns
|
||||
-------
|
||||
grid : ndarray, shape (n_points, n_target_features)
|
||||
A value for each feature at each point in the grid. ``n_points`` is
|
||||
always ``<= grid_resolution ** X.shape[1]``.
|
||||
|
||||
values : list of 1d ndarrays
|
||||
The values with which the grid has been created. The size of each
|
||||
array ``values[j]`` is either ``grid_resolution``, or the number of
|
||||
unique values in ``X[:, j]``, whichever is smaller.
|
||||
"""
|
||||
if not isinstance(percentiles, Iterable) or len(percentiles) != 2:
|
||||
raise ValueError("'percentiles' must be a sequence of 2 elements.")
|
||||
if not all(0 <= x <= 1 for x in percentiles):
|
||||
raise ValueError("'percentiles' values must be in [0, 1].")
|
||||
if percentiles[0] >= percentiles[1]:
|
||||
raise ValueError('percentiles[0] must be strictly less '
|
||||
'than percentiles[1].')
|
||||
|
||||
if grid_resolution <= 1:
|
||||
raise ValueError("'grid_resolution' must be strictly greater than 1.")
|
||||
|
||||
values = []
|
||||
for feature in range(X.shape[1]):
|
||||
uniques = np.unique(_safe_indexing(X, feature, axis=1))
|
||||
if uniques.shape[0] < grid_resolution:
|
||||
# feature has low resolution use unique vals
|
||||
axis = uniques
|
||||
else:
|
||||
# create axis based on percentiles and grid resolution
|
||||
emp_percentiles = mquantiles(
|
||||
_safe_indexing(X, feature, axis=1), prob=percentiles, axis=0
|
||||
)
|
||||
if np.allclose(emp_percentiles[0], emp_percentiles[1]):
|
||||
raise ValueError(
|
||||
'percentiles are too close to each other, '
|
||||
'unable to build the grid. Please choose percentiles '
|
||||
'that are further apart.')
|
||||
axis = np.linspace(emp_percentiles[0],
|
||||
emp_percentiles[1],
|
||||
num=grid_resolution, endpoint=True)
|
||||
values.append(axis)
|
||||
|
||||
return cartesian(values), values
|
||||
|
||||
|
||||
def _partial_dependence_recursion(est, grid, features):
|
||||
averaged_predictions = est._compute_partial_dependence_recursion(grid,
|
||||
features)
|
||||
if averaged_predictions.ndim == 1:
|
||||
# reshape to (1, n_points) for consistency with
|
||||
# _partial_dependence_brute
|
||||
averaged_predictions = averaged_predictions.reshape(1, -1)
|
||||
|
||||
return averaged_predictions
|
||||
|
||||
|
||||
def _partial_dependence_brute(est, grid, features, X, response_method):
|
||||
averaged_predictions = []
|
||||
|
||||
# define the prediction_method (predict, predict_proba, decision_function).
|
||||
if is_regressor(est):
|
||||
prediction_method = est.predict
|
||||
else:
|
||||
predict_proba = getattr(est, 'predict_proba', None)
|
||||
decision_function = getattr(est, 'decision_function', None)
|
||||
if response_method == 'auto':
|
||||
# try predict_proba, then decision_function if it doesn't exist
|
||||
prediction_method = predict_proba or decision_function
|
||||
else:
|
||||
prediction_method = (predict_proba if response_method ==
|
||||
'predict_proba' else decision_function)
|
||||
if prediction_method is None:
|
||||
if response_method == 'auto':
|
||||
raise ValueError(
|
||||
'The estimator has no predict_proba and no '
|
||||
'decision_function method.'
|
||||
)
|
||||
elif response_method == 'predict_proba':
|
||||
raise ValueError('The estimator has no predict_proba method.')
|
||||
else:
|
||||
raise ValueError(
|
||||
'The estimator has no decision_function method.')
|
||||
|
||||
for new_values in grid:
|
||||
X_eval = X.copy()
|
||||
for i, variable in enumerate(features):
|
||||
if hasattr(X_eval, 'iloc'):
|
||||
X_eval.iloc[:, variable] = new_values[i]
|
||||
else:
|
||||
X_eval[:, variable] = new_values[i]
|
||||
|
||||
try:
|
||||
predictions = prediction_method(X_eval)
|
||||
except NotFittedError:
|
||||
raise ValueError(
|
||||
"'estimator' parameter must be a fitted estimator")
|
||||
|
||||
# Note: predictions is of shape
|
||||
# (n_points,) for non-multioutput regressors
|
||||
# (n_points, n_tasks) for multioutput regressors
|
||||
# (n_points, 1) for the regressors in cross_decomposition (I think)
|
||||
# (n_points, 2) for binary classification
|
||||
# (n_points, n_classes) for multiclass classification
|
||||
|
||||
# average over samples
|
||||
averaged_predictions.append(np.mean(predictions, axis=0))
|
||||
|
||||
# reshape to (n_targets, n_points) where n_targets is:
|
||||
# - 1 for non-multioutput regression and binary classification (shape is
|
||||
# already correct in those cases)
|
||||
# - n_tasks for multi-output regression
|
||||
# - n_classes for multiclass classification.
|
||||
averaged_predictions = np.array(averaged_predictions).T
|
||||
if is_regressor(est) and averaged_predictions.ndim == 1:
|
||||
# non-multioutput regression, shape is (n_points,)
|
||||
averaged_predictions = averaged_predictions.reshape(1, -1)
|
||||
elif is_classifier(est) and averaged_predictions.shape[0] == 2:
|
||||
# Binary classification, shape is (2, n_points).
|
||||
# we output the effect of **positive** class
|
||||
averaged_predictions = averaged_predictions[1]
|
||||
averaged_predictions = averaged_predictions.reshape(1, -1)
|
||||
|
||||
return averaged_predictions
|
||||
|
||||
|
||||
@_deprecate_positional_args
|
||||
def partial_dependence(estimator, X, features, *, response_method='auto',
|
||||
percentiles=(0.05, 0.95), grid_resolution=100,
|
||||
method='auto'):
|
||||
"""Partial dependence of ``features``.
|
||||
|
||||
Partial dependence of a feature (or a set of features) corresponds to
|
||||
the average response of an estimator for each possible value of the
|
||||
feature.
|
||||
|
||||
Read more in the :ref:`User Guide <partial_dependence>`.
|
||||
|
||||
.. warning::
|
||||
|
||||
For :class:`~sklearn.ensemble.GradientBoostingClassifier` and
|
||||
:class:`~sklearn.ensemble.GradientBoostingRegressor`, the
|
||||
'recursion' method (used by default) will not account for the `init`
|
||||
predictor of the boosting process. In practice, this will produce
|
||||
the same values as 'brute' up to a constant offset in the target
|
||||
response, provided that `init` is a constant estimator (which is the
|
||||
default). However, if `init` is not a constant estimator, the
|
||||
partial dependence values are incorrect for 'recursion' because the
|
||||
offset will be sample-dependent. It is preferable to use the 'brute'
|
||||
method. Note that this only applies to
|
||||
:class:`~sklearn.ensemble.GradientBoostingClassifier` and
|
||||
:class:`~sklearn.ensemble.GradientBoostingRegressor`, not to
|
||||
:class:`~sklearn.ensemble.HistGradientBoostingClassifier` and
|
||||
:class:`~sklearn.ensemble.HistGradientBoostingRegressor`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
estimator : BaseEstimator
|
||||
A fitted estimator object implementing :term:`predict`,
|
||||
:term:`predict_proba`, or :term:`decision_function`.
|
||||
Multioutput-multiclass classifiers are not supported.
|
||||
|
||||
X : {array-like or dataframe} of shape (n_samples, n_features)
|
||||
``X`` is used to generate a grid of values for the target
|
||||
``features`` (where the partial dependence will be evaluated), and
|
||||
also to generate values for the complement features when the
|
||||
`method` is 'brute'.
|
||||
|
||||
features : array-like of {int, str}
|
||||
The feature (e.g. `[0]`) or pair of interacting features
|
||||
(e.g. `[(0, 1)]`) for which the partial dependency should be computed.
|
||||
|
||||
response_method : 'auto', 'predict_proba' or 'decision_function', \
|
||||
optional (default='auto')
|
||||
Specifies whether to use :term:`predict_proba` or
|
||||
:term:`decision_function` as the target response. For regressors
|
||||
this parameter is ignored and the response is always the output of
|
||||
:term:`predict`. By default, :term:`predict_proba` is tried first
|
||||
and we revert to :term:`decision_function` if it doesn't exist. If
|
||||
``method`` is 'recursion', the response is always the output of
|
||||
:term:`decision_function`.
|
||||
|
||||
percentiles : tuple of float, optional (default=(0.05, 0.95))
|
||||
The lower and upper percentile used to create the extreme values
|
||||
for the grid. Must be in [0, 1].
|
||||
|
||||
grid_resolution : int, optional (default=100)
|
||||
The number of equally spaced points on the grid, for each target
|
||||
feature.
|
||||
|
||||
method : str, optional (default='auto')
|
||||
The method used to calculate the averaged predictions:
|
||||
|
||||
- 'recursion' is only supported for some tree-based estimators (namely
|
||||
:class:`~sklearn.ensemble.GradientBoostingClassifier`,
|
||||
:class:`~sklearn.ensemble.GradientBoostingRegressor`,
|
||||
:class:`~sklearn.ensemble.HistGradientBoostingClassifier`,
|
||||
:class:`~sklearn.ensemble.HistGradientBoostingRegressor`,
|
||||
:class:`~sklearn.tree.DecisionTreeRegressor`,
|
||||
:class:`~sklearn.ensemble.RandomForestRegressor`,
|
||||
)
|
||||
but is more efficient in terms of speed.
|
||||
With this method, the target response of a
|
||||
classifier is always the decision function, not the predicted
|
||||
probabilities.
|
||||
|
||||
- 'brute' is supported for any estimator, but is more
|
||||
computationally intensive.
|
||||
|
||||
- 'auto': the 'recursion' is used for estimators that support it,
|
||||
and 'brute' is used otherwise.
|
||||
|
||||
Please see :ref:`this note <pdp_method_differences>` for
|
||||
differences between the 'brute' and 'recursion' method.
|
||||
|
||||
Returns
|
||||
-------
|
||||
averaged_predictions : ndarray, \
|
||||
shape (n_outputs, len(values[0]), len(values[1]), ...)
|
||||
The predictions for all the points in the grid, averaged over all
|
||||
samples in X (or over the training data if ``method`` is
|
||||
'recursion'). ``n_outputs`` corresponds to the number of classes in
|
||||
a multi-class setting, or to the number of tasks for multi-output
|
||||
regression. For classical regression and binary classification
|
||||
``n_outputs==1``. ``n_values_feature_j`` corresponds to the size
|
||||
``values[j]``.
|
||||
|
||||
values : seq of 1d ndarrays
|
||||
The values with which the grid has been created. The generated grid
|
||||
is a cartesian product of the arrays in ``values``. ``len(values) ==
|
||||
len(features)``. The size of each array ``values[j]`` is either
|
||||
``grid_resolution``, or the number of unique values in ``X[:, j]``,
|
||||
whichever is smaller.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> X = [[0, 0, 2], [1, 0, 0]]
|
||||
>>> y = [0, 1]
|
||||
>>> from sklearn.ensemble import GradientBoostingClassifier
|
||||
>>> gb = GradientBoostingClassifier(random_state=0).fit(X, y)
|
||||
>>> partial_dependence(gb, features=[0], X=X, percentiles=(0, 1),
|
||||
... grid_resolution=2) # doctest: +SKIP
|
||||
(array([[-4.52..., 4.52...]]), [array([ 0., 1.])])
|
||||
|
||||
See also
|
||||
--------
|
||||
sklearn.inspection.plot_partial_dependence: Plot partial dependence
|
||||
"""
|
||||
if not (is_classifier(estimator) or is_regressor(estimator)):
|
||||
raise ValueError(
|
||||
"'estimator' must be a fitted regressor or classifier."
|
||||
)
|
||||
|
||||
if isinstance(estimator, Pipeline):
|
||||
# TODO: to be removed if/when pipeline get a `steps_` attributes
|
||||
# assuming Pipeline is the only estimator that does not store a new
|
||||
# attribute
|
||||
for est in estimator:
|
||||
# FIXME: remove the None option when it will be deprecated
|
||||
if est not in (None, 'drop'):
|
||||
check_is_fitted(est)
|
||||
else:
|
||||
check_is_fitted(estimator)
|
||||
|
||||
if (is_classifier(estimator) and
|
||||
isinstance(estimator.classes_[0], np.ndarray)):
|
||||
raise ValueError(
|
||||
'Multiclass-multioutput estimators are not supported'
|
||||
)
|
||||
|
||||
# Use check_array only on lists and other non-array-likes / sparse. Do not
|
||||
# convert DataFrame into a NumPy array.
|
||||
if not(hasattr(X, '__array__') or sparse.issparse(X)):
|
||||
X = check_array(X, force_all_finite='allow-nan', dtype=np.object)
|
||||
|
||||
accepted_responses = ('auto', 'predict_proba', 'decision_function')
|
||||
if response_method not in accepted_responses:
|
||||
raise ValueError(
|
||||
'response_method {} is invalid. Accepted response_method names '
|
||||
'are {}.'.format(response_method, ', '.join(accepted_responses)))
|
||||
|
||||
if is_regressor(estimator) and response_method != 'auto':
|
||||
raise ValueError(
|
||||
"The response_method parameter is ignored for regressors and "
|
||||
"must be 'auto'."
|
||||
)
|
||||
|
||||
accepted_methods = ('brute', 'recursion', 'auto')
|
||||
if method not in accepted_methods:
|
||||
raise ValueError(
|
||||
'method {} is invalid. Accepted method names are {}.'.format(
|
||||
method, ', '.join(accepted_methods)))
|
||||
|
||||
if method == 'auto':
|
||||
if (isinstance(estimator, BaseGradientBoosting) and
|
||||
estimator.init is None):
|
||||
method = 'recursion'
|
||||
elif isinstance(estimator, (BaseHistGradientBoosting,
|
||||
DecisionTreeRegressor,
|
||||
RandomForestRegressor)):
|
||||
method = 'recursion'
|
||||
else:
|
||||
method = 'brute'
|
||||
|
||||
if method == 'recursion':
|
||||
if not isinstance(estimator,
|
||||
(BaseGradientBoosting, BaseHistGradientBoosting,
|
||||
DecisionTreeRegressor, RandomForestRegressor)):
|
||||
supported_classes_recursion = (
|
||||
'GradientBoostingClassifier',
|
||||
'GradientBoostingRegressor',
|
||||
'HistGradientBoostingClassifier',
|
||||
'HistGradientBoostingRegressor',
|
||||
'HistGradientBoostingRegressor',
|
||||
'DecisionTreeRegressor',
|
||||
'RandomForestRegressor',
|
||||
)
|
||||
raise ValueError(
|
||||
"Only the following estimators support the 'recursion' "
|
||||
"method: {}. Try using method='brute'."
|
||||
.format(', '.join(supported_classes_recursion)))
|
||||
if response_method == 'auto':
|
||||
response_method = 'decision_function'
|
||||
|
||||
if response_method != 'decision_function':
|
||||
raise ValueError(
|
||||
"With the 'recursion' method, the response_method must be "
|
||||
"'decision_function'. Got {}.".format(response_method)
|
||||
)
|
||||
|
||||
if _determine_key_type(features, accept_slice=False) == 'int':
|
||||
# _get_column_indices() supports negative indexing. Here, we limit
|
||||
# the indexing to be positive. The upper bound will be checked
|
||||
# by _get_column_indices()
|
||||
if np.any(np.less(features, 0)):
|
||||
raise ValueError(
|
||||
'all features must be in [0, {}]'.format(X.shape[1] - 1)
|
||||
)
|
||||
|
||||
features_indices = np.asarray(
|
||||
_get_column_indices(X, features), dtype=np.int32, order='C'
|
||||
).ravel()
|
||||
|
||||
grid, values = _grid_from_X(
|
||||
_safe_indexing(X, features_indices, axis=1), percentiles,
|
||||
grid_resolution
|
||||
)
|
||||
|
||||
if method == 'brute':
|
||||
averaged_predictions = _partial_dependence_brute(
|
||||
estimator, grid, features_indices, X, response_method
|
||||
)
|
||||
else:
|
||||
averaged_predictions = _partial_dependence_recursion(
|
||||
estimator, grid, features_indices
|
||||
)
|
||||
|
||||
# reshape averaged_predictions to
|
||||
# (n_outputs, n_values_feature_0, n_values_feature_1, ...)
|
||||
averaged_predictions = averaged_predictions.reshape(
|
||||
-1, *[val.shape[0] for val in values])
|
||||
|
||||
return averaged_predictions, values
|
|
@ -0,0 +1,142 @@
|
|||
"""Permutation importance for estimators"""
|
||||
import numpy as np
|
||||
from joblib import Parallel
|
||||
from joblib import delayed
|
||||
|
||||
from ..metrics import check_scoring
|
||||
from ..utils import Bunch
|
||||
from ..utils import check_random_state
|
||||
from ..utils import check_array
|
||||
from ..utils.validation import _deprecate_positional_args
|
||||
|
||||
|
||||
def _calculate_permutation_scores(estimator, X, y, col_idx, random_state,
|
||||
n_repeats, scorer):
|
||||
"""Calculate score when `col_idx` is permuted."""
|
||||
random_state = check_random_state(random_state)
|
||||
|
||||
# Work on a copy of X to to ensure thread-safety in case of threading based
|
||||
# parallelism. Furthermore, making a copy is also useful when the joblib
|
||||
# backend is 'loky' (default) or the old 'multiprocessing': in those cases,
|
||||
# if X is large it will be automatically be backed by a readonly memory map
|
||||
# (memmap). X.copy() on the other hand is always guaranteed to return a
|
||||
# writable data-structure whose columns can be shuffled inplace.
|
||||
X_permuted = X.copy()
|
||||
scores = np.zeros(n_repeats)
|
||||
shuffling_idx = np.arange(X.shape[0])
|
||||
for n_round in range(n_repeats):
|
||||
random_state.shuffle(shuffling_idx)
|
||||
if hasattr(X_permuted, "iloc"):
|
||||
col = X_permuted.iloc[shuffling_idx, col_idx]
|
||||
col.index = X_permuted.index
|
||||
X_permuted.iloc[:, col_idx] = col
|
||||
else:
|
||||
X_permuted[:, col_idx] = X_permuted[shuffling_idx, col_idx]
|
||||
feature_score = scorer(estimator, X_permuted, y)
|
||||
scores[n_round] = feature_score
|
||||
|
||||
return scores
|
||||
|
||||
|
||||
@_deprecate_positional_args
|
||||
def permutation_importance(estimator, X, y, *, scoring=None, n_repeats=5,
|
||||
n_jobs=None, random_state=None):
|
||||
"""Permutation importance for feature evaluation [BRE]_.
|
||||
|
||||
The :term:`estimator` is required to be a fitted estimator. `X` can be the
|
||||
data set used to train the estimator or a hold-out set. The permutation
|
||||
importance of a feature is calculated as follows. First, a baseline metric,
|
||||
defined by :term:`scoring`, is evaluated on a (potentially different)
|
||||
dataset defined by the `X`. Next, a feature column from the validation set
|
||||
is permuted and the metric is evaluated again. The permutation importance
|
||||
is defined to be the difference between the baseline metric and metric from
|
||||
permutating the feature column.
|
||||
|
||||
Read more in the :ref:`User Guide <permutation_importance>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
estimator : object
|
||||
An estimator that has already been :term:`fitted` and is compatible
|
||||
with :term:`scorer`.
|
||||
|
||||
X : ndarray or DataFrame, shape (n_samples, n_features)
|
||||
Data on which permutation importance will be computed.
|
||||
|
||||
y : array-like or None, shape (n_samples, ) or (n_samples, n_classes)
|
||||
Targets for supervised or `None` for unsupervised.
|
||||
|
||||
scoring : string, callable or None, default=None
|
||||
Scorer to use. It can be a single
|
||||
string (see :ref:`scoring_parameter`) or a callable (see
|
||||
:ref:`scoring`). If None, the estimator's default scorer is used.
|
||||
|
||||
n_repeats : int, default=5
|
||||
Number of times to permute a feature.
|
||||
|
||||
n_jobs : int or None, default=None
|
||||
The number of jobs to use for the computation.
|
||||
`None` means 1 unless in a :obj:`joblib.parallel_backend` context.
|
||||
`-1` means using all processors. See :term:`Glossary <n_jobs>`
|
||||
for more details.
|
||||
|
||||
random_state : int, RandomState instance, default=None
|
||||
Pseudo-random number generator to control the permutations of each
|
||||
feature.
|
||||
Pass an int to get reproducible results across function calls.
|
||||
See :term: `Glossary <random_state>`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
result : :class:`~sklearn.utils.Bunch`
|
||||
Dictionary-like object, with the following attributes.
|
||||
|
||||
importances_mean : ndarray, shape (n_features, )
|
||||
Mean of feature importance over `n_repeats`.
|
||||
importances_std : ndarray, shape (n_features, )
|
||||
Standard deviation over `n_repeats`.
|
||||
importances : ndarray, shape (n_features, n_repeats)
|
||||
Raw permutation importance scores.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [BRE] L. Breiman, "Random Forests", Machine Learning, 45(1), 5-32,
|
||||
2001. https://doi.org/10.1023/A:1010933404324
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from sklearn.linear_model import LogisticRegression
|
||||
>>> from sklearn.inspection import permutation_importance
|
||||
>>> X = [[1, 9, 9],[1, 9, 9],[1, 9, 9],
|
||||
... [0, 9, 9],[0, 9, 9],[0, 9, 9]]
|
||||
>>> y = [1, 1, 1, 0, 0, 0]
|
||||
>>> clf = LogisticRegression().fit(X, y)
|
||||
>>> result = permutation_importance(clf, X, y, n_repeats=10,
|
||||
... random_state=0)
|
||||
>>> result.importances_mean
|
||||
array([0.4666..., 0. , 0. ])
|
||||
>>> result.importances_std
|
||||
array([0.2211..., 0. , 0. ])
|
||||
"""
|
||||
if not hasattr(X, "iloc"):
|
||||
X = check_array(X, force_all_finite='allow-nan', dtype=None)
|
||||
|
||||
# Precompute random seed from the random state to be used
|
||||
# to get a fresh independent RandomState instance for each
|
||||
# parallel call to _calculate_permutation_scores, irrespective of
|
||||
# the fact that variables are shared or not depending on the active
|
||||
# joblib backend (sequential, thread-based or process-based).
|
||||
random_state = check_random_state(random_state)
|
||||
random_seed = random_state.randint(np.iinfo(np.int32).max + 1)
|
||||
|
||||
scorer = check_scoring(estimator, scoring=scoring)
|
||||
baseline_score = scorer(estimator, X, y)
|
||||
|
||||
scores = Parallel(n_jobs=n_jobs)(delayed(_calculate_permutation_scores)(
|
||||
estimator, X, y, col_idx, random_seed, n_repeats, scorer
|
||||
) for col_idx in range(X.shape[1]))
|
||||
|
||||
importances = baseline_score - np.array(scores)
|
||||
return Bunch(importances_mean=np.mean(importances, axis=1),
|
||||
importances_std=np.std(importances, axis=1),
|
||||
importances=importances)
|
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,593 @@
|
|||
import numbers
|
||||
from itertools import chain
|
||||
from itertools import count
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
from scipy import sparse
|
||||
from scipy.stats.mstats import mquantiles
|
||||
from joblib import Parallel, delayed
|
||||
|
||||
from .. import partial_dependence
|
||||
from ...base import is_regressor
|
||||
from ...utils import check_array
|
||||
from ...utils import check_matplotlib_support # noqa
|
||||
from ...utils import _safe_indexing
|
||||
from ...utils.validation import _deprecate_positional_args
|
||||
|
||||
|
||||
@_deprecate_positional_args
|
||||
def plot_partial_dependence(estimator, X, features, *, feature_names=None,
|
||||
target=None, response_method='auto', n_cols=3,
|
||||
grid_resolution=100, percentiles=(0.05, 0.95),
|
||||
method='auto', n_jobs=None, verbose=0, fig=None,
|
||||
line_kw=None, contour_kw=None, ax=None):
|
||||
"""Partial dependence plots.
|
||||
|
||||
The ``len(features)`` plots are arranged in a grid with ``n_cols``
|
||||
columns. Two-way partial dependence plots are plotted as contour plots. The
|
||||
deciles of the feature values will be shown with tick marks on the x-axes
|
||||
for one-way plots, and on both axes for two-way plots.
|
||||
|
||||
Read more in the :ref:`User Guide <partial_dependence>`.
|
||||
|
||||
.. note::
|
||||
|
||||
:func:`plot_partial_dependence` does not support using the same axes
|
||||
with multiple calls. To plot the the partial dependence for multiple
|
||||
estimators, please pass the axes created by the first call to the
|
||||
second call::
|
||||
|
||||
>>> from sklearn.inspection import plot_partial_dependence
|
||||
>>> from sklearn.datasets import make_friedman1
|
||||
>>> from sklearn.linear_model import LinearRegression
|
||||
>>> X, y = make_friedman1()
|
||||
>>> est = LinearRegression().fit(X, y)
|
||||
>>> disp1 = plot_partial_dependence(est, X) # doctest: +SKIP
|
||||
>>> disp2 = plot_partial_dependence(est, X,
|
||||
... ax=disp1.axes_) # doctest: +SKIP
|
||||
|
||||
.. warning::
|
||||
|
||||
For :class:`~sklearn.ensemble.GradientBoostingClassifier` and
|
||||
:class:`~sklearn.ensemble.GradientBoostingRegressor`, the
|
||||
'recursion' method (used by default) will not account for the `init`
|
||||
predictor of the boosting process. In practice, this will produce
|
||||
the same values as 'brute' up to a constant offset in the target
|
||||
response, provided that `init` is a constant estimator (which is the
|
||||
default). However, if `init` is not a constant estimator, the
|
||||
partial dependence values are incorrect for 'recursion' because the
|
||||
offset will be sample-dependent. It is preferable to use the 'brute'
|
||||
method. Note that this only applies to
|
||||
:class:`~sklearn.ensemble.GradientBoostingClassifier` and
|
||||
:class:`~sklearn.ensemble.GradientBoostingRegressor`, not to
|
||||
:class:`~sklearn.ensemble.HistGradientBoostingClassifier` and
|
||||
:class:`~sklearn.ensemble.HistGradientBoostingRegressor`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
estimator : BaseEstimator
|
||||
A fitted estimator object implementing :term:`predict`,
|
||||
:term:`predict_proba`, or :term:`decision_function`.
|
||||
Multioutput-multiclass classifiers are not supported.
|
||||
|
||||
X : {array-like or dataframe} of shape (n_samples, n_features)
|
||||
``X`` is used to generate a grid of values for the target
|
||||
``features`` (where the partial dependence will be evaluated), and
|
||||
also to generate values for the complement features when the
|
||||
`method` is 'brute'.
|
||||
|
||||
features : list of {int, str, pair of int, pair of str}
|
||||
The target features for which to create the PDPs.
|
||||
If features[i] is an int or a string, a one-way PDP is created; if
|
||||
features[i] is a tuple, a two-way PDP is created. Each tuple must be
|
||||
of size 2.
|
||||
if any entry is a string, then it must be in ``feature_names``.
|
||||
|
||||
feature_names : array-like of shape (n_features,), dtype=str, default=None
|
||||
Name of each feature; feature_names[i] holds the name of the feature
|
||||
with index i.
|
||||
By default, the name of the feature corresponds to their numerical
|
||||
index for NumPy array and their column name for pandas dataframe.
|
||||
|
||||
target : int, optional (default=None)
|
||||
- In a multiclass setting, specifies the class for which the PDPs
|
||||
should be computed. Note that for binary classification, the
|
||||
positive class (index 1) is always used.
|
||||
- In a multioutput setting, specifies the task for which the PDPs
|
||||
should be computed.
|
||||
|
||||
Ignored in binary classification or classical regression settings.
|
||||
|
||||
response_method : 'auto', 'predict_proba' or 'decision_function', \
|
||||
optional (default='auto')
|
||||
Specifies whether to use :term:`predict_proba` or
|
||||
:term:`decision_function` as the target response. For regressors
|
||||
this parameter is ignored and the response is always the output of
|
||||
:term:`predict`. By default, :term:`predict_proba` is tried first
|
||||
and we revert to :term:`decision_function` if it doesn't exist. If
|
||||
``method`` is 'recursion', the response is always the output of
|
||||
:term:`decision_function`.
|
||||
|
||||
n_cols : int, optional (default=3)
|
||||
The maximum number of columns in the grid plot. Only active when `ax`
|
||||
is a single axis or `None`.
|
||||
|
||||
grid_resolution : int, optional (default=100)
|
||||
The number of equally spaced points on the axes of the plots, for each
|
||||
target feature.
|
||||
|
||||
percentiles : tuple of float, optional (default=(0.05, 0.95))
|
||||
The lower and upper percentile used to create the extreme values
|
||||
for the PDP axes. Must be in [0, 1].
|
||||
|
||||
method : str, optional (default='auto')
|
||||
The method used to calculate the averaged predictions:
|
||||
|
||||
- 'recursion' is only supported for some tree-based estimators (namely
|
||||
:class:`~sklearn.ensemble.GradientBoostingClassifier`,
|
||||
:class:`~sklearn.ensemble.GradientBoostingRegressor`,
|
||||
:class:`~sklearn.ensemble.HistGradientBoostingClassifier`,
|
||||
:class:`~sklearn.ensemble.HistGradientBoostingRegressor`,
|
||||
:class:`~sklearn.tree.DecisionTreeRegressor`,
|
||||
:class:`~sklearn.ensemble.RandomForestRegressor`
|
||||
but is more efficient in terms of speed.
|
||||
With this method, the target response of a
|
||||
classifier is always the decision function, not the predicted
|
||||
probabilities.
|
||||
|
||||
- 'brute' is supported for any estimator, but is more
|
||||
computationally intensive.
|
||||
|
||||
- 'auto': the 'recursion' is used for estimators that support it,
|
||||
and 'brute' is used otherwise.
|
||||
|
||||
Please see :ref:`this note <pdp_method_differences>` for
|
||||
differences between the 'brute' and 'recursion' method.
|
||||
|
||||
n_jobs : int, optional (default=None)
|
||||
The number of CPUs to use to compute the partial dependences.
|
||||
``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
|
||||
``-1`` means using all processors. See :term:`Glossary <n_jobs>`
|
||||
for more details.
|
||||
|
||||
verbose : int, optional (default=0)
|
||||
Verbose output during PD computations.
|
||||
|
||||
fig : Matplotlib figure object, optional (default=None)
|
||||
A figure object onto which the plots will be drawn, after the figure
|
||||
has been cleared. By default, a new one is created.
|
||||
|
||||
.. deprecated:: 0.22
|
||||
``fig`` will be removed in 0.24.
|
||||
|
||||
line_kw : dict, optional
|
||||
Dict with keywords passed to the ``matplotlib.pyplot.plot`` call.
|
||||
For one-way partial dependence plots.
|
||||
|
||||
contour_kw : dict, optional
|
||||
Dict with keywords passed to the ``matplotlib.pyplot.contourf`` call.
|
||||
For two-way partial dependence plots.
|
||||
|
||||
ax : Matplotlib axes or array-like of Matplotlib axes, default=None
|
||||
- If a single axis is passed in, it is treated as a bounding axes
|
||||
and a grid of partial dependence plots will be drawn within
|
||||
these bounds. The `n_cols` parameter controls the number of
|
||||
columns in the grid.
|
||||
- If an array-like of axes are passed in, the partial dependence
|
||||
plots will be drawn directly into these axes.
|
||||
- If `None`, a figure and a bounding axes is created and treated
|
||||
as the single axes case.
|
||||
|
||||
.. versionadded:: 0.22
|
||||
|
||||
Returns
|
||||
-------
|
||||
display: :class:`~sklearn.inspection.PartialDependenceDisplay`
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from sklearn.datasets import make_friedman1
|
||||
>>> from sklearn.ensemble import GradientBoostingRegressor
|
||||
>>> X, y = make_friedman1()
|
||||
>>> clf = GradientBoostingRegressor(n_estimators=10).fit(X, y)
|
||||
>>> plot_partial_dependence(clf, X, [0, (0, 1)]) #doctest: +SKIP
|
||||
|
||||
See also
|
||||
--------
|
||||
sklearn.inspection.partial_dependence: Return raw partial
|
||||
dependence values
|
||||
"""
|
||||
check_matplotlib_support('plot_partial_dependence') # noqa
|
||||
import matplotlib.pyplot as plt # noqa
|
||||
from matplotlib import transforms # noqa
|
||||
from matplotlib.ticker import MaxNLocator # noqa
|
||||
from matplotlib.ticker import ScalarFormatter # noqa
|
||||
|
||||
# set target_idx for multi-class estimators
|
||||
if hasattr(estimator, 'classes_') and np.size(estimator.classes_) > 2:
|
||||
if target is None:
|
||||
raise ValueError('target must be specified for multi-class')
|
||||
target_idx = np.searchsorted(estimator.classes_, target)
|
||||
if (not (0 <= target_idx < len(estimator.classes_)) or
|
||||
estimator.classes_[target_idx] != target):
|
||||
raise ValueError('target not in est.classes_, got {}'.format(
|
||||
target))
|
||||
else:
|
||||
# regression and binary classification
|
||||
target_idx = 0
|
||||
|
||||
# Use check_array only on lists and other non-array-likes / sparse. Do not
|
||||
# convert DataFrame into a NumPy array.
|
||||
if not(hasattr(X, '__array__') or sparse.issparse(X)):
|
||||
X = check_array(X, force_all_finite='allow-nan', dtype=np.object)
|
||||
n_features = X.shape[1]
|
||||
|
||||
# convert feature_names to list
|
||||
if feature_names is None:
|
||||
if hasattr(X, "loc"):
|
||||
# get the column names for a pandas dataframe
|
||||
feature_names = X.columns.tolist()
|
||||
else:
|
||||
# define a list of numbered indices for a numpy array
|
||||
feature_names = [str(i) for i in range(n_features)]
|
||||
elif hasattr(feature_names, "tolist"):
|
||||
# convert numpy array or pandas index to a list
|
||||
feature_names = feature_names.tolist()
|
||||
if len(set(feature_names)) != len(feature_names):
|
||||
raise ValueError('feature_names should not contain duplicates.')
|
||||
|
||||
def convert_feature(fx):
|
||||
if isinstance(fx, str):
|
||||
try:
|
||||
fx = feature_names.index(fx)
|
||||
except ValueError:
|
||||
raise ValueError('Feature %s not in feature_names' % fx)
|
||||
return int(fx)
|
||||
|
||||
# convert features into a seq of int tuples
|
||||
tmp_features = []
|
||||
for fxs in features:
|
||||
if isinstance(fxs, (numbers.Integral, str)):
|
||||
fxs = (fxs,)
|
||||
try:
|
||||
fxs = tuple(convert_feature(fx) for fx in fxs)
|
||||
except TypeError:
|
||||
raise ValueError('Each entry in features must be either an int, '
|
||||
'a string, or an iterable of size at most 2.')
|
||||
if not 1 <= np.size(fxs) <= 2:
|
||||
raise ValueError('Each entry in features must be either an int, '
|
||||
'a string, or an iterable of size at most 2.')
|
||||
|
||||
tmp_features.append(fxs)
|
||||
|
||||
features = tmp_features
|
||||
|
||||
# Early exit if the axes does not have the correct number of axes
|
||||
if ax is not None and not isinstance(ax, plt.Axes):
|
||||
axes = np.asarray(ax, dtype=object)
|
||||
if axes.size != len(features):
|
||||
raise ValueError("Expected ax to have {} axes, got {}".format(
|
||||
len(features), axes.size))
|
||||
|
||||
for i in chain.from_iterable(features):
|
||||
if i >= len(feature_names):
|
||||
raise ValueError('All entries of features must be less than '
|
||||
'len(feature_names) = {0}, got {1}.'
|
||||
.format(len(feature_names), i))
|
||||
|
||||
# compute averaged predictions
|
||||
pd_results = Parallel(n_jobs=n_jobs, verbose=verbose)(
|
||||
delayed(partial_dependence)(estimator, X, fxs,
|
||||
response_method=response_method,
|
||||
method=method,
|
||||
grid_resolution=grid_resolution,
|
||||
percentiles=percentiles)
|
||||
for fxs in features)
|
||||
|
||||
# For multioutput regression, we can only check the validity of target
|
||||
# now that we have the predictions.
|
||||
# Also note: as multiclass-multioutput classifiers are not supported,
|
||||
# multiclass and multioutput scenario are mutually exclusive. So there is
|
||||
# no risk of overwriting target_idx here.
|
||||
avg_preds, _ = pd_results[0] # checking the first result is enough
|
||||
if is_regressor(estimator) and avg_preds.shape[0] > 1:
|
||||
if target is None:
|
||||
raise ValueError(
|
||||
'target must be specified for multi-output regressors')
|
||||
if not 0 <= target <= avg_preds.shape[0]:
|
||||
raise ValueError(
|
||||
'target must be in [0, n_tasks], got {}.'.format(target))
|
||||
target_idx = target
|
||||
|
||||
# get global min and max average predictions of PD grouped by plot type
|
||||
pdp_lim = {}
|
||||
for avg_preds, values in pd_results:
|
||||
min_pd = avg_preds[target_idx].min()
|
||||
max_pd = avg_preds[target_idx].max()
|
||||
n_fx = len(values)
|
||||
old_min_pd, old_max_pd = pdp_lim.get(n_fx, (min_pd, max_pd))
|
||||
min_pd = min(min_pd, old_min_pd)
|
||||
max_pd = max(max_pd, old_max_pd)
|
||||
pdp_lim[n_fx] = (min_pd, max_pd)
|
||||
|
||||
deciles = {}
|
||||
for fx in chain.from_iterable(features):
|
||||
if fx not in deciles:
|
||||
X_col = _safe_indexing(X, fx, axis=1)
|
||||
deciles[fx] = mquantiles(X_col, prob=np.arange(0.1, 1.0, 0.1))
|
||||
|
||||
if fig is not None:
|
||||
warnings.warn("The fig parameter is deprecated in version "
|
||||
"0.22 and will be removed in version 0.24",
|
||||
FutureWarning)
|
||||
fig.clear()
|
||||
ax = fig.gca()
|
||||
|
||||
display = PartialDependenceDisplay(pd_results=pd_results,
|
||||
features=features,
|
||||
feature_names=feature_names,
|
||||
target_idx=target_idx,
|
||||
pdp_lim=pdp_lim,
|
||||
deciles=deciles)
|
||||
return display.plot(ax=ax, n_cols=n_cols, line_kw=line_kw,
|
||||
contour_kw=contour_kw)
|
||||
|
||||
|
||||
class PartialDependenceDisplay:
|
||||
"""Partial Dependence Plot (PDP) visualization.
|
||||
|
||||
It is recommended to use
|
||||
:func:`~sklearn.inspection.plot_partial_dependence` to create a
|
||||
:class:`~sklearn.inspection.PartialDependenceDisplay`. All parameters are
|
||||
stored as attributes.
|
||||
|
||||
Read more in
|
||||
:ref:`sphx_glr_auto_examples_miscellaneous_plot_partial_dependence_visualization_api.py`
|
||||
and the :ref:`User Guide <visualizations>`.
|
||||
|
||||
.. versionadded:: 0.22
|
||||
|
||||
Parameters
|
||||
----------
|
||||
pd_results : list of (ndarray, ndarray)
|
||||
Results of :func:`~sklearn.inspection.partial_dependence` for
|
||||
``features``. Each tuple corresponds to a (averaged_predictions, grid).
|
||||
|
||||
features : list of (int,) or list of (int, int)
|
||||
Indices of features for a given plot. A tuple of one integer will plot
|
||||
a partial dependence curve of one feature. A tuple of two integers will
|
||||
plot a two-way partial dependence curve as a contour plot.
|
||||
|
||||
feature_names : list of str
|
||||
Feature names corresponding to the indices in ``features``.
|
||||
|
||||
target_idx : int
|
||||
|
||||
- In a multiclass setting, specifies the class for which the PDPs
|
||||
should be computed. Note that for binary classification, the
|
||||
positive class (index 1) is always used.
|
||||
- In a multioutput setting, specifies the task for which the PDPs
|
||||
should be computed.
|
||||
|
||||
Ignored in binary classification or classical regression settings.
|
||||
|
||||
pdp_lim : dict
|
||||
Global min and max average predictions, such that all plots will have
|
||||
the same scale and y limits. `pdp_lim[1]` is the global min and max for
|
||||
single partial dependence curves. `pdp_lim[2]` is the global min and
|
||||
max for two-way partial dependence curves.
|
||||
|
||||
deciles : dict
|
||||
Deciles for feature indices in ``features``.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
bounding_ax_ : matplotlib Axes or None
|
||||
If `ax` is an axes or None, the `bounding_ax_` is the axes where the
|
||||
grid of partial dependence plots are drawn. If `ax` is a list of axes
|
||||
or a numpy array of axes, `bounding_ax_` is None.
|
||||
|
||||
axes_ : ndarray of matplotlib Axes
|
||||
If `ax` is an axes or None, `axes_[i, j]` is the axes on the i-th row
|
||||
and j-th column. If `ax` is a list of axes, `axes_[i]` is the i-th item
|
||||
in `ax`. Elements that are None correspond to a nonexisting axes in
|
||||
that position.
|
||||
|
||||
lines_ : ndarray of matplotlib Artists
|
||||
If `ax` is an axes or None, `lines_[i, j]` is the partial dependence
|
||||
curve on the i-th row and j-th column. If `ax` is a list of axes,
|
||||
`lines_[i]` is the partial dependence curve corresponding to the i-th
|
||||
item in `ax`. Elements that are None correspond to a nonexisting axes
|
||||
or an axes that does not include a line plot.
|
||||
|
||||
deciles_vlines_ : ndarray of matplotlib LineCollection
|
||||
If `ax` is an axes or None, `vlines_[i, j]` is the line collection
|
||||
representing the x axis deciles of the i-th row and j-th column. If
|
||||
`ax` is a list of axes, `vlines_[i]` corresponds to the i-th item in
|
||||
`ax`. Elements that are None correspond to a nonexisting axes or an
|
||||
axes that does not include a PDP plot.
|
||||
.. versionadded:: 0.23
|
||||
deciles_hlines_ : ndarray of matplotlib LineCollection
|
||||
If `ax` is an axes or None, `vlines_[i, j]` is the line collection
|
||||
representing the y axis deciles of the i-th row and j-th column. If
|
||||
`ax` is a list of axes, `vlines_[i]` corresponds to the i-th item in
|
||||
`ax`. Elements that are None correspond to a nonexisting axes or an
|
||||
axes that does not include a 2-way plot.
|
||||
.. versionadded:: 0.23
|
||||
|
||||
contours_ : ndarray of matplotlib Artists
|
||||
If `ax` is an axes or None, `contours_[i, j]` is the partial dependence
|
||||
plot on the i-th row and j-th column. If `ax` is a list of axes,
|
||||
`contours_[i]` is the partial dependence plot corresponding to the i-th
|
||||
item in `ax`. Elements that are None correspond to a nonexisting axes
|
||||
or an axes that does not include a contour plot.
|
||||
|
||||
figure_ : matplotlib Figure
|
||||
Figure containing partial dependence plots.
|
||||
|
||||
"""
|
||||
@_deprecate_positional_args
|
||||
def __init__(self, pd_results, *, features, feature_names, target_idx,
|
||||
pdp_lim, deciles):
|
||||
self.pd_results = pd_results
|
||||
self.features = features
|
||||
self.feature_names = feature_names
|
||||
self.target_idx = target_idx
|
||||
self.pdp_lim = pdp_lim
|
||||
self.deciles = deciles
|
||||
|
||||
def plot(self, ax=None, n_cols=3, line_kw=None, contour_kw=None):
|
||||
"""Plot partial dependence plots.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
ax : Matplotlib axes or array-like of Matplotlib axes, default=None
|
||||
- If a single axis is passed in, it is treated as a bounding axes
|
||||
and a grid of partial dependence plots will be drawn within
|
||||
these bounds. The `n_cols` parameter controls the number of
|
||||
columns in the grid.
|
||||
- If an array-like of axes are passed in, the partial dependence
|
||||
plots will be drawn directly into these axes.
|
||||
- If `None`, a figure and a bounding axes is created and treated
|
||||
as the single axes case.
|
||||
|
||||
n_cols : int, default=3
|
||||
The maximum number of columns in the grid plot. Only active when
|
||||
`ax` is a single axes or `None`.
|
||||
|
||||
line_kw : dict, default=None
|
||||
Dict with keywords passed to the `matplotlib.pyplot.plot` call.
|
||||
For one-way partial dependence plots.
|
||||
|
||||
contour_kw : dict, default=None
|
||||
Dict with keywords passed to the `matplotlib.pyplot.contourf`
|
||||
call for two-way partial dependence plots.
|
||||
|
||||
Returns
|
||||
-------
|
||||
display: :class:`~sklearn.inspection.PartialDependenceDisplay`
|
||||
"""
|
||||
|
||||
check_matplotlib_support("plot_partial_dependence")
|
||||
import matplotlib.pyplot as plt # noqa
|
||||
from matplotlib import transforms # noqa
|
||||
from matplotlib.ticker import MaxNLocator # noqa
|
||||
from matplotlib.ticker import ScalarFormatter # noqa
|
||||
from matplotlib.gridspec import GridSpecFromSubplotSpec # noqa
|
||||
|
||||
if line_kw is None:
|
||||
line_kw = {}
|
||||
if contour_kw is None:
|
||||
contour_kw = {}
|
||||
|
||||
if ax is None:
|
||||
_, ax = plt.subplots()
|
||||
|
||||
default_contour_kws = {"alpha": 0.75}
|
||||
contour_kw = {**default_contour_kws, **contour_kw}
|
||||
|
||||
n_features = len(self.features)
|
||||
|
||||
if isinstance(ax, plt.Axes):
|
||||
# If ax was set off, it has most likely been set to off
|
||||
# by a previous call to plot.
|
||||
if not ax.axison:
|
||||
raise ValueError("The ax was already used in another plot "
|
||||
"function, please set ax=display.axes_ "
|
||||
"instead")
|
||||
|
||||
ax.set_axis_off()
|
||||
self.bounding_ax_ = ax
|
||||
self.figure_ = ax.figure
|
||||
|
||||
n_cols = min(n_cols, n_features)
|
||||
n_rows = int(np.ceil(n_features / float(n_cols)))
|
||||
|
||||
self.axes_ = np.empty((n_rows, n_cols), dtype=np.object)
|
||||
|
||||
axes_ravel = self.axes_.ravel()
|
||||
|
||||
gs = GridSpecFromSubplotSpec(n_rows, n_cols,
|
||||
subplot_spec=ax.get_subplotspec())
|
||||
for i, spec in zip(range(n_features), gs):
|
||||
axes_ravel[i] = self.figure_.add_subplot(spec)
|
||||
|
||||
else: # array-like
|
||||
ax = np.asarray(ax, dtype=object)
|
||||
if ax.size != n_features:
|
||||
raise ValueError("Expected ax to have {} axes, got {}"
|
||||
.format(n_features, ax.size))
|
||||
|
||||
if ax.ndim == 2:
|
||||
n_cols = ax.shape[1]
|
||||
else:
|
||||
n_cols = None
|
||||
|
||||
self.bounding_ax_ = None
|
||||
self.figure_ = ax.ravel()[0].figure
|
||||
self.axes_ = ax
|
||||
|
||||
# create contour levels for two-way plots
|
||||
if 2 in self.pdp_lim:
|
||||
Z_level = np.linspace(*self.pdp_lim[2], num=8)
|
||||
|
||||
self.lines_ = np.empty_like(self.axes_, dtype=np.object)
|
||||
self.contours_ = np.empty_like(self.axes_, dtype=np.object)
|
||||
self.deciles_vlines_ = np.empty_like(self.axes_, dtype=np.object)
|
||||
self.deciles_hlines_ = np.empty_like(self.axes_, dtype=np.object)
|
||||
# Create 1d views of these 2d arrays for easy indexing
|
||||
lines_ravel = self.lines_.ravel(order='C')
|
||||
contours_ravel = self.contours_.ravel(order='C')
|
||||
vlines_ravel = self.deciles_vlines_.ravel(order='C')
|
||||
hlines_ravel = self.deciles_hlines_.ravel(order='C')
|
||||
|
||||
for i, axi, fx, (avg_preds, values) in zip(count(),
|
||||
self.axes_.ravel(),
|
||||
self.features,
|
||||
self.pd_results):
|
||||
if len(values) == 1:
|
||||
lines_ravel[i] = axi.plot(values[0],
|
||||
avg_preds[self.target_idx].ravel(),
|
||||
**line_kw)[0]
|
||||
else:
|
||||
# contour plot
|
||||
XX, YY = np.meshgrid(values[0], values[1])
|
||||
Z = avg_preds[self.target_idx].T
|
||||
CS = axi.contour(XX, YY, Z, levels=Z_level, linewidths=0.5,
|
||||
colors='k')
|
||||
contours_ravel[i] = axi.contourf(XX, YY, Z, levels=Z_level,
|
||||
vmax=Z_level[-1],
|
||||
vmin=Z_level[0],
|
||||
**contour_kw)
|
||||
axi.clabel(CS, fmt='%2.2f', colors='k', fontsize=10,
|
||||
inline=True)
|
||||
|
||||
trans = transforms.blended_transform_factory(axi.transData,
|
||||
axi.transAxes)
|
||||
ylim = axi.get_ylim()
|
||||
vlines_ravel[i] = axi.vlines(self.deciles[fx[0]], 0, 0.05,
|
||||
transform=trans, color='k')
|
||||
axi.set_ylim(ylim)
|
||||
|
||||
# Set xlabel if it is not already set
|
||||
if not axi.get_xlabel():
|
||||
axi.set_xlabel(self.feature_names[fx[0]])
|
||||
|
||||
if len(values) == 1:
|
||||
if n_cols is None or i % n_cols == 0:
|
||||
axi.set_ylabel('Partial dependence')
|
||||
else:
|
||||
axi.set_yticklabels([])
|
||||
axi.set_ylim(self.pdp_lim[1])
|
||||
else:
|
||||
# contour plot
|
||||
trans = transforms.blended_transform_factory(axi.transAxes,
|
||||
axi.transData)
|
||||
xlim = axi.get_xlim()
|
||||
hlines_ravel[i] = axi.hlines(self.deciles[fx[1]], 0, 0.05,
|
||||
transform=trans, color='k')
|
||||
# hline erases xlim
|
||||
axi.set_ylabel(self.feature_names[fx[1]])
|
||||
axi.set_xlim(xlim)
|
||||
return self
|
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,474 @@
|
|||
import numpy as np
|
||||
from scipy.stats.mstats import mquantiles
|
||||
|
||||
import pytest
|
||||
from numpy.testing import assert_allclose
|
||||
|
||||
from sklearn.datasets import load_boston
|
||||
from sklearn.datasets import load_iris
|
||||
from sklearn.datasets import make_classification, make_regression
|
||||
from sklearn.ensemble import GradientBoostingRegressor
|
||||
from sklearn.ensemble import GradientBoostingClassifier
|
||||
from sklearn.linear_model import LinearRegression
|
||||
from sklearn.utils._testing import _convert_container
|
||||
|
||||
from sklearn.inspection import plot_partial_dependence
|
||||
|
||||
|
||||
# TODO: Remove when https://github.com/numpy/numpy/issues/14397 is resolved
|
||||
pytestmark = pytest.mark.filterwarnings(
|
||||
"ignore:In future, it will be an error for 'np.bool_':DeprecationWarning:"
|
||||
"matplotlib.*")
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def boston():
|
||||
return load_boston()
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def clf_boston(boston):
|
||||
clf = GradientBoostingRegressor(n_estimators=10, random_state=1)
|
||||
clf.fit(boston.data, boston.target)
|
||||
return clf
|
||||
|
||||
|
||||
@pytest.mark.parametrize("grid_resolution", [10, 20])
|
||||
def test_plot_partial_dependence(grid_resolution, pyplot, clf_boston, boston):
|
||||
# Test partial dependence plot function.
|
||||
feature_names = boston.feature_names
|
||||
disp = plot_partial_dependence(clf_boston, boston.data,
|
||||
[0, 1, (0, 1)],
|
||||
grid_resolution=grid_resolution,
|
||||
feature_names=feature_names,
|
||||
contour_kw={"cmap": "jet"})
|
||||
fig = pyplot.gcf()
|
||||
axs = fig.get_axes()
|
||||
assert disp.figure_ is fig
|
||||
assert len(axs) == 4
|
||||
|
||||
assert disp.bounding_ax_ is not None
|
||||
assert disp.axes_.shape == (1, 3)
|
||||
assert disp.lines_.shape == (1, 3)
|
||||
assert disp.contours_.shape == (1, 3)
|
||||
assert disp.deciles_vlines_.shape == (1, 3)
|
||||
assert disp.deciles_hlines_.shape == (1, 3)
|
||||
|
||||
assert disp.lines_[0, 2] is None
|
||||
assert disp.contours_[0, 0] is None
|
||||
assert disp.contours_[0, 1] is None
|
||||
|
||||
# deciles lines: always show on xaxis, only show on yaxis if 2-way PDP
|
||||
for i in range(3):
|
||||
assert disp.deciles_vlines_[0, i] is not None
|
||||
assert disp.deciles_hlines_[0, 0] is None
|
||||
assert disp.deciles_hlines_[0, 1] is None
|
||||
assert disp.deciles_hlines_[0, 2] is not None
|
||||
|
||||
assert disp.features == [(0, ), (1, ), (0, 1)]
|
||||
assert np.all(disp.feature_names == feature_names)
|
||||
assert len(disp.deciles) == 2
|
||||
for i in [0, 1]:
|
||||
assert_allclose(disp.deciles[i],
|
||||
mquantiles(boston.data[:, i],
|
||||
prob=np.arange(0.1, 1.0, 0.1)))
|
||||
|
||||
single_feature_positions = [(0, 0), (0, 1)]
|
||||
expected_ylabels = ["Partial dependence", ""]
|
||||
|
||||
for i, pos in enumerate(single_feature_positions):
|
||||
ax = disp.axes_[pos]
|
||||
assert ax.get_ylabel() == expected_ylabels[i]
|
||||
assert ax.get_xlabel() == boston.feature_names[i]
|
||||
assert_allclose(ax.get_ylim(), disp.pdp_lim[1])
|
||||
|
||||
line = disp.lines_[pos]
|
||||
|
||||
avg_preds, values = disp.pd_results[i]
|
||||
assert avg_preds.shape == (1, grid_resolution)
|
||||
target_idx = disp.target_idx
|
||||
|
||||
line_data = line.get_data()
|
||||
assert_allclose(line_data[0], values[0])
|
||||
assert_allclose(line_data[1], avg_preds[target_idx].ravel())
|
||||
|
||||
# two feature position
|
||||
ax = disp.axes_[0, 2]
|
||||
coutour = disp.contours_[0, 2]
|
||||
expected_levels = np.linspace(*disp.pdp_lim[2], num=8)
|
||||
assert_allclose(coutour.levels, expected_levels)
|
||||
assert coutour.get_cmap().name == "jet"
|
||||
assert ax.get_xlabel() == boston.feature_names[0]
|
||||
assert ax.get_ylabel() == boston.feature_names[1]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"input_type, feature_names_type",
|
||||
[('dataframe', None),
|
||||
('dataframe', 'list'), ('list', 'list'), ('array', 'list'),
|
||||
('dataframe', 'array'), ('list', 'array'), ('array', 'array'),
|
||||
('dataframe', 'series'), ('list', 'series'), ('array', 'series'),
|
||||
('dataframe', 'index'), ('list', 'index'), ('array', 'index')]
|
||||
)
|
||||
def test_plot_partial_dependence_str_features(pyplot, clf_boston, boston,
|
||||
input_type, feature_names_type):
|
||||
if input_type == 'dataframe':
|
||||
pd = pytest.importorskip("pandas")
|
||||
X = pd.DataFrame(boston.data, columns=boston.feature_names)
|
||||
elif input_type == 'list':
|
||||
X = boston.data.tolist()
|
||||
else:
|
||||
X = boston.data
|
||||
|
||||
if feature_names_type is None:
|
||||
feature_names = None
|
||||
else:
|
||||
feature_names = _convert_container(boston.feature_names,
|
||||
feature_names_type)
|
||||
|
||||
grid_resolution = 25
|
||||
# check with str features and array feature names and single column
|
||||
disp = plot_partial_dependence(clf_boston, X,
|
||||
[('CRIM', 'ZN'), 'ZN'],
|
||||
grid_resolution=grid_resolution,
|
||||
feature_names=feature_names,
|
||||
n_cols=1, line_kw={"alpha": 0.8})
|
||||
fig = pyplot.gcf()
|
||||
axs = fig.get_axes()
|
||||
assert len(axs) == 3
|
||||
|
||||
assert disp.figure_ is fig
|
||||
assert disp.axes_.shape == (2, 1)
|
||||
assert disp.lines_.shape == (2, 1)
|
||||
assert disp.contours_.shape == (2, 1)
|
||||
assert disp.deciles_vlines_.shape == (2, 1)
|
||||
assert disp.deciles_hlines_.shape == (2, 1)
|
||||
|
||||
assert disp.lines_[0, 0] is None
|
||||
assert disp.deciles_vlines_[0, 0] is not None
|
||||
assert disp.deciles_hlines_[0, 0] is not None
|
||||
assert disp.contours_[1, 0] is None
|
||||
assert disp.deciles_hlines_[1, 0] is None
|
||||
assert disp.deciles_vlines_[1, 0] is not None
|
||||
|
||||
# line
|
||||
ax = disp.axes_[1, 0]
|
||||
assert ax.get_xlabel() == "ZN"
|
||||
assert ax.get_ylabel() == "Partial dependence"
|
||||
|
||||
line = disp.lines_[1, 0]
|
||||
avg_preds, values = disp.pd_results[1]
|
||||
target_idx = disp.target_idx
|
||||
assert line.get_alpha() == 0.8
|
||||
|
||||
line_data = line.get_data()
|
||||
assert_allclose(line_data[0], values[0])
|
||||
assert_allclose(line_data[1], avg_preds[target_idx].ravel())
|
||||
|
||||
# contour
|
||||
ax = disp.axes_[0, 0]
|
||||
coutour = disp.contours_[0, 0]
|
||||
expect_levels = np.linspace(*disp.pdp_lim[2], num=8)
|
||||
assert_allclose(coutour.levels, expect_levels)
|
||||
assert ax.get_xlabel() == "CRIM"
|
||||
assert ax.get_ylabel() == "ZN"
|
||||
|
||||
|
||||
def test_plot_partial_dependence_custom_axes(pyplot, clf_boston, boston):
|
||||
grid_resolution = 25
|
||||
fig, (ax1, ax2) = pyplot.subplots(1, 2)
|
||||
feature_names = boston.feature_names.tolist()
|
||||
disp = plot_partial_dependence(clf_boston, boston.data,
|
||||
['CRIM', ('CRIM', 'ZN')],
|
||||
grid_resolution=grid_resolution,
|
||||
feature_names=feature_names, ax=[ax1, ax2])
|
||||
assert fig is disp.figure_
|
||||
assert disp.bounding_ax_ is None
|
||||
assert disp.axes_.shape == (2, )
|
||||
assert disp.axes_[0] is ax1
|
||||
assert disp.axes_[1] is ax2
|
||||
|
||||
ax = disp.axes_[0]
|
||||
assert ax.get_xlabel() == "CRIM"
|
||||
assert ax.get_ylabel() == "Partial dependence"
|
||||
|
||||
line = disp.lines_[0]
|
||||
avg_preds, values = disp.pd_results[0]
|
||||
target_idx = disp.target_idx
|
||||
|
||||
line_data = line.get_data()
|
||||
assert_allclose(line_data[0], values[0])
|
||||
assert_allclose(line_data[1], avg_preds[target_idx].ravel())
|
||||
|
||||
# contour
|
||||
ax = disp.axes_[1]
|
||||
coutour = disp.contours_[1]
|
||||
expect_levels = np.linspace(*disp.pdp_lim[2], num=8)
|
||||
assert_allclose(coutour.levels, expect_levels)
|
||||
assert ax.get_xlabel() == "CRIM"
|
||||
assert ax.get_ylabel() == "ZN"
|
||||
|
||||
|
||||
def test_plot_partial_dependence_passing_numpy_axes(pyplot, clf_boston,
|
||||
boston):
|
||||
grid_resolution = 25
|
||||
feature_names = boston.feature_names.tolist()
|
||||
disp1 = plot_partial_dependence(clf_boston, boston.data,
|
||||
['CRIM', 'ZN'],
|
||||
grid_resolution=grid_resolution,
|
||||
feature_names=feature_names)
|
||||
assert disp1.axes_.shape == (1, 2)
|
||||
assert disp1.axes_[0, 0].get_ylabel() == "Partial dependence"
|
||||
assert disp1.axes_[0, 1].get_ylabel() == ""
|
||||
assert len(disp1.axes_[0, 0].get_lines()) == 1
|
||||
assert len(disp1.axes_[0, 1].get_lines()) == 1
|
||||
|
||||
lr = LinearRegression()
|
||||
lr.fit(boston.data, boston.target)
|
||||
|
||||
disp2 = plot_partial_dependence(lr, boston.data,
|
||||
['CRIM', 'ZN'],
|
||||
grid_resolution=grid_resolution,
|
||||
feature_names=feature_names,
|
||||
ax=disp1.axes_)
|
||||
|
||||
assert np.all(disp1.axes_ == disp2.axes_)
|
||||
assert len(disp2.axes_[0, 0].get_lines()) == 2
|
||||
assert len(disp2.axes_[0, 1].get_lines()) == 2
|
||||
|
||||
|
||||
@pytest.mark.parametrize("nrows, ncols", [(2, 2), (3, 1)])
|
||||
def test_plot_partial_dependence_incorrent_num_axes(pyplot, clf_boston,
|
||||
boston, nrows, ncols):
|
||||
grid_resolution = 5
|
||||
fig, axes = pyplot.subplots(nrows, ncols)
|
||||
axes_formats = [list(axes.ravel()), tuple(axes.ravel()), axes]
|
||||
|
||||
msg = "Expected ax to have 2 axes, got {}".format(nrows * ncols)
|
||||
|
||||
disp = plot_partial_dependence(clf_boston, boston.data,
|
||||
['CRIM', 'ZN'],
|
||||
grid_resolution=grid_resolution,
|
||||
feature_names=boston.feature_names)
|
||||
|
||||
for ax_format in axes_formats:
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
plot_partial_dependence(clf_boston, boston.data,
|
||||
['CRIM', 'ZN'],
|
||||
grid_resolution=grid_resolution,
|
||||
feature_names=boston.feature_names,
|
||||
ax=ax_format)
|
||||
|
||||
# with axes object
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
disp.plot(ax=ax_format)
|
||||
|
||||
|
||||
def test_plot_partial_dependence_with_same_axes(pyplot, clf_boston, boston):
|
||||
# The first call to plot_partial_dependence will create two new axes to
|
||||
# place in the space of the passed in axes, which results in a total of
|
||||
# three axes in the figure.
|
||||
# Currently the API does not allow for the second call to
|
||||
# plot_partial_dependence to use the same axes again, because it will
|
||||
# create two new axes in the space resulting in five axes. To get the
|
||||
# expected behavior one needs to pass the generated axes into the second
|
||||
# call:
|
||||
# disp1 = plot_partial_dependence(...)
|
||||
# disp2 = plot_partial_dependence(..., ax=disp1.axes_)
|
||||
|
||||
grid_resolution = 25
|
||||
fig, ax = pyplot.subplots()
|
||||
plot_partial_dependence(clf_boston, boston.data, ['CRIM', 'ZN'],
|
||||
grid_resolution=grid_resolution,
|
||||
feature_names=boston.feature_names, ax=ax)
|
||||
|
||||
msg = ("The ax was already used in another plot function, please set "
|
||||
"ax=display.axes_ instead")
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
plot_partial_dependence(clf_boston, boston.data,
|
||||
['CRIM', 'ZN'],
|
||||
grid_resolution=grid_resolution,
|
||||
feature_names=boston.feature_names, ax=ax)
|
||||
|
||||
|
||||
def test_plot_partial_dependence_feature_name_reuse(pyplot, clf_boston,
|
||||
boston):
|
||||
# second call to plot does not change the feature names from the first
|
||||
# call
|
||||
|
||||
feature_names = boston.feature_names
|
||||
disp = plot_partial_dependence(clf_boston, boston.data,
|
||||
[0, 1],
|
||||
grid_resolution=10,
|
||||
feature_names=feature_names)
|
||||
|
||||
plot_partial_dependence(clf_boston, boston.data, [0, 1],
|
||||
grid_resolution=10, ax=disp.axes_)
|
||||
|
||||
for i, ax in enumerate(disp.axes_.ravel()):
|
||||
assert ax.get_xlabel() == feature_names[i]
|
||||
|
||||
|
||||
def test_plot_partial_dependence_multiclass(pyplot):
|
||||
grid_resolution = 25
|
||||
clf_int = GradientBoostingClassifier(n_estimators=10, random_state=1)
|
||||
iris = load_iris()
|
||||
|
||||
# Test partial dependence plot function on multi-class input.
|
||||
clf_int.fit(iris.data, iris.target)
|
||||
disp_target_0 = plot_partial_dependence(clf_int, iris.data, [0, 1],
|
||||
target=0,
|
||||
grid_resolution=grid_resolution)
|
||||
assert disp_target_0.figure_ is pyplot.gcf()
|
||||
assert disp_target_0.axes_.shape == (1, 2)
|
||||
assert disp_target_0.lines_.shape == (1, 2)
|
||||
assert disp_target_0.contours_.shape == (1, 2)
|
||||
assert disp_target_0.deciles_vlines_.shape == (1, 2)
|
||||
assert disp_target_0.deciles_hlines_.shape == (1, 2)
|
||||
assert all(c is None for c in disp_target_0.contours_.flat)
|
||||
assert disp_target_0.target_idx == 0
|
||||
|
||||
# now with symbol labels
|
||||
target = iris.target_names[iris.target]
|
||||
clf_symbol = GradientBoostingClassifier(n_estimators=10, random_state=1)
|
||||
clf_symbol.fit(iris.data, target)
|
||||
disp_symbol = plot_partial_dependence(clf_symbol, iris.data, [0, 1],
|
||||
target='setosa',
|
||||
grid_resolution=grid_resolution)
|
||||
assert disp_symbol.figure_ is pyplot.gcf()
|
||||
assert disp_symbol.axes_.shape == (1, 2)
|
||||
assert disp_symbol.lines_.shape == (1, 2)
|
||||
assert disp_symbol.contours_.shape == (1, 2)
|
||||
assert disp_symbol.deciles_vlines_.shape == (1, 2)
|
||||
assert disp_symbol.deciles_hlines_.shape == (1, 2)
|
||||
assert all(c is None for c in disp_symbol.contours_.flat)
|
||||
assert disp_symbol.target_idx == 0
|
||||
|
||||
for int_result, symbol_result in zip(disp_target_0.pd_results,
|
||||
disp_symbol.pd_results):
|
||||
avg_preds_int, values_int = int_result
|
||||
avg_preds_symbol, values_symbol = symbol_result
|
||||
assert_allclose(avg_preds_int, avg_preds_symbol)
|
||||
assert_allclose(values_int, values_symbol)
|
||||
|
||||
# check that the pd plots are different for another target
|
||||
disp_target_1 = plot_partial_dependence(clf_int, iris.data, [0, 1],
|
||||
target=1,
|
||||
grid_resolution=grid_resolution)
|
||||
target_0_data_y = disp_target_0.lines_[0, 0].get_data()[1]
|
||||
target_1_data_y = disp_target_1.lines_[0, 0].get_data()[1]
|
||||
assert any(target_0_data_y != target_1_data_y)
|
||||
|
||||
|
||||
multioutput_regression_data = make_regression(n_samples=50, n_targets=2,
|
||||
random_state=0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("target", [0, 1])
|
||||
def test_plot_partial_dependence_multioutput(pyplot, target):
|
||||
# Test partial dependence plot function on multi-output input.
|
||||
X, y = multioutput_regression_data
|
||||
clf = LinearRegression().fit(X, y)
|
||||
|
||||
grid_resolution = 25
|
||||
disp = plot_partial_dependence(clf, X, [0, 1], target=target,
|
||||
grid_resolution=grid_resolution)
|
||||
fig = pyplot.gcf()
|
||||
axs = fig.get_axes()
|
||||
assert len(axs) == 3
|
||||
assert disp.target_idx == target
|
||||
assert disp.bounding_ax_ is not None
|
||||
|
||||
positions = [(0, 0), (0, 1)]
|
||||
expected_label = ["Partial dependence", ""]
|
||||
|
||||
for i, pos in enumerate(positions):
|
||||
ax = disp.axes_[pos]
|
||||
assert ax.get_ylabel() == expected_label[i]
|
||||
assert ax.get_xlabel() == "{}".format(i)
|
||||
|
||||
|
||||
def test_plot_partial_dependence_dataframe(pyplot, clf_boston, boston):
|
||||
pd = pytest.importorskip('pandas')
|
||||
df = pd.DataFrame(boston.data, columns=boston.feature_names)
|
||||
|
||||
grid_resolution = 25
|
||||
|
||||
plot_partial_dependence(
|
||||
clf_boston, df, ['TAX', 'AGE'], grid_resolution=grid_resolution,
|
||||
feature_names=df.columns.tolist()
|
||||
)
|
||||
|
||||
|
||||
dummy_classification_data = make_classification(random_state=0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, params, err_msg",
|
||||
[(multioutput_regression_data, {"target": None, 'features': [0]},
|
||||
"target must be specified for multi-output"),
|
||||
(multioutput_regression_data, {"target": -1, 'features': [0]},
|
||||
r'target must be in \[0, n_tasks\]'),
|
||||
(multioutput_regression_data, {"target": 100, 'features': [0]},
|
||||
r'target must be in \[0, n_tasks\]'),
|
||||
(dummy_classification_data,
|
||||
{'features': ['foobar'], 'feature_names': None},
|
||||
'Feature foobar not in feature_names'),
|
||||
(dummy_classification_data,
|
||||
{'features': ['foobar'], 'feature_names': ['abcd', 'def']},
|
||||
'Feature foobar not in feature_names'),
|
||||
(dummy_classification_data, {'features': [(1, 2, 3)]},
|
||||
'Each entry in features must be either an int, '),
|
||||
(dummy_classification_data, {'features': [1, {}]},
|
||||
'Each entry in features must be either an int, '),
|
||||
(dummy_classification_data, {'features': [tuple()]},
|
||||
'Each entry in features must be either an int, '),
|
||||
(dummy_classification_data,
|
||||
{'features': [123], 'feature_names': ['blahblah']},
|
||||
'All entries of features must be less than '),
|
||||
(dummy_classification_data,
|
||||
{'features': [0, 1, 2], 'feature_names': ['a', 'b', 'a']},
|
||||
'feature_names should not contain duplicates')]
|
||||
)
|
||||
def test_plot_partial_dependence_error(pyplot, data, params, err_msg):
|
||||
X, y = data
|
||||
estimator = LinearRegression().fit(X, y)
|
||||
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
plot_partial_dependence(estimator, X, **params)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("params, err_msg", [
|
||||
({'target': 4, 'features': [0]},
|
||||
'target not in est.classes_, got 4'),
|
||||
({'target': None, 'features': [0]},
|
||||
'target must be specified for multi-class'),
|
||||
({'target': 1, 'features': [4.5]},
|
||||
'Each entry in features must be either an int,'),
|
||||
])
|
||||
def test_plot_partial_dependence_multiclass_error(pyplot, params, err_msg):
|
||||
iris = load_iris()
|
||||
clf = GradientBoostingClassifier(n_estimators=10, random_state=1)
|
||||
clf.fit(iris.data, iris.target)
|
||||
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
plot_partial_dependence(clf, iris.data, **params)
|
||||
|
||||
|
||||
def test_plot_partial_dependence_fig_deprecated(pyplot):
|
||||
# Make sure fig object is correctly used if not None
|
||||
X, y = make_regression(n_samples=50, random_state=0)
|
||||
clf = LinearRegression()
|
||||
clf.fit(X, y)
|
||||
|
||||
fig = pyplot.figure()
|
||||
grid_resolution = 25
|
||||
|
||||
msg = ("The fig parameter is deprecated in version 0.22 and will be "
|
||||
"removed in version 0.24")
|
||||
with pytest.warns(FutureWarning, match=msg):
|
||||
plot_partial_dependence(
|
||||
clf, X, [0, 1], target=0, grid_resolution=grid_resolution, fig=fig)
|
||||
|
||||
assert pyplot.gcf() is fig
|
|
@ -0,0 +1,18 @@
|
|||
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
|
||||
import sys
|
||||
# mypy error: Module X has no attribute y (typically for C extensions)
|
||||
from . import _partial_dependence # type: ignore
|
||||
from ..externals._pep562 import Pep562
|
||||
from ..utils.deprecation import _raise_dep_warning_if_not_pytest
|
||||
|
||||
deprecated_path = 'sklearn.inspection.partial_dependence'
|
||||
correct_import_path = 'sklearn.inspection'
|
||||
|
||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
|
||||
|
||||
def __getattr__(name):
|
||||
return getattr(_partial_dependence, name)
|
||||
|
||||
if not sys.version_info >= (3, 7):
|
||||
Pep562(__name__)
|
17
venv/Lib/site-packages/sklearn/inspection/setup.py
Normal file
17
venv/Lib/site-packages/sklearn/inspection/setup.py
Normal file
|
@ -0,0 +1,17 @@
|
|||
from numpy.distutils.misc_util import Configuration
|
||||
|
||||
|
||||
def configuration(parent_package="", top_path=None):
|
||||
config = Configuration("inspection", parent_package, top_path)
|
||||
|
||||
config.add_subpackage('_plot')
|
||||
config.add_subpackage('_plot.tests')
|
||||
|
||||
config.add_subpackage('tests')
|
||||
|
||||
return config
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from numpy.distutils.core import setup
|
||||
setup(**configuration().todict())
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,663 @@
|
|||
"""
|
||||
Testing for the partial dependence module.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import sklearn
|
||||
from sklearn.inspection import partial_dependence
|
||||
from sklearn.inspection._partial_dependence import (
|
||||
_grid_from_X,
|
||||
_partial_dependence_brute,
|
||||
_partial_dependence_recursion
|
||||
)
|
||||
from sklearn.ensemble import GradientBoostingClassifier
|
||||
from sklearn.ensemble import GradientBoostingRegressor
|
||||
from sklearn.ensemble import RandomForestRegressor
|
||||
from sklearn.experimental import enable_hist_gradient_boosting # noqa
|
||||
from sklearn.ensemble import HistGradientBoostingClassifier
|
||||
from sklearn.ensemble import HistGradientBoostingRegressor
|
||||
from sklearn.linear_model import LinearRegression
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.linear_model import MultiTaskLasso
|
||||
from sklearn.tree import DecisionTreeRegressor
|
||||
from sklearn.datasets import load_iris
|
||||
from sklearn.datasets import make_classification, make_regression
|
||||
from sklearn.cluster import KMeans
|
||||
from sklearn.compose import make_column_transformer
|
||||
from sklearn.metrics import r2_score
|
||||
from sklearn.preprocessing import PolynomialFeatures
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.preprocessing import RobustScaler
|
||||
from sklearn.pipeline import make_pipeline
|
||||
from sklearn.dummy import DummyClassifier
|
||||
from sklearn.base import BaseEstimator, ClassifierMixin, clone
|
||||
from sklearn.exceptions import NotFittedError
|
||||
from sklearn.utils._testing import assert_allclose
|
||||
from sklearn.utils._testing import assert_array_equal
|
||||
from sklearn.utils._testing import ignore_warnings
|
||||
from sklearn.utils import _IS_32BIT
|
||||
from sklearn.utils.validation import check_random_state
|
||||
from sklearn.tree.tests.test_tree import assert_is_subtree
|
||||
|
||||
|
||||
# toy sample
|
||||
X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]
|
||||
y = [-1, -1, -1, 1, 1, 1]
|
||||
|
||||
|
||||
# (X, y), n_targets <-- as expected in the output of partial_dep()
|
||||
binary_classification_data = (make_classification(n_samples=50,
|
||||
random_state=0), 1)
|
||||
multiclass_classification_data = (make_classification(n_samples=50,
|
||||
n_classes=3,
|
||||
n_clusters_per_class=1,
|
||||
random_state=0), 3)
|
||||
regression_data = (make_regression(n_samples=50, random_state=0), 1)
|
||||
multioutput_regression_data = (make_regression(n_samples=50, n_targets=2,
|
||||
random_state=0), 2)
|
||||
|
||||
# iris
|
||||
iris = load_iris()
|
||||
|
||||
|
||||
@pytest.mark.parametrize('Estimator, method, data', [
|
||||
(GradientBoostingClassifier, 'recursion', binary_classification_data),
|
||||
(GradientBoostingClassifier, 'recursion', multiclass_classification_data),
|
||||
(GradientBoostingClassifier, 'brute', binary_classification_data),
|
||||
(GradientBoostingClassifier, 'brute', multiclass_classification_data),
|
||||
(GradientBoostingRegressor, 'recursion', regression_data),
|
||||
(GradientBoostingRegressor, 'brute', regression_data),
|
||||
(DecisionTreeRegressor, 'brute', regression_data),
|
||||
(LinearRegression, 'brute', regression_data),
|
||||
(LinearRegression, 'brute', multioutput_regression_data),
|
||||
(LogisticRegression, 'brute', binary_classification_data),
|
||||
(LogisticRegression, 'brute', multiclass_classification_data),
|
||||
(MultiTaskLasso, 'brute', multioutput_regression_data),
|
||||
])
|
||||
@pytest.mark.parametrize('grid_resolution', (5, 10))
|
||||
@pytest.mark.parametrize('features', ([1], [1, 2]))
|
||||
def test_output_shape(Estimator, method, data, grid_resolution,
|
||||
features):
|
||||
# Check that partial_dependence has consistent output shape for different
|
||||
# kinds of estimators:
|
||||
# - classifiers with binary and multiclass settings
|
||||
# - regressors
|
||||
# - multi-task regressors
|
||||
|
||||
est = Estimator()
|
||||
|
||||
# n_target corresponds to the number of classes (1 for binary classif) or
|
||||
# the number of tasks / outputs in multi task settings. It's equal to 1 for
|
||||
# classical regression_data.
|
||||
(X, y), n_targets = data
|
||||
|
||||
est.fit(X, y)
|
||||
pdp, axes = partial_dependence(est, X=X, features=features,
|
||||
method=method,
|
||||
grid_resolution=grid_resolution)
|
||||
|
||||
expected_pdp_shape = (n_targets, *[grid_resolution
|
||||
for _ in range(len(features))])
|
||||
expected_axes_shape = (len(features), grid_resolution)
|
||||
|
||||
assert pdp.shape == expected_pdp_shape
|
||||
assert axes is not None
|
||||
assert np.asarray(axes).shape == expected_axes_shape
|
||||
|
||||
|
||||
def test_grid_from_X():
|
||||
# tests for _grid_from_X: sanity check for output, and for shapes.
|
||||
|
||||
# Make sure that the grid is a cartesian product of the input (it will use
|
||||
# the unique values instead of the percentiles)
|
||||
percentiles = (.05, .95)
|
||||
grid_resolution = 100
|
||||
X = np.asarray([[1, 2],
|
||||
[3, 4]])
|
||||
grid, axes = _grid_from_X(X, percentiles, grid_resolution)
|
||||
assert_array_equal(grid, [[1, 2],
|
||||
[1, 4],
|
||||
[3, 2],
|
||||
[3, 4]])
|
||||
assert_array_equal(axes, X.T)
|
||||
|
||||
# test shapes of returned objects depending on the number of unique values
|
||||
# for a feature.
|
||||
rng = np.random.RandomState(0)
|
||||
grid_resolution = 15
|
||||
|
||||
# n_unique_values > grid_resolution
|
||||
X = rng.normal(size=(20, 2))
|
||||
grid, axes = _grid_from_X(X, percentiles, grid_resolution=grid_resolution)
|
||||
assert grid.shape == (grid_resolution * grid_resolution, X.shape[1])
|
||||
assert np.asarray(axes).shape == (2, grid_resolution)
|
||||
|
||||
# n_unique_values < grid_resolution, will use actual values
|
||||
n_unique_values = 12
|
||||
X[n_unique_values - 1:, 0] = 12345
|
||||
rng.shuffle(X) # just to make sure the order is irrelevant
|
||||
grid, axes = _grid_from_X(X, percentiles, grid_resolution=grid_resolution)
|
||||
assert grid.shape == (n_unique_values * grid_resolution, X.shape[1])
|
||||
# axes is a list of arrays of different shapes
|
||||
assert axes[0].shape == (n_unique_values,)
|
||||
assert axes[1].shape == (grid_resolution,)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"grid_resolution, percentiles, err_msg",
|
||||
[(2, (0, 0.0001), "percentiles are too close"),
|
||||
(100, (1, 2, 3, 4), "'percentiles' must be a sequence of 2 elements"),
|
||||
(100, 12345, "'percentiles' must be a sequence of 2 elements"),
|
||||
(100, (-1, .95), r"'percentiles' values must be in \[0, 1\]"),
|
||||
(100, (.05, 2), r"'percentiles' values must be in \[0, 1\]"),
|
||||
(100, (.9, .1), r"percentiles\[0\] must be strictly less than"),
|
||||
(1, (0.05, 0.95), "'grid_resolution' must be strictly greater than 1")]
|
||||
)
|
||||
def test_grid_from_X_error(grid_resolution, percentiles, err_msg):
|
||||
X = np.asarray([[1, 2], [3, 4]])
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
_grid_from_X(
|
||||
X, grid_resolution=grid_resolution, percentiles=percentiles
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('target_feature', range(5))
|
||||
@pytest.mark.parametrize('est, method', [
|
||||
(LinearRegression(), 'brute'),
|
||||
(GradientBoostingRegressor(random_state=0), 'brute'),
|
||||
(GradientBoostingRegressor(random_state=0), 'recursion'),
|
||||
(HistGradientBoostingRegressor(random_state=0), 'brute'),
|
||||
(HistGradientBoostingRegressor(random_state=0), 'recursion')]
|
||||
)
|
||||
def test_partial_dependence_helpers(est, method, target_feature):
|
||||
# Check that what is returned by _partial_dependence_brute or
|
||||
# _partial_dependence_recursion is equivalent to manually setting a target
|
||||
# feature to a given value, and computing the average prediction over all
|
||||
# samples.
|
||||
# This also checks that the brute and recursion methods give the same
|
||||
# output.
|
||||
# Note that even on the trainset, the brute and the recursion methods
|
||||
# aren't always strictly equivalent, in particular when the slow method
|
||||
# generates unrealistic samples that have low mass in the joint
|
||||
# distribution of the input features, and when some of the features are
|
||||
# dependent. Hence the high tolerance on the checks.
|
||||
|
||||
X, y = make_regression(random_state=0, n_features=5, n_informative=5)
|
||||
# The 'init' estimator for GBDT (here the average prediction) isn't taken
|
||||
# into account with the recursion method, for technical reasons. We set
|
||||
# the mean to 0 to that this 'bug' doesn't have any effect.
|
||||
y = y - y.mean()
|
||||
est.fit(X, y)
|
||||
|
||||
# target feature will be set to .5 and then to 123
|
||||
features = np.array([target_feature], dtype=np.int32)
|
||||
grid = np.array([[.5],
|
||||
[123]])
|
||||
|
||||
if method == 'brute':
|
||||
pdp = _partial_dependence_brute(est, grid, features, X,
|
||||
response_method='auto')
|
||||
else:
|
||||
pdp = _partial_dependence_recursion(est, grid, features)
|
||||
|
||||
mean_predictions = []
|
||||
for val in (.5, 123):
|
||||
X_ = X.copy()
|
||||
X_[:, target_feature] = val
|
||||
mean_predictions.append(est.predict(X_).mean())
|
||||
|
||||
pdp = pdp[0] # (shape is (1, 2) so make it (2,))
|
||||
|
||||
# allow for greater margin for error with recursion method
|
||||
rtol = 1e-1 if method == 'recursion' else 1e-3
|
||||
assert np.allclose(pdp, mean_predictions, rtol=rtol)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('seed', range(1))
|
||||
def test_recursion_decision_tree_vs_forest_and_gbdt(seed):
|
||||
# Make sure that the recursion method gives the same results on a
|
||||
# DecisionTreeRegressor and a GradientBoostingRegressor or a
|
||||
# RandomForestRegressor with 1 tree and equivalent parameters.
|
||||
|
||||
rng = np.random.RandomState(seed)
|
||||
|
||||
# Purely random dataset to avoid correlated features
|
||||
n_samples = 1000
|
||||
n_features = 5
|
||||
X = rng.randn(n_samples, n_features)
|
||||
y = rng.randn(n_samples) * 10
|
||||
|
||||
# The 'init' estimator for GBDT (here the average prediction) isn't taken
|
||||
# into account with the recursion method, for technical reasons. We set
|
||||
# the mean to 0 to that this 'bug' doesn't have any effect.
|
||||
y = y - y.mean()
|
||||
|
||||
# set max_depth not too high to avoid splits with same gain but different
|
||||
# features
|
||||
max_depth = 5
|
||||
|
||||
tree_seed = 0
|
||||
forest = RandomForestRegressor(n_estimators=1, max_features=None,
|
||||
bootstrap=False, max_depth=max_depth,
|
||||
random_state=tree_seed)
|
||||
# The forest will use ensemble.base._set_random_states to set the
|
||||
# random_state of the tree sub-estimator. We simulate this here to have
|
||||
# equivalent estimators.
|
||||
equiv_random_state = check_random_state(tree_seed).randint(
|
||||
np.iinfo(np.int32).max)
|
||||
gbdt = GradientBoostingRegressor(n_estimators=1, learning_rate=1,
|
||||
criterion='mse', max_depth=max_depth,
|
||||
random_state=equiv_random_state)
|
||||
tree = DecisionTreeRegressor(max_depth=max_depth,
|
||||
random_state=equiv_random_state)
|
||||
|
||||
forest.fit(X, y)
|
||||
gbdt.fit(X, y)
|
||||
tree.fit(X, y)
|
||||
|
||||
# sanity check: if the trees aren't the same, the PD values won't be equal
|
||||
try:
|
||||
assert_is_subtree(tree.tree_, gbdt[0, 0].tree_)
|
||||
assert_is_subtree(tree.tree_, forest[0].tree_)
|
||||
except AssertionError:
|
||||
# For some reason the trees aren't exactly equal on 32bits, so the PDs
|
||||
# cannot be equal either. See
|
||||
# https://github.com/scikit-learn/scikit-learn/issues/8853
|
||||
assert _IS_32BIT, "this should only fail on 32 bit platforms"
|
||||
return
|
||||
|
||||
grid = rng.randn(50).reshape(-1, 1)
|
||||
for f in range(n_features):
|
||||
features = np.array([f], dtype=np.int32)
|
||||
|
||||
pdp_forest = _partial_dependence_recursion(forest, grid, features)
|
||||
pdp_gbdt = _partial_dependence_recursion(gbdt, grid, features)
|
||||
pdp_tree = _partial_dependence_recursion(tree, grid, features)
|
||||
|
||||
np.testing.assert_allclose(pdp_gbdt, pdp_tree)
|
||||
np.testing.assert_allclose(pdp_forest, pdp_tree)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('est', (
|
||||
GradientBoostingClassifier(random_state=0),
|
||||
HistGradientBoostingClassifier(random_state=0),
|
||||
))
|
||||
@pytest.mark.parametrize('target_feature', (0, 1, 2, 3, 4, 5))
|
||||
def test_recursion_decision_function(est, target_feature):
|
||||
# Make sure the recursion method (implicitly uses decision_function) has
|
||||
# the same result as using brute method with
|
||||
# response_method=decision_function
|
||||
|
||||
X, y = make_classification(n_classes=2, n_clusters_per_class=1,
|
||||
random_state=1)
|
||||
assert np.mean(y) == .5 # make sure the init estimator predicts 0 anyway
|
||||
|
||||
est.fit(X, y)
|
||||
|
||||
preds_1, _ = partial_dependence(est, X, [target_feature],
|
||||
response_method='decision_function',
|
||||
method='recursion')
|
||||
preds_2, _ = partial_dependence(est, X, [target_feature],
|
||||
response_method='decision_function',
|
||||
method='brute')
|
||||
|
||||
assert_allclose(preds_1, preds_2, atol=1e-7)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('est', (
|
||||
LinearRegression(),
|
||||
GradientBoostingRegressor(random_state=0),
|
||||
HistGradientBoostingRegressor(random_state=0, min_samples_leaf=1,
|
||||
max_leaf_nodes=None, max_iter=1),
|
||||
DecisionTreeRegressor(random_state=0),
|
||||
))
|
||||
@pytest.mark.parametrize('power', (1, 2))
|
||||
def test_partial_dependence_easy_target(est, power):
|
||||
# If the target y only depends on one feature in an obvious way (linear or
|
||||
# quadratic) then the partial dependence for that feature should reflect
|
||||
# it.
|
||||
# We here fit a linear regression_data model (with polynomial features if
|
||||
# needed) and compute r_squared to check that the partial dependence
|
||||
# correctly reflects the target.
|
||||
|
||||
rng = np.random.RandomState(0)
|
||||
n_samples = 200
|
||||
target_variable = 2
|
||||
X = rng.normal(size=(n_samples, 5))
|
||||
y = X[:, target_variable]**power
|
||||
|
||||
est.fit(X, y)
|
||||
|
||||
averaged_predictions, values = partial_dependence(
|
||||
est, features=[target_variable], X=X, grid_resolution=1000)
|
||||
|
||||
new_X = values[0].reshape(-1, 1)
|
||||
new_y = averaged_predictions[0]
|
||||
# add polynomial features if needed
|
||||
new_X = PolynomialFeatures(degree=power).fit_transform(new_X)
|
||||
|
||||
lr = LinearRegression().fit(new_X, new_y)
|
||||
r2 = r2_score(new_y, lr.predict(new_X))
|
||||
|
||||
assert r2 > .99
|
||||
|
||||
|
||||
@pytest.mark.parametrize('Estimator',
|
||||
(sklearn.tree.DecisionTreeClassifier,
|
||||
sklearn.tree.ExtraTreeClassifier,
|
||||
sklearn.ensemble.ExtraTreesClassifier,
|
||||
sklearn.neighbors.KNeighborsClassifier,
|
||||
sklearn.neighbors.RadiusNeighborsClassifier,
|
||||
sklearn.ensemble.RandomForestClassifier))
|
||||
def test_multiclass_multioutput(Estimator):
|
||||
# Make sure error is raised for multiclass-multioutput classifiers
|
||||
|
||||
# make multiclass-multioutput dataset
|
||||
X, y = make_classification(n_classes=3, n_clusters_per_class=1,
|
||||
random_state=0)
|
||||
y = np.array([y, y]).T
|
||||
|
||||
est = Estimator()
|
||||
est.fit(X, y)
|
||||
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="Multiclass-multioutput estimators are not supported"):
|
||||
partial_dependence(est, X, [0])
|
||||
|
||||
|
||||
class NoPredictProbaNoDecisionFunction(ClassifierMixin, BaseEstimator):
|
||||
def fit(self, X, y):
|
||||
# simulate that we have some classes
|
||||
self.classes_ = [0, 1]
|
||||
return self
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"estimator, params, err_msg",
|
||||
[(KMeans(),
|
||||
{'features': [0]},
|
||||
"'estimator' must be a fitted regressor or classifier"),
|
||||
(LinearRegression(),
|
||||
{'features': [0], 'response_method': 'predict_proba'},
|
||||
'The response_method parameter is ignored for regressors'),
|
||||
(GradientBoostingClassifier(random_state=0),
|
||||
{'features': [0], 'response_method': 'predict_proba',
|
||||
'method': 'recursion'},
|
||||
"'recursion' method, the response_method must be 'decision_function'"),
|
||||
(GradientBoostingClassifier(random_state=0),
|
||||
{'features': [0], 'response_method': 'predict_proba', 'method': 'auto'},
|
||||
"'recursion' method, the response_method must be 'decision_function'"),
|
||||
(GradientBoostingClassifier(random_state=0),
|
||||
{'features': [0], 'response_method': 'blahblah'},
|
||||
'response_method blahblah is invalid. Accepted response_method'),
|
||||
(NoPredictProbaNoDecisionFunction(),
|
||||
{'features': [0], 'response_method': 'auto'},
|
||||
'The estimator has no predict_proba and no decision_function method'),
|
||||
(NoPredictProbaNoDecisionFunction(),
|
||||
{'features': [0], 'response_method': 'predict_proba'},
|
||||
'The estimator has no predict_proba method.'),
|
||||
(NoPredictProbaNoDecisionFunction(),
|
||||
{'features': [0], 'response_method': 'decision_function'},
|
||||
'The estimator has no decision_function method.'),
|
||||
(LinearRegression(),
|
||||
{'features': [0], 'method': 'blahblah'},
|
||||
'blahblah is invalid. Accepted method names are brute, recursion, auto'),
|
||||
(LinearRegression(),
|
||||
{'features': [0], 'method': 'recursion'},
|
||||
"Only the following estimators support the 'recursion' method:")]
|
||||
)
|
||||
def test_partial_dependence_error(estimator, params, err_msg):
|
||||
X, y = make_classification(random_state=0)
|
||||
estimator.fit(X, y)
|
||||
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
partial_dependence(estimator, X, **params)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"with_dataframe, err_msg",
|
||||
[(True, "Only array-like or scalar are supported"),
|
||||
(False, "Only array-like or scalar are supported")]
|
||||
)
|
||||
def test_partial_dependence_slice_error(with_dataframe, err_msg):
|
||||
X, y = make_classification(random_state=0)
|
||||
if with_dataframe:
|
||||
pd = pytest.importorskip('pandas')
|
||||
X = pd.DataFrame(X)
|
||||
estimator = LogisticRegression().fit(X, y)
|
||||
|
||||
with pytest.raises(TypeError, match=err_msg):
|
||||
partial_dependence(estimator, X, features=slice(0, 2, 1))
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'estimator',
|
||||
[LinearRegression(), GradientBoostingClassifier(random_state=0)]
|
||||
)
|
||||
@pytest.mark.parametrize('features', [-1, 10000])
|
||||
def test_partial_dependence_unknown_feature_indices(estimator, features):
|
||||
X, y = make_classification(random_state=0)
|
||||
estimator.fit(X, y)
|
||||
|
||||
err_msg = 'all features must be in'
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
partial_dependence(estimator, X, [features])
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'estimator',
|
||||
[LinearRegression(), GradientBoostingClassifier(random_state=0)]
|
||||
)
|
||||
def test_partial_dependence_unknown_feature_string(estimator):
|
||||
pd = pytest.importorskip("pandas")
|
||||
X, y = make_classification(random_state=0)
|
||||
df = pd.DataFrame(X)
|
||||
estimator.fit(df, y)
|
||||
|
||||
features = ['random']
|
||||
err_msg = 'A given column is not a column of the dataframe'
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
partial_dependence(estimator, df, features)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'estimator',
|
||||
[LinearRegression(), GradientBoostingClassifier(random_state=0)]
|
||||
)
|
||||
def test_partial_dependence_X_list(estimator):
|
||||
# check that array-like objects are accepted
|
||||
X, y = make_classification(random_state=0)
|
||||
estimator.fit(X, y)
|
||||
partial_dependence(estimator, list(X), [0])
|
||||
|
||||
|
||||
# TODO: Remove in 0.24 when DummyClassifier's `strategy` default updates
|
||||
@ignore_warnings(category=FutureWarning)
|
||||
def test_warning_recursion_non_constant_init():
|
||||
# make sure that passing a non-constant init parameter to a GBDT and using
|
||||
# recursion method yields a warning.
|
||||
|
||||
gbc = GradientBoostingClassifier(init=DummyClassifier(), random_state=0)
|
||||
gbc.fit(X, y)
|
||||
|
||||
with pytest.warns(
|
||||
UserWarning,
|
||||
match='Using recursion method with a non-constant init predictor'):
|
||||
partial_dependence(gbc, X, [0], method='recursion')
|
||||
|
||||
with pytest.warns(
|
||||
UserWarning,
|
||||
match='Using recursion method with a non-constant init predictor'):
|
||||
partial_dependence(gbc, X, [0], method='recursion')
|
||||
|
||||
|
||||
def test_partial_dependence_sample_weight():
|
||||
# Test near perfect correlation between partial dependence and diagonal
|
||||
# when sample weights emphasize y = x predictions
|
||||
# non-regression test for #13193
|
||||
# TODO: extend to HistGradientBoosting once sample_weight is supported
|
||||
N = 1000
|
||||
rng = np.random.RandomState(123456)
|
||||
mask = rng.randint(2, size=N, dtype=bool)
|
||||
|
||||
x = rng.rand(N)
|
||||
# set y = x on mask and y = -x outside
|
||||
y = x.copy()
|
||||
y[~mask] = -y[~mask]
|
||||
X = np.c_[mask, x]
|
||||
# sample weights to emphasize data points where y = x
|
||||
sample_weight = np.ones(N)
|
||||
sample_weight[mask] = 1000.
|
||||
|
||||
clf = GradientBoostingRegressor(n_estimators=10, random_state=1)
|
||||
clf.fit(X, y, sample_weight=sample_weight)
|
||||
|
||||
pdp, values = partial_dependence(clf, X, features=[1])
|
||||
|
||||
assert np.corrcoef(pdp, values)[0, 1] > 0.99
|
||||
|
||||
|
||||
def test_hist_gbdt_sw_not_supported():
|
||||
# TODO: remove/fix when PDP supports HGBT with sample weights
|
||||
clf = HistGradientBoostingRegressor(random_state=1)
|
||||
clf.fit(X, y, sample_weight=np.ones(len(X)))
|
||||
|
||||
with pytest.raises(NotImplementedError,
|
||||
match="does not support partial dependence"):
|
||||
partial_dependence(clf, X, features=[1])
|
||||
|
||||
|
||||
# TODO: Remove in 0.24 when DummyClassifier's `strategy` default updates
|
||||
@ignore_warnings(category=FutureWarning)
|
||||
def test_partial_dependence_pipeline():
|
||||
# check that the partial dependence support pipeline
|
||||
iris = load_iris()
|
||||
|
||||
scaler = StandardScaler()
|
||||
clf = DummyClassifier(random_state=42)
|
||||
pipe = make_pipeline(scaler, clf)
|
||||
|
||||
clf.fit(scaler.fit_transform(iris.data), iris.target)
|
||||
pipe.fit(iris.data, iris.target)
|
||||
|
||||
features = 0
|
||||
pdp_pipe, values_pipe = partial_dependence(
|
||||
pipe, iris.data, features=[features], grid_resolution=10
|
||||
)
|
||||
pdp_clf, values_clf = partial_dependence(
|
||||
clf, scaler.transform(iris.data), features=[features],
|
||||
grid_resolution=10
|
||||
)
|
||||
assert_allclose(pdp_pipe, pdp_clf)
|
||||
assert_allclose(
|
||||
values_pipe[0],
|
||||
values_clf[0] * scaler.scale_[features] + scaler.mean_[features]
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"estimator",
|
||||
[LogisticRegression(max_iter=1000, random_state=0),
|
||||
GradientBoostingClassifier(random_state=0, n_estimators=5)],
|
||||
ids=['estimator-brute', 'estimator-recursion']
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"preprocessor",
|
||||
[None,
|
||||
make_column_transformer(
|
||||
(StandardScaler(), [iris.feature_names[i] for i in (0, 2)]),
|
||||
(RobustScaler(), [iris.feature_names[i] for i in (1, 3)])),
|
||||
make_column_transformer(
|
||||
(StandardScaler(), [iris.feature_names[i] for i in (0, 2)]),
|
||||
remainder='passthrough')],
|
||||
ids=['None', 'column-transformer', 'column-transformer-passthrough']
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"features",
|
||||
[[0, 2], [iris.feature_names[i] for i in (0, 2)]],
|
||||
ids=['features-integer', 'features-string']
|
||||
)
|
||||
def test_partial_dependence_dataframe(estimator, preprocessor, features):
|
||||
# check that the partial dependence support dataframe and pipeline
|
||||
# including a column transformer
|
||||
pd = pytest.importorskip("pandas")
|
||||
df = pd.DataFrame(iris.data, columns=iris.feature_names)
|
||||
|
||||
pipe = make_pipeline(preprocessor, estimator)
|
||||
pipe.fit(df, iris.target)
|
||||
pdp_pipe, values_pipe = partial_dependence(
|
||||
pipe, df, features=features, grid_resolution=10
|
||||
)
|
||||
|
||||
# the column transformer will reorder the column when transforming
|
||||
# we mixed the index to be sure that we are computing the partial
|
||||
# dependence of the right columns
|
||||
if preprocessor is not None:
|
||||
X_proc = clone(preprocessor).fit_transform(df)
|
||||
features_clf = [0, 1]
|
||||
else:
|
||||
X_proc = df
|
||||
features_clf = [0, 2]
|
||||
|
||||
clf = clone(estimator).fit(X_proc, iris.target)
|
||||
pdp_clf, values_clf = partial_dependence(
|
||||
clf, X_proc, features=features_clf, method='brute', grid_resolution=10
|
||||
)
|
||||
|
||||
assert_allclose(pdp_pipe, pdp_clf)
|
||||
if preprocessor is not None:
|
||||
scaler = preprocessor.named_transformers_['standardscaler']
|
||||
assert_allclose(
|
||||
values_pipe[1],
|
||||
values_clf[1] * scaler.scale_[1] + scaler.mean_[1]
|
||||
)
|
||||
else:
|
||||
assert_allclose(values_pipe[1], values_clf[1])
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"features, expected_pd_shape",
|
||||
[(0, (3, 10)),
|
||||
(iris.feature_names[0], (3, 10)),
|
||||
([0, 2], (3, 10, 10)),
|
||||
([iris.feature_names[i] for i in (0, 2)], (3, 10, 10)),
|
||||
([True, False, True, False], (3, 10, 10))],
|
||||
ids=['scalar-int', 'scalar-str', 'list-int', 'list-str', 'mask']
|
||||
)
|
||||
def test_partial_dependence_feature_type(features, expected_pd_shape):
|
||||
# check all possible features type supported in PDP
|
||||
pd = pytest.importorskip("pandas")
|
||||
df = pd.DataFrame(iris.data, columns=iris.feature_names)
|
||||
|
||||
preprocessor = make_column_transformer(
|
||||
(StandardScaler(), [iris.feature_names[i] for i in (0, 2)]),
|
||||
(RobustScaler(), [iris.feature_names[i] for i in (1, 3)])
|
||||
)
|
||||
pipe = make_pipeline(
|
||||
preprocessor, LogisticRegression(max_iter=1000, random_state=0)
|
||||
)
|
||||
pipe.fit(df, iris.target)
|
||||
pdp_pipe, values_pipe = partial_dependence(
|
||||
pipe, df, features=features, grid_resolution=10
|
||||
)
|
||||
assert pdp_pipe.shape == expected_pd_shape
|
||||
assert len(values_pipe) == len(pdp_pipe.shape) - 1
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"estimator", [LinearRegression(), LogisticRegression(),
|
||||
GradientBoostingRegressor(), GradientBoostingClassifier()]
|
||||
)
|
||||
def test_partial_dependence_unfitted(estimator):
|
||||
X = iris.data
|
||||
preprocessor = make_column_transformer(
|
||||
(StandardScaler(), [0, 2]), (RobustScaler(), [1, 3])
|
||||
)
|
||||
pipe = make_pipeline(preprocessor, estimator)
|
||||
with pytest.raises(NotFittedError, match="is not fitted yet"):
|
||||
partial_dependence(pipe, X, features=[0, 2], grid_resolution=10)
|
||||
with pytest.raises(NotFittedError, match="is not fitted yet"):
|
||||
partial_dependence(estimator, X, features=[0, 2], grid_resolution=10)
|
|
@ -0,0 +1,353 @@
|
|||
import pytest
|
||||
import numpy as np
|
||||
|
||||
from numpy.testing import assert_allclose
|
||||
|
||||
from sklearn.compose import ColumnTransformer
|
||||
from sklearn.datasets import load_diabetes
|
||||
from sklearn.datasets import load_iris
|
||||
from sklearn.datasets import make_classification
|
||||
from sklearn.datasets import make_regression
|
||||
from sklearn.dummy import DummyClassifier
|
||||
from sklearn.ensemble import RandomForestRegressor
|
||||
from sklearn.ensemble import RandomForestClassifier
|
||||
from sklearn.linear_model import LinearRegression
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.impute import SimpleImputer
|
||||
from sklearn.inspection import permutation_importance
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.pipeline import make_pipeline
|
||||
from sklearn.preprocessing import KBinsDiscretizer
|
||||
from sklearn.preprocessing import OneHotEncoder
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.preprocessing import scale
|
||||
from sklearn.utils import parallel_backend
|
||||
from sklearn.utils._testing import _convert_container
|
||||
|
||||
|
||||
|
||||
@pytest.mark.parametrize("n_jobs", [1, 2])
|
||||
def test_permutation_importance_correlated_feature_regression(n_jobs):
|
||||
# Make sure that feature highly correlated to the target have a higher
|
||||
# importance
|
||||
rng = np.random.RandomState(42)
|
||||
n_repeats = 5
|
||||
|
||||
X, y = load_diabetes(return_X_y=True)
|
||||
y_with_little_noise = (
|
||||
y + rng.normal(scale=0.001, size=y.shape[0])).reshape(-1, 1)
|
||||
|
||||
X = np.hstack([X, y_with_little_noise])
|
||||
|
||||
clf = RandomForestRegressor(n_estimators=10, random_state=42)
|
||||
clf.fit(X, y)
|
||||
|
||||
result = permutation_importance(clf, X, y, n_repeats=n_repeats,
|
||||
random_state=rng, n_jobs=n_jobs)
|
||||
|
||||
assert result.importances.shape == (X.shape[1], n_repeats)
|
||||
|
||||
# the correlated feature with y was added as the last column and should
|
||||
# have the highest importance
|
||||
assert np.all(result.importances_mean[-1] >
|
||||
result.importances_mean[:-1])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("n_jobs", [1, 2])
|
||||
def test_permutation_importance_correlated_feature_regression_pandas(n_jobs):
|
||||
pd = pytest.importorskip("pandas")
|
||||
|
||||
# Make sure that feature highly correlated to the target have a higher
|
||||
# importance
|
||||
rng = np.random.RandomState(42)
|
||||
n_repeats = 5
|
||||
|
||||
dataset = load_iris()
|
||||
X, y = dataset.data, dataset.target
|
||||
y_with_little_noise = (
|
||||
y + rng.normal(scale=0.001, size=y.shape[0])).reshape(-1, 1)
|
||||
|
||||
# Adds feature correlated with y as the last column
|
||||
X = pd.DataFrame(X, columns=dataset.feature_names)
|
||||
X['correlated_feature'] = y_with_little_noise
|
||||
|
||||
clf = RandomForestClassifier(n_estimators=10, random_state=42)
|
||||
clf.fit(X, y)
|
||||
|
||||
result = permutation_importance(clf, X, y, n_repeats=n_repeats,
|
||||
random_state=rng, n_jobs=n_jobs)
|
||||
|
||||
assert result.importances.shape == (X.shape[1], n_repeats)
|
||||
|
||||
# the correlated feature with y was added as the last column and should
|
||||
# have the highest importance
|
||||
assert np.all(result.importances_mean[-1] > result.importances_mean[:-1])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("n_jobs", [1, 2])
|
||||
def test_robustness_to_high_cardinality_noisy_feature(n_jobs, seed=42):
|
||||
# Permutation variable importance should not be affected by the high
|
||||
# cardinality bias of traditional feature importances, especially when
|
||||
# computed on a held-out test set:
|
||||
rng = np.random.RandomState(seed)
|
||||
n_repeats = 5
|
||||
n_samples = 1000
|
||||
n_classes = 5
|
||||
n_informative_features = 2
|
||||
n_noise_features = 1
|
||||
n_features = n_informative_features + n_noise_features
|
||||
|
||||
# Generate a multiclass classification dataset and a set of informative
|
||||
# binary features that can be used to predict some classes of y exactly
|
||||
# while leaving some classes unexplained to make the problem harder.
|
||||
classes = np.arange(n_classes)
|
||||
y = rng.choice(classes, size=n_samples)
|
||||
X = np.hstack([(y == c).reshape(-1, 1)
|
||||
for c in classes[:n_informative_features]])
|
||||
X = X.astype(np.float32)
|
||||
|
||||
# Not all target classes are explained by the binary class indicator
|
||||
# features:
|
||||
assert n_informative_features < n_classes
|
||||
|
||||
# Add 10 other noisy features with high cardinality (numerical) values
|
||||
# that can be used to overfit the training data.
|
||||
X = np.concatenate([X, rng.randn(n_samples, n_noise_features)], axis=1)
|
||||
assert X.shape == (n_samples, n_features)
|
||||
|
||||
# Split the dataset to be able to evaluate on a held-out test set. The
|
||||
# Test size should be large enough for importance measurements to be
|
||||
# stable:
|
||||
X_train, X_test, y_train, y_test = train_test_split(
|
||||
X, y, test_size=0.5, random_state=rng)
|
||||
clf = RandomForestClassifier(n_estimators=5, random_state=rng)
|
||||
clf.fit(X_train, y_train)
|
||||
|
||||
# Variable importances computed by impurity decrease on the tree node
|
||||
# splits often use the noisy features in splits. This can give misleading
|
||||
# impression that high cardinality noisy variables are the most important:
|
||||
tree_importances = clf.feature_importances_
|
||||
informative_tree_importances = tree_importances[:n_informative_features]
|
||||
noisy_tree_importances = tree_importances[n_informative_features:]
|
||||
assert informative_tree_importances.max() < noisy_tree_importances.min()
|
||||
|
||||
# Let's check that permutation-based feature importances do not have this
|
||||
# problem.
|
||||
r = permutation_importance(clf, X_test, y_test, n_repeats=n_repeats,
|
||||
random_state=rng, n_jobs=n_jobs)
|
||||
|
||||
assert r.importances.shape == (X.shape[1], n_repeats)
|
||||
|
||||
# Split the importances between informative and noisy features
|
||||
informative_importances = r.importances_mean[:n_informative_features]
|
||||
noisy_importances = r.importances_mean[n_informative_features:]
|
||||
|
||||
# Because we do not have a binary variable explaining each target classes,
|
||||
# the RF model will have to use the random variable to make some
|
||||
# (overfitting) splits (as max_depth is not set). Therefore the noisy
|
||||
# variables will be non-zero but with small values oscillating around
|
||||
# zero:
|
||||
assert max(np.abs(noisy_importances)) > 1e-7
|
||||
assert noisy_importances.max() < 0.05
|
||||
|
||||
# The binary features correlated with y should have a higher importance
|
||||
# than the high cardinality noisy features.
|
||||
# The maximum test accuracy is 2 / 5 == 0.4, each informative feature
|
||||
# contributing approximately a bit more than 0.2 of accuracy.
|
||||
assert informative_importances.min() > 0.15
|
||||
|
||||
|
||||
def test_permutation_importance_mixed_types():
|
||||
rng = np.random.RandomState(42)
|
||||
n_repeats = 4
|
||||
|
||||
# Last column is correlated with y
|
||||
X = np.array([[1.0, 2.0, 3.0, np.nan], [2, 1, 2, 1]]).T
|
||||
y = np.array([0, 1, 0, 1])
|
||||
|
||||
clf = make_pipeline(SimpleImputer(), LogisticRegression(solver='lbfgs'))
|
||||
clf.fit(X, y)
|
||||
result = permutation_importance(clf, X, y, n_repeats=n_repeats,
|
||||
random_state=rng)
|
||||
|
||||
assert result.importances.shape == (X.shape[1], n_repeats)
|
||||
|
||||
# the correlated feature with y is the last column and should
|
||||
# have the highest importance
|
||||
assert np.all(result.importances_mean[-1] > result.importances_mean[:-1])
|
||||
|
||||
# use another random state
|
||||
rng = np.random.RandomState(0)
|
||||
result2 = permutation_importance(clf, X, y, n_repeats=n_repeats,
|
||||
random_state=rng)
|
||||
assert result2.importances.shape == (X.shape[1], n_repeats)
|
||||
|
||||
assert not np.allclose(result.importances, result2.importances)
|
||||
|
||||
# the correlated feature with y is the last column and should
|
||||
# have the highest importance
|
||||
assert np.all(result2.importances_mean[-1] > result2.importances_mean[:-1])
|
||||
|
||||
|
||||
def test_permutation_importance_mixed_types_pandas():
|
||||
pd = pytest.importorskip("pandas")
|
||||
rng = np.random.RandomState(42)
|
||||
n_repeats = 5
|
||||
|
||||
# Last column is correlated with y
|
||||
X = pd.DataFrame({'col1': [1.0, 2.0, 3.0, np.nan],
|
||||
'col2': ['a', 'b', 'a', 'b']})
|
||||
y = np.array([0, 1, 0, 1])
|
||||
|
||||
num_preprocess = make_pipeline(SimpleImputer(), StandardScaler())
|
||||
preprocess = ColumnTransformer([
|
||||
('num', num_preprocess, ['col1']),
|
||||
('cat', OneHotEncoder(), ['col2'])
|
||||
])
|
||||
clf = make_pipeline(preprocess, LogisticRegression(solver='lbfgs'))
|
||||
clf.fit(X, y)
|
||||
|
||||
result = permutation_importance(clf, X, y, n_repeats=n_repeats,
|
||||
random_state=rng)
|
||||
|
||||
assert result.importances.shape == (X.shape[1], n_repeats)
|
||||
# the correlated feature with y is the last column and should
|
||||
# have the highest importance
|
||||
assert np.all(result.importances_mean[-1] > result.importances_mean[:-1])
|
||||
|
||||
|
||||
def test_permutation_importance_linear_regresssion():
|
||||
X, y = make_regression(n_samples=500, n_features=10, random_state=0)
|
||||
|
||||
X = scale(X)
|
||||
y = scale(y)
|
||||
|
||||
lr = LinearRegression().fit(X, y)
|
||||
|
||||
# this relationship can be computed in closed form
|
||||
expected_importances = 2 * lr.coef_**2
|
||||
results = permutation_importance(lr, X, y,
|
||||
n_repeats=50,
|
||||
scoring='neg_mean_squared_error')
|
||||
assert_allclose(expected_importances, results.importances_mean,
|
||||
rtol=1e-1, atol=1e-6)
|
||||
|
||||
|
||||
def test_permutation_importance_equivalence_sequential_parallel():
|
||||
# regression test to make sure that sequential and parallel calls will
|
||||
# output the same results.
|
||||
X, y = make_regression(n_samples=500, n_features=10, random_state=0)
|
||||
lr = LinearRegression().fit(X, y)
|
||||
|
||||
importance_sequential = permutation_importance(
|
||||
lr, X, y, n_repeats=5, random_state=0, n_jobs=1
|
||||
)
|
||||
|
||||
# First check that the problem is structured enough and that the model is
|
||||
# complex enough to not yield trivial, constant importances:
|
||||
imp_min = importance_sequential['importances'].min()
|
||||
imp_max = importance_sequential['importances'].max()
|
||||
assert imp_max - imp_min > 0.3
|
||||
|
||||
# The actually check that parallelism does not impact the results
|
||||
# either with shared memory (threading) or without isolated memory
|
||||
# via process-based parallelism using the default backend
|
||||
# ('loky' or 'multiprocessing') depending on the joblib version:
|
||||
|
||||
# process-based parallelism (by default):
|
||||
importance_processes = permutation_importance(
|
||||
lr, X, y, n_repeats=5, random_state=0, n_jobs=2)
|
||||
assert_allclose(
|
||||
importance_processes['importances'],
|
||||
importance_sequential['importances']
|
||||
)
|
||||
|
||||
# thread-based parallelism:
|
||||
with parallel_backend("threading"):
|
||||
importance_threading = permutation_importance(
|
||||
lr, X, y, n_repeats=5, random_state=0, n_jobs=2
|
||||
)
|
||||
assert_allclose(
|
||||
importance_threading['importances'],
|
||||
importance_sequential['importances']
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("n_jobs", [None, 1, 2])
|
||||
def test_permutation_importance_equivalence_array_dataframe(n_jobs):
|
||||
# This test checks that the column shuffling logic has the same behavior
|
||||
# both a dataframe and a simple numpy array.
|
||||
pd = pytest.importorskip('pandas')
|
||||
|
||||
# regression test to make sure that sequential and parallel calls will
|
||||
# output the same results.
|
||||
X, y = make_regression(n_samples=100, n_features=5, random_state=0)
|
||||
X_df = pd.DataFrame(X)
|
||||
|
||||
# Add a categorical feature that is statistically linked to y:
|
||||
binner = KBinsDiscretizer(n_bins=3, encode="ordinal")
|
||||
cat_column = binner.fit_transform(y.reshape(-1, 1))
|
||||
|
||||
# Concatenate the extra column to the numpy array: integers will be
|
||||
# cast to float values
|
||||
X = np.hstack([X, cat_column])
|
||||
assert X.dtype.kind == "f"
|
||||
|
||||
# Insert extra column as a non-numpy-native dtype (while keeping backward
|
||||
# compat for old pandas versions):
|
||||
if hasattr(pd, "Categorical"):
|
||||
cat_column = pd.Categorical(cat_column.ravel())
|
||||
else:
|
||||
cat_column = cat_column.ravel()
|
||||
new_col_idx = len(X_df.columns)
|
||||
X_df[new_col_idx] = cat_column
|
||||
assert X_df[new_col_idx].dtype == cat_column.dtype
|
||||
|
||||
# Stich an aribtrary index to the dataframe:
|
||||
X_df.index = np.arange(len(X_df)).astype(str)
|
||||
|
||||
rf = RandomForestRegressor(n_estimators=5, max_depth=3, random_state=0)
|
||||
rf.fit(X, y)
|
||||
|
||||
n_repeats = 3
|
||||
importance_array = permutation_importance(
|
||||
rf, X, y, n_repeats=n_repeats, random_state=0, n_jobs=n_jobs
|
||||
)
|
||||
|
||||
# First check that the problem is structured enough and that the model is
|
||||
# complex enough to not yield trivial, constant importances:
|
||||
imp_min = importance_array['importances'].min()
|
||||
imp_max = importance_array['importances'].max()
|
||||
assert imp_max - imp_min > 0.3
|
||||
|
||||
# Now check that importances computed on dataframe matche the values
|
||||
# of those computed on the array with the same data.
|
||||
importance_dataframe = permutation_importance(
|
||||
rf, X_df, y, n_repeats=n_repeats, random_state=0, n_jobs=n_jobs
|
||||
)
|
||||
assert_allclose(
|
||||
importance_array['importances'],
|
||||
importance_dataframe['importances']
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("input_type", ["array", "dataframe"])
|
||||
def test_permutation_importance_large_memmaped_data(input_type):
|
||||
# Smoke, non-regression test for:
|
||||
# https://github.com/scikit-learn/scikit-learn/issues/15810
|
||||
n_samples, n_features = int(5e4), 4
|
||||
X, y = make_classification(n_samples=n_samples, n_features=n_features,
|
||||
random_state=0)
|
||||
assert X.nbytes > 1e6 # trigger joblib memmaping
|
||||
|
||||
X = _convert_container(X, input_type)
|
||||
clf = DummyClassifier(strategy='prior').fit(X, y)
|
||||
|
||||
# Actual smoke test: should not raise any error:
|
||||
n_repeats = 5
|
||||
r = permutation_importance(clf, X, y, n_repeats=n_repeats, n_jobs=2)
|
||||
|
||||
# Auxiliary check: DummyClassifier is feature independent:
|
||||
# permutating feature should not change the predictions
|
||||
expected_importances = np.zeros((n_features, n_repeats))
|
||||
assert_allclose(expected_importances, r.importances)
|
Loading…
Add table
Add a link
Reference in a new issue