Uploaded Test files
This commit is contained in:
parent
f584ad9d97
commit
2e81cb7d99
16627 changed files with 2065359 additions and 102444 deletions
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,593 @@
|
|||
import numbers
|
||||
from itertools import chain
|
||||
from itertools import count
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
from scipy import sparse
|
||||
from scipy.stats.mstats import mquantiles
|
||||
from joblib import Parallel, delayed
|
||||
|
||||
from .. import partial_dependence
|
||||
from ...base import is_regressor
|
||||
from ...utils import check_array
|
||||
from ...utils import check_matplotlib_support # noqa
|
||||
from ...utils import _safe_indexing
|
||||
from ...utils.validation import _deprecate_positional_args
|
||||
|
||||
|
||||
@_deprecate_positional_args
|
||||
def plot_partial_dependence(estimator, X, features, *, feature_names=None,
|
||||
target=None, response_method='auto', n_cols=3,
|
||||
grid_resolution=100, percentiles=(0.05, 0.95),
|
||||
method='auto', n_jobs=None, verbose=0, fig=None,
|
||||
line_kw=None, contour_kw=None, ax=None):
|
||||
"""Partial dependence plots.
|
||||
|
||||
The ``len(features)`` plots are arranged in a grid with ``n_cols``
|
||||
columns. Two-way partial dependence plots are plotted as contour plots. The
|
||||
deciles of the feature values will be shown with tick marks on the x-axes
|
||||
for one-way plots, and on both axes for two-way plots.
|
||||
|
||||
Read more in the :ref:`User Guide <partial_dependence>`.
|
||||
|
||||
.. note::
|
||||
|
||||
:func:`plot_partial_dependence` does not support using the same axes
|
||||
with multiple calls. To plot the the partial dependence for multiple
|
||||
estimators, please pass the axes created by the first call to the
|
||||
second call::
|
||||
|
||||
>>> from sklearn.inspection import plot_partial_dependence
|
||||
>>> from sklearn.datasets import make_friedman1
|
||||
>>> from sklearn.linear_model import LinearRegression
|
||||
>>> X, y = make_friedman1()
|
||||
>>> est = LinearRegression().fit(X, y)
|
||||
>>> disp1 = plot_partial_dependence(est, X) # doctest: +SKIP
|
||||
>>> disp2 = plot_partial_dependence(est, X,
|
||||
... ax=disp1.axes_) # doctest: +SKIP
|
||||
|
||||
.. warning::
|
||||
|
||||
For :class:`~sklearn.ensemble.GradientBoostingClassifier` and
|
||||
:class:`~sklearn.ensemble.GradientBoostingRegressor`, the
|
||||
'recursion' method (used by default) will not account for the `init`
|
||||
predictor of the boosting process. In practice, this will produce
|
||||
the same values as 'brute' up to a constant offset in the target
|
||||
response, provided that `init` is a constant estimator (which is the
|
||||
default). However, if `init` is not a constant estimator, the
|
||||
partial dependence values are incorrect for 'recursion' because the
|
||||
offset will be sample-dependent. It is preferable to use the 'brute'
|
||||
method. Note that this only applies to
|
||||
:class:`~sklearn.ensemble.GradientBoostingClassifier` and
|
||||
:class:`~sklearn.ensemble.GradientBoostingRegressor`, not to
|
||||
:class:`~sklearn.ensemble.HistGradientBoostingClassifier` and
|
||||
:class:`~sklearn.ensemble.HistGradientBoostingRegressor`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
estimator : BaseEstimator
|
||||
A fitted estimator object implementing :term:`predict`,
|
||||
:term:`predict_proba`, or :term:`decision_function`.
|
||||
Multioutput-multiclass classifiers are not supported.
|
||||
|
||||
X : {array-like or dataframe} of shape (n_samples, n_features)
|
||||
``X`` is used to generate a grid of values for the target
|
||||
``features`` (where the partial dependence will be evaluated), and
|
||||
also to generate values for the complement features when the
|
||||
`method` is 'brute'.
|
||||
|
||||
features : list of {int, str, pair of int, pair of str}
|
||||
The target features for which to create the PDPs.
|
||||
If features[i] is an int or a string, a one-way PDP is created; if
|
||||
features[i] is a tuple, a two-way PDP is created. Each tuple must be
|
||||
of size 2.
|
||||
if any entry is a string, then it must be in ``feature_names``.
|
||||
|
||||
feature_names : array-like of shape (n_features,), dtype=str, default=None
|
||||
Name of each feature; feature_names[i] holds the name of the feature
|
||||
with index i.
|
||||
By default, the name of the feature corresponds to their numerical
|
||||
index for NumPy array and their column name for pandas dataframe.
|
||||
|
||||
target : int, optional (default=None)
|
||||
- In a multiclass setting, specifies the class for which the PDPs
|
||||
should be computed. Note that for binary classification, the
|
||||
positive class (index 1) is always used.
|
||||
- In a multioutput setting, specifies the task for which the PDPs
|
||||
should be computed.
|
||||
|
||||
Ignored in binary classification or classical regression settings.
|
||||
|
||||
response_method : 'auto', 'predict_proba' or 'decision_function', \
|
||||
optional (default='auto')
|
||||
Specifies whether to use :term:`predict_proba` or
|
||||
:term:`decision_function` as the target response. For regressors
|
||||
this parameter is ignored and the response is always the output of
|
||||
:term:`predict`. By default, :term:`predict_proba` is tried first
|
||||
and we revert to :term:`decision_function` if it doesn't exist. If
|
||||
``method`` is 'recursion', the response is always the output of
|
||||
:term:`decision_function`.
|
||||
|
||||
n_cols : int, optional (default=3)
|
||||
The maximum number of columns in the grid plot. Only active when `ax`
|
||||
is a single axis or `None`.
|
||||
|
||||
grid_resolution : int, optional (default=100)
|
||||
The number of equally spaced points on the axes of the plots, for each
|
||||
target feature.
|
||||
|
||||
percentiles : tuple of float, optional (default=(0.05, 0.95))
|
||||
The lower and upper percentile used to create the extreme values
|
||||
for the PDP axes. Must be in [0, 1].
|
||||
|
||||
method : str, optional (default='auto')
|
||||
The method used to calculate the averaged predictions:
|
||||
|
||||
- 'recursion' is only supported for some tree-based estimators (namely
|
||||
:class:`~sklearn.ensemble.GradientBoostingClassifier`,
|
||||
:class:`~sklearn.ensemble.GradientBoostingRegressor`,
|
||||
:class:`~sklearn.ensemble.HistGradientBoostingClassifier`,
|
||||
:class:`~sklearn.ensemble.HistGradientBoostingRegressor`,
|
||||
:class:`~sklearn.tree.DecisionTreeRegressor`,
|
||||
:class:`~sklearn.ensemble.RandomForestRegressor`
|
||||
but is more efficient in terms of speed.
|
||||
With this method, the target response of a
|
||||
classifier is always the decision function, not the predicted
|
||||
probabilities.
|
||||
|
||||
- 'brute' is supported for any estimator, but is more
|
||||
computationally intensive.
|
||||
|
||||
- 'auto': the 'recursion' is used for estimators that support it,
|
||||
and 'brute' is used otherwise.
|
||||
|
||||
Please see :ref:`this note <pdp_method_differences>` for
|
||||
differences between the 'brute' and 'recursion' method.
|
||||
|
||||
n_jobs : int, optional (default=None)
|
||||
The number of CPUs to use to compute the partial dependences.
|
||||
``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
|
||||
``-1`` means using all processors. See :term:`Glossary <n_jobs>`
|
||||
for more details.
|
||||
|
||||
verbose : int, optional (default=0)
|
||||
Verbose output during PD computations.
|
||||
|
||||
fig : Matplotlib figure object, optional (default=None)
|
||||
A figure object onto which the plots will be drawn, after the figure
|
||||
has been cleared. By default, a new one is created.
|
||||
|
||||
.. deprecated:: 0.22
|
||||
``fig`` will be removed in 0.24.
|
||||
|
||||
line_kw : dict, optional
|
||||
Dict with keywords passed to the ``matplotlib.pyplot.plot`` call.
|
||||
For one-way partial dependence plots.
|
||||
|
||||
contour_kw : dict, optional
|
||||
Dict with keywords passed to the ``matplotlib.pyplot.contourf`` call.
|
||||
For two-way partial dependence plots.
|
||||
|
||||
ax : Matplotlib axes or array-like of Matplotlib axes, default=None
|
||||
- If a single axis is passed in, it is treated as a bounding axes
|
||||
and a grid of partial dependence plots will be drawn within
|
||||
these bounds. The `n_cols` parameter controls the number of
|
||||
columns in the grid.
|
||||
- If an array-like of axes are passed in, the partial dependence
|
||||
plots will be drawn directly into these axes.
|
||||
- If `None`, a figure and a bounding axes is created and treated
|
||||
as the single axes case.
|
||||
|
||||
.. versionadded:: 0.22
|
||||
|
||||
Returns
|
||||
-------
|
||||
display: :class:`~sklearn.inspection.PartialDependenceDisplay`
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from sklearn.datasets import make_friedman1
|
||||
>>> from sklearn.ensemble import GradientBoostingRegressor
|
||||
>>> X, y = make_friedman1()
|
||||
>>> clf = GradientBoostingRegressor(n_estimators=10).fit(X, y)
|
||||
>>> plot_partial_dependence(clf, X, [0, (0, 1)]) #doctest: +SKIP
|
||||
|
||||
See also
|
||||
--------
|
||||
sklearn.inspection.partial_dependence: Return raw partial
|
||||
dependence values
|
||||
"""
|
||||
check_matplotlib_support('plot_partial_dependence') # noqa
|
||||
import matplotlib.pyplot as plt # noqa
|
||||
from matplotlib import transforms # noqa
|
||||
from matplotlib.ticker import MaxNLocator # noqa
|
||||
from matplotlib.ticker import ScalarFormatter # noqa
|
||||
|
||||
# set target_idx for multi-class estimators
|
||||
if hasattr(estimator, 'classes_') and np.size(estimator.classes_) > 2:
|
||||
if target is None:
|
||||
raise ValueError('target must be specified for multi-class')
|
||||
target_idx = np.searchsorted(estimator.classes_, target)
|
||||
if (not (0 <= target_idx < len(estimator.classes_)) or
|
||||
estimator.classes_[target_idx] != target):
|
||||
raise ValueError('target not in est.classes_, got {}'.format(
|
||||
target))
|
||||
else:
|
||||
# regression and binary classification
|
||||
target_idx = 0
|
||||
|
||||
# Use check_array only on lists and other non-array-likes / sparse. Do not
|
||||
# convert DataFrame into a NumPy array.
|
||||
if not(hasattr(X, '__array__') or sparse.issparse(X)):
|
||||
X = check_array(X, force_all_finite='allow-nan', dtype=np.object)
|
||||
n_features = X.shape[1]
|
||||
|
||||
# convert feature_names to list
|
||||
if feature_names is None:
|
||||
if hasattr(X, "loc"):
|
||||
# get the column names for a pandas dataframe
|
||||
feature_names = X.columns.tolist()
|
||||
else:
|
||||
# define a list of numbered indices for a numpy array
|
||||
feature_names = [str(i) for i in range(n_features)]
|
||||
elif hasattr(feature_names, "tolist"):
|
||||
# convert numpy array or pandas index to a list
|
||||
feature_names = feature_names.tolist()
|
||||
if len(set(feature_names)) != len(feature_names):
|
||||
raise ValueError('feature_names should not contain duplicates.')
|
||||
|
||||
def convert_feature(fx):
|
||||
if isinstance(fx, str):
|
||||
try:
|
||||
fx = feature_names.index(fx)
|
||||
except ValueError:
|
||||
raise ValueError('Feature %s not in feature_names' % fx)
|
||||
return int(fx)
|
||||
|
||||
# convert features into a seq of int tuples
|
||||
tmp_features = []
|
||||
for fxs in features:
|
||||
if isinstance(fxs, (numbers.Integral, str)):
|
||||
fxs = (fxs,)
|
||||
try:
|
||||
fxs = tuple(convert_feature(fx) for fx in fxs)
|
||||
except TypeError:
|
||||
raise ValueError('Each entry in features must be either an int, '
|
||||
'a string, or an iterable of size at most 2.')
|
||||
if not 1 <= np.size(fxs) <= 2:
|
||||
raise ValueError('Each entry in features must be either an int, '
|
||||
'a string, or an iterable of size at most 2.')
|
||||
|
||||
tmp_features.append(fxs)
|
||||
|
||||
features = tmp_features
|
||||
|
||||
# Early exit if the axes does not have the correct number of axes
|
||||
if ax is not None and not isinstance(ax, plt.Axes):
|
||||
axes = np.asarray(ax, dtype=object)
|
||||
if axes.size != len(features):
|
||||
raise ValueError("Expected ax to have {} axes, got {}".format(
|
||||
len(features), axes.size))
|
||||
|
||||
for i in chain.from_iterable(features):
|
||||
if i >= len(feature_names):
|
||||
raise ValueError('All entries of features must be less than '
|
||||
'len(feature_names) = {0}, got {1}.'
|
||||
.format(len(feature_names), i))
|
||||
|
||||
# compute averaged predictions
|
||||
pd_results = Parallel(n_jobs=n_jobs, verbose=verbose)(
|
||||
delayed(partial_dependence)(estimator, X, fxs,
|
||||
response_method=response_method,
|
||||
method=method,
|
||||
grid_resolution=grid_resolution,
|
||||
percentiles=percentiles)
|
||||
for fxs in features)
|
||||
|
||||
# For multioutput regression, we can only check the validity of target
|
||||
# now that we have the predictions.
|
||||
# Also note: as multiclass-multioutput classifiers are not supported,
|
||||
# multiclass and multioutput scenario are mutually exclusive. So there is
|
||||
# no risk of overwriting target_idx here.
|
||||
avg_preds, _ = pd_results[0] # checking the first result is enough
|
||||
if is_regressor(estimator) and avg_preds.shape[0] > 1:
|
||||
if target is None:
|
||||
raise ValueError(
|
||||
'target must be specified for multi-output regressors')
|
||||
if not 0 <= target <= avg_preds.shape[0]:
|
||||
raise ValueError(
|
||||
'target must be in [0, n_tasks], got {}.'.format(target))
|
||||
target_idx = target
|
||||
|
||||
# get global min and max average predictions of PD grouped by plot type
|
||||
pdp_lim = {}
|
||||
for avg_preds, values in pd_results:
|
||||
min_pd = avg_preds[target_idx].min()
|
||||
max_pd = avg_preds[target_idx].max()
|
||||
n_fx = len(values)
|
||||
old_min_pd, old_max_pd = pdp_lim.get(n_fx, (min_pd, max_pd))
|
||||
min_pd = min(min_pd, old_min_pd)
|
||||
max_pd = max(max_pd, old_max_pd)
|
||||
pdp_lim[n_fx] = (min_pd, max_pd)
|
||||
|
||||
deciles = {}
|
||||
for fx in chain.from_iterable(features):
|
||||
if fx not in deciles:
|
||||
X_col = _safe_indexing(X, fx, axis=1)
|
||||
deciles[fx] = mquantiles(X_col, prob=np.arange(0.1, 1.0, 0.1))
|
||||
|
||||
if fig is not None:
|
||||
warnings.warn("The fig parameter is deprecated in version "
|
||||
"0.22 and will be removed in version 0.24",
|
||||
FutureWarning)
|
||||
fig.clear()
|
||||
ax = fig.gca()
|
||||
|
||||
display = PartialDependenceDisplay(pd_results=pd_results,
|
||||
features=features,
|
||||
feature_names=feature_names,
|
||||
target_idx=target_idx,
|
||||
pdp_lim=pdp_lim,
|
||||
deciles=deciles)
|
||||
return display.plot(ax=ax, n_cols=n_cols, line_kw=line_kw,
|
||||
contour_kw=contour_kw)
|
||||
|
||||
|
||||
class PartialDependenceDisplay:
|
||||
"""Partial Dependence Plot (PDP) visualization.
|
||||
|
||||
It is recommended to use
|
||||
:func:`~sklearn.inspection.plot_partial_dependence` to create a
|
||||
:class:`~sklearn.inspection.PartialDependenceDisplay`. All parameters are
|
||||
stored as attributes.
|
||||
|
||||
Read more in
|
||||
:ref:`sphx_glr_auto_examples_miscellaneous_plot_partial_dependence_visualization_api.py`
|
||||
and the :ref:`User Guide <visualizations>`.
|
||||
|
||||
.. versionadded:: 0.22
|
||||
|
||||
Parameters
|
||||
----------
|
||||
pd_results : list of (ndarray, ndarray)
|
||||
Results of :func:`~sklearn.inspection.partial_dependence` for
|
||||
``features``. Each tuple corresponds to a (averaged_predictions, grid).
|
||||
|
||||
features : list of (int,) or list of (int, int)
|
||||
Indices of features for a given plot. A tuple of one integer will plot
|
||||
a partial dependence curve of one feature. A tuple of two integers will
|
||||
plot a two-way partial dependence curve as a contour plot.
|
||||
|
||||
feature_names : list of str
|
||||
Feature names corresponding to the indices in ``features``.
|
||||
|
||||
target_idx : int
|
||||
|
||||
- In a multiclass setting, specifies the class for which the PDPs
|
||||
should be computed. Note that for binary classification, the
|
||||
positive class (index 1) is always used.
|
||||
- In a multioutput setting, specifies the task for which the PDPs
|
||||
should be computed.
|
||||
|
||||
Ignored in binary classification or classical regression settings.
|
||||
|
||||
pdp_lim : dict
|
||||
Global min and max average predictions, such that all plots will have
|
||||
the same scale and y limits. `pdp_lim[1]` is the global min and max for
|
||||
single partial dependence curves. `pdp_lim[2]` is the global min and
|
||||
max for two-way partial dependence curves.
|
||||
|
||||
deciles : dict
|
||||
Deciles for feature indices in ``features``.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
bounding_ax_ : matplotlib Axes or None
|
||||
If `ax` is an axes or None, the `bounding_ax_` is the axes where the
|
||||
grid of partial dependence plots are drawn. If `ax` is a list of axes
|
||||
or a numpy array of axes, `bounding_ax_` is None.
|
||||
|
||||
axes_ : ndarray of matplotlib Axes
|
||||
If `ax` is an axes or None, `axes_[i, j]` is the axes on the i-th row
|
||||
and j-th column. If `ax` is a list of axes, `axes_[i]` is the i-th item
|
||||
in `ax`. Elements that are None correspond to a nonexisting axes in
|
||||
that position.
|
||||
|
||||
lines_ : ndarray of matplotlib Artists
|
||||
If `ax` is an axes or None, `lines_[i, j]` is the partial dependence
|
||||
curve on the i-th row and j-th column. If `ax` is a list of axes,
|
||||
`lines_[i]` is the partial dependence curve corresponding to the i-th
|
||||
item in `ax`. Elements that are None correspond to a nonexisting axes
|
||||
or an axes that does not include a line plot.
|
||||
|
||||
deciles_vlines_ : ndarray of matplotlib LineCollection
|
||||
If `ax` is an axes or None, `vlines_[i, j]` is the line collection
|
||||
representing the x axis deciles of the i-th row and j-th column. If
|
||||
`ax` is a list of axes, `vlines_[i]` corresponds to the i-th item in
|
||||
`ax`. Elements that are None correspond to a nonexisting axes or an
|
||||
axes that does not include a PDP plot.
|
||||
.. versionadded:: 0.23
|
||||
deciles_hlines_ : ndarray of matplotlib LineCollection
|
||||
If `ax` is an axes or None, `vlines_[i, j]` is the line collection
|
||||
representing the y axis deciles of the i-th row and j-th column. If
|
||||
`ax` is a list of axes, `vlines_[i]` corresponds to the i-th item in
|
||||
`ax`. Elements that are None correspond to a nonexisting axes or an
|
||||
axes that does not include a 2-way plot.
|
||||
.. versionadded:: 0.23
|
||||
|
||||
contours_ : ndarray of matplotlib Artists
|
||||
If `ax` is an axes or None, `contours_[i, j]` is the partial dependence
|
||||
plot on the i-th row and j-th column. If `ax` is a list of axes,
|
||||
`contours_[i]` is the partial dependence plot corresponding to the i-th
|
||||
item in `ax`. Elements that are None correspond to a nonexisting axes
|
||||
or an axes that does not include a contour plot.
|
||||
|
||||
figure_ : matplotlib Figure
|
||||
Figure containing partial dependence plots.
|
||||
|
||||
"""
|
||||
@_deprecate_positional_args
|
||||
def __init__(self, pd_results, *, features, feature_names, target_idx,
|
||||
pdp_lim, deciles):
|
||||
self.pd_results = pd_results
|
||||
self.features = features
|
||||
self.feature_names = feature_names
|
||||
self.target_idx = target_idx
|
||||
self.pdp_lim = pdp_lim
|
||||
self.deciles = deciles
|
||||
|
||||
def plot(self, ax=None, n_cols=3, line_kw=None, contour_kw=None):
|
||||
"""Plot partial dependence plots.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
ax : Matplotlib axes or array-like of Matplotlib axes, default=None
|
||||
- If a single axis is passed in, it is treated as a bounding axes
|
||||
and a grid of partial dependence plots will be drawn within
|
||||
these bounds. The `n_cols` parameter controls the number of
|
||||
columns in the grid.
|
||||
- If an array-like of axes are passed in, the partial dependence
|
||||
plots will be drawn directly into these axes.
|
||||
- If `None`, a figure and a bounding axes is created and treated
|
||||
as the single axes case.
|
||||
|
||||
n_cols : int, default=3
|
||||
The maximum number of columns in the grid plot. Only active when
|
||||
`ax` is a single axes or `None`.
|
||||
|
||||
line_kw : dict, default=None
|
||||
Dict with keywords passed to the `matplotlib.pyplot.plot` call.
|
||||
For one-way partial dependence plots.
|
||||
|
||||
contour_kw : dict, default=None
|
||||
Dict with keywords passed to the `matplotlib.pyplot.contourf`
|
||||
call for two-way partial dependence plots.
|
||||
|
||||
Returns
|
||||
-------
|
||||
display: :class:`~sklearn.inspection.PartialDependenceDisplay`
|
||||
"""
|
||||
|
||||
check_matplotlib_support("plot_partial_dependence")
|
||||
import matplotlib.pyplot as plt # noqa
|
||||
from matplotlib import transforms # noqa
|
||||
from matplotlib.ticker import MaxNLocator # noqa
|
||||
from matplotlib.ticker import ScalarFormatter # noqa
|
||||
from matplotlib.gridspec import GridSpecFromSubplotSpec # noqa
|
||||
|
||||
if line_kw is None:
|
||||
line_kw = {}
|
||||
if contour_kw is None:
|
||||
contour_kw = {}
|
||||
|
||||
if ax is None:
|
||||
_, ax = plt.subplots()
|
||||
|
||||
default_contour_kws = {"alpha": 0.75}
|
||||
contour_kw = {**default_contour_kws, **contour_kw}
|
||||
|
||||
n_features = len(self.features)
|
||||
|
||||
if isinstance(ax, plt.Axes):
|
||||
# If ax was set off, it has most likely been set to off
|
||||
# by a previous call to plot.
|
||||
if not ax.axison:
|
||||
raise ValueError("The ax was already used in another plot "
|
||||
"function, please set ax=display.axes_ "
|
||||
"instead")
|
||||
|
||||
ax.set_axis_off()
|
||||
self.bounding_ax_ = ax
|
||||
self.figure_ = ax.figure
|
||||
|
||||
n_cols = min(n_cols, n_features)
|
||||
n_rows = int(np.ceil(n_features / float(n_cols)))
|
||||
|
||||
self.axes_ = np.empty((n_rows, n_cols), dtype=np.object)
|
||||
|
||||
axes_ravel = self.axes_.ravel()
|
||||
|
||||
gs = GridSpecFromSubplotSpec(n_rows, n_cols,
|
||||
subplot_spec=ax.get_subplotspec())
|
||||
for i, spec in zip(range(n_features), gs):
|
||||
axes_ravel[i] = self.figure_.add_subplot(spec)
|
||||
|
||||
else: # array-like
|
||||
ax = np.asarray(ax, dtype=object)
|
||||
if ax.size != n_features:
|
||||
raise ValueError("Expected ax to have {} axes, got {}"
|
||||
.format(n_features, ax.size))
|
||||
|
||||
if ax.ndim == 2:
|
||||
n_cols = ax.shape[1]
|
||||
else:
|
||||
n_cols = None
|
||||
|
||||
self.bounding_ax_ = None
|
||||
self.figure_ = ax.ravel()[0].figure
|
||||
self.axes_ = ax
|
||||
|
||||
# create contour levels for two-way plots
|
||||
if 2 in self.pdp_lim:
|
||||
Z_level = np.linspace(*self.pdp_lim[2], num=8)
|
||||
|
||||
self.lines_ = np.empty_like(self.axes_, dtype=np.object)
|
||||
self.contours_ = np.empty_like(self.axes_, dtype=np.object)
|
||||
self.deciles_vlines_ = np.empty_like(self.axes_, dtype=np.object)
|
||||
self.deciles_hlines_ = np.empty_like(self.axes_, dtype=np.object)
|
||||
# Create 1d views of these 2d arrays for easy indexing
|
||||
lines_ravel = self.lines_.ravel(order='C')
|
||||
contours_ravel = self.contours_.ravel(order='C')
|
||||
vlines_ravel = self.deciles_vlines_.ravel(order='C')
|
||||
hlines_ravel = self.deciles_hlines_.ravel(order='C')
|
||||
|
||||
for i, axi, fx, (avg_preds, values) in zip(count(),
|
||||
self.axes_.ravel(),
|
||||
self.features,
|
||||
self.pd_results):
|
||||
if len(values) == 1:
|
||||
lines_ravel[i] = axi.plot(values[0],
|
||||
avg_preds[self.target_idx].ravel(),
|
||||
**line_kw)[0]
|
||||
else:
|
||||
# contour plot
|
||||
XX, YY = np.meshgrid(values[0], values[1])
|
||||
Z = avg_preds[self.target_idx].T
|
||||
CS = axi.contour(XX, YY, Z, levels=Z_level, linewidths=0.5,
|
||||
colors='k')
|
||||
contours_ravel[i] = axi.contourf(XX, YY, Z, levels=Z_level,
|
||||
vmax=Z_level[-1],
|
||||
vmin=Z_level[0],
|
||||
**contour_kw)
|
||||
axi.clabel(CS, fmt='%2.2f', colors='k', fontsize=10,
|
||||
inline=True)
|
||||
|
||||
trans = transforms.blended_transform_factory(axi.transData,
|
||||
axi.transAxes)
|
||||
ylim = axi.get_ylim()
|
||||
vlines_ravel[i] = axi.vlines(self.deciles[fx[0]], 0, 0.05,
|
||||
transform=trans, color='k')
|
||||
axi.set_ylim(ylim)
|
||||
|
||||
# Set xlabel if it is not already set
|
||||
if not axi.get_xlabel():
|
||||
axi.set_xlabel(self.feature_names[fx[0]])
|
||||
|
||||
if len(values) == 1:
|
||||
if n_cols is None or i % n_cols == 0:
|
||||
axi.set_ylabel('Partial dependence')
|
||||
else:
|
||||
axi.set_yticklabels([])
|
||||
axi.set_ylim(self.pdp_lim[1])
|
||||
else:
|
||||
# contour plot
|
||||
trans = transforms.blended_transform_factory(axi.transAxes,
|
||||
axi.transData)
|
||||
xlim = axi.get_xlim()
|
||||
hlines_ravel[i] = axi.hlines(self.deciles[fx[1]], 0, 0.05,
|
||||
transform=trans, color='k')
|
||||
# hline erases xlim
|
||||
axi.set_ylabel(self.feature_names[fx[1]])
|
||||
axi.set_xlim(xlim)
|
||||
return self
|
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,474 @@
|
|||
import numpy as np
|
||||
from scipy.stats.mstats import mquantiles
|
||||
|
||||
import pytest
|
||||
from numpy.testing import assert_allclose
|
||||
|
||||
from sklearn.datasets import load_boston
|
||||
from sklearn.datasets import load_iris
|
||||
from sklearn.datasets import make_classification, make_regression
|
||||
from sklearn.ensemble import GradientBoostingRegressor
|
||||
from sklearn.ensemble import GradientBoostingClassifier
|
||||
from sklearn.linear_model import LinearRegression
|
||||
from sklearn.utils._testing import _convert_container
|
||||
|
||||
from sklearn.inspection import plot_partial_dependence
|
||||
|
||||
|
||||
# TODO: Remove when https://github.com/numpy/numpy/issues/14397 is resolved
|
||||
pytestmark = pytest.mark.filterwarnings(
|
||||
"ignore:In future, it will be an error for 'np.bool_':DeprecationWarning:"
|
||||
"matplotlib.*")
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def boston():
|
||||
return load_boston()
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def clf_boston(boston):
|
||||
clf = GradientBoostingRegressor(n_estimators=10, random_state=1)
|
||||
clf.fit(boston.data, boston.target)
|
||||
return clf
|
||||
|
||||
|
||||
@pytest.mark.parametrize("grid_resolution", [10, 20])
|
||||
def test_plot_partial_dependence(grid_resolution, pyplot, clf_boston, boston):
|
||||
# Test partial dependence plot function.
|
||||
feature_names = boston.feature_names
|
||||
disp = plot_partial_dependence(clf_boston, boston.data,
|
||||
[0, 1, (0, 1)],
|
||||
grid_resolution=grid_resolution,
|
||||
feature_names=feature_names,
|
||||
contour_kw={"cmap": "jet"})
|
||||
fig = pyplot.gcf()
|
||||
axs = fig.get_axes()
|
||||
assert disp.figure_ is fig
|
||||
assert len(axs) == 4
|
||||
|
||||
assert disp.bounding_ax_ is not None
|
||||
assert disp.axes_.shape == (1, 3)
|
||||
assert disp.lines_.shape == (1, 3)
|
||||
assert disp.contours_.shape == (1, 3)
|
||||
assert disp.deciles_vlines_.shape == (1, 3)
|
||||
assert disp.deciles_hlines_.shape == (1, 3)
|
||||
|
||||
assert disp.lines_[0, 2] is None
|
||||
assert disp.contours_[0, 0] is None
|
||||
assert disp.contours_[0, 1] is None
|
||||
|
||||
# deciles lines: always show on xaxis, only show on yaxis if 2-way PDP
|
||||
for i in range(3):
|
||||
assert disp.deciles_vlines_[0, i] is not None
|
||||
assert disp.deciles_hlines_[0, 0] is None
|
||||
assert disp.deciles_hlines_[0, 1] is None
|
||||
assert disp.deciles_hlines_[0, 2] is not None
|
||||
|
||||
assert disp.features == [(0, ), (1, ), (0, 1)]
|
||||
assert np.all(disp.feature_names == feature_names)
|
||||
assert len(disp.deciles) == 2
|
||||
for i in [0, 1]:
|
||||
assert_allclose(disp.deciles[i],
|
||||
mquantiles(boston.data[:, i],
|
||||
prob=np.arange(0.1, 1.0, 0.1)))
|
||||
|
||||
single_feature_positions = [(0, 0), (0, 1)]
|
||||
expected_ylabels = ["Partial dependence", ""]
|
||||
|
||||
for i, pos in enumerate(single_feature_positions):
|
||||
ax = disp.axes_[pos]
|
||||
assert ax.get_ylabel() == expected_ylabels[i]
|
||||
assert ax.get_xlabel() == boston.feature_names[i]
|
||||
assert_allclose(ax.get_ylim(), disp.pdp_lim[1])
|
||||
|
||||
line = disp.lines_[pos]
|
||||
|
||||
avg_preds, values = disp.pd_results[i]
|
||||
assert avg_preds.shape == (1, grid_resolution)
|
||||
target_idx = disp.target_idx
|
||||
|
||||
line_data = line.get_data()
|
||||
assert_allclose(line_data[0], values[0])
|
||||
assert_allclose(line_data[1], avg_preds[target_idx].ravel())
|
||||
|
||||
# two feature position
|
||||
ax = disp.axes_[0, 2]
|
||||
coutour = disp.contours_[0, 2]
|
||||
expected_levels = np.linspace(*disp.pdp_lim[2], num=8)
|
||||
assert_allclose(coutour.levels, expected_levels)
|
||||
assert coutour.get_cmap().name == "jet"
|
||||
assert ax.get_xlabel() == boston.feature_names[0]
|
||||
assert ax.get_ylabel() == boston.feature_names[1]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"input_type, feature_names_type",
|
||||
[('dataframe', None),
|
||||
('dataframe', 'list'), ('list', 'list'), ('array', 'list'),
|
||||
('dataframe', 'array'), ('list', 'array'), ('array', 'array'),
|
||||
('dataframe', 'series'), ('list', 'series'), ('array', 'series'),
|
||||
('dataframe', 'index'), ('list', 'index'), ('array', 'index')]
|
||||
)
|
||||
def test_plot_partial_dependence_str_features(pyplot, clf_boston, boston,
|
||||
input_type, feature_names_type):
|
||||
if input_type == 'dataframe':
|
||||
pd = pytest.importorskip("pandas")
|
||||
X = pd.DataFrame(boston.data, columns=boston.feature_names)
|
||||
elif input_type == 'list':
|
||||
X = boston.data.tolist()
|
||||
else:
|
||||
X = boston.data
|
||||
|
||||
if feature_names_type is None:
|
||||
feature_names = None
|
||||
else:
|
||||
feature_names = _convert_container(boston.feature_names,
|
||||
feature_names_type)
|
||||
|
||||
grid_resolution = 25
|
||||
# check with str features and array feature names and single column
|
||||
disp = plot_partial_dependence(clf_boston, X,
|
||||
[('CRIM', 'ZN'), 'ZN'],
|
||||
grid_resolution=grid_resolution,
|
||||
feature_names=feature_names,
|
||||
n_cols=1, line_kw={"alpha": 0.8})
|
||||
fig = pyplot.gcf()
|
||||
axs = fig.get_axes()
|
||||
assert len(axs) == 3
|
||||
|
||||
assert disp.figure_ is fig
|
||||
assert disp.axes_.shape == (2, 1)
|
||||
assert disp.lines_.shape == (2, 1)
|
||||
assert disp.contours_.shape == (2, 1)
|
||||
assert disp.deciles_vlines_.shape == (2, 1)
|
||||
assert disp.deciles_hlines_.shape == (2, 1)
|
||||
|
||||
assert disp.lines_[0, 0] is None
|
||||
assert disp.deciles_vlines_[0, 0] is not None
|
||||
assert disp.deciles_hlines_[0, 0] is not None
|
||||
assert disp.contours_[1, 0] is None
|
||||
assert disp.deciles_hlines_[1, 0] is None
|
||||
assert disp.deciles_vlines_[1, 0] is not None
|
||||
|
||||
# line
|
||||
ax = disp.axes_[1, 0]
|
||||
assert ax.get_xlabel() == "ZN"
|
||||
assert ax.get_ylabel() == "Partial dependence"
|
||||
|
||||
line = disp.lines_[1, 0]
|
||||
avg_preds, values = disp.pd_results[1]
|
||||
target_idx = disp.target_idx
|
||||
assert line.get_alpha() == 0.8
|
||||
|
||||
line_data = line.get_data()
|
||||
assert_allclose(line_data[0], values[0])
|
||||
assert_allclose(line_data[1], avg_preds[target_idx].ravel())
|
||||
|
||||
# contour
|
||||
ax = disp.axes_[0, 0]
|
||||
coutour = disp.contours_[0, 0]
|
||||
expect_levels = np.linspace(*disp.pdp_lim[2], num=8)
|
||||
assert_allclose(coutour.levels, expect_levels)
|
||||
assert ax.get_xlabel() == "CRIM"
|
||||
assert ax.get_ylabel() == "ZN"
|
||||
|
||||
|
||||
def test_plot_partial_dependence_custom_axes(pyplot, clf_boston, boston):
|
||||
grid_resolution = 25
|
||||
fig, (ax1, ax2) = pyplot.subplots(1, 2)
|
||||
feature_names = boston.feature_names.tolist()
|
||||
disp = plot_partial_dependence(clf_boston, boston.data,
|
||||
['CRIM', ('CRIM', 'ZN')],
|
||||
grid_resolution=grid_resolution,
|
||||
feature_names=feature_names, ax=[ax1, ax2])
|
||||
assert fig is disp.figure_
|
||||
assert disp.bounding_ax_ is None
|
||||
assert disp.axes_.shape == (2, )
|
||||
assert disp.axes_[0] is ax1
|
||||
assert disp.axes_[1] is ax2
|
||||
|
||||
ax = disp.axes_[0]
|
||||
assert ax.get_xlabel() == "CRIM"
|
||||
assert ax.get_ylabel() == "Partial dependence"
|
||||
|
||||
line = disp.lines_[0]
|
||||
avg_preds, values = disp.pd_results[0]
|
||||
target_idx = disp.target_idx
|
||||
|
||||
line_data = line.get_data()
|
||||
assert_allclose(line_data[0], values[0])
|
||||
assert_allclose(line_data[1], avg_preds[target_idx].ravel())
|
||||
|
||||
# contour
|
||||
ax = disp.axes_[1]
|
||||
coutour = disp.contours_[1]
|
||||
expect_levels = np.linspace(*disp.pdp_lim[2], num=8)
|
||||
assert_allclose(coutour.levels, expect_levels)
|
||||
assert ax.get_xlabel() == "CRIM"
|
||||
assert ax.get_ylabel() == "ZN"
|
||||
|
||||
|
||||
def test_plot_partial_dependence_passing_numpy_axes(pyplot, clf_boston,
|
||||
boston):
|
||||
grid_resolution = 25
|
||||
feature_names = boston.feature_names.tolist()
|
||||
disp1 = plot_partial_dependence(clf_boston, boston.data,
|
||||
['CRIM', 'ZN'],
|
||||
grid_resolution=grid_resolution,
|
||||
feature_names=feature_names)
|
||||
assert disp1.axes_.shape == (1, 2)
|
||||
assert disp1.axes_[0, 0].get_ylabel() == "Partial dependence"
|
||||
assert disp1.axes_[0, 1].get_ylabel() == ""
|
||||
assert len(disp1.axes_[0, 0].get_lines()) == 1
|
||||
assert len(disp1.axes_[0, 1].get_lines()) == 1
|
||||
|
||||
lr = LinearRegression()
|
||||
lr.fit(boston.data, boston.target)
|
||||
|
||||
disp2 = plot_partial_dependence(lr, boston.data,
|
||||
['CRIM', 'ZN'],
|
||||
grid_resolution=grid_resolution,
|
||||
feature_names=feature_names,
|
||||
ax=disp1.axes_)
|
||||
|
||||
assert np.all(disp1.axes_ == disp2.axes_)
|
||||
assert len(disp2.axes_[0, 0].get_lines()) == 2
|
||||
assert len(disp2.axes_[0, 1].get_lines()) == 2
|
||||
|
||||
|
||||
@pytest.mark.parametrize("nrows, ncols", [(2, 2), (3, 1)])
|
||||
def test_plot_partial_dependence_incorrent_num_axes(pyplot, clf_boston,
|
||||
boston, nrows, ncols):
|
||||
grid_resolution = 5
|
||||
fig, axes = pyplot.subplots(nrows, ncols)
|
||||
axes_formats = [list(axes.ravel()), tuple(axes.ravel()), axes]
|
||||
|
||||
msg = "Expected ax to have 2 axes, got {}".format(nrows * ncols)
|
||||
|
||||
disp = plot_partial_dependence(clf_boston, boston.data,
|
||||
['CRIM', 'ZN'],
|
||||
grid_resolution=grid_resolution,
|
||||
feature_names=boston.feature_names)
|
||||
|
||||
for ax_format in axes_formats:
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
plot_partial_dependence(clf_boston, boston.data,
|
||||
['CRIM', 'ZN'],
|
||||
grid_resolution=grid_resolution,
|
||||
feature_names=boston.feature_names,
|
||||
ax=ax_format)
|
||||
|
||||
# with axes object
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
disp.plot(ax=ax_format)
|
||||
|
||||
|
||||
def test_plot_partial_dependence_with_same_axes(pyplot, clf_boston, boston):
|
||||
# The first call to plot_partial_dependence will create two new axes to
|
||||
# place in the space of the passed in axes, which results in a total of
|
||||
# three axes in the figure.
|
||||
# Currently the API does not allow for the second call to
|
||||
# plot_partial_dependence to use the same axes again, because it will
|
||||
# create two new axes in the space resulting in five axes. To get the
|
||||
# expected behavior one needs to pass the generated axes into the second
|
||||
# call:
|
||||
# disp1 = plot_partial_dependence(...)
|
||||
# disp2 = plot_partial_dependence(..., ax=disp1.axes_)
|
||||
|
||||
grid_resolution = 25
|
||||
fig, ax = pyplot.subplots()
|
||||
plot_partial_dependence(clf_boston, boston.data, ['CRIM', 'ZN'],
|
||||
grid_resolution=grid_resolution,
|
||||
feature_names=boston.feature_names, ax=ax)
|
||||
|
||||
msg = ("The ax was already used in another plot function, please set "
|
||||
"ax=display.axes_ instead")
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
plot_partial_dependence(clf_boston, boston.data,
|
||||
['CRIM', 'ZN'],
|
||||
grid_resolution=grid_resolution,
|
||||
feature_names=boston.feature_names, ax=ax)
|
||||
|
||||
|
||||
def test_plot_partial_dependence_feature_name_reuse(pyplot, clf_boston,
|
||||
boston):
|
||||
# second call to plot does not change the feature names from the first
|
||||
# call
|
||||
|
||||
feature_names = boston.feature_names
|
||||
disp = plot_partial_dependence(clf_boston, boston.data,
|
||||
[0, 1],
|
||||
grid_resolution=10,
|
||||
feature_names=feature_names)
|
||||
|
||||
plot_partial_dependence(clf_boston, boston.data, [0, 1],
|
||||
grid_resolution=10, ax=disp.axes_)
|
||||
|
||||
for i, ax in enumerate(disp.axes_.ravel()):
|
||||
assert ax.get_xlabel() == feature_names[i]
|
||||
|
||||
|
||||
def test_plot_partial_dependence_multiclass(pyplot):
|
||||
grid_resolution = 25
|
||||
clf_int = GradientBoostingClassifier(n_estimators=10, random_state=1)
|
||||
iris = load_iris()
|
||||
|
||||
# Test partial dependence plot function on multi-class input.
|
||||
clf_int.fit(iris.data, iris.target)
|
||||
disp_target_0 = plot_partial_dependence(clf_int, iris.data, [0, 1],
|
||||
target=0,
|
||||
grid_resolution=grid_resolution)
|
||||
assert disp_target_0.figure_ is pyplot.gcf()
|
||||
assert disp_target_0.axes_.shape == (1, 2)
|
||||
assert disp_target_0.lines_.shape == (1, 2)
|
||||
assert disp_target_0.contours_.shape == (1, 2)
|
||||
assert disp_target_0.deciles_vlines_.shape == (1, 2)
|
||||
assert disp_target_0.deciles_hlines_.shape == (1, 2)
|
||||
assert all(c is None for c in disp_target_0.contours_.flat)
|
||||
assert disp_target_0.target_idx == 0
|
||||
|
||||
# now with symbol labels
|
||||
target = iris.target_names[iris.target]
|
||||
clf_symbol = GradientBoostingClassifier(n_estimators=10, random_state=1)
|
||||
clf_symbol.fit(iris.data, target)
|
||||
disp_symbol = plot_partial_dependence(clf_symbol, iris.data, [0, 1],
|
||||
target='setosa',
|
||||
grid_resolution=grid_resolution)
|
||||
assert disp_symbol.figure_ is pyplot.gcf()
|
||||
assert disp_symbol.axes_.shape == (1, 2)
|
||||
assert disp_symbol.lines_.shape == (1, 2)
|
||||
assert disp_symbol.contours_.shape == (1, 2)
|
||||
assert disp_symbol.deciles_vlines_.shape == (1, 2)
|
||||
assert disp_symbol.deciles_hlines_.shape == (1, 2)
|
||||
assert all(c is None for c in disp_symbol.contours_.flat)
|
||||
assert disp_symbol.target_idx == 0
|
||||
|
||||
for int_result, symbol_result in zip(disp_target_0.pd_results,
|
||||
disp_symbol.pd_results):
|
||||
avg_preds_int, values_int = int_result
|
||||
avg_preds_symbol, values_symbol = symbol_result
|
||||
assert_allclose(avg_preds_int, avg_preds_symbol)
|
||||
assert_allclose(values_int, values_symbol)
|
||||
|
||||
# check that the pd plots are different for another target
|
||||
disp_target_1 = plot_partial_dependence(clf_int, iris.data, [0, 1],
|
||||
target=1,
|
||||
grid_resolution=grid_resolution)
|
||||
target_0_data_y = disp_target_0.lines_[0, 0].get_data()[1]
|
||||
target_1_data_y = disp_target_1.lines_[0, 0].get_data()[1]
|
||||
assert any(target_0_data_y != target_1_data_y)
|
||||
|
||||
|
||||
multioutput_regression_data = make_regression(n_samples=50, n_targets=2,
|
||||
random_state=0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("target", [0, 1])
|
||||
def test_plot_partial_dependence_multioutput(pyplot, target):
|
||||
# Test partial dependence plot function on multi-output input.
|
||||
X, y = multioutput_regression_data
|
||||
clf = LinearRegression().fit(X, y)
|
||||
|
||||
grid_resolution = 25
|
||||
disp = plot_partial_dependence(clf, X, [0, 1], target=target,
|
||||
grid_resolution=grid_resolution)
|
||||
fig = pyplot.gcf()
|
||||
axs = fig.get_axes()
|
||||
assert len(axs) == 3
|
||||
assert disp.target_idx == target
|
||||
assert disp.bounding_ax_ is not None
|
||||
|
||||
positions = [(0, 0), (0, 1)]
|
||||
expected_label = ["Partial dependence", ""]
|
||||
|
||||
for i, pos in enumerate(positions):
|
||||
ax = disp.axes_[pos]
|
||||
assert ax.get_ylabel() == expected_label[i]
|
||||
assert ax.get_xlabel() == "{}".format(i)
|
||||
|
||||
|
||||
def test_plot_partial_dependence_dataframe(pyplot, clf_boston, boston):
|
||||
pd = pytest.importorskip('pandas')
|
||||
df = pd.DataFrame(boston.data, columns=boston.feature_names)
|
||||
|
||||
grid_resolution = 25
|
||||
|
||||
plot_partial_dependence(
|
||||
clf_boston, df, ['TAX', 'AGE'], grid_resolution=grid_resolution,
|
||||
feature_names=df.columns.tolist()
|
||||
)
|
||||
|
||||
|
||||
dummy_classification_data = make_classification(random_state=0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, params, err_msg",
|
||||
[(multioutput_regression_data, {"target": None, 'features': [0]},
|
||||
"target must be specified for multi-output"),
|
||||
(multioutput_regression_data, {"target": -1, 'features': [0]},
|
||||
r'target must be in \[0, n_tasks\]'),
|
||||
(multioutput_regression_data, {"target": 100, 'features': [0]},
|
||||
r'target must be in \[0, n_tasks\]'),
|
||||
(dummy_classification_data,
|
||||
{'features': ['foobar'], 'feature_names': None},
|
||||
'Feature foobar not in feature_names'),
|
||||
(dummy_classification_data,
|
||||
{'features': ['foobar'], 'feature_names': ['abcd', 'def']},
|
||||
'Feature foobar not in feature_names'),
|
||||
(dummy_classification_data, {'features': [(1, 2, 3)]},
|
||||
'Each entry in features must be either an int, '),
|
||||
(dummy_classification_data, {'features': [1, {}]},
|
||||
'Each entry in features must be either an int, '),
|
||||
(dummy_classification_data, {'features': [tuple()]},
|
||||
'Each entry in features must be either an int, '),
|
||||
(dummy_classification_data,
|
||||
{'features': [123], 'feature_names': ['blahblah']},
|
||||
'All entries of features must be less than '),
|
||||
(dummy_classification_data,
|
||||
{'features': [0, 1, 2], 'feature_names': ['a', 'b', 'a']},
|
||||
'feature_names should not contain duplicates')]
|
||||
)
|
||||
def test_plot_partial_dependence_error(pyplot, data, params, err_msg):
|
||||
X, y = data
|
||||
estimator = LinearRegression().fit(X, y)
|
||||
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
plot_partial_dependence(estimator, X, **params)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("params, err_msg", [
|
||||
({'target': 4, 'features': [0]},
|
||||
'target not in est.classes_, got 4'),
|
||||
({'target': None, 'features': [0]},
|
||||
'target must be specified for multi-class'),
|
||||
({'target': 1, 'features': [4.5]},
|
||||
'Each entry in features must be either an int,'),
|
||||
])
|
||||
def test_plot_partial_dependence_multiclass_error(pyplot, params, err_msg):
|
||||
iris = load_iris()
|
||||
clf = GradientBoostingClassifier(n_estimators=10, random_state=1)
|
||||
clf.fit(iris.data, iris.target)
|
||||
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
plot_partial_dependence(clf, iris.data, **params)
|
||||
|
||||
|
||||
def test_plot_partial_dependence_fig_deprecated(pyplot):
|
||||
# Make sure fig object is correctly used if not None
|
||||
X, y = make_regression(n_samples=50, random_state=0)
|
||||
clf = LinearRegression()
|
||||
clf.fit(X, y)
|
||||
|
||||
fig = pyplot.figure()
|
||||
grid_resolution = 25
|
||||
|
||||
msg = ("The fig parameter is deprecated in version 0.22 and will be "
|
||||
"removed in version 0.24")
|
||||
with pytest.warns(FutureWarning, match=msg):
|
||||
plot_partial_dependence(
|
||||
clf, X, [0, 1], target=0, grid_resolution=grid_resolution, fig=fig)
|
||||
|
||||
assert pyplot.gcf() is fig
|
Loading…
Add table
Add a link
Reference in a new issue