Uploaded Test files

2020-11-12 11:05:57 -05:00 · 2020-11-12 11:05:57 -05:00 · 2e81cb7d99
commit 2e81cb7d99
parent f584ad9d97
16627 changed files with 2065359 additions and 102444 deletions
--- a/venv/Lib/site-packages/sklearn/inspection/init.py
+++ b/venv/Lib/site-packages/sklearn/inspection/init.py
@ -0,0 +1,26 @@
+"""The :mod:`sklearn.inspection` module includes tools for model inspection."""
+
+# TODO: remove me in 0.24 (as well as the noqa markers) and
+# import the partial_dependence func directly from the
+# ._partial_dependence module instead.
+# Pre-cache the import of the deprecated module so that import
+# sklearn.inspection.partial_dependence returns the function as in
+# 0.21, instead of the module
+# https://github.com/scikit-learn/scikit-learn/issues/15842
+import warnings
+with warnings.catch_warnings():
+    warnings.simplefilter("ignore", category=FutureWarning)
+    from .partial_dependence import partial_dependence
+
+from ._permutation_importance import permutation_importance  # noqa
+
+from ._plot.partial_dependence import plot_partial_dependence  # noqa
+from ._plot.partial_dependence import PartialDependenceDisplay  # noqa
+
+
+__all__ = [
+    'partial_dependence',
+    'plot_partial_dependence',
+    'permutation_importance',
+    'PartialDependenceDisplay'
+]
--- a/venv/Lib/site-packages/sklearn/inspection/pycache/init.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/inspection/pycache/init.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/inspection/pycache/_partial_dependence.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/inspection/pycache/_partial_dependence.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/inspection/pycache/_permutation_importance.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/inspection/pycache/_permutation_importance.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/inspection/pycache/partial_dependence.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/inspection/pycache/partial_dependence.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/inspection/pycache/setup.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/inspection/pycache/setup.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/inspection/_partial_dependence.py
+++ b/venv/Lib/site-packages/sklearn/inspection/_partial_dependence.py
@ -0,0 +1,421 @@
+"""Partial dependence plots for regression and classification models."""
+
+# Authors: Peter Prettenhofer
+#          Trevor Stephens
+#          Nicolas Hug
+# License: BSD 3 clause
+
+from collections.abc import Iterable
+
+import numpy as np
+from scipy import sparse
+from scipy.stats.mstats import mquantiles
+
+from ..base import is_classifier, is_regressor
+from ..pipeline import Pipeline
+from ..utils.extmath import cartesian
+from ..utils import check_array
+from ..utils import check_matplotlib_support  # noqa
+from ..utils import _safe_indexing
+from ..utils import _determine_key_type
+from ..utils import _get_column_indices
+from ..utils.validation import check_is_fitted
+from ..utils.validation import _deprecate_positional_args
+from ..tree import DecisionTreeRegressor
+from ..ensemble import RandomForestRegressor
+from ..exceptions import NotFittedError
+from ..ensemble._gb import BaseGradientBoosting
+from sklearn.ensemble._hist_gradient_boosting.gradient_boosting import (
+    BaseHistGradientBoosting)
+
+
+__all__ = [
+    'partial_dependence',
+]
+
+
+def _grid_from_X(X, percentiles, grid_resolution):
+    """Generate a grid of points based on the percentiles of X.
+
+    The grid is a cartesian product between the columns of ``values``. The
+    ith column of ``values`` consists in ``grid_resolution`` equally-spaced
+    points between the percentiles of the jth column of X.
+    If ``grid_resolution`` is bigger than the number of unique values in the
+    jth column of X, then those unique values will be used instead.
+
+    Parameters
+    ----------
+    X : ndarray, shape (n_samples, n_target_features)
+        The data
+
+    percentiles : tuple of floats
+        The percentiles which are used to construct the extreme values of
+        the grid. Must be in [0, 1].
+
+    grid_resolution : int
+        The number of equally spaced points to be placed on the grid for each
+        feature.
+
+    Returns
+    -------
+    grid : ndarray, shape (n_points, n_target_features)
+        A value for each feature at each point in the grid. ``n_points`` is
+        always ``<= grid_resolution ** X.shape[1]``.
+
+    values : list of 1d ndarrays
+        The values with which the grid has been created. The size of each
+        array ``values[j]`` is either ``grid_resolution``, or the number of
+        unique values in ``X[:, j]``, whichever is smaller.
+    """
+    if not isinstance(percentiles, Iterable) or len(percentiles) != 2:
+        raise ValueError("'percentiles' must be a sequence of 2 elements.")
+    if not all(0 <= x <= 1 for x in percentiles):
+        raise ValueError("'percentiles' values must be in [0, 1].")
+    if percentiles[0] >= percentiles[1]:
+        raise ValueError('percentiles[0] must be strictly less '
+                         'than percentiles[1].')
+
+    if grid_resolution <= 1:
+        raise ValueError("'grid_resolution' must be strictly greater than 1.")
+
+    values = []
+    for feature in range(X.shape[1]):
+        uniques = np.unique(_safe_indexing(X, feature, axis=1))
+        if uniques.shape[0] < grid_resolution:
+            # feature has low resolution use unique vals
+            axis = uniques
+        else:
+            # create axis based on percentiles and grid resolution
+            emp_percentiles = mquantiles(
+                _safe_indexing(X, feature, axis=1), prob=percentiles, axis=0
+            )
+            if np.allclose(emp_percentiles[0], emp_percentiles[1]):
+                raise ValueError(
+                    'percentiles are too close to each other, '
+                    'unable to build the grid. Please choose percentiles '
+                    'that are further apart.')
+            axis = np.linspace(emp_percentiles[0],
+                               emp_percentiles[1],
+                               num=grid_resolution, endpoint=True)
+        values.append(axis)
+
+    return cartesian(values), values
+
+
+def _partial_dependence_recursion(est, grid, features):
+    averaged_predictions = est._compute_partial_dependence_recursion(grid,
+                                                                     features)
+    if averaged_predictions.ndim == 1:
+        # reshape to (1, n_points) for consistency with
+        # _partial_dependence_brute
+        averaged_predictions = averaged_predictions.reshape(1, -1)
+
+    return averaged_predictions
+
+
+def _partial_dependence_brute(est, grid, features, X, response_method):
+    averaged_predictions = []
+
+    # define the prediction_method (predict, predict_proba, decision_function).
+    if is_regressor(est):
+        prediction_method = est.predict
+    else:
+        predict_proba = getattr(est, 'predict_proba', None)
+        decision_function = getattr(est, 'decision_function', None)
+        if response_method == 'auto':
+            # try predict_proba, then decision_function if it doesn't exist
+            prediction_method = predict_proba or decision_function
+        else:
+            prediction_method = (predict_proba if response_method ==
+                                 'predict_proba' else decision_function)
+        if prediction_method is None:
+            if response_method == 'auto':
+                raise ValueError(
+                    'The estimator has no predict_proba and no '
+                    'decision_function method.'
+                )
+            elif response_method == 'predict_proba':
+                raise ValueError('The estimator has no predict_proba method.')
+            else:
+                raise ValueError(
+                    'The estimator has no decision_function method.')
+
+    for new_values in grid:
+        X_eval = X.copy()
+        for i, variable in enumerate(features):
+            if hasattr(X_eval, 'iloc'):
+                X_eval.iloc[:, variable] = new_values[i]
+            else:
+                X_eval[:, variable] = new_values[i]
+
+        try:
+            predictions = prediction_method(X_eval)
+        except NotFittedError:
+            raise ValueError(
+                "'estimator' parameter must be a fitted estimator")
+
+        # Note: predictions is of shape
+        # (n_points,) for non-multioutput regressors
+        # (n_points, n_tasks) for multioutput regressors
+        # (n_points, 1) for the regressors in cross_decomposition (I think)
+        # (n_points, 2) for binary classification
+        # (n_points, n_classes) for multiclass classification
+
+        # average over samples
+        averaged_predictions.append(np.mean(predictions, axis=0))
+
+    # reshape to (n_targets, n_points) where n_targets is:
+    # - 1 for non-multioutput regression and binary classification (shape is
+    #   already correct in those cases)
+    # - n_tasks for multi-output regression
+    # - n_classes for multiclass classification.
+    averaged_predictions = np.array(averaged_predictions).T
+    if is_regressor(est) and averaged_predictions.ndim == 1:
+        # non-multioutput regression, shape is (n_points,)
+        averaged_predictions = averaged_predictions.reshape(1, -1)
+    elif is_classifier(est) and averaged_predictions.shape[0] == 2:
+        # Binary classification, shape is (2, n_points).
+        # we output the effect of **positive** class
+        averaged_predictions = averaged_predictions[1]
+        averaged_predictions = averaged_predictions.reshape(1, -1)
+
+    return averaged_predictions
+
+
+@_deprecate_positional_args
+def partial_dependence(estimator, X, features, *, response_method='auto',
+                       percentiles=(0.05, 0.95), grid_resolution=100,
+                       method='auto'):
+    """Partial dependence of ``features``.
+
+    Partial dependence of a feature (or a set of features) corresponds to
+    the average response of an estimator for each possible value of the
+    feature.
+
+    Read more in the :ref:`User Guide <partial_dependence>`.
+
+    .. warning::
+
+        For :class:`~sklearn.ensemble.GradientBoostingClassifier` and
+        :class:`~sklearn.ensemble.GradientBoostingRegressor`, the
+        'recursion' method (used by default) will not account for the `init`
+        predictor of the boosting process. In practice, this will produce
+        the same values as 'brute' up to a constant offset in the target
+        response, provided that `init` is a constant estimator (which is the
+        default). However, if `init` is not a constant estimator, the
+        partial dependence values are incorrect for 'recursion' because the
+        offset will be sample-dependent. It is preferable to use the 'brute'
+        method. Note that this only applies to
+        :class:`~sklearn.ensemble.GradientBoostingClassifier` and
+        :class:`~sklearn.ensemble.GradientBoostingRegressor`, not to
+        :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and
+        :class:`~sklearn.ensemble.HistGradientBoostingRegressor`.
+
+    Parameters
+    ----------
+    estimator : BaseEstimator
+        A fitted estimator object implementing :term:`predict`,
+        :term:`predict_proba`, or :term:`decision_function`.
+        Multioutput-multiclass classifiers are not supported.
+
+    X : {array-like or dataframe} of shape (n_samples, n_features)
+        ``X`` is used to generate a grid of values for the target
+        ``features`` (where the partial dependence will be evaluated), and
+        also to generate values for the complement features when the
+        `method` is 'brute'.
+
+    features : array-like of {int, str}
+        The feature (e.g. `[0]`) or pair of interacting features
+        (e.g. `[(0, 1)]`) for which the partial dependency should be computed.
+
+    response_method : 'auto', 'predict_proba' or 'decision_function', \
+            optional (default='auto')
+        Specifies whether to use :term:`predict_proba` or
+        :term:`decision_function` as the target response. For regressors
+        this parameter is ignored and the response is always the output of
+        :term:`predict`. By default, :term:`predict_proba` is tried first
+        and we revert to :term:`decision_function` if it doesn't exist. If
+        ``method`` is 'recursion', the response is always the output of
+        :term:`decision_function`.
+
+    percentiles : tuple of float, optional (default=(0.05, 0.95))
+        The lower and upper percentile used to create the extreme values
+        for the grid. Must be in [0, 1].
+
+    grid_resolution : int, optional (default=100)
+        The number of equally spaced points on the grid, for each target
+        feature.
+
+    method : str, optional (default='auto')
+        The method used to calculate the averaged predictions:
+
+        - 'recursion' is only supported for some tree-based estimators (namely
+          :class:`~sklearn.ensemble.GradientBoostingClassifier`,
+          :class:`~sklearn.ensemble.GradientBoostingRegressor`,
+          :class:`~sklearn.ensemble.HistGradientBoostingClassifier`,
+          :class:`~sklearn.ensemble.HistGradientBoostingRegressor`,
+          :class:`~sklearn.tree.DecisionTreeRegressor`,
+          :class:`~sklearn.ensemble.RandomForestRegressor`,
+          )
+          but is more efficient in terms of speed.
+          With this method, the target response of a
+          classifier is always the decision function, not the predicted
+          probabilities.
+
+        - 'brute' is supported for any estimator, but is more
+          computationally intensive.
+
+        - 'auto': the 'recursion' is used for estimators that support it,
+          and 'brute' is used otherwise.
+
+        Please see :ref:`this note <pdp_method_differences>` for
+        differences between the 'brute' and 'recursion' method.
+
+    Returns
+    -------
+    averaged_predictions : ndarray, \
+            shape (n_outputs, len(values[0]), len(values[1]), ...)
+        The predictions for all the points in the grid, averaged over all
+        samples in X (or over the training data if ``method`` is
+        'recursion'). ``n_outputs`` corresponds to the number of classes in
+        a multi-class setting, or to the number of tasks for multi-output
+        regression. For classical regression and binary classification
+        ``n_outputs==1``. ``n_values_feature_j`` corresponds to the size
+        ``values[j]``.
+
+    values : seq of 1d ndarrays
+        The values with which the grid has been created. The generated grid
+        is a cartesian product of the arrays in ``values``. ``len(values) ==
+        len(features)``. The size of each array ``values[j]`` is either
+        ``grid_resolution``, or the number of unique values in ``X[:, j]``,
+        whichever is smaller.
+
+    Examples
+    --------
+    >>> X = [[0, 0, 2], [1, 0, 0]]
+    >>> y = [0, 1]
+    >>> from sklearn.ensemble import GradientBoostingClassifier
+    >>> gb = GradientBoostingClassifier(random_state=0).fit(X, y)
+    >>> partial_dependence(gb, features=[0], X=X, percentiles=(0, 1),
+    ...                    grid_resolution=2) # doctest: +SKIP
+    (array([[-4.52...,  4.52...]]), [array([ 0.,  1.])])
+
+    See also
+    --------
+    sklearn.inspection.plot_partial_dependence: Plot partial dependence
+    """
+    if not (is_classifier(estimator) or is_regressor(estimator)):
+        raise ValueError(
+            "'estimator' must be a fitted regressor or classifier."
+        )
+
+    if isinstance(estimator, Pipeline):
+        # TODO: to be removed if/when pipeline get a `steps_` attributes
+        # assuming Pipeline is the only estimator that does not store a new
+        # attribute
+        for est in estimator:
+            # FIXME: remove the None option when it will be deprecated
+            if est not in (None, 'drop'):
+                check_is_fitted(est)
+    else:
+        check_is_fitted(estimator)
+
+    if (is_classifier(estimator) and
+            isinstance(estimator.classes_[0], np.ndarray)):
+        raise ValueError(
+            'Multiclass-multioutput estimators are not supported'
+        )
+
+    # Use check_array only on lists and other non-array-likes / sparse. Do not
+    # convert DataFrame into a NumPy array.
+    if not(hasattr(X, '__array__') or sparse.issparse(X)):
+        X = check_array(X, force_all_finite='allow-nan', dtype=np.object)
+
+    accepted_responses = ('auto', 'predict_proba', 'decision_function')
+    if response_method not in accepted_responses:
+        raise ValueError(
+            'response_method {} is invalid. Accepted response_method names '
+            'are {}.'.format(response_method, ', '.join(accepted_responses)))
+
+    if is_regressor(estimator) and response_method != 'auto':
+        raise ValueError(
+            "The response_method parameter is ignored for regressors and "
+            "must be 'auto'."
+        )
+
+    accepted_methods = ('brute', 'recursion', 'auto')
+    if method not in accepted_methods:
+        raise ValueError(
+            'method {} is invalid. Accepted method names are {}.'.format(
+                method, ', '.join(accepted_methods)))
+
+    if method == 'auto':
+        if (isinstance(estimator, BaseGradientBoosting) and
+                estimator.init is None):
+            method = 'recursion'
+        elif isinstance(estimator, (BaseHistGradientBoosting,
+                                    DecisionTreeRegressor,
+                                    RandomForestRegressor)):
+            method = 'recursion'
+        else:
+            method = 'brute'
+
+    if method == 'recursion':
+        if not isinstance(estimator,
+                          (BaseGradientBoosting, BaseHistGradientBoosting,
+                           DecisionTreeRegressor, RandomForestRegressor)):
+            supported_classes_recursion = (
+                'GradientBoostingClassifier',
+                'GradientBoostingRegressor',
+                'HistGradientBoostingClassifier',
+                'HistGradientBoostingRegressor',
+                'HistGradientBoostingRegressor',
+                'DecisionTreeRegressor',
+                'RandomForestRegressor',
+            )
+            raise ValueError(
+                "Only the following estimators support the 'recursion' "
+                "method: {}. Try using method='brute'."
+                .format(', '.join(supported_classes_recursion)))
+        if response_method == 'auto':
+            response_method = 'decision_function'
+
+        if response_method != 'decision_function':
+            raise ValueError(
+                "With the 'recursion' method, the response_method must be "
+                "'decision_function'. Got {}.".format(response_method)
+            )
+
+    if _determine_key_type(features, accept_slice=False) == 'int':
+        # _get_column_indices() supports negative indexing. Here, we limit
+        # the indexing to be positive. The upper bound will be checked
+        # by _get_column_indices()
+        if np.any(np.less(features, 0)):
+            raise ValueError(
+                'all features must be in [0, {}]'.format(X.shape[1] - 1)
+            )
+
+    features_indices = np.asarray(
+        _get_column_indices(X, features), dtype=np.int32, order='C'
+    ).ravel()
+
+    grid, values = _grid_from_X(
+        _safe_indexing(X, features_indices, axis=1), percentiles,
+        grid_resolution
+    )
+
+    if method == 'brute':
+        averaged_predictions = _partial_dependence_brute(
+            estimator, grid, features_indices, X, response_method
+        )
+    else:
+        averaged_predictions = _partial_dependence_recursion(
+            estimator, grid, features_indices
+        )
+
+    # reshape averaged_predictions to
+    # (n_outputs, n_values_feature_0, n_values_feature_1, ...)
+    averaged_predictions = averaged_predictions.reshape(
+        -1, *[val.shape[0] for val in values])
+
+    return averaged_predictions, values
--- a/venv/Lib/site-packages/sklearn/inspection/_permutation_importance.py
+++ b/venv/Lib/site-packages/sklearn/inspection/_permutation_importance.py
@ -0,0 +1,142 @@
+"""Permutation importance for estimators"""
+import numpy as np
+from joblib import Parallel
+from joblib import delayed
+
+from ..metrics import check_scoring
+from ..utils import Bunch
+from ..utils import check_random_state
+from ..utils import check_array
+from ..utils.validation import _deprecate_positional_args
+
+
+def _calculate_permutation_scores(estimator, X, y, col_idx, random_state,
+                                  n_repeats, scorer):
+    """Calculate score when `col_idx` is permuted."""
+    random_state = check_random_state(random_state)
+
+    # Work on a copy of X to to ensure thread-safety in case of threading based
+    # parallelism. Furthermore, making a copy is also useful when the joblib
+    # backend is 'loky' (default) or the old 'multiprocessing': in those cases,
+    # if X is large it will be automatically be backed by a readonly memory map
+    # (memmap). X.copy() on the other hand is always guaranteed to return a
+    # writable data-structure whose columns can be shuffled inplace.
+    X_permuted = X.copy()
+    scores = np.zeros(n_repeats)
+    shuffling_idx = np.arange(X.shape[0])
+    for n_round in range(n_repeats):
+        random_state.shuffle(shuffling_idx)
+        if hasattr(X_permuted, "iloc"):
+            col = X_permuted.iloc[shuffling_idx, col_idx]
+            col.index = X_permuted.index
+            X_permuted.iloc[:, col_idx] = col
+        else:
+            X_permuted[:, col_idx] = X_permuted[shuffling_idx, col_idx]
+        feature_score = scorer(estimator, X_permuted, y)
+        scores[n_round] = feature_score
+
+    return scores
+
+
+@_deprecate_positional_args
+def permutation_importance(estimator, X, y, *, scoring=None, n_repeats=5,
+                           n_jobs=None, random_state=None):
+    """Permutation importance for feature evaluation [BRE]_.
+
+    The :term:`estimator` is required to be a fitted estimator. `X` can be the
+    data set used to train the estimator or a hold-out set. The permutation
+    importance of a feature is calculated as follows. First, a baseline metric,
+    defined by :term:`scoring`, is evaluated on a (potentially different)
+    dataset defined by the `X`. Next, a feature column from the validation set
+    is permuted and the metric is evaluated again. The permutation importance
+    is defined to be the difference between the baseline metric and metric from
+    permutating the feature column.
+
+    Read more in the :ref:`User Guide <permutation_importance>`.
+
+    Parameters
+    ----------
+    estimator : object
+        An estimator that has already been :term:`fitted` and is compatible
+        with :term:`scorer`.
+
+    X : ndarray or DataFrame, shape (n_samples, n_features)
+        Data on which permutation importance will be computed.
+
+    y : array-like or None, shape (n_samples, ) or (n_samples, n_classes)
+        Targets for supervised or `None` for unsupervised.
+
+    scoring : string, callable or None, default=None
+        Scorer to use. It can be a single
+        string (see :ref:`scoring_parameter`) or a callable (see
+        :ref:`scoring`). If None, the estimator's default scorer is used.
+
+    n_repeats : int, default=5
+        Number of times to permute a feature.
+
+    n_jobs : int or None, default=None
+        The number of jobs to use for the computation.
+        `None` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        `-1` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    random_state : int, RandomState instance, default=None
+        Pseudo-random number generator to control the permutations of each
+        feature.
+        Pass an int to get reproducible results across function calls.
+        See :term: `Glossary <random_state>`.
+
+    Returns
+    -------
+    result : :class:`~sklearn.utils.Bunch`
+        Dictionary-like object, with the following attributes.
+
+        importances_mean : ndarray, shape (n_features, )
+            Mean of feature importance over `n_repeats`.
+        importances_std : ndarray, shape (n_features, )
+            Standard deviation over `n_repeats`.
+        importances : ndarray, shape (n_features, n_repeats)
+            Raw permutation importance scores.
+
+    References
+    ----------
+    .. [BRE] L. Breiman, "Random Forests", Machine Learning, 45(1), 5-32,
+             2001. https://doi.org/10.1023/A:1010933404324
+
+    Examples
+    --------
+    >>> from sklearn.linear_model import LogisticRegression
+    >>> from sklearn.inspection import permutation_importance
+    >>> X = [[1, 9, 9],[1, 9, 9],[1, 9, 9],
+    ...      [0, 9, 9],[0, 9, 9],[0, 9, 9]]
+    >>> y = [1, 1, 1, 0, 0, 0]
+    >>> clf = LogisticRegression().fit(X, y)
+    >>> result = permutation_importance(clf, X, y, n_repeats=10,
+    ...                                 random_state=0)
+    >>> result.importances_mean
+    array([0.4666..., 0.       , 0.       ])
+    >>> result.importances_std
+    array([0.2211..., 0.       , 0.       ])
+    """
+    if not hasattr(X, "iloc"):
+        X = check_array(X, force_all_finite='allow-nan', dtype=None)
+
+    # Precompute random seed from the random state to be used
+    # to get a fresh independent RandomState instance for each
+    # parallel call to _calculate_permutation_scores, irrespective of
+    # the fact that variables are shared or not depending on the active
+    # joblib backend (sequential, thread-based or process-based).
+    random_state = check_random_state(random_state)
+    random_seed = random_state.randint(np.iinfo(np.int32).max + 1)
+
+    scorer = check_scoring(estimator, scoring=scoring)
+    baseline_score = scorer(estimator, X, y)
+
+    scores = Parallel(n_jobs=n_jobs)(delayed(_calculate_permutation_scores)(
+        estimator, X, y, col_idx, random_seed, n_repeats, scorer
+    ) for col_idx in range(X.shape[1]))
+
+    importances = baseline_score - np.array(scores)
+    return Bunch(importances_mean=np.mean(importances, axis=1),
+                 importances_std=np.std(importances, axis=1),
+                 importances=importances)
--- a/venv/Lib/site-packages/sklearn/inspection/_plot/init.py
+++ b/venv/Lib/site-packages/sklearn/inspection/_plot/init.py
--- a/venv/Lib/site-packages/sklearn/inspection/_plot/pycache/init.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/inspection/_plot/pycache/init.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/inspection/_plot/pycache/partial_dependence.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/inspection/_plot/pycache/partial_dependence.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/inspection/_plot/partial_dependence.py
+++ b/venv/Lib/site-packages/sklearn/inspection/_plot/partial_dependence.py
@ -0,0 +1,593 @@
+import numbers
+from itertools import chain
+from itertools import count
+import warnings
+
+import numpy as np
+from scipy import sparse
+from scipy.stats.mstats import mquantiles
+from joblib import Parallel, delayed
+
+from .. import partial_dependence
+from ...base import is_regressor
+from ...utils import check_array
+from ...utils import check_matplotlib_support  # noqa
+from ...utils import _safe_indexing
+from ...utils.validation import _deprecate_positional_args
+
+
+@_deprecate_positional_args
+def plot_partial_dependence(estimator, X, features, *, feature_names=None,
+                            target=None, response_method='auto', n_cols=3,
+                            grid_resolution=100, percentiles=(0.05, 0.95),
+                            method='auto', n_jobs=None, verbose=0, fig=None,
+                            line_kw=None, contour_kw=None, ax=None):
+    """Partial dependence plots.
+
+    The ``len(features)`` plots are arranged in a grid with ``n_cols``
+    columns. Two-way partial dependence plots are plotted as contour plots. The
+    deciles of the feature values will be shown with tick marks on the x-axes
+    for one-way plots, and on both axes for two-way plots.
+
+    Read more in the :ref:`User Guide <partial_dependence>`.
+
+    .. note::
+
+        :func:`plot_partial_dependence` does not support using the same axes
+        with multiple calls. To plot the the partial dependence for multiple
+        estimators, please pass the axes created by the first call to the
+        second call::
+
+          >>> from sklearn.inspection import plot_partial_dependence
+          >>> from sklearn.datasets import make_friedman1
+          >>> from sklearn.linear_model import LinearRegression
+          >>> X, y = make_friedman1()
+          >>> est = LinearRegression().fit(X, y)
+          >>> disp1 = plot_partial_dependence(est, X)  # doctest: +SKIP
+          >>> disp2 = plot_partial_dependence(est, X,
+          ...                                 ax=disp1.axes_)  # doctest: +SKIP
+
+    .. warning::
+
+        For :class:`~sklearn.ensemble.GradientBoostingClassifier` and
+        :class:`~sklearn.ensemble.GradientBoostingRegressor`, the
+        'recursion' method (used by default) will not account for the `init`
+        predictor of the boosting process. In practice, this will produce
+        the same values as 'brute' up to a constant offset in the target
+        response, provided that `init` is a constant estimator (which is the
+        default). However, if `init` is not a constant estimator, the
+        partial dependence values are incorrect for 'recursion' because the
+        offset will be sample-dependent. It is preferable to use the 'brute'
+        method. Note that this only applies to
+        :class:`~sklearn.ensemble.GradientBoostingClassifier` and
+        :class:`~sklearn.ensemble.GradientBoostingRegressor`, not to
+        :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and
+        :class:`~sklearn.ensemble.HistGradientBoostingRegressor`.
+
+    Parameters
+    ----------
+    estimator : BaseEstimator
+        A fitted estimator object implementing :term:`predict`,
+        :term:`predict_proba`, or :term:`decision_function`.
+        Multioutput-multiclass classifiers are not supported.
+
+    X : {array-like or dataframe} of shape (n_samples, n_features)
+        ``X`` is used to generate a grid of values for the target
+        ``features`` (where the partial dependence will be evaluated), and
+        also to generate values for the complement features when the
+        `method` is 'brute'.
+
+    features : list of {int, str, pair of int, pair of str}
+        The target features for which to create the PDPs.
+        If features[i] is an int or a string, a one-way PDP is created; if
+        features[i] is a tuple, a two-way PDP is created. Each tuple must be
+        of size 2.
+        if any entry is a string, then it must be in ``feature_names``.
+
+    feature_names : array-like of shape (n_features,), dtype=str, default=None
+        Name of each feature; feature_names[i] holds the name of the feature
+        with index i.
+        By default, the name of the feature corresponds to their numerical
+        index for NumPy array and their column name for pandas dataframe.
+
+    target : int, optional (default=None)
+        - In a multiclass setting, specifies the class for which the PDPs
+          should be computed. Note that for binary classification, the
+          positive class (index 1) is always used.
+        - In a multioutput setting, specifies the task for which the PDPs
+          should be computed.
+
+        Ignored in binary classification or classical regression settings.
+
+    response_method : 'auto', 'predict_proba' or 'decision_function', \
+            optional (default='auto')
+        Specifies whether to use :term:`predict_proba` or
+        :term:`decision_function` as the target response. For regressors
+        this parameter is ignored and the response is always the output of
+        :term:`predict`. By default, :term:`predict_proba` is tried first
+        and we revert to :term:`decision_function` if it doesn't exist. If
+        ``method`` is 'recursion', the response is always the output of
+        :term:`decision_function`.
+
+    n_cols : int, optional (default=3)
+        The maximum number of columns in the grid plot. Only active when `ax`
+        is a single axis or `None`.
+
+    grid_resolution : int, optional (default=100)
+        The number of equally spaced points on the axes of the plots, for each
+        target feature.
+
+    percentiles : tuple of float, optional (default=(0.05, 0.95))
+        The lower and upper percentile used to create the extreme values
+        for the PDP axes. Must be in [0, 1].
+
+    method : str, optional (default='auto')
+        The method used to calculate the averaged predictions:
+
+        - 'recursion' is only supported for some tree-based estimators (namely
+          :class:`~sklearn.ensemble.GradientBoostingClassifier`,
+          :class:`~sklearn.ensemble.GradientBoostingRegressor`,
+          :class:`~sklearn.ensemble.HistGradientBoostingClassifier`,
+          :class:`~sklearn.ensemble.HistGradientBoostingRegressor`,
+          :class:`~sklearn.tree.DecisionTreeRegressor`,
+          :class:`~sklearn.ensemble.RandomForestRegressor`
+          but is more efficient in terms of speed.
+          With this method, the target response of a
+          classifier is always the decision function, not the predicted
+          probabilities.
+
+        - 'brute' is supported for any estimator, but is more
+          computationally intensive.
+
+        - 'auto': the 'recursion' is used for estimators that support it,
+          and 'brute' is used otherwise.
+
+        Please see :ref:`this note <pdp_method_differences>` for
+        differences between the 'brute' and 'recursion' method.
+
+    n_jobs : int, optional (default=None)
+        The number of CPUs to use to compute the partial dependences.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    verbose : int, optional (default=0)
+        Verbose output during PD computations.
+
+    fig : Matplotlib figure object, optional (default=None)
+        A figure object onto which the plots will be drawn, after the figure
+        has been cleared. By default, a new one is created.
+
+        .. deprecated:: 0.22
+           ``fig`` will be removed in 0.24.
+
+    line_kw : dict, optional
+        Dict with keywords passed to the ``matplotlib.pyplot.plot`` call.
+        For one-way partial dependence plots.
+
+    contour_kw : dict, optional
+        Dict with keywords passed to the ``matplotlib.pyplot.contourf`` call.
+        For two-way partial dependence plots.
+
+    ax : Matplotlib axes or array-like of Matplotlib axes, default=None
+        - If a single axis is passed in, it is treated as a bounding axes
+            and a grid of partial dependence plots will be drawn within
+            these bounds. The `n_cols` parameter controls the number of
+            columns in the grid.
+        - If an array-like of axes are passed in, the partial dependence
+            plots will be drawn directly into these axes.
+        - If `None`, a figure and a bounding axes is created and treated
+            as the single axes case.
+
+        .. versionadded:: 0.22
+
+    Returns
+    -------
+    display: :class:`~sklearn.inspection.PartialDependenceDisplay`
+
+    Examples
+    --------
+    >>> from sklearn.datasets import make_friedman1
+    >>> from sklearn.ensemble import GradientBoostingRegressor
+    >>> X, y = make_friedman1()
+    >>> clf = GradientBoostingRegressor(n_estimators=10).fit(X, y)
+    >>> plot_partial_dependence(clf, X, [0, (0, 1)]) #doctest: +SKIP
+
+    See also
+    --------
+    sklearn.inspection.partial_dependence: Return raw partial
+      dependence values
+    """
+    check_matplotlib_support('plot_partial_dependence')  # noqa
+    import matplotlib.pyplot as plt  # noqa
+    from matplotlib import transforms  # noqa
+    from matplotlib.ticker import MaxNLocator  # noqa
+    from matplotlib.ticker import ScalarFormatter  # noqa
+
+    # set target_idx for multi-class estimators
+    if hasattr(estimator, 'classes_') and np.size(estimator.classes_) > 2:
+        if target is None:
+            raise ValueError('target must be specified for multi-class')
+        target_idx = np.searchsorted(estimator.classes_, target)
+        if (not (0 <= target_idx < len(estimator.classes_)) or
+                estimator.classes_[target_idx] != target):
+            raise ValueError('target not in est.classes_, got {}'.format(
+                target))
+    else:
+        # regression and binary classification
+        target_idx = 0
+
+    # Use check_array only on lists and other non-array-likes / sparse. Do not
+    # convert DataFrame into a NumPy array.
+    if not(hasattr(X, '__array__') or sparse.issparse(X)):
+        X = check_array(X, force_all_finite='allow-nan', dtype=np.object)
+    n_features = X.shape[1]
+
+    # convert feature_names to list
+    if feature_names is None:
+        if hasattr(X, "loc"):
+            # get the column names for a pandas dataframe
+            feature_names = X.columns.tolist()
+        else:
+            # define a list of numbered indices for a numpy array
+            feature_names = [str(i) for i in range(n_features)]
+    elif hasattr(feature_names, "tolist"):
+        # convert numpy array or pandas index to a list
+        feature_names = feature_names.tolist()
+    if len(set(feature_names)) != len(feature_names):
+        raise ValueError('feature_names should not contain duplicates.')
+
+    def convert_feature(fx):
+        if isinstance(fx, str):
+            try:
+                fx = feature_names.index(fx)
+            except ValueError:
+                raise ValueError('Feature %s not in feature_names' % fx)
+        return int(fx)
+
+    # convert features into a seq of int tuples
+    tmp_features = []
+    for fxs in features:
+        if isinstance(fxs, (numbers.Integral, str)):
+            fxs = (fxs,)
+        try:
+            fxs = tuple(convert_feature(fx) for fx in fxs)
+        except TypeError:
+            raise ValueError('Each entry in features must be either an int, '
+                             'a string, or an iterable of size at most 2.')
+        if not 1 <= np.size(fxs) <= 2:
+            raise ValueError('Each entry in features must be either an int, '
+                             'a string, or an iterable of size at most 2.')
+
+        tmp_features.append(fxs)
+
+    features = tmp_features
+
+    # Early exit if the axes does not have the correct number of axes
+    if ax is not None and not isinstance(ax, plt.Axes):
+        axes = np.asarray(ax, dtype=object)
+        if axes.size != len(features):
+            raise ValueError("Expected ax to have {} axes, got {}".format(
+                             len(features), axes.size))
+
+    for i in chain.from_iterable(features):
+        if i >= len(feature_names):
+            raise ValueError('All entries of features must be less than '
+                             'len(feature_names) = {0}, got {1}.'
+                             .format(len(feature_names), i))
+
+    # compute averaged predictions
+    pd_results = Parallel(n_jobs=n_jobs, verbose=verbose)(
+        delayed(partial_dependence)(estimator, X, fxs,
+                                    response_method=response_method,
+                                    method=method,
+                                    grid_resolution=grid_resolution,
+                                    percentiles=percentiles)
+        for fxs in features)
+
+    # For multioutput regression, we can only check the validity of target
+    # now that we have the predictions.
+    # Also note: as multiclass-multioutput classifiers are not supported,
+    # multiclass and multioutput scenario are mutually exclusive. So there is
+    # no risk of overwriting target_idx here.
+    avg_preds, _ = pd_results[0]  # checking the first result is enough
+    if is_regressor(estimator) and avg_preds.shape[0] > 1:
+        if target is None:
+            raise ValueError(
+                'target must be specified for multi-output regressors')
+        if not 0 <= target <= avg_preds.shape[0]:
+            raise ValueError(
+                'target must be in [0, n_tasks], got {}.'.format(target))
+        target_idx = target
+
+    # get global min and max average predictions of PD grouped by plot type
+    pdp_lim = {}
+    for avg_preds, values in pd_results:
+        min_pd = avg_preds[target_idx].min()
+        max_pd = avg_preds[target_idx].max()
+        n_fx = len(values)
+        old_min_pd, old_max_pd = pdp_lim.get(n_fx, (min_pd, max_pd))
+        min_pd = min(min_pd, old_min_pd)
+        max_pd = max(max_pd, old_max_pd)
+        pdp_lim[n_fx] = (min_pd, max_pd)
+
+    deciles = {}
+    for fx in chain.from_iterable(features):
+        if fx not in deciles:
+            X_col = _safe_indexing(X, fx, axis=1)
+            deciles[fx] = mquantiles(X_col, prob=np.arange(0.1, 1.0, 0.1))
+
+    if fig is not None:
+        warnings.warn("The fig parameter is deprecated in version "
+                      "0.22 and will be removed in version 0.24",
+                      FutureWarning)
+        fig.clear()
+        ax = fig.gca()
+
+    display = PartialDependenceDisplay(pd_results=pd_results,
+                                       features=features,
+                                       feature_names=feature_names,
+                                       target_idx=target_idx,
+                                       pdp_lim=pdp_lim,
+                                       deciles=deciles)
+    return display.plot(ax=ax, n_cols=n_cols, line_kw=line_kw,
+                        contour_kw=contour_kw)
+
+
+class PartialDependenceDisplay:
+    """Partial Dependence Plot (PDP) visualization.
+
+    It is recommended to use
+    :func:`~sklearn.inspection.plot_partial_dependence` to create a
+    :class:`~sklearn.inspection.PartialDependenceDisplay`. All parameters are
+    stored as attributes.
+
+    Read more in
+    :ref:`sphx_glr_auto_examples_miscellaneous_plot_partial_dependence_visualization_api.py`
+    and the :ref:`User Guide <visualizations>`.
+
+        .. versionadded:: 0.22
+
+    Parameters
+    ----------
+    pd_results : list of (ndarray, ndarray)
+        Results of :func:`~sklearn.inspection.partial_dependence` for
+        ``features``. Each tuple corresponds to a (averaged_predictions, grid).
+
+    features : list of (int,) or list of (int, int)
+        Indices of features for a given plot. A tuple of one integer will plot
+        a partial dependence curve of one feature. A tuple of two integers will
+        plot a two-way partial dependence curve as a contour plot.
+
+    feature_names : list of str
+        Feature names corresponding to the indices in ``features``.
+
+    target_idx : int
+
+        - In a multiclass setting, specifies the class for which the PDPs
+          should be computed. Note that for binary classification, the
+          positive class (index 1) is always used.
+        - In a multioutput setting, specifies the task for which the PDPs
+          should be computed.
+
+        Ignored in binary classification or classical regression settings.
+
+    pdp_lim : dict
+        Global min and max average predictions, such that all plots will have
+        the same scale and y limits. `pdp_lim[1]` is the global min and max for
+        single partial dependence curves. `pdp_lim[2]` is the global min and
+        max for two-way partial dependence curves.
+
+    deciles : dict
+        Deciles for feature indices in ``features``.
+
+    Attributes
+    ----------
+    bounding_ax_ : matplotlib Axes or None
+        If `ax` is an axes or None, the `bounding_ax_` is the axes where the
+        grid of partial dependence plots are drawn. If `ax` is a list of axes
+        or a numpy array of axes, `bounding_ax_` is None.
+
+    axes_ : ndarray of matplotlib Axes
+        If `ax` is an axes or None, `axes_[i, j]` is the axes on the i-th row
+        and j-th column. If `ax` is a list of axes, `axes_[i]` is the i-th item
+        in `ax`. Elements that are None correspond to a nonexisting axes in
+        that position.
+
+    lines_ : ndarray of matplotlib Artists
+        If `ax` is an axes or None, `lines_[i, j]` is the partial dependence
+        curve on the i-th row and j-th column. If `ax` is a list of axes,
+        `lines_[i]` is the partial dependence curve corresponding to the i-th
+        item in `ax`. Elements that are None correspond to a nonexisting axes
+        or an axes that does not include a line plot.
+
+    deciles_vlines_ : ndarray of matplotlib LineCollection
+        If `ax` is an axes or None, `vlines_[i, j]` is the line collection
+        representing the x axis deciles of the i-th row and j-th column. If
+        `ax` is a list of axes, `vlines_[i]` corresponds to the i-th item in
+        `ax`. Elements that are None correspond to a nonexisting axes or an
+        axes that does not include a PDP plot.
+        .. versionadded:: 0.23
+    deciles_hlines_ : ndarray of matplotlib LineCollection
+        If `ax` is an axes or None, `vlines_[i, j]` is the line collection
+        representing the y axis deciles of the i-th row and j-th column. If
+        `ax` is a list of axes, `vlines_[i]` corresponds to the i-th item in
+        `ax`. Elements that are None correspond to a nonexisting axes or an
+        axes that does not include a 2-way plot.
+        .. versionadded:: 0.23
+
+    contours_ : ndarray of matplotlib Artists
+        If `ax` is an axes or None, `contours_[i, j]` is the partial dependence
+        plot on the i-th row and j-th column. If `ax` is a list of axes,
+        `contours_[i]` is the partial dependence plot corresponding to the i-th
+        item in `ax`. Elements that are None correspond to a nonexisting axes
+        or an axes that does not include a contour plot.
+
+    figure_ : matplotlib Figure
+        Figure containing partial dependence plots.
+
+    """
+    @_deprecate_positional_args
+    def __init__(self, pd_results, *, features, feature_names, target_idx,
+                 pdp_lim, deciles):
+        self.pd_results = pd_results
+        self.features = features
+        self.feature_names = feature_names
+        self.target_idx = target_idx
+        self.pdp_lim = pdp_lim
+        self.deciles = deciles
+
+    def plot(self, ax=None, n_cols=3, line_kw=None, contour_kw=None):
+        """Plot partial dependence plots.
+
+        Parameters
+        ----------
+        ax : Matplotlib axes or array-like of Matplotlib axes, default=None
+            - If a single axis is passed in, it is treated as a bounding axes
+                and a grid of partial dependence plots will be drawn within
+                these bounds. The `n_cols` parameter controls the number of
+                columns in the grid.
+            - If an array-like of axes are passed in, the partial dependence
+                plots will be drawn directly into these axes.
+            - If `None`, a figure and a bounding axes is created and treated
+                as the single axes case.
+
+        n_cols : int, default=3
+            The maximum number of columns in the grid plot. Only active when
+            `ax` is a single axes or `None`.
+
+        line_kw : dict, default=None
+            Dict with keywords passed to the `matplotlib.pyplot.plot` call.
+            For one-way partial dependence plots.
+
+        contour_kw : dict, default=None
+            Dict with keywords passed to the `matplotlib.pyplot.contourf`
+            call for two-way partial dependence plots.
+
+        Returns
+        -------
+        display: :class:`~sklearn.inspection.PartialDependenceDisplay`
+        """
+
+        check_matplotlib_support("plot_partial_dependence")
+        import matplotlib.pyplot as plt  # noqa
+        from matplotlib import transforms  # noqa
+        from matplotlib.ticker import MaxNLocator  # noqa
+        from matplotlib.ticker import ScalarFormatter  # noqa
+        from matplotlib.gridspec import GridSpecFromSubplotSpec  # noqa
+
+        if line_kw is None:
+            line_kw = {}
+        if contour_kw is None:
+            contour_kw = {}
+
+        if ax is None:
+            _, ax = plt.subplots()
+
+        default_contour_kws = {"alpha": 0.75}
+        contour_kw = {**default_contour_kws, **contour_kw}
+
+        n_features = len(self.features)
+
+        if isinstance(ax, plt.Axes):
+            # If ax was set off, it has most likely been set to off
+            # by a previous call to plot.
+            if not ax.axison:
+                raise ValueError("The ax was already used in another plot "
+                                 "function, please set ax=display.axes_ "
+                                 "instead")
+
+            ax.set_axis_off()
+            self.bounding_ax_ = ax
+            self.figure_ = ax.figure
+
+            n_cols = min(n_cols, n_features)
+            n_rows = int(np.ceil(n_features / float(n_cols)))
+
+            self.axes_ = np.empty((n_rows, n_cols), dtype=np.object)
+
+            axes_ravel = self.axes_.ravel()
+
+            gs = GridSpecFromSubplotSpec(n_rows, n_cols,
+                                         subplot_spec=ax.get_subplotspec())
+            for i, spec in zip(range(n_features), gs):
+                axes_ravel[i] = self.figure_.add_subplot(spec)
+
+        else:  # array-like
+            ax = np.asarray(ax, dtype=object)
+            if ax.size != n_features:
+                raise ValueError("Expected ax to have {} axes, got {}"
+                                 .format(n_features, ax.size))
+
+            if ax.ndim == 2:
+                n_cols = ax.shape[1]
+            else:
+                n_cols = None
+
+            self.bounding_ax_ = None
+            self.figure_ = ax.ravel()[0].figure
+            self.axes_ = ax
+
+        # create contour levels for two-way plots
+        if 2 in self.pdp_lim:
+            Z_level = np.linspace(*self.pdp_lim[2], num=8)
+
+        self.lines_ = np.empty_like(self.axes_, dtype=np.object)
+        self.contours_ = np.empty_like(self.axes_, dtype=np.object)
+        self.deciles_vlines_ = np.empty_like(self.axes_, dtype=np.object)
+        self.deciles_hlines_ = np.empty_like(self.axes_, dtype=np.object)
+        # Create 1d views of these 2d arrays for easy indexing
+        lines_ravel = self.lines_.ravel(order='C')
+        contours_ravel = self.contours_.ravel(order='C')
+        vlines_ravel = self.deciles_vlines_.ravel(order='C')
+        hlines_ravel = self.deciles_hlines_.ravel(order='C')
+
+        for i, axi, fx, (avg_preds, values) in zip(count(),
+                                                   self.axes_.ravel(),
+                                                   self.features,
+                                                   self.pd_results):
+            if len(values) == 1:
+                lines_ravel[i] = axi.plot(values[0],
+                                          avg_preds[self.target_idx].ravel(),
+                                          **line_kw)[0]
+            else:
+                # contour plot
+                XX, YY = np.meshgrid(values[0], values[1])
+                Z = avg_preds[self.target_idx].T
+                CS = axi.contour(XX, YY, Z, levels=Z_level, linewidths=0.5,
+                                 colors='k')
+                contours_ravel[i] = axi.contourf(XX, YY, Z, levels=Z_level,
+                                                 vmax=Z_level[-1],
+                                                 vmin=Z_level[0],
+                                                 **contour_kw)
+                axi.clabel(CS, fmt='%2.2f', colors='k', fontsize=10,
+                           inline=True)
+
+            trans = transforms.blended_transform_factory(axi.transData,
+                                                         axi.transAxes)
+            ylim = axi.get_ylim()
+            vlines_ravel[i] = axi.vlines(self.deciles[fx[0]], 0, 0.05,
+                                         transform=trans, color='k')
+            axi.set_ylim(ylim)
+
+            # Set xlabel if it is not already set
+            if not axi.get_xlabel():
+                axi.set_xlabel(self.feature_names[fx[0]])
+
+            if len(values) == 1:
+                if n_cols is None or i % n_cols == 0:
+                    axi.set_ylabel('Partial dependence')
+                else:
+                    axi.set_yticklabels([])
+                axi.set_ylim(self.pdp_lim[1])
+            else:
+                # contour plot
+                trans = transforms.blended_transform_factory(axi.transAxes,
+                                                             axi.transData)
+                xlim = axi.get_xlim()
+                hlines_ravel[i] = axi.hlines(self.deciles[fx[1]], 0, 0.05,
+                                             transform=trans, color='k')
+                # hline erases xlim
+                axi.set_ylabel(self.feature_names[fx[1]])
+                axi.set_xlim(xlim)
+        return self
--- a/venv/Lib/site-packages/sklearn/inspection/_plot/tests/init.py
+++ b/venv/Lib/site-packages/sklearn/inspection/_plot/tests/init.py
--- a/venv/Lib/site-packages/sklearn/inspection/_plot/tests/pycache/init.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/inspection/_plot/tests/pycache/init.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/inspection/_plot/tests/pycache/test_plot_partial_dependence.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/inspection/_plot/tests/pycache/test_plot_partial_dependence.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py
+++ b/venv/Lib/site-packages/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py
@ -0,0 +1,474 @@
+import numpy as np
+from scipy.stats.mstats import mquantiles
+
+import pytest
+from numpy.testing import assert_allclose
+
+from sklearn.datasets import load_boston
+from sklearn.datasets import load_iris
+from sklearn.datasets import make_classification, make_regression
+from sklearn.ensemble import GradientBoostingRegressor
+from sklearn.ensemble import GradientBoostingClassifier
+from sklearn.linear_model import LinearRegression
+from sklearn.utils._testing import _convert_container
+
+from sklearn.inspection import plot_partial_dependence
+
+
+# TODO: Remove when https://github.com/numpy/numpy/issues/14397 is resolved
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:In future, it will be an error for 'np.bool_':DeprecationWarning:"
+    "matplotlib.*")
+
+
+@pytest.fixture(scope="module")
+def boston():
+    return load_boston()
+
+
+@pytest.fixture(scope="module")
+def clf_boston(boston):
+    clf = GradientBoostingRegressor(n_estimators=10, random_state=1)
+    clf.fit(boston.data, boston.target)
+    return clf
+
+
+@pytest.mark.parametrize("grid_resolution", [10, 20])
+def test_plot_partial_dependence(grid_resolution, pyplot, clf_boston, boston):
+    # Test partial dependence plot function.
+    feature_names = boston.feature_names
+    disp = plot_partial_dependence(clf_boston, boston.data,
+                                   [0, 1, (0, 1)],
+                                   grid_resolution=grid_resolution,
+                                   feature_names=feature_names,
+                                   contour_kw={"cmap": "jet"})
+    fig = pyplot.gcf()
+    axs = fig.get_axes()
+    assert disp.figure_ is fig
+    assert len(axs) == 4
+
+    assert disp.bounding_ax_ is not None
+    assert disp.axes_.shape == (1, 3)
+    assert disp.lines_.shape == (1, 3)
+    assert disp.contours_.shape == (1, 3)
+    assert disp.deciles_vlines_.shape == (1, 3)
+    assert disp.deciles_hlines_.shape == (1, 3)
+
+    assert disp.lines_[0, 2] is None
+    assert disp.contours_[0, 0] is None
+    assert disp.contours_[0, 1] is None
+
+    # deciles lines: always show on xaxis, only show on yaxis if 2-way PDP
+    for i in range(3):
+        assert disp.deciles_vlines_[0, i] is not None
+    assert disp.deciles_hlines_[0, 0] is None
+    assert disp.deciles_hlines_[0, 1] is None
+    assert disp.deciles_hlines_[0, 2] is not None
+
+    assert disp.features == [(0, ), (1, ), (0, 1)]
+    assert np.all(disp.feature_names == feature_names)
+    assert len(disp.deciles) == 2
+    for i in [0, 1]:
+        assert_allclose(disp.deciles[i],
+                        mquantiles(boston.data[:, i],
+                                   prob=np.arange(0.1, 1.0, 0.1)))
+
+    single_feature_positions = [(0, 0), (0, 1)]
+    expected_ylabels = ["Partial dependence", ""]
+
+    for i, pos in enumerate(single_feature_positions):
+        ax = disp.axes_[pos]
+        assert ax.get_ylabel() == expected_ylabels[i]
+        assert ax.get_xlabel() == boston.feature_names[i]
+        assert_allclose(ax.get_ylim(), disp.pdp_lim[1])
+
+        line = disp.lines_[pos]
+
+        avg_preds, values = disp.pd_results[i]
+        assert avg_preds.shape == (1, grid_resolution)
+        target_idx = disp.target_idx
+
+        line_data = line.get_data()
+        assert_allclose(line_data[0], values[0])
+        assert_allclose(line_data[1], avg_preds[target_idx].ravel())
+
+    # two feature position
+    ax = disp.axes_[0, 2]
+    coutour = disp.contours_[0, 2]
+    expected_levels = np.linspace(*disp.pdp_lim[2], num=8)
+    assert_allclose(coutour.levels, expected_levels)
+    assert coutour.get_cmap().name == "jet"
+    assert ax.get_xlabel() == boston.feature_names[0]
+    assert ax.get_ylabel() == boston.feature_names[1]
+
+
+@pytest.mark.parametrize(
+    "input_type, feature_names_type",
+    [('dataframe', None),
+     ('dataframe', 'list'), ('list', 'list'), ('array', 'list'),
+     ('dataframe', 'array'), ('list', 'array'), ('array', 'array'),
+     ('dataframe', 'series'), ('list', 'series'), ('array', 'series'),
+     ('dataframe', 'index'), ('list', 'index'), ('array', 'index')]
+)
+def test_plot_partial_dependence_str_features(pyplot, clf_boston, boston,
+                                              input_type, feature_names_type):
+    if input_type == 'dataframe':
+        pd = pytest.importorskip("pandas")
+        X = pd.DataFrame(boston.data, columns=boston.feature_names)
+    elif input_type == 'list':
+        X = boston.data.tolist()
+    else:
+        X = boston.data
+
+    if feature_names_type is None:
+        feature_names = None
+    else:
+        feature_names = _convert_container(boston.feature_names,
+                                           feature_names_type)
+
+    grid_resolution = 25
+    # check with str features and array feature names and single column
+    disp = plot_partial_dependence(clf_boston, X,
+                                   [('CRIM', 'ZN'), 'ZN'],
+                                   grid_resolution=grid_resolution,
+                                   feature_names=feature_names,
+                                   n_cols=1, line_kw={"alpha": 0.8})
+    fig = pyplot.gcf()
+    axs = fig.get_axes()
+    assert len(axs) == 3
+
+    assert disp.figure_ is fig
+    assert disp.axes_.shape == (2, 1)
+    assert disp.lines_.shape == (2, 1)
+    assert disp.contours_.shape == (2, 1)
+    assert disp.deciles_vlines_.shape == (2, 1)
+    assert disp.deciles_hlines_.shape == (2, 1)
+
+    assert disp.lines_[0, 0] is None
+    assert disp.deciles_vlines_[0, 0] is not None
+    assert disp.deciles_hlines_[0, 0] is not None
+    assert disp.contours_[1, 0] is None
+    assert disp.deciles_hlines_[1, 0] is None
+    assert disp.deciles_vlines_[1, 0] is not None
+
+    # line
+    ax = disp.axes_[1, 0]
+    assert ax.get_xlabel() == "ZN"
+    assert ax.get_ylabel() == "Partial dependence"
+
+    line = disp.lines_[1, 0]
+    avg_preds, values = disp.pd_results[1]
+    target_idx = disp.target_idx
+    assert line.get_alpha() == 0.8
+
+    line_data = line.get_data()
+    assert_allclose(line_data[0], values[0])
+    assert_allclose(line_data[1], avg_preds[target_idx].ravel())
+
+    # contour
+    ax = disp.axes_[0, 0]
+    coutour = disp.contours_[0, 0]
+    expect_levels = np.linspace(*disp.pdp_lim[2], num=8)
+    assert_allclose(coutour.levels, expect_levels)
+    assert ax.get_xlabel() == "CRIM"
+    assert ax.get_ylabel() == "ZN"
+
+
+def test_plot_partial_dependence_custom_axes(pyplot, clf_boston, boston):
+    grid_resolution = 25
+    fig, (ax1, ax2) = pyplot.subplots(1, 2)
+    feature_names = boston.feature_names.tolist()
+    disp = plot_partial_dependence(clf_boston, boston.data,
+                                   ['CRIM', ('CRIM', 'ZN')],
+                                   grid_resolution=grid_resolution,
+                                   feature_names=feature_names, ax=[ax1, ax2])
+    assert fig is disp.figure_
+    assert disp.bounding_ax_ is None
+    assert disp.axes_.shape == (2, )
+    assert disp.axes_[0] is ax1
+    assert disp.axes_[1] is ax2
+
+    ax = disp.axes_[0]
+    assert ax.get_xlabel() == "CRIM"
+    assert ax.get_ylabel() == "Partial dependence"
+
+    line = disp.lines_[0]
+    avg_preds, values = disp.pd_results[0]
+    target_idx = disp.target_idx
+
+    line_data = line.get_data()
+    assert_allclose(line_data[0], values[0])
+    assert_allclose(line_data[1], avg_preds[target_idx].ravel())
+
+    # contour
+    ax = disp.axes_[1]
+    coutour = disp.contours_[1]
+    expect_levels = np.linspace(*disp.pdp_lim[2], num=8)
+    assert_allclose(coutour.levels, expect_levels)
+    assert ax.get_xlabel() == "CRIM"
+    assert ax.get_ylabel() == "ZN"
+
+
+def test_plot_partial_dependence_passing_numpy_axes(pyplot, clf_boston,
+                                                    boston):
+    grid_resolution = 25
+    feature_names = boston.feature_names.tolist()
+    disp1 = plot_partial_dependence(clf_boston, boston.data,
+                                    ['CRIM', 'ZN'],
+                                    grid_resolution=grid_resolution,
+                                    feature_names=feature_names)
+    assert disp1.axes_.shape == (1, 2)
+    assert disp1.axes_[0, 0].get_ylabel() == "Partial dependence"
+    assert disp1.axes_[0, 1].get_ylabel() == ""
+    assert len(disp1.axes_[0, 0].get_lines()) == 1
+    assert len(disp1.axes_[0, 1].get_lines()) == 1
+
+    lr = LinearRegression()
+    lr.fit(boston.data, boston.target)
+
+    disp2 = plot_partial_dependence(lr, boston.data,
+                                    ['CRIM', 'ZN'],
+                                    grid_resolution=grid_resolution,
+                                    feature_names=feature_names,
+                                    ax=disp1.axes_)
+
+    assert np.all(disp1.axes_ == disp2.axes_)
+    assert len(disp2.axes_[0, 0].get_lines()) == 2
+    assert len(disp2.axes_[0, 1].get_lines()) == 2
+
+
+@pytest.mark.parametrize("nrows, ncols", [(2, 2), (3, 1)])
+def test_plot_partial_dependence_incorrent_num_axes(pyplot, clf_boston,
+                                                    boston, nrows, ncols):
+    grid_resolution = 5
+    fig, axes = pyplot.subplots(nrows, ncols)
+    axes_formats = [list(axes.ravel()), tuple(axes.ravel()), axes]
+
+    msg = "Expected ax to have 2 axes, got {}".format(nrows * ncols)
+
+    disp = plot_partial_dependence(clf_boston, boston.data,
+                                   ['CRIM', 'ZN'],
+                                   grid_resolution=grid_resolution,
+                                   feature_names=boston.feature_names)
+
+    for ax_format in axes_formats:
+        with pytest.raises(ValueError, match=msg):
+            plot_partial_dependence(clf_boston, boston.data,
+                                    ['CRIM', 'ZN'],
+                                    grid_resolution=grid_resolution,
+                                    feature_names=boston.feature_names,
+                                    ax=ax_format)
+
+        # with axes object
+        with pytest.raises(ValueError, match=msg):
+            disp.plot(ax=ax_format)
+
+
+def test_plot_partial_dependence_with_same_axes(pyplot, clf_boston, boston):
+    # The first call to plot_partial_dependence will create two new axes to
+    # place in the space of the passed in axes, which results in a total of
+    # three axes in the figure.
+    # Currently the API does not allow for the second call to
+    # plot_partial_dependence to use the same axes again, because it will
+    # create two new axes in the space resulting in five axes. To get the
+    # expected behavior one needs to pass the generated axes into the second
+    # call:
+    # disp1 = plot_partial_dependence(...)
+    # disp2 = plot_partial_dependence(..., ax=disp1.axes_)
+
+    grid_resolution = 25
+    fig, ax = pyplot.subplots()
+    plot_partial_dependence(clf_boston, boston.data, ['CRIM', 'ZN'],
+                            grid_resolution=grid_resolution,
+                            feature_names=boston.feature_names, ax=ax)
+
+    msg = ("The ax was already used in another plot function, please set "
+           "ax=display.axes_ instead")
+
+    with pytest.raises(ValueError, match=msg):
+        plot_partial_dependence(clf_boston, boston.data,
+                                ['CRIM', 'ZN'],
+                                grid_resolution=grid_resolution,
+                                feature_names=boston.feature_names, ax=ax)
+
+
+def test_plot_partial_dependence_feature_name_reuse(pyplot, clf_boston,
+                                                    boston):
+    # second call to plot does not change the feature names from the first
+    # call
+
+    feature_names = boston.feature_names
+    disp = plot_partial_dependence(clf_boston, boston.data,
+                                   [0, 1],
+                                   grid_resolution=10,
+                                   feature_names=feature_names)
+
+    plot_partial_dependence(clf_boston, boston.data, [0, 1],
+                            grid_resolution=10, ax=disp.axes_)
+
+    for i, ax in enumerate(disp.axes_.ravel()):
+        assert ax.get_xlabel() == feature_names[i]
+
+
+def test_plot_partial_dependence_multiclass(pyplot):
+    grid_resolution = 25
+    clf_int = GradientBoostingClassifier(n_estimators=10, random_state=1)
+    iris = load_iris()
+
+    # Test partial dependence plot function on multi-class input.
+    clf_int.fit(iris.data, iris.target)
+    disp_target_0 = plot_partial_dependence(clf_int, iris.data, [0, 1],
+                                            target=0,
+                                            grid_resolution=grid_resolution)
+    assert disp_target_0.figure_ is pyplot.gcf()
+    assert disp_target_0.axes_.shape == (1, 2)
+    assert disp_target_0.lines_.shape == (1, 2)
+    assert disp_target_0.contours_.shape == (1, 2)
+    assert disp_target_0.deciles_vlines_.shape == (1, 2)
+    assert disp_target_0.deciles_hlines_.shape == (1, 2)
+    assert all(c is None for c in disp_target_0.contours_.flat)
+    assert disp_target_0.target_idx == 0
+
+    # now with symbol labels
+    target = iris.target_names[iris.target]
+    clf_symbol = GradientBoostingClassifier(n_estimators=10, random_state=1)
+    clf_symbol.fit(iris.data, target)
+    disp_symbol = plot_partial_dependence(clf_symbol, iris.data, [0, 1],
+                                          target='setosa',
+                                          grid_resolution=grid_resolution)
+    assert disp_symbol.figure_ is pyplot.gcf()
+    assert disp_symbol.axes_.shape == (1, 2)
+    assert disp_symbol.lines_.shape == (1, 2)
+    assert disp_symbol.contours_.shape == (1, 2)
+    assert disp_symbol.deciles_vlines_.shape == (1, 2)
+    assert disp_symbol.deciles_hlines_.shape == (1, 2)
+    assert all(c is None for c in disp_symbol.contours_.flat)
+    assert disp_symbol.target_idx == 0
+
+    for int_result, symbol_result in zip(disp_target_0.pd_results,
+                                         disp_symbol.pd_results):
+        avg_preds_int, values_int = int_result
+        avg_preds_symbol, values_symbol = symbol_result
+        assert_allclose(avg_preds_int, avg_preds_symbol)
+        assert_allclose(values_int, values_symbol)
+
+    # check that the pd plots are different for another target
+    disp_target_1 = plot_partial_dependence(clf_int, iris.data, [0, 1],
+                                            target=1,
+                                            grid_resolution=grid_resolution)
+    target_0_data_y = disp_target_0.lines_[0, 0].get_data()[1]
+    target_1_data_y = disp_target_1.lines_[0, 0].get_data()[1]
+    assert any(target_0_data_y != target_1_data_y)
+
+
+multioutput_regression_data = make_regression(n_samples=50, n_targets=2,
+                                              random_state=0)
+
+
+@pytest.mark.parametrize("target", [0, 1])
+def test_plot_partial_dependence_multioutput(pyplot, target):
+    # Test partial dependence plot function on multi-output input.
+    X, y = multioutput_regression_data
+    clf = LinearRegression().fit(X, y)
+
+    grid_resolution = 25
+    disp = plot_partial_dependence(clf, X, [0, 1], target=target,
+                                   grid_resolution=grid_resolution)
+    fig = pyplot.gcf()
+    axs = fig.get_axes()
+    assert len(axs) == 3
+    assert disp.target_idx == target
+    assert disp.bounding_ax_ is not None
+
+    positions = [(0, 0), (0, 1)]
+    expected_label = ["Partial dependence", ""]
+
+    for i, pos in enumerate(positions):
+        ax = disp.axes_[pos]
+        assert ax.get_ylabel() == expected_label[i]
+        assert ax.get_xlabel() == "{}".format(i)
+
+
+def test_plot_partial_dependence_dataframe(pyplot, clf_boston, boston):
+    pd = pytest.importorskip('pandas')
+    df = pd.DataFrame(boston.data, columns=boston.feature_names)
+
+    grid_resolution = 25
+
+    plot_partial_dependence(
+        clf_boston, df, ['TAX', 'AGE'], grid_resolution=grid_resolution,
+        feature_names=df.columns.tolist()
+    )
+
+
+dummy_classification_data = make_classification(random_state=0)
+
+
+@pytest.mark.parametrize(
+    "data, params, err_msg",
+    [(multioutput_regression_data, {"target": None, 'features': [0]},
+      "target must be specified for multi-output"),
+     (multioutput_regression_data, {"target": -1, 'features': [0]},
+      r'target must be in \[0, n_tasks\]'),
+     (multioutput_regression_data, {"target": 100, 'features': [0]},
+      r'target must be in \[0, n_tasks\]'),
+     (dummy_classification_data,
+     {'features': ['foobar'], 'feature_names': None},
+     'Feature foobar not in feature_names'),
+     (dummy_classification_data,
+     {'features': ['foobar'], 'feature_names': ['abcd', 'def']},
+      'Feature foobar not in feature_names'),
+     (dummy_classification_data, {'features': [(1, 2, 3)]},
+      'Each entry in features must be either an int, '),
+     (dummy_classification_data, {'features': [1, {}]},
+      'Each entry in features must be either an int, '),
+     (dummy_classification_data, {'features': [tuple()]},
+      'Each entry in features must be either an int, '),
+     (dummy_classification_data,
+      {'features': [123], 'feature_names': ['blahblah']},
+      'All entries of features must be less than '),
+     (dummy_classification_data,
+      {'features': [0, 1, 2], 'feature_names': ['a', 'b', 'a']},
+      'feature_names should not contain duplicates')]
+)
+def test_plot_partial_dependence_error(pyplot, data, params, err_msg):
+    X, y = data
+    estimator = LinearRegression().fit(X, y)
+
+    with pytest.raises(ValueError, match=err_msg):
+        plot_partial_dependence(estimator, X, **params)
+
+
+@pytest.mark.parametrize("params, err_msg", [
+    ({'target': 4, 'features': [0]},
+     'target not in est.classes_, got 4'),
+    ({'target': None, 'features': [0]},
+     'target must be specified for multi-class'),
+    ({'target': 1, 'features': [4.5]},
+     'Each entry in features must be either an int,'),
+])
+def test_plot_partial_dependence_multiclass_error(pyplot, params, err_msg):
+    iris = load_iris()
+    clf = GradientBoostingClassifier(n_estimators=10, random_state=1)
+    clf.fit(iris.data, iris.target)
+
+    with pytest.raises(ValueError, match=err_msg):
+        plot_partial_dependence(clf, iris.data, **params)
+
+
+def test_plot_partial_dependence_fig_deprecated(pyplot):
+    # Make sure fig object is correctly used if not None
+    X, y = make_regression(n_samples=50, random_state=0)
+    clf = LinearRegression()
+    clf.fit(X, y)
+
+    fig = pyplot.figure()
+    grid_resolution = 25
+
+    msg = ("The fig parameter is deprecated in version 0.22 and will be "
+           "removed in version 0.24")
+    with pytest.warns(FutureWarning, match=msg):
+        plot_partial_dependence(
+            clf, X, [0, 1], target=0, grid_resolution=grid_resolution, fig=fig)
+
+    assert pyplot.gcf() is fig
--- a/venv/Lib/site-packages/sklearn/inspection/partial_dependence.py
+++ b/venv/Lib/site-packages/sklearn/inspection/partial_dependence.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _partial_dependence  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.inspection.partial_dependence'
+correct_import_path = 'sklearn.inspection'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_partial_dependence, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/inspection/setup.py
+++ b/venv/Lib/site-packages/sklearn/inspection/setup.py
@ -0,0 +1,17 @@
+from numpy.distutils.misc_util import Configuration
+
+
+def configuration(parent_package="", top_path=None):
+    config = Configuration("inspection", parent_package, top_path)
+
+    config.add_subpackage('_plot')
+    config.add_subpackage('_plot.tests')
+
+    config.add_subpackage('tests')
+
+    return config
+
+
+if __name__ == "__main__":
+    from numpy.distutils.core import setup
+    setup(**configuration().todict())
--- a/venv/Lib/site-packages/sklearn/inspection/tests/init.py
+++ b/venv/Lib/site-packages/sklearn/inspection/tests/init.py
--- a/venv/Lib/site-packages/sklearn/inspection/tests/pycache/init.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/inspection/tests/pycache/init.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/inspection/tests/pycache/test_partial_dependence.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/inspection/tests/pycache/test_partial_dependence.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/inspection/tests/pycache/test_permutation_importance.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/inspection/tests/pycache/test_permutation_importance.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/inspection/tests/test_partial_dependence.py
+++ b/venv/Lib/site-packages/sklearn/inspection/tests/test_partial_dependence.py
@ -0,0 +1,663 @@
+"""
+Testing for the partial dependence module.
+"""
+
+import numpy as np
+import pytest
+
+import sklearn
+from sklearn.inspection import partial_dependence
+from sklearn.inspection._partial_dependence import (
+    _grid_from_X,
+    _partial_dependence_brute,
+    _partial_dependence_recursion
+)
+from sklearn.ensemble import GradientBoostingClassifier
+from sklearn.ensemble import GradientBoostingRegressor
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.experimental import enable_hist_gradient_boosting  # noqa
+from sklearn.ensemble import HistGradientBoostingClassifier
+from sklearn.ensemble import HistGradientBoostingRegressor
+from sklearn.linear_model import LinearRegression
+from sklearn.linear_model import LogisticRegression
+from sklearn.linear_model import MultiTaskLasso
+from sklearn.tree import DecisionTreeRegressor
+from sklearn.datasets import load_iris
+from sklearn.datasets import make_classification, make_regression
+from sklearn.cluster import KMeans
+from sklearn.compose import make_column_transformer
+from sklearn.metrics import r2_score
+from sklearn.preprocessing import PolynomialFeatures
+from sklearn.preprocessing import StandardScaler
+from sklearn.preprocessing import RobustScaler
+from sklearn.pipeline import make_pipeline
+from sklearn.dummy import DummyClassifier
+from sklearn.base import BaseEstimator, ClassifierMixin, clone
+from sklearn.exceptions import NotFittedError
+from sklearn.utils._testing import assert_allclose
+from sklearn.utils._testing import assert_array_equal
+from sklearn.utils._testing import ignore_warnings
+from sklearn.utils import _IS_32BIT
+from sklearn.utils.validation import check_random_state
+from sklearn.tree.tests.test_tree import assert_is_subtree
+
+
+# toy sample
+X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]
+y = [-1, -1, -1, 1, 1, 1]
+
+
+# (X, y), n_targets  <-- as expected in the output of partial_dep()
+binary_classification_data = (make_classification(n_samples=50,
+                                                  random_state=0), 1)
+multiclass_classification_data = (make_classification(n_samples=50,
+                                                      n_classes=3,
+                                                      n_clusters_per_class=1,
+                                                      random_state=0), 3)
+regression_data = (make_regression(n_samples=50, random_state=0), 1)
+multioutput_regression_data = (make_regression(n_samples=50, n_targets=2,
+                                               random_state=0), 2)
+
+# iris
+iris = load_iris()
+
+
+@pytest.mark.parametrize('Estimator, method, data', [
+    (GradientBoostingClassifier, 'recursion', binary_classification_data),
+    (GradientBoostingClassifier, 'recursion', multiclass_classification_data),
+    (GradientBoostingClassifier, 'brute', binary_classification_data),
+    (GradientBoostingClassifier, 'brute', multiclass_classification_data),
+    (GradientBoostingRegressor, 'recursion', regression_data),
+    (GradientBoostingRegressor, 'brute', regression_data),
+    (DecisionTreeRegressor, 'brute', regression_data),
+    (LinearRegression, 'brute', regression_data),
+    (LinearRegression, 'brute', multioutput_regression_data),
+    (LogisticRegression, 'brute', binary_classification_data),
+    (LogisticRegression, 'brute', multiclass_classification_data),
+    (MultiTaskLasso, 'brute', multioutput_regression_data),
+    ])
+@pytest.mark.parametrize('grid_resolution', (5, 10))
+@pytest.mark.parametrize('features', ([1], [1, 2]))
+def test_output_shape(Estimator, method, data, grid_resolution,
+                      features):
+    # Check that partial_dependence has consistent output shape for different
+    # kinds of estimators:
+    # - classifiers with binary and multiclass settings
+    # - regressors
+    # - multi-task regressors
+
+    est = Estimator()
+
+    # n_target corresponds to the number of classes (1 for binary classif) or
+    # the number of tasks / outputs in multi task settings. It's equal to 1 for
+    # classical regression_data.
+    (X, y), n_targets = data
+
+    est.fit(X, y)
+    pdp, axes = partial_dependence(est, X=X, features=features,
+                                   method=method,
+                                   grid_resolution=grid_resolution)
+
+    expected_pdp_shape = (n_targets, *[grid_resolution
+                                       for _ in range(len(features))])
+    expected_axes_shape = (len(features), grid_resolution)
+
+    assert pdp.shape == expected_pdp_shape
+    assert axes is not None
+    assert np.asarray(axes).shape == expected_axes_shape
+
+
+def test_grid_from_X():
+    # tests for _grid_from_X: sanity check for output, and for shapes.
+
+    # Make sure that the grid is a cartesian product of the input (it will use
+    # the unique values instead of the percentiles)
+    percentiles = (.05, .95)
+    grid_resolution = 100
+    X = np.asarray([[1, 2],
+                    [3, 4]])
+    grid, axes = _grid_from_X(X, percentiles, grid_resolution)
+    assert_array_equal(grid, [[1, 2],
+                              [1, 4],
+                              [3, 2],
+                              [3, 4]])
+    assert_array_equal(axes, X.T)
+
+    # test shapes of returned objects depending on the number of unique values
+    # for a feature.
+    rng = np.random.RandomState(0)
+    grid_resolution = 15
+
+    # n_unique_values > grid_resolution
+    X = rng.normal(size=(20, 2))
+    grid, axes = _grid_from_X(X, percentiles, grid_resolution=grid_resolution)
+    assert grid.shape == (grid_resolution * grid_resolution, X.shape[1])
+    assert np.asarray(axes).shape == (2, grid_resolution)
+
+    # n_unique_values < grid_resolution, will use actual values
+    n_unique_values = 12
+    X[n_unique_values - 1:, 0] = 12345
+    rng.shuffle(X)  # just to make sure the order is irrelevant
+    grid, axes = _grid_from_X(X, percentiles, grid_resolution=grid_resolution)
+    assert grid.shape == (n_unique_values * grid_resolution, X.shape[1])
+    # axes is a list of arrays of different shapes
+    assert axes[0].shape == (n_unique_values,)
+    assert axes[1].shape == (grid_resolution,)
+
+
+@pytest.mark.parametrize(
+    "grid_resolution, percentiles, err_msg",
+    [(2, (0, 0.0001), "percentiles are too close"),
+     (100, (1, 2, 3, 4), "'percentiles' must be a sequence of 2 elements"),
+     (100, 12345, "'percentiles' must be a sequence of 2 elements"),
+     (100, (-1, .95), r"'percentiles' values must be in \[0, 1\]"),
+     (100, (.05, 2), r"'percentiles' values must be in \[0, 1\]"),
+     (100, (.9, .1), r"percentiles\[0\] must be strictly less than"),
+     (1, (0.05, 0.95), "'grid_resolution' must be strictly greater than 1")]
+)
+def test_grid_from_X_error(grid_resolution, percentiles, err_msg):
+    X = np.asarray([[1, 2], [3, 4]])
+    with pytest.raises(ValueError, match=err_msg):
+        _grid_from_X(
+            X, grid_resolution=grid_resolution, percentiles=percentiles
+        )
+
+
+@pytest.mark.parametrize('target_feature', range(5))
+@pytest.mark.parametrize('est, method', [
+    (LinearRegression(), 'brute'),
+    (GradientBoostingRegressor(random_state=0), 'brute'),
+    (GradientBoostingRegressor(random_state=0), 'recursion'),
+    (HistGradientBoostingRegressor(random_state=0), 'brute'),
+    (HistGradientBoostingRegressor(random_state=0), 'recursion')]
+)
+def test_partial_dependence_helpers(est, method, target_feature):
+    # Check that what is returned by _partial_dependence_brute or
+    # _partial_dependence_recursion is equivalent to manually setting a target
+    # feature to a given value, and computing the average prediction over all
+    # samples.
+    # This also checks that the brute and recursion methods give the same
+    # output.
+    # Note that even on the trainset, the brute and the recursion methods
+    # aren't always strictly equivalent, in particular when the slow method
+    # generates unrealistic samples that have low mass in the joint
+    # distribution of the input features, and when some of the features are
+    # dependent. Hence the high tolerance on the checks.
+
+    X, y = make_regression(random_state=0, n_features=5, n_informative=5)
+    # The 'init' estimator for GBDT (here the average prediction) isn't taken
+    # into account with the recursion method, for technical reasons. We set
+    # the mean to 0 to that this 'bug' doesn't have any effect.
+    y = y - y.mean()
+    est.fit(X, y)
+
+    # target feature will be set to .5 and then to 123
+    features = np.array([target_feature], dtype=np.int32)
+    grid = np.array([[.5],
+                     [123]])
+
+    if method == 'brute':
+        pdp = _partial_dependence_brute(est, grid, features, X,
+                                        response_method='auto')
+    else:
+        pdp = _partial_dependence_recursion(est, grid, features)
+
+    mean_predictions = []
+    for val in (.5, 123):
+        X_ = X.copy()
+        X_[:, target_feature] = val
+        mean_predictions.append(est.predict(X_).mean())
+
+    pdp = pdp[0]  # (shape is (1, 2) so make it (2,))
+
+    # allow for greater margin for error with recursion method
+    rtol = 1e-1 if method == 'recursion' else 1e-3
+    assert np.allclose(pdp, mean_predictions, rtol=rtol)
+
+
+@pytest.mark.parametrize('seed', range(1))
+def test_recursion_decision_tree_vs_forest_and_gbdt(seed):
+    # Make sure that the recursion method gives the same results on a
+    # DecisionTreeRegressor and a GradientBoostingRegressor or a
+    # RandomForestRegressor with 1 tree and equivalent parameters.
+
+    rng = np.random.RandomState(seed)
+
+    # Purely random dataset to avoid correlated features
+    n_samples = 1000
+    n_features = 5
+    X = rng.randn(n_samples, n_features)
+    y = rng.randn(n_samples) * 10
+
+    # The 'init' estimator for GBDT (here the average prediction) isn't taken
+    # into account with the recursion method, for technical reasons. We set
+    # the mean to 0 to that this 'bug' doesn't have any effect.
+    y = y - y.mean()
+
+    # set max_depth not too high to avoid splits with same gain but different
+    # features
+    max_depth = 5
+
+    tree_seed = 0
+    forest = RandomForestRegressor(n_estimators=1, max_features=None,
+                                   bootstrap=False, max_depth=max_depth,
+                                   random_state=tree_seed)
+    # The forest will use ensemble.base._set_random_states to set the
+    # random_state of the tree sub-estimator. We simulate this here to have
+    # equivalent estimators.
+    equiv_random_state = check_random_state(tree_seed).randint(
+        np.iinfo(np.int32).max)
+    gbdt = GradientBoostingRegressor(n_estimators=1, learning_rate=1,
+                                     criterion='mse', max_depth=max_depth,
+                                     random_state=equiv_random_state)
+    tree = DecisionTreeRegressor(max_depth=max_depth,
+                                 random_state=equiv_random_state)
+
+    forest.fit(X, y)
+    gbdt.fit(X, y)
+    tree.fit(X, y)
+
+    # sanity check: if the trees aren't the same, the PD values won't be equal
+    try:
+        assert_is_subtree(tree.tree_, gbdt[0, 0].tree_)
+        assert_is_subtree(tree.tree_, forest[0].tree_)
+    except AssertionError:
+        # For some reason the trees aren't exactly equal on 32bits, so the PDs
+        # cannot be equal either. See
+        # https://github.com/scikit-learn/scikit-learn/issues/8853
+        assert _IS_32BIT, "this should only fail on 32 bit platforms"
+        return
+
+    grid = rng.randn(50).reshape(-1, 1)
+    for f in range(n_features):
+        features = np.array([f], dtype=np.int32)
+
+        pdp_forest = _partial_dependence_recursion(forest, grid, features)
+        pdp_gbdt = _partial_dependence_recursion(gbdt, grid, features)
+        pdp_tree = _partial_dependence_recursion(tree, grid, features)
+
+        np.testing.assert_allclose(pdp_gbdt, pdp_tree)
+        np.testing.assert_allclose(pdp_forest, pdp_tree)
+
+
+@pytest.mark.parametrize('est', (
+    GradientBoostingClassifier(random_state=0),
+    HistGradientBoostingClassifier(random_state=0),
+))
+@pytest.mark.parametrize('target_feature', (0, 1, 2, 3, 4, 5))
+def test_recursion_decision_function(est, target_feature):
+    # Make sure the recursion method (implicitly uses decision_function) has
+    # the same result as using brute method with
+    # response_method=decision_function
+
+    X, y = make_classification(n_classes=2, n_clusters_per_class=1,
+                               random_state=1)
+    assert np.mean(y) == .5  # make sure the init estimator predicts 0 anyway
+
+    est.fit(X, y)
+
+    preds_1, _ = partial_dependence(est, X, [target_feature],
+                                    response_method='decision_function',
+                                    method='recursion')
+    preds_2, _ = partial_dependence(est, X, [target_feature],
+                                    response_method='decision_function',
+                                    method='brute')
+
+    assert_allclose(preds_1, preds_2, atol=1e-7)
+
+
+@pytest.mark.parametrize('est', (
+    LinearRegression(),
+    GradientBoostingRegressor(random_state=0),
+    HistGradientBoostingRegressor(random_state=0, min_samples_leaf=1,
+                                  max_leaf_nodes=None, max_iter=1),
+    DecisionTreeRegressor(random_state=0),
+))
+@pytest.mark.parametrize('power', (1, 2))
+def test_partial_dependence_easy_target(est, power):
+    # If the target y only depends on one feature in an obvious way (linear or
+    # quadratic) then the partial dependence for that feature should reflect
+    # it.
+    # We here fit a linear regression_data model (with polynomial features if
+    # needed) and compute r_squared to check that the partial dependence
+    # correctly reflects the target.
+
+    rng = np.random.RandomState(0)
+    n_samples = 200
+    target_variable = 2
+    X = rng.normal(size=(n_samples, 5))
+    y = X[:, target_variable]**power
+
+    est.fit(X, y)
+
+    averaged_predictions, values = partial_dependence(
+        est, features=[target_variable], X=X, grid_resolution=1000)
+
+    new_X = values[0].reshape(-1, 1)
+    new_y = averaged_predictions[0]
+    # add polynomial features if needed
+    new_X = PolynomialFeatures(degree=power).fit_transform(new_X)
+
+    lr = LinearRegression().fit(new_X, new_y)
+    r2 = r2_score(new_y, lr.predict(new_X))
+
+    assert r2 > .99
+
+
+@pytest.mark.parametrize('Estimator',
+                         (sklearn.tree.DecisionTreeClassifier,
+                          sklearn.tree.ExtraTreeClassifier,
+                          sklearn.ensemble.ExtraTreesClassifier,
+                          sklearn.neighbors.KNeighborsClassifier,
+                          sklearn.neighbors.RadiusNeighborsClassifier,
+                          sklearn.ensemble.RandomForestClassifier))
+def test_multiclass_multioutput(Estimator):
+    # Make sure error is raised for multiclass-multioutput classifiers
+
+    # make multiclass-multioutput dataset
+    X, y = make_classification(n_classes=3, n_clusters_per_class=1,
+                               random_state=0)
+    y = np.array([y, y]).T
+
+    est = Estimator()
+    est.fit(X, y)
+
+    with pytest.raises(
+            ValueError,
+            match="Multiclass-multioutput estimators are not supported"):
+        partial_dependence(est, X, [0])
+
+
+class NoPredictProbaNoDecisionFunction(ClassifierMixin, BaseEstimator):
+    def fit(self, X, y):
+        # simulate that we have some classes
+        self.classes_ = [0, 1]
+        return self
+
+
+@pytest.mark.parametrize(
+    "estimator, params, err_msg",
+    [(KMeans(),
+      {'features': [0]},
+      "'estimator' must be a fitted regressor or classifier"),
+     (LinearRegression(),
+      {'features': [0], 'response_method': 'predict_proba'},
+      'The response_method parameter is ignored for regressors'),
+     (GradientBoostingClassifier(random_state=0),
+      {'features': [0], 'response_method': 'predict_proba',
+       'method': 'recursion'},
+      "'recursion' method, the response_method must be 'decision_function'"),
+     (GradientBoostingClassifier(random_state=0),
+      {'features': [0], 'response_method': 'predict_proba', 'method': 'auto'},
+      "'recursion' method, the response_method must be 'decision_function'"),
+     (GradientBoostingClassifier(random_state=0),
+      {'features': [0], 'response_method': 'blahblah'},
+      'response_method blahblah is invalid. Accepted response_method'),
+     (NoPredictProbaNoDecisionFunction(),
+      {'features': [0], 'response_method': 'auto'},
+      'The estimator has no predict_proba and no decision_function method'),
+     (NoPredictProbaNoDecisionFunction(),
+      {'features': [0], 'response_method': 'predict_proba'},
+      'The estimator has no predict_proba method.'),
+     (NoPredictProbaNoDecisionFunction(),
+      {'features': [0], 'response_method': 'decision_function'},
+      'The estimator has no decision_function method.'),
+     (LinearRegression(),
+      {'features': [0], 'method': 'blahblah'},
+      'blahblah is invalid. Accepted method names are brute, recursion, auto'),
+     (LinearRegression(),
+      {'features': [0], 'method': 'recursion'},
+      "Only the following estimators support the 'recursion' method:")]
+)
+def test_partial_dependence_error(estimator, params, err_msg):
+    X, y = make_classification(random_state=0)
+    estimator.fit(X, y)
+
+    with pytest.raises(ValueError, match=err_msg):
+        partial_dependence(estimator, X, **params)
+
+
+@pytest.mark.parametrize(
+    "with_dataframe, err_msg",
+    [(True, "Only array-like or scalar are supported"),
+     (False, "Only array-like or scalar are supported")]
+)
+def test_partial_dependence_slice_error(with_dataframe, err_msg):
+    X, y = make_classification(random_state=0)
+    if with_dataframe:
+        pd = pytest.importorskip('pandas')
+        X = pd.DataFrame(X)
+    estimator = LogisticRegression().fit(X, y)
+
+    with pytest.raises(TypeError, match=err_msg):
+        partial_dependence(estimator, X, features=slice(0, 2, 1))
+
+
+@pytest.mark.parametrize(
+    'estimator',
+    [LinearRegression(), GradientBoostingClassifier(random_state=0)]
+)
+@pytest.mark.parametrize('features', [-1, 10000])
+def test_partial_dependence_unknown_feature_indices(estimator, features):
+    X, y = make_classification(random_state=0)
+    estimator.fit(X, y)
+
+    err_msg = 'all features must be in'
+    with pytest.raises(ValueError, match=err_msg):
+        partial_dependence(estimator, X, [features])
+
+
+@pytest.mark.parametrize(
+    'estimator',
+    [LinearRegression(), GradientBoostingClassifier(random_state=0)]
+)
+def test_partial_dependence_unknown_feature_string(estimator):
+    pd = pytest.importorskip("pandas")
+    X, y = make_classification(random_state=0)
+    df = pd.DataFrame(X)
+    estimator.fit(df, y)
+
+    features = ['random']
+    err_msg = 'A given column is not a column of the dataframe'
+    with pytest.raises(ValueError, match=err_msg):
+        partial_dependence(estimator, df, features)
+
+
+@pytest.mark.parametrize(
+    'estimator',
+    [LinearRegression(), GradientBoostingClassifier(random_state=0)]
+)
+def test_partial_dependence_X_list(estimator):
+    # check that array-like objects are accepted
+    X, y = make_classification(random_state=0)
+    estimator.fit(X, y)
+    partial_dependence(estimator, list(X), [0])
+
+
+# TODO: Remove in 0.24 when DummyClassifier's `strategy` default updates
+@ignore_warnings(category=FutureWarning)
+def test_warning_recursion_non_constant_init():
+    # make sure that passing a non-constant init parameter to a GBDT and using
+    # recursion method yields a warning.
+
+    gbc = GradientBoostingClassifier(init=DummyClassifier(), random_state=0)
+    gbc.fit(X, y)
+
+    with pytest.warns(
+            UserWarning,
+            match='Using recursion method with a non-constant init predictor'):
+        partial_dependence(gbc, X, [0], method='recursion')
+
+    with pytest.warns(
+            UserWarning,
+            match='Using recursion method with a non-constant init predictor'):
+        partial_dependence(gbc, X, [0], method='recursion')
+
+
+def test_partial_dependence_sample_weight():
+    # Test near perfect correlation between partial dependence and diagonal
+    # when sample weights emphasize y = x predictions
+    # non-regression test for #13193
+    # TODO: extend to HistGradientBoosting once sample_weight is supported
+    N = 1000
+    rng = np.random.RandomState(123456)
+    mask = rng.randint(2, size=N, dtype=bool)
+
+    x = rng.rand(N)
+    # set y = x on mask and y = -x outside
+    y = x.copy()
+    y[~mask] = -y[~mask]
+    X = np.c_[mask, x]
+    # sample weights to emphasize data points where y = x
+    sample_weight = np.ones(N)
+    sample_weight[mask] = 1000.
+
+    clf = GradientBoostingRegressor(n_estimators=10, random_state=1)
+    clf.fit(X, y, sample_weight=sample_weight)
+
+    pdp, values = partial_dependence(clf, X, features=[1])
+
+    assert np.corrcoef(pdp, values)[0, 1] > 0.99
+
+
+def test_hist_gbdt_sw_not_supported():
+    # TODO: remove/fix when PDP supports HGBT with sample weights
+    clf = HistGradientBoostingRegressor(random_state=1)
+    clf.fit(X, y, sample_weight=np.ones(len(X)))
+
+    with pytest.raises(NotImplementedError,
+                       match="does not support partial dependence"):
+        partial_dependence(clf, X, features=[1])
+
+
+# TODO: Remove in 0.24 when DummyClassifier's `strategy` default updates
+@ignore_warnings(category=FutureWarning)
+def test_partial_dependence_pipeline():
+    # check that the partial dependence support pipeline
+    iris = load_iris()
+
+    scaler = StandardScaler()
+    clf = DummyClassifier(random_state=42)
+    pipe = make_pipeline(scaler, clf)
+
+    clf.fit(scaler.fit_transform(iris.data), iris.target)
+    pipe.fit(iris.data, iris.target)
+
+    features = 0
+    pdp_pipe, values_pipe = partial_dependence(
+        pipe, iris.data, features=[features], grid_resolution=10
+    )
+    pdp_clf, values_clf = partial_dependence(
+        clf, scaler.transform(iris.data), features=[features],
+        grid_resolution=10
+    )
+    assert_allclose(pdp_pipe, pdp_clf)
+    assert_allclose(
+        values_pipe[0],
+        values_clf[0] * scaler.scale_[features] + scaler.mean_[features]
+    )
+
+
+@pytest.mark.parametrize(
+    "estimator",
+    [LogisticRegression(max_iter=1000, random_state=0),
+     GradientBoostingClassifier(random_state=0, n_estimators=5)],
+    ids=['estimator-brute', 'estimator-recursion']
+)
+@pytest.mark.parametrize(
+    "preprocessor",
+    [None,
+     make_column_transformer(
+         (StandardScaler(), [iris.feature_names[i] for i in (0, 2)]),
+         (RobustScaler(), [iris.feature_names[i] for i in (1, 3)])),
+     make_column_transformer(
+         (StandardScaler(), [iris.feature_names[i] for i in (0, 2)]),
+         remainder='passthrough')],
+    ids=['None', 'column-transformer', 'column-transformer-passthrough']
+)
+@pytest.mark.parametrize(
+    "features",
+    [[0, 2], [iris.feature_names[i] for i in (0, 2)]],
+    ids=['features-integer', 'features-string']
+)
+def test_partial_dependence_dataframe(estimator, preprocessor, features):
+    # check that the partial dependence support dataframe and pipeline
+    # including a column transformer
+    pd = pytest.importorskip("pandas")
+    df = pd.DataFrame(iris.data, columns=iris.feature_names)
+
+    pipe = make_pipeline(preprocessor, estimator)
+    pipe.fit(df, iris.target)
+    pdp_pipe, values_pipe = partial_dependence(
+        pipe, df, features=features, grid_resolution=10
+    )
+
+    # the column transformer will reorder the column when transforming
+    # we mixed the index to be sure that we are computing the partial
+    # dependence of the right columns
+    if preprocessor is not None:
+        X_proc = clone(preprocessor).fit_transform(df)
+        features_clf = [0, 1]
+    else:
+        X_proc = df
+        features_clf = [0, 2]
+
+    clf = clone(estimator).fit(X_proc, iris.target)
+    pdp_clf, values_clf = partial_dependence(
+        clf, X_proc, features=features_clf, method='brute', grid_resolution=10
+    )
+
+    assert_allclose(pdp_pipe, pdp_clf)
+    if preprocessor is not None:
+        scaler = preprocessor.named_transformers_['standardscaler']
+        assert_allclose(
+            values_pipe[1],
+            values_clf[1] * scaler.scale_[1] + scaler.mean_[1]
+        )
+    else:
+        assert_allclose(values_pipe[1], values_clf[1])
+
+
+@pytest.mark.parametrize(
+    "features, expected_pd_shape",
+    [(0, (3, 10)),
+     (iris.feature_names[0], (3, 10)),
+     ([0, 2], (3, 10, 10)),
+     ([iris.feature_names[i] for i in (0, 2)], (3, 10, 10)),
+     ([True, False, True, False], (3, 10, 10))],
+    ids=['scalar-int', 'scalar-str', 'list-int', 'list-str', 'mask']
+)
+def test_partial_dependence_feature_type(features, expected_pd_shape):
+    # check all possible features type supported in PDP
+    pd = pytest.importorskip("pandas")
+    df = pd.DataFrame(iris.data, columns=iris.feature_names)
+
+    preprocessor = make_column_transformer(
+        (StandardScaler(), [iris.feature_names[i] for i in (0, 2)]),
+        (RobustScaler(), [iris.feature_names[i] for i in (1, 3)])
+    )
+    pipe = make_pipeline(
+        preprocessor, LogisticRegression(max_iter=1000, random_state=0)
+    )
+    pipe.fit(df, iris.target)
+    pdp_pipe, values_pipe = partial_dependence(
+        pipe, df, features=features, grid_resolution=10
+    )
+    assert pdp_pipe.shape == expected_pd_shape
+    assert len(values_pipe) == len(pdp_pipe.shape) - 1
+
+
+@pytest.mark.parametrize(
+    "estimator", [LinearRegression(), LogisticRegression(),
+                  GradientBoostingRegressor(), GradientBoostingClassifier()]
+)
+def test_partial_dependence_unfitted(estimator):
+    X = iris.data
+    preprocessor = make_column_transformer(
+        (StandardScaler(), [0, 2]), (RobustScaler(), [1, 3])
+    )
+    pipe = make_pipeline(preprocessor, estimator)
+    with pytest.raises(NotFittedError, match="is not fitted yet"):
+        partial_dependence(pipe, X, features=[0, 2], grid_resolution=10)
+    with pytest.raises(NotFittedError, match="is not fitted yet"):
+        partial_dependence(estimator, X, features=[0, 2], grid_resolution=10)
--- a/venv/Lib/site-packages/sklearn/inspection/tests/test_permutation_importance.py
+++ b/venv/Lib/site-packages/sklearn/inspection/tests/test_permutation_importance.py
@ -0,0 +1,353 @@
+import pytest
+import numpy as np
+
+from numpy.testing import assert_allclose
+
+from sklearn.compose import ColumnTransformer
+from sklearn.datasets import load_diabetes
+from sklearn.datasets import load_iris
+from sklearn.datasets import make_classification
+from sklearn.datasets import make_regression
+from sklearn.dummy import DummyClassifier
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.linear_model import LinearRegression
+from sklearn.linear_model import LogisticRegression
+from sklearn.impute import SimpleImputer
+from sklearn.inspection import permutation_importance
+from sklearn.model_selection import train_test_split
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import KBinsDiscretizer
+from sklearn.preprocessing import OneHotEncoder
+from sklearn.preprocessing import StandardScaler
+from sklearn.preprocessing import scale
+from sklearn.utils import parallel_backend
+from sklearn.utils._testing import _convert_container
+
+
+
+@pytest.mark.parametrize("n_jobs", [1, 2])
+def test_permutation_importance_correlated_feature_regression(n_jobs):
+    # Make sure that feature highly correlated to the target have a higher
+    # importance
+    rng = np.random.RandomState(42)
+    n_repeats = 5
+
+    X, y = load_diabetes(return_X_y=True)
+    y_with_little_noise = (
+        y + rng.normal(scale=0.001, size=y.shape[0])).reshape(-1, 1)
+
+    X = np.hstack([X, y_with_little_noise])
+
+    clf = RandomForestRegressor(n_estimators=10, random_state=42)
+    clf.fit(X, y)
+
+    result = permutation_importance(clf, X, y, n_repeats=n_repeats,
+                                    random_state=rng, n_jobs=n_jobs)
+
+    assert result.importances.shape == (X.shape[1], n_repeats)
+
+    # the correlated feature with y was added as the last column and should
+    # have the highest importance
+    assert np.all(result.importances_mean[-1] >
+                  result.importances_mean[:-1])
+
+
+@pytest.mark.parametrize("n_jobs", [1, 2])
+def test_permutation_importance_correlated_feature_regression_pandas(n_jobs):
+    pd = pytest.importorskip("pandas")
+
+    # Make sure that feature highly correlated to the target have a higher
+    # importance
+    rng = np.random.RandomState(42)
+    n_repeats = 5
+
+    dataset = load_iris()
+    X, y = dataset.data, dataset.target
+    y_with_little_noise = (
+        y + rng.normal(scale=0.001, size=y.shape[0])).reshape(-1, 1)
+
+    # Adds feature correlated with y as the last column
+    X = pd.DataFrame(X, columns=dataset.feature_names)
+    X['correlated_feature'] = y_with_little_noise
+
+    clf = RandomForestClassifier(n_estimators=10, random_state=42)
+    clf.fit(X, y)
+
+    result = permutation_importance(clf, X, y, n_repeats=n_repeats,
+                                    random_state=rng, n_jobs=n_jobs)
+
+    assert result.importances.shape == (X.shape[1], n_repeats)
+
+    # the correlated feature with y was added as the last column and should
+    # have the highest importance
+    assert np.all(result.importances_mean[-1] > result.importances_mean[:-1])
+
+
+@pytest.mark.parametrize("n_jobs", [1, 2])
+def test_robustness_to_high_cardinality_noisy_feature(n_jobs, seed=42):
+    # Permutation variable importance should not be affected by the high
+    # cardinality bias of traditional feature importances, especially when
+    # computed on a held-out test set:
+    rng = np.random.RandomState(seed)
+    n_repeats = 5
+    n_samples = 1000
+    n_classes = 5
+    n_informative_features = 2
+    n_noise_features = 1
+    n_features = n_informative_features + n_noise_features
+
+    # Generate a multiclass classification dataset and a set of informative
+    # binary features that can be used to predict some classes of y exactly
+    # while leaving some classes unexplained to make the problem harder.
+    classes = np.arange(n_classes)
+    y = rng.choice(classes, size=n_samples)
+    X = np.hstack([(y == c).reshape(-1, 1)
+                   for c in classes[:n_informative_features]])
+    X = X.astype(np.float32)
+
+    # Not all target classes are explained by the binary class indicator
+    # features:
+    assert n_informative_features < n_classes
+
+    # Add 10 other noisy features with high cardinality (numerical) values
+    # that can be used to overfit the training data.
+    X = np.concatenate([X, rng.randn(n_samples, n_noise_features)], axis=1)
+    assert X.shape == (n_samples, n_features)
+
+    # Split the dataset to be able to evaluate on a held-out test set. The
+    # Test size should be large enough for importance measurements to be
+    # stable:
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, test_size=0.5, random_state=rng)
+    clf = RandomForestClassifier(n_estimators=5, random_state=rng)
+    clf.fit(X_train, y_train)
+
+    # Variable importances computed by impurity decrease on the tree node
+    # splits often use the noisy features in splits. This can give misleading
+    # impression that high cardinality noisy variables are the most important:
+    tree_importances = clf.feature_importances_
+    informative_tree_importances = tree_importances[:n_informative_features]
+    noisy_tree_importances = tree_importances[n_informative_features:]
+    assert informative_tree_importances.max() < noisy_tree_importances.min()
+
+    # Let's check that permutation-based feature importances do not have this
+    # problem.
+    r = permutation_importance(clf, X_test, y_test, n_repeats=n_repeats,
+                               random_state=rng, n_jobs=n_jobs)
+
+    assert r.importances.shape == (X.shape[1], n_repeats)
+
+    # Split the importances between informative and noisy features
+    informative_importances = r.importances_mean[:n_informative_features]
+    noisy_importances = r.importances_mean[n_informative_features:]
+
+    # Because we do not have a binary variable explaining each target classes,
+    # the RF model will have to use the random variable to make some
+    # (overfitting) splits (as max_depth is not set). Therefore the noisy
+    # variables will be non-zero but with small values oscillating around
+    # zero:
+    assert max(np.abs(noisy_importances)) > 1e-7
+    assert noisy_importances.max() < 0.05
+
+    # The binary features correlated with y should have a higher importance
+    # than the high cardinality noisy features.
+    # The maximum test accuracy is 2 / 5 == 0.4, each informative feature
+    # contributing approximately a bit more than 0.2 of accuracy.
+    assert informative_importances.min() > 0.15
+
+
+def test_permutation_importance_mixed_types():
+    rng = np.random.RandomState(42)
+    n_repeats = 4
+
+    # Last column is correlated with y
+    X = np.array([[1.0, 2.0, 3.0, np.nan], [2, 1, 2, 1]]).T
+    y = np.array([0, 1, 0, 1])
+
+    clf = make_pipeline(SimpleImputer(), LogisticRegression(solver='lbfgs'))
+    clf.fit(X, y)
+    result = permutation_importance(clf, X, y, n_repeats=n_repeats,
+                                    random_state=rng)
+
+    assert result.importances.shape == (X.shape[1], n_repeats)
+
+    # the correlated feature with y is the last column and should
+    # have the highest importance
+    assert np.all(result.importances_mean[-1] > result.importances_mean[:-1])
+
+    # use another random state
+    rng = np.random.RandomState(0)
+    result2 = permutation_importance(clf, X, y, n_repeats=n_repeats,
+                                     random_state=rng)
+    assert result2.importances.shape == (X.shape[1], n_repeats)
+
+    assert not np.allclose(result.importances, result2.importances)
+
+    # the correlated feature with y is the last column and should
+    # have the highest importance
+    assert np.all(result2.importances_mean[-1] > result2.importances_mean[:-1])
+
+
+def test_permutation_importance_mixed_types_pandas():
+    pd = pytest.importorskip("pandas")
+    rng = np.random.RandomState(42)
+    n_repeats = 5
+
+    # Last column is correlated with y
+    X = pd.DataFrame({'col1': [1.0, 2.0, 3.0, np.nan],
+                      'col2': ['a', 'b', 'a', 'b']})
+    y = np.array([0, 1, 0, 1])
+
+    num_preprocess = make_pipeline(SimpleImputer(), StandardScaler())
+    preprocess = ColumnTransformer([
+        ('num', num_preprocess, ['col1']),
+        ('cat', OneHotEncoder(), ['col2'])
+    ])
+    clf = make_pipeline(preprocess, LogisticRegression(solver='lbfgs'))
+    clf.fit(X, y)
+
+    result = permutation_importance(clf, X, y, n_repeats=n_repeats,
+                                    random_state=rng)
+
+    assert result.importances.shape == (X.shape[1], n_repeats)
+    # the correlated feature with y is the last column and should
+    # have the highest importance
+    assert np.all(result.importances_mean[-1] > result.importances_mean[:-1])
+
+
+def test_permutation_importance_linear_regresssion():
+    X, y = make_regression(n_samples=500, n_features=10, random_state=0)
+
+    X = scale(X)
+    y = scale(y)
+
+    lr = LinearRegression().fit(X, y)
+
+    # this relationship can be computed in closed form
+    expected_importances = 2 * lr.coef_**2
+    results = permutation_importance(lr, X, y,
+                                     n_repeats=50,
+                                     scoring='neg_mean_squared_error')
+    assert_allclose(expected_importances, results.importances_mean,
+                    rtol=1e-1, atol=1e-6)
+
+
+def test_permutation_importance_equivalence_sequential_parallel():
+    # regression test to make sure that sequential and parallel calls will
+    # output the same results.
+    X, y = make_regression(n_samples=500, n_features=10, random_state=0)
+    lr = LinearRegression().fit(X, y)
+
+    importance_sequential = permutation_importance(
+        lr, X, y, n_repeats=5, random_state=0, n_jobs=1
+    )
+
+    # First check that the problem is structured enough and that the model is
+    # complex enough to not yield trivial, constant importances:
+    imp_min = importance_sequential['importances'].min()
+    imp_max = importance_sequential['importances'].max()
+    assert imp_max - imp_min > 0.3
+
+    # The actually check that parallelism does not impact the results
+    # either with shared memory (threading) or without isolated memory
+    # via process-based parallelism using the default backend
+    # ('loky' or 'multiprocessing') depending on the joblib version:
+
+    # process-based parallelism (by default):
+    importance_processes = permutation_importance(
+        lr, X, y, n_repeats=5, random_state=0, n_jobs=2)
+    assert_allclose(
+        importance_processes['importances'],
+        importance_sequential['importances']
+    )
+
+    # thread-based parallelism:
+    with parallel_backend("threading"):
+        importance_threading = permutation_importance(
+            lr, X, y, n_repeats=5, random_state=0, n_jobs=2
+        )
+    assert_allclose(
+        importance_threading['importances'],
+        importance_sequential['importances']
+    )
+
+
+@pytest.mark.parametrize("n_jobs", [None, 1, 2])
+def test_permutation_importance_equivalence_array_dataframe(n_jobs):
+    # This test checks that the column shuffling logic has the same behavior
+    # both a dataframe and a simple numpy array.
+    pd = pytest.importorskip('pandas')
+
+    # regression test to make sure that sequential and parallel calls will
+    # output the same results.
+    X, y = make_regression(n_samples=100, n_features=5, random_state=0)
+    X_df = pd.DataFrame(X)
+
+    # Add a categorical feature that is statistically linked to y:
+    binner = KBinsDiscretizer(n_bins=3, encode="ordinal")
+    cat_column = binner.fit_transform(y.reshape(-1, 1))
+
+    # Concatenate the extra column to the numpy array: integers will be
+    # cast to float values
+    X = np.hstack([X, cat_column])
+    assert X.dtype.kind == "f"
+
+    # Insert extra column as a non-numpy-native dtype (while keeping backward
+    # compat for old pandas versions):
+    if hasattr(pd, "Categorical"):
+        cat_column = pd.Categorical(cat_column.ravel())
+    else:
+        cat_column = cat_column.ravel()
+    new_col_idx = len(X_df.columns)
+    X_df[new_col_idx] = cat_column
+    assert X_df[new_col_idx].dtype == cat_column.dtype
+
+    # Stich an aribtrary index to the dataframe:
+    X_df.index = np.arange(len(X_df)).astype(str)
+
+    rf = RandomForestRegressor(n_estimators=5, max_depth=3, random_state=0)
+    rf.fit(X, y)
+
+    n_repeats = 3
+    importance_array = permutation_importance(
+        rf, X, y, n_repeats=n_repeats, random_state=0, n_jobs=n_jobs
+    )
+
+    # First check that the problem is structured enough and that the model is
+    # complex enough to not yield trivial, constant importances:
+    imp_min = importance_array['importances'].min()
+    imp_max = importance_array['importances'].max()
+    assert imp_max - imp_min > 0.3
+
+    # Now check that importances computed on dataframe matche the values
+    # of those computed on the array with the same data.
+    importance_dataframe = permutation_importance(
+        rf, X_df, y, n_repeats=n_repeats, random_state=0, n_jobs=n_jobs
+    )
+    assert_allclose(
+        importance_array['importances'],
+        importance_dataframe['importances']
+    )
+
+
+@pytest.mark.parametrize("input_type", ["array", "dataframe"])
+def test_permutation_importance_large_memmaped_data(input_type):
+    # Smoke, non-regression test for:
+    # https://github.com/scikit-learn/scikit-learn/issues/15810
+    n_samples, n_features = int(5e4), 4
+    X, y = make_classification(n_samples=n_samples, n_features=n_features,
+                               random_state=0)
+    assert X.nbytes > 1e6  # trigger joblib memmaping
+
+    X = _convert_container(X, input_type)
+    clf = DummyClassifier(strategy='prior').fit(X, y)
+
+    # Actual smoke test: should not raise any error:
+    n_repeats = 5
+    r = permutation_importance(clf, X, y, n_repeats=n_repeats, n_jobs=2)
+
+    # Auxiliary check: DummyClassifier is feature independent:
+    # permutating feature should not change the predictions
+    expected_importances = np.zeros((n_features, n_repeats))
+    assert_allclose(expected_importances, r.importances)