Uploaded Test files

2020-11-12 11:05:57 -05:00 · 2020-11-12 11:05:57 -05:00 · 2e81cb7d99
commit 2e81cb7d99
parent f584ad9d97
16627 changed files with 2065359 additions and 102444 deletions
--- a/venv/Lib/site-packages/sklearn/inspection/tests/init.py
+++ b/venv/Lib/site-packages/sklearn/inspection/tests/init.py
--- a/venv/Lib/site-packages/sklearn/inspection/tests/pycache/init.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/inspection/tests/pycache/init.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/inspection/tests/pycache/test_partial_dependence.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/inspection/tests/pycache/test_partial_dependence.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/inspection/tests/pycache/test_permutation_importance.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/inspection/tests/pycache/test_permutation_importance.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/inspection/tests/test_partial_dependence.py
+++ b/venv/Lib/site-packages/sklearn/inspection/tests/test_partial_dependence.py
@ -0,0 +1,663 @@
+"""
+Testing for the partial dependence module.
+"""
+
+import numpy as np
+import pytest
+
+import sklearn
+from sklearn.inspection import partial_dependence
+from sklearn.inspection._partial_dependence import (
+    _grid_from_X,
+    _partial_dependence_brute,
+    _partial_dependence_recursion
+)
+from sklearn.ensemble import GradientBoostingClassifier
+from sklearn.ensemble import GradientBoostingRegressor
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.experimental import enable_hist_gradient_boosting  # noqa
+from sklearn.ensemble import HistGradientBoostingClassifier
+from sklearn.ensemble import HistGradientBoostingRegressor
+from sklearn.linear_model import LinearRegression
+from sklearn.linear_model import LogisticRegression
+from sklearn.linear_model import MultiTaskLasso
+from sklearn.tree import DecisionTreeRegressor
+from sklearn.datasets import load_iris
+from sklearn.datasets import make_classification, make_regression
+from sklearn.cluster import KMeans
+from sklearn.compose import make_column_transformer
+from sklearn.metrics import r2_score
+from sklearn.preprocessing import PolynomialFeatures
+from sklearn.preprocessing import StandardScaler
+from sklearn.preprocessing import RobustScaler
+from sklearn.pipeline import make_pipeline
+from sklearn.dummy import DummyClassifier
+from sklearn.base import BaseEstimator, ClassifierMixin, clone
+from sklearn.exceptions import NotFittedError
+from sklearn.utils._testing import assert_allclose
+from sklearn.utils._testing import assert_array_equal
+from sklearn.utils._testing import ignore_warnings
+from sklearn.utils import _IS_32BIT
+from sklearn.utils.validation import check_random_state
+from sklearn.tree.tests.test_tree import assert_is_subtree
+
+
+# toy sample
+X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]
+y = [-1, -1, -1, 1, 1, 1]
+
+
+# (X, y), n_targets  <-- as expected in the output of partial_dep()
+binary_classification_data = (make_classification(n_samples=50,
+                                                  random_state=0), 1)
+multiclass_classification_data = (make_classification(n_samples=50,
+                                                      n_classes=3,
+                                                      n_clusters_per_class=1,
+                                                      random_state=0), 3)
+regression_data = (make_regression(n_samples=50, random_state=0), 1)
+multioutput_regression_data = (make_regression(n_samples=50, n_targets=2,
+                                               random_state=0), 2)
+
+# iris
+iris = load_iris()
+
+
+@pytest.mark.parametrize('Estimator, method, data', [
+    (GradientBoostingClassifier, 'recursion', binary_classification_data),
+    (GradientBoostingClassifier, 'recursion', multiclass_classification_data),
+    (GradientBoostingClassifier, 'brute', binary_classification_data),
+    (GradientBoostingClassifier, 'brute', multiclass_classification_data),
+    (GradientBoostingRegressor, 'recursion', regression_data),
+    (GradientBoostingRegressor, 'brute', regression_data),
+    (DecisionTreeRegressor, 'brute', regression_data),
+    (LinearRegression, 'brute', regression_data),
+    (LinearRegression, 'brute', multioutput_regression_data),
+    (LogisticRegression, 'brute', binary_classification_data),
+    (LogisticRegression, 'brute', multiclass_classification_data),
+    (MultiTaskLasso, 'brute', multioutput_regression_data),
+    ])
+@pytest.mark.parametrize('grid_resolution', (5, 10))
+@pytest.mark.parametrize('features', ([1], [1, 2]))
+def test_output_shape(Estimator, method, data, grid_resolution,
+                      features):
+    # Check that partial_dependence has consistent output shape for different
+    # kinds of estimators:
+    # - classifiers with binary and multiclass settings
+    # - regressors
+    # - multi-task regressors
+
+    est = Estimator()
+
+    # n_target corresponds to the number of classes (1 for binary classif) or
+    # the number of tasks / outputs in multi task settings. It's equal to 1 for
+    # classical regression_data.
+    (X, y), n_targets = data
+
+    est.fit(X, y)
+    pdp, axes = partial_dependence(est, X=X, features=features,
+                                   method=method,
+                                   grid_resolution=grid_resolution)
+
+    expected_pdp_shape = (n_targets, *[grid_resolution
+                                       for _ in range(len(features))])
+    expected_axes_shape = (len(features), grid_resolution)
+
+    assert pdp.shape == expected_pdp_shape
+    assert axes is not None
+    assert np.asarray(axes).shape == expected_axes_shape
+
+
+def test_grid_from_X():
+    # tests for _grid_from_X: sanity check for output, and for shapes.
+
+    # Make sure that the grid is a cartesian product of the input (it will use
+    # the unique values instead of the percentiles)
+    percentiles = (.05, .95)
+    grid_resolution = 100
+    X = np.asarray([[1, 2],
+                    [3, 4]])
+    grid, axes = _grid_from_X(X, percentiles, grid_resolution)
+    assert_array_equal(grid, [[1, 2],
+                              [1, 4],
+                              [3, 2],
+                              [3, 4]])
+    assert_array_equal(axes, X.T)
+
+    # test shapes of returned objects depending on the number of unique values
+    # for a feature.
+    rng = np.random.RandomState(0)
+    grid_resolution = 15
+
+    # n_unique_values > grid_resolution
+    X = rng.normal(size=(20, 2))
+    grid, axes = _grid_from_X(X, percentiles, grid_resolution=grid_resolution)
+    assert grid.shape == (grid_resolution * grid_resolution, X.shape[1])
+    assert np.asarray(axes).shape == (2, grid_resolution)
+
+    # n_unique_values < grid_resolution, will use actual values
+    n_unique_values = 12
+    X[n_unique_values - 1:, 0] = 12345
+    rng.shuffle(X)  # just to make sure the order is irrelevant
+    grid, axes = _grid_from_X(X, percentiles, grid_resolution=grid_resolution)
+    assert grid.shape == (n_unique_values * grid_resolution, X.shape[1])
+    # axes is a list of arrays of different shapes
+    assert axes[0].shape == (n_unique_values,)
+    assert axes[1].shape == (grid_resolution,)
+
+
+@pytest.mark.parametrize(
+    "grid_resolution, percentiles, err_msg",
+    [(2, (0, 0.0001), "percentiles are too close"),
+     (100, (1, 2, 3, 4), "'percentiles' must be a sequence of 2 elements"),
+     (100, 12345, "'percentiles' must be a sequence of 2 elements"),
+     (100, (-1, .95), r"'percentiles' values must be in \[0, 1\]"),
+     (100, (.05, 2), r"'percentiles' values must be in \[0, 1\]"),
+     (100, (.9, .1), r"percentiles\[0\] must be strictly less than"),
+     (1, (0.05, 0.95), "'grid_resolution' must be strictly greater than 1")]
+)
+def test_grid_from_X_error(grid_resolution, percentiles, err_msg):
+    X = np.asarray([[1, 2], [3, 4]])
+    with pytest.raises(ValueError, match=err_msg):
+        _grid_from_X(
+            X, grid_resolution=grid_resolution, percentiles=percentiles
+        )
+
+
+@pytest.mark.parametrize('target_feature', range(5))
+@pytest.mark.parametrize('est, method', [
+    (LinearRegression(), 'brute'),
+    (GradientBoostingRegressor(random_state=0), 'brute'),
+    (GradientBoostingRegressor(random_state=0), 'recursion'),
+    (HistGradientBoostingRegressor(random_state=0), 'brute'),
+    (HistGradientBoostingRegressor(random_state=0), 'recursion')]
+)
+def test_partial_dependence_helpers(est, method, target_feature):
+    # Check that what is returned by _partial_dependence_brute or
+    # _partial_dependence_recursion is equivalent to manually setting a target
+    # feature to a given value, and computing the average prediction over all
+    # samples.
+    # This also checks that the brute and recursion methods give the same
+    # output.
+    # Note that even on the trainset, the brute and the recursion methods
+    # aren't always strictly equivalent, in particular when the slow method
+    # generates unrealistic samples that have low mass in the joint
+    # distribution of the input features, and when some of the features are
+    # dependent. Hence the high tolerance on the checks.
+
+    X, y = make_regression(random_state=0, n_features=5, n_informative=5)
+    # The 'init' estimator for GBDT (here the average prediction) isn't taken
+    # into account with the recursion method, for technical reasons. We set
+    # the mean to 0 to that this 'bug' doesn't have any effect.
+    y = y - y.mean()
+    est.fit(X, y)
+
+    # target feature will be set to .5 and then to 123
+    features = np.array([target_feature], dtype=np.int32)
+    grid = np.array([[.5],
+                     [123]])
+
+    if method == 'brute':
+        pdp = _partial_dependence_brute(est, grid, features, X,
+                                        response_method='auto')
+    else:
+        pdp = _partial_dependence_recursion(est, grid, features)
+
+    mean_predictions = []
+    for val in (.5, 123):
+        X_ = X.copy()
+        X_[:, target_feature] = val
+        mean_predictions.append(est.predict(X_).mean())
+
+    pdp = pdp[0]  # (shape is (1, 2) so make it (2,))
+
+    # allow for greater margin for error with recursion method
+    rtol = 1e-1 if method == 'recursion' else 1e-3
+    assert np.allclose(pdp, mean_predictions, rtol=rtol)
+
+
+@pytest.mark.parametrize('seed', range(1))
+def test_recursion_decision_tree_vs_forest_and_gbdt(seed):
+    # Make sure that the recursion method gives the same results on a
+    # DecisionTreeRegressor and a GradientBoostingRegressor or a
+    # RandomForestRegressor with 1 tree and equivalent parameters.
+
+    rng = np.random.RandomState(seed)
+
+    # Purely random dataset to avoid correlated features
+    n_samples = 1000
+    n_features = 5
+    X = rng.randn(n_samples, n_features)
+    y = rng.randn(n_samples) * 10
+
+    # The 'init' estimator for GBDT (here the average prediction) isn't taken
+    # into account with the recursion method, for technical reasons. We set
+    # the mean to 0 to that this 'bug' doesn't have any effect.
+    y = y - y.mean()
+
+    # set max_depth not too high to avoid splits with same gain but different
+    # features
+    max_depth = 5
+
+    tree_seed = 0
+    forest = RandomForestRegressor(n_estimators=1, max_features=None,
+                                   bootstrap=False, max_depth=max_depth,
+                                   random_state=tree_seed)
+    # The forest will use ensemble.base._set_random_states to set the
+    # random_state of the tree sub-estimator. We simulate this here to have
+    # equivalent estimators.
+    equiv_random_state = check_random_state(tree_seed).randint(
+        np.iinfo(np.int32).max)
+    gbdt = GradientBoostingRegressor(n_estimators=1, learning_rate=1,
+                                     criterion='mse', max_depth=max_depth,
+                                     random_state=equiv_random_state)
+    tree = DecisionTreeRegressor(max_depth=max_depth,
+                                 random_state=equiv_random_state)
+
+    forest.fit(X, y)
+    gbdt.fit(X, y)
+    tree.fit(X, y)
+
+    # sanity check: if the trees aren't the same, the PD values won't be equal
+    try:
+        assert_is_subtree(tree.tree_, gbdt[0, 0].tree_)
+        assert_is_subtree(tree.tree_, forest[0].tree_)
+    except AssertionError:
+        # For some reason the trees aren't exactly equal on 32bits, so the PDs
+        # cannot be equal either. See
+        # https://github.com/scikit-learn/scikit-learn/issues/8853
+        assert _IS_32BIT, "this should only fail on 32 bit platforms"
+        return
+
+    grid = rng.randn(50).reshape(-1, 1)
+    for f in range(n_features):
+        features = np.array([f], dtype=np.int32)
+
+        pdp_forest = _partial_dependence_recursion(forest, grid, features)
+        pdp_gbdt = _partial_dependence_recursion(gbdt, grid, features)
+        pdp_tree = _partial_dependence_recursion(tree, grid, features)
+
+        np.testing.assert_allclose(pdp_gbdt, pdp_tree)
+        np.testing.assert_allclose(pdp_forest, pdp_tree)
+
+
+@pytest.mark.parametrize('est', (
+    GradientBoostingClassifier(random_state=0),
+    HistGradientBoostingClassifier(random_state=0),
+))
+@pytest.mark.parametrize('target_feature', (0, 1, 2, 3, 4, 5))
+def test_recursion_decision_function(est, target_feature):
+    # Make sure the recursion method (implicitly uses decision_function) has
+    # the same result as using brute method with
+    # response_method=decision_function
+
+    X, y = make_classification(n_classes=2, n_clusters_per_class=1,
+                               random_state=1)
+    assert np.mean(y) == .5  # make sure the init estimator predicts 0 anyway
+
+    est.fit(X, y)
+
+    preds_1, _ = partial_dependence(est, X, [target_feature],
+                                    response_method='decision_function',
+                                    method='recursion')
+    preds_2, _ = partial_dependence(est, X, [target_feature],
+                                    response_method='decision_function',
+                                    method='brute')
+
+    assert_allclose(preds_1, preds_2, atol=1e-7)
+
+
+@pytest.mark.parametrize('est', (
+    LinearRegression(),
+    GradientBoostingRegressor(random_state=0),
+    HistGradientBoostingRegressor(random_state=0, min_samples_leaf=1,
+                                  max_leaf_nodes=None, max_iter=1),
+    DecisionTreeRegressor(random_state=0),
+))
+@pytest.mark.parametrize('power', (1, 2))
+def test_partial_dependence_easy_target(est, power):
+    # If the target y only depends on one feature in an obvious way (linear or
+    # quadratic) then the partial dependence for that feature should reflect
+    # it.
+    # We here fit a linear regression_data model (with polynomial features if
+    # needed) and compute r_squared to check that the partial dependence
+    # correctly reflects the target.
+
+    rng = np.random.RandomState(0)
+    n_samples = 200
+    target_variable = 2
+    X = rng.normal(size=(n_samples, 5))
+    y = X[:, target_variable]**power
+
+    est.fit(X, y)
+
+    averaged_predictions, values = partial_dependence(
+        est, features=[target_variable], X=X, grid_resolution=1000)
+
+    new_X = values[0].reshape(-1, 1)
+    new_y = averaged_predictions[0]
+    # add polynomial features if needed
+    new_X = PolynomialFeatures(degree=power).fit_transform(new_X)
+
+    lr = LinearRegression().fit(new_X, new_y)
+    r2 = r2_score(new_y, lr.predict(new_X))
+
+    assert r2 > .99
+
+
+@pytest.mark.parametrize('Estimator',
+                         (sklearn.tree.DecisionTreeClassifier,
+                          sklearn.tree.ExtraTreeClassifier,
+                          sklearn.ensemble.ExtraTreesClassifier,
+                          sklearn.neighbors.KNeighborsClassifier,
+                          sklearn.neighbors.RadiusNeighborsClassifier,
+                          sklearn.ensemble.RandomForestClassifier))
+def test_multiclass_multioutput(Estimator):
+    # Make sure error is raised for multiclass-multioutput classifiers
+
+    # make multiclass-multioutput dataset
+    X, y = make_classification(n_classes=3, n_clusters_per_class=1,
+                               random_state=0)
+    y = np.array([y, y]).T
+
+    est = Estimator()
+    est.fit(X, y)
+
+    with pytest.raises(
+            ValueError,
+            match="Multiclass-multioutput estimators are not supported"):
+        partial_dependence(est, X, [0])
+
+
+class NoPredictProbaNoDecisionFunction(ClassifierMixin, BaseEstimator):
+    def fit(self, X, y):
+        # simulate that we have some classes
+        self.classes_ = [0, 1]
+        return self
+
+
+@pytest.mark.parametrize(
+    "estimator, params, err_msg",
+    [(KMeans(),
+      {'features': [0]},
+      "'estimator' must be a fitted regressor or classifier"),
+     (LinearRegression(),
+      {'features': [0], 'response_method': 'predict_proba'},
+      'The response_method parameter is ignored for regressors'),
+     (GradientBoostingClassifier(random_state=0),
+      {'features': [0], 'response_method': 'predict_proba',
+       'method': 'recursion'},
+      "'recursion' method, the response_method must be 'decision_function'"),
+     (GradientBoostingClassifier(random_state=0),
+      {'features': [0], 'response_method': 'predict_proba', 'method': 'auto'},
+      "'recursion' method, the response_method must be 'decision_function'"),
+     (GradientBoostingClassifier(random_state=0),
+      {'features': [0], 'response_method': 'blahblah'},
+      'response_method blahblah is invalid. Accepted response_method'),
+     (NoPredictProbaNoDecisionFunction(),
+      {'features': [0], 'response_method': 'auto'},
+      'The estimator has no predict_proba and no decision_function method'),
+     (NoPredictProbaNoDecisionFunction(),
+      {'features': [0], 'response_method': 'predict_proba'},
+      'The estimator has no predict_proba method.'),
+     (NoPredictProbaNoDecisionFunction(),
+      {'features': [0], 'response_method': 'decision_function'},
+      'The estimator has no decision_function method.'),
+     (LinearRegression(),
+      {'features': [0], 'method': 'blahblah'},
+      'blahblah is invalid. Accepted method names are brute, recursion, auto'),
+     (LinearRegression(),
+      {'features': [0], 'method': 'recursion'},
+      "Only the following estimators support the 'recursion' method:")]
+)
+def test_partial_dependence_error(estimator, params, err_msg):
+    X, y = make_classification(random_state=0)
+    estimator.fit(X, y)
+
+    with pytest.raises(ValueError, match=err_msg):
+        partial_dependence(estimator, X, **params)
+
+
+@pytest.mark.parametrize(
+    "with_dataframe, err_msg",
+    [(True, "Only array-like or scalar are supported"),
+     (False, "Only array-like or scalar are supported")]
+)
+def test_partial_dependence_slice_error(with_dataframe, err_msg):
+    X, y = make_classification(random_state=0)
+    if with_dataframe:
+        pd = pytest.importorskip('pandas')
+        X = pd.DataFrame(X)
+    estimator = LogisticRegression().fit(X, y)
+
+    with pytest.raises(TypeError, match=err_msg):
+        partial_dependence(estimator, X, features=slice(0, 2, 1))
+
+
+@pytest.mark.parametrize(
+    'estimator',
+    [LinearRegression(), GradientBoostingClassifier(random_state=0)]
+)
+@pytest.mark.parametrize('features', [-1, 10000])
+def test_partial_dependence_unknown_feature_indices(estimator, features):
+    X, y = make_classification(random_state=0)
+    estimator.fit(X, y)
+
+    err_msg = 'all features must be in'
+    with pytest.raises(ValueError, match=err_msg):
+        partial_dependence(estimator, X, [features])
+
+
+@pytest.mark.parametrize(
+    'estimator',
+    [LinearRegression(), GradientBoostingClassifier(random_state=0)]
+)
+def test_partial_dependence_unknown_feature_string(estimator):
+    pd = pytest.importorskip("pandas")
+    X, y = make_classification(random_state=0)
+    df = pd.DataFrame(X)
+    estimator.fit(df, y)
+
+    features = ['random']
+    err_msg = 'A given column is not a column of the dataframe'
+    with pytest.raises(ValueError, match=err_msg):
+        partial_dependence(estimator, df, features)
+
+
+@pytest.mark.parametrize(
+    'estimator',
+    [LinearRegression(), GradientBoostingClassifier(random_state=0)]
+)
+def test_partial_dependence_X_list(estimator):
+    # check that array-like objects are accepted
+    X, y = make_classification(random_state=0)
+    estimator.fit(X, y)
+    partial_dependence(estimator, list(X), [0])
+
+
+# TODO: Remove in 0.24 when DummyClassifier's `strategy` default updates
+@ignore_warnings(category=FutureWarning)
+def test_warning_recursion_non_constant_init():
+    # make sure that passing a non-constant init parameter to a GBDT and using
+    # recursion method yields a warning.
+
+    gbc = GradientBoostingClassifier(init=DummyClassifier(), random_state=0)
+    gbc.fit(X, y)
+
+    with pytest.warns(
+            UserWarning,
+            match='Using recursion method with a non-constant init predictor'):
+        partial_dependence(gbc, X, [0], method='recursion')
+
+    with pytest.warns(
+            UserWarning,
+            match='Using recursion method with a non-constant init predictor'):
+        partial_dependence(gbc, X, [0], method='recursion')
+
+
+def test_partial_dependence_sample_weight():
+    # Test near perfect correlation between partial dependence and diagonal
+    # when sample weights emphasize y = x predictions
+    # non-regression test for #13193
+    # TODO: extend to HistGradientBoosting once sample_weight is supported
+    N = 1000
+    rng = np.random.RandomState(123456)
+    mask = rng.randint(2, size=N, dtype=bool)
+
+    x = rng.rand(N)
+    # set y = x on mask and y = -x outside
+    y = x.copy()
+    y[~mask] = -y[~mask]
+    X = np.c_[mask, x]
+    # sample weights to emphasize data points where y = x
+    sample_weight = np.ones(N)
+    sample_weight[mask] = 1000.
+
+    clf = GradientBoostingRegressor(n_estimators=10, random_state=1)
+    clf.fit(X, y, sample_weight=sample_weight)
+
+    pdp, values = partial_dependence(clf, X, features=[1])
+
+    assert np.corrcoef(pdp, values)[0, 1] > 0.99
+
+
+def test_hist_gbdt_sw_not_supported():
+    # TODO: remove/fix when PDP supports HGBT with sample weights
+    clf = HistGradientBoostingRegressor(random_state=1)
+    clf.fit(X, y, sample_weight=np.ones(len(X)))
+
+    with pytest.raises(NotImplementedError,
+                       match="does not support partial dependence"):
+        partial_dependence(clf, X, features=[1])
+
+
+# TODO: Remove in 0.24 when DummyClassifier's `strategy` default updates
+@ignore_warnings(category=FutureWarning)
+def test_partial_dependence_pipeline():
+    # check that the partial dependence support pipeline
+    iris = load_iris()
+
+    scaler = StandardScaler()
+    clf = DummyClassifier(random_state=42)
+    pipe = make_pipeline(scaler, clf)
+
+    clf.fit(scaler.fit_transform(iris.data), iris.target)
+    pipe.fit(iris.data, iris.target)
+
+    features = 0
+    pdp_pipe, values_pipe = partial_dependence(
+        pipe, iris.data, features=[features], grid_resolution=10
+    )
+    pdp_clf, values_clf = partial_dependence(
+        clf, scaler.transform(iris.data), features=[features],
+        grid_resolution=10
+    )
+    assert_allclose(pdp_pipe, pdp_clf)
+    assert_allclose(
+        values_pipe[0],
+        values_clf[0] * scaler.scale_[features] + scaler.mean_[features]
+    )
+
+
+@pytest.mark.parametrize(
+    "estimator",
+    [LogisticRegression(max_iter=1000, random_state=0),
+     GradientBoostingClassifier(random_state=0, n_estimators=5)],
+    ids=['estimator-brute', 'estimator-recursion']
+)
+@pytest.mark.parametrize(
+    "preprocessor",
+    [None,
+     make_column_transformer(
+         (StandardScaler(), [iris.feature_names[i] for i in (0, 2)]),
+         (RobustScaler(), [iris.feature_names[i] for i in (1, 3)])),
+     make_column_transformer(
+         (StandardScaler(), [iris.feature_names[i] for i in (0, 2)]),
+         remainder='passthrough')],
+    ids=['None', 'column-transformer', 'column-transformer-passthrough']
+)
+@pytest.mark.parametrize(
+    "features",
+    [[0, 2], [iris.feature_names[i] for i in (0, 2)]],
+    ids=['features-integer', 'features-string']
+)
+def test_partial_dependence_dataframe(estimator, preprocessor, features):
+    # check that the partial dependence support dataframe and pipeline
+    # including a column transformer
+    pd = pytest.importorskip("pandas")
+    df = pd.DataFrame(iris.data, columns=iris.feature_names)
+
+    pipe = make_pipeline(preprocessor, estimator)
+    pipe.fit(df, iris.target)
+    pdp_pipe, values_pipe = partial_dependence(
+        pipe, df, features=features, grid_resolution=10
+    )
+
+    # the column transformer will reorder the column when transforming
+    # we mixed the index to be sure that we are computing the partial
+    # dependence of the right columns
+    if preprocessor is not None:
+        X_proc = clone(preprocessor).fit_transform(df)
+        features_clf = [0, 1]
+    else:
+        X_proc = df
+        features_clf = [0, 2]
+
+    clf = clone(estimator).fit(X_proc, iris.target)
+    pdp_clf, values_clf = partial_dependence(
+        clf, X_proc, features=features_clf, method='brute', grid_resolution=10
+    )
+
+    assert_allclose(pdp_pipe, pdp_clf)
+    if preprocessor is not None:
+        scaler = preprocessor.named_transformers_['standardscaler']
+        assert_allclose(
+            values_pipe[1],
+            values_clf[1] * scaler.scale_[1] + scaler.mean_[1]
+        )
+    else:
+        assert_allclose(values_pipe[1], values_clf[1])
+
+
+@pytest.mark.parametrize(
+    "features, expected_pd_shape",
+    [(0, (3, 10)),
+     (iris.feature_names[0], (3, 10)),
+     ([0, 2], (3, 10, 10)),
+     ([iris.feature_names[i] for i in (0, 2)], (3, 10, 10)),
+     ([True, False, True, False], (3, 10, 10))],
+    ids=['scalar-int', 'scalar-str', 'list-int', 'list-str', 'mask']
+)
+def test_partial_dependence_feature_type(features, expected_pd_shape):
+    # check all possible features type supported in PDP
+    pd = pytest.importorskip("pandas")
+    df = pd.DataFrame(iris.data, columns=iris.feature_names)
+
+    preprocessor = make_column_transformer(
+        (StandardScaler(), [iris.feature_names[i] for i in (0, 2)]),
+        (RobustScaler(), [iris.feature_names[i] for i in (1, 3)])
+    )
+    pipe = make_pipeline(
+        preprocessor, LogisticRegression(max_iter=1000, random_state=0)
+    )
+    pipe.fit(df, iris.target)
+    pdp_pipe, values_pipe = partial_dependence(
+        pipe, df, features=features, grid_resolution=10
+    )
+    assert pdp_pipe.shape == expected_pd_shape
+    assert len(values_pipe) == len(pdp_pipe.shape) - 1
+
+
+@pytest.mark.parametrize(
+    "estimator", [LinearRegression(), LogisticRegression(),
+                  GradientBoostingRegressor(), GradientBoostingClassifier()]
+)
+def test_partial_dependence_unfitted(estimator):
+    X = iris.data
+    preprocessor = make_column_transformer(
+        (StandardScaler(), [0, 2]), (RobustScaler(), [1, 3])
+    )
+    pipe = make_pipeline(preprocessor, estimator)
+    with pytest.raises(NotFittedError, match="is not fitted yet"):
+        partial_dependence(pipe, X, features=[0, 2], grid_resolution=10)
+    with pytest.raises(NotFittedError, match="is not fitted yet"):
+        partial_dependence(estimator, X, features=[0, 2], grid_resolution=10)
--- a/venv/Lib/site-packages/sklearn/inspection/tests/test_permutation_importance.py
+++ b/venv/Lib/site-packages/sklearn/inspection/tests/test_permutation_importance.py
@ -0,0 +1,353 @@
+import pytest
+import numpy as np
+
+from numpy.testing import assert_allclose
+
+from sklearn.compose import ColumnTransformer
+from sklearn.datasets import load_diabetes
+from sklearn.datasets import load_iris
+from sklearn.datasets import make_classification
+from sklearn.datasets import make_regression
+from sklearn.dummy import DummyClassifier
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.linear_model import LinearRegression
+from sklearn.linear_model import LogisticRegression
+from sklearn.impute import SimpleImputer
+from sklearn.inspection import permutation_importance
+from sklearn.model_selection import train_test_split
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import KBinsDiscretizer
+from sklearn.preprocessing import OneHotEncoder
+from sklearn.preprocessing import StandardScaler
+from sklearn.preprocessing import scale
+from sklearn.utils import parallel_backend
+from sklearn.utils._testing import _convert_container
+
+
+
+@pytest.mark.parametrize("n_jobs", [1, 2])
+def test_permutation_importance_correlated_feature_regression(n_jobs):
+    # Make sure that feature highly correlated to the target have a higher
+    # importance
+    rng = np.random.RandomState(42)
+    n_repeats = 5
+
+    X, y = load_diabetes(return_X_y=True)
+    y_with_little_noise = (
+        y + rng.normal(scale=0.001, size=y.shape[0])).reshape(-1, 1)
+
+    X = np.hstack([X, y_with_little_noise])
+
+    clf = RandomForestRegressor(n_estimators=10, random_state=42)
+    clf.fit(X, y)
+
+    result = permutation_importance(clf, X, y, n_repeats=n_repeats,
+                                    random_state=rng, n_jobs=n_jobs)
+
+    assert result.importances.shape == (X.shape[1], n_repeats)
+
+    # the correlated feature with y was added as the last column and should
+    # have the highest importance
+    assert np.all(result.importances_mean[-1] >
+                  result.importances_mean[:-1])
+
+
+@pytest.mark.parametrize("n_jobs", [1, 2])
+def test_permutation_importance_correlated_feature_regression_pandas(n_jobs):
+    pd = pytest.importorskip("pandas")
+
+    # Make sure that feature highly correlated to the target have a higher
+    # importance
+    rng = np.random.RandomState(42)
+    n_repeats = 5
+
+    dataset = load_iris()
+    X, y = dataset.data, dataset.target
+    y_with_little_noise = (
+        y + rng.normal(scale=0.001, size=y.shape[0])).reshape(-1, 1)
+
+    # Adds feature correlated with y as the last column
+    X = pd.DataFrame(X, columns=dataset.feature_names)
+    X['correlated_feature'] = y_with_little_noise
+
+    clf = RandomForestClassifier(n_estimators=10, random_state=42)
+    clf.fit(X, y)
+
+    result = permutation_importance(clf, X, y, n_repeats=n_repeats,
+                                    random_state=rng, n_jobs=n_jobs)
+
+    assert result.importances.shape == (X.shape[1], n_repeats)
+
+    # the correlated feature with y was added as the last column and should
+    # have the highest importance
+    assert np.all(result.importances_mean[-1] > result.importances_mean[:-1])
+
+
+@pytest.mark.parametrize("n_jobs", [1, 2])
+def test_robustness_to_high_cardinality_noisy_feature(n_jobs, seed=42):
+    # Permutation variable importance should not be affected by the high
+    # cardinality bias of traditional feature importances, especially when
+    # computed on a held-out test set:
+    rng = np.random.RandomState(seed)
+    n_repeats = 5
+    n_samples = 1000
+    n_classes = 5
+    n_informative_features = 2
+    n_noise_features = 1
+    n_features = n_informative_features + n_noise_features
+
+    # Generate a multiclass classification dataset and a set of informative
+    # binary features that can be used to predict some classes of y exactly
+    # while leaving some classes unexplained to make the problem harder.
+    classes = np.arange(n_classes)
+    y = rng.choice(classes, size=n_samples)
+    X = np.hstack([(y == c).reshape(-1, 1)
+                   for c in classes[:n_informative_features]])
+    X = X.astype(np.float32)
+
+    # Not all target classes are explained by the binary class indicator
+    # features:
+    assert n_informative_features < n_classes
+
+    # Add 10 other noisy features with high cardinality (numerical) values
+    # that can be used to overfit the training data.
+    X = np.concatenate([X, rng.randn(n_samples, n_noise_features)], axis=1)
+    assert X.shape == (n_samples, n_features)
+
+    # Split the dataset to be able to evaluate on a held-out test set. The
+    # Test size should be large enough for importance measurements to be
+    # stable:
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, test_size=0.5, random_state=rng)
+    clf = RandomForestClassifier(n_estimators=5, random_state=rng)
+    clf.fit(X_train, y_train)
+
+    # Variable importances computed by impurity decrease on the tree node
+    # splits often use the noisy features in splits. This can give misleading
+    # impression that high cardinality noisy variables are the most important:
+    tree_importances = clf.feature_importances_
+    informative_tree_importances = tree_importances[:n_informative_features]
+    noisy_tree_importances = tree_importances[n_informative_features:]
+    assert informative_tree_importances.max() < noisy_tree_importances.min()
+
+    # Let's check that permutation-based feature importances do not have this
+    # problem.
+    r = permutation_importance(clf, X_test, y_test, n_repeats=n_repeats,
+                               random_state=rng, n_jobs=n_jobs)
+
+    assert r.importances.shape == (X.shape[1], n_repeats)
+
+    # Split the importances between informative and noisy features
+    informative_importances = r.importances_mean[:n_informative_features]
+    noisy_importances = r.importances_mean[n_informative_features:]
+
+    # Because we do not have a binary variable explaining each target classes,
+    # the RF model will have to use the random variable to make some
+    # (overfitting) splits (as max_depth is not set). Therefore the noisy
+    # variables will be non-zero but with small values oscillating around
+    # zero:
+    assert max(np.abs(noisy_importances)) > 1e-7
+    assert noisy_importances.max() < 0.05
+
+    # The binary features correlated with y should have a higher importance
+    # than the high cardinality noisy features.
+    # The maximum test accuracy is 2 / 5 == 0.4, each informative feature
+    # contributing approximately a bit more than 0.2 of accuracy.
+    assert informative_importances.min() > 0.15
+
+
+def test_permutation_importance_mixed_types():
+    rng = np.random.RandomState(42)
+    n_repeats = 4
+
+    # Last column is correlated with y
+    X = np.array([[1.0, 2.0, 3.0, np.nan], [2, 1, 2, 1]]).T
+    y = np.array([0, 1, 0, 1])
+
+    clf = make_pipeline(SimpleImputer(), LogisticRegression(solver='lbfgs'))
+    clf.fit(X, y)
+    result = permutation_importance(clf, X, y, n_repeats=n_repeats,
+                                    random_state=rng)
+
+    assert result.importances.shape == (X.shape[1], n_repeats)
+
+    # the correlated feature with y is the last column and should
+    # have the highest importance
+    assert np.all(result.importances_mean[-1] > result.importances_mean[:-1])
+
+    # use another random state
+    rng = np.random.RandomState(0)
+    result2 = permutation_importance(clf, X, y, n_repeats=n_repeats,
+                                     random_state=rng)
+    assert result2.importances.shape == (X.shape[1], n_repeats)
+
+    assert not np.allclose(result.importances, result2.importances)
+
+    # the correlated feature with y is the last column and should
+    # have the highest importance
+    assert np.all(result2.importances_mean[-1] > result2.importances_mean[:-1])
+
+
+def test_permutation_importance_mixed_types_pandas():
+    pd = pytest.importorskip("pandas")
+    rng = np.random.RandomState(42)
+    n_repeats = 5
+
+    # Last column is correlated with y
+    X = pd.DataFrame({'col1': [1.0, 2.0, 3.0, np.nan],
+                      'col2': ['a', 'b', 'a', 'b']})
+    y = np.array([0, 1, 0, 1])
+
+    num_preprocess = make_pipeline(SimpleImputer(), StandardScaler())
+    preprocess = ColumnTransformer([
+        ('num', num_preprocess, ['col1']),
+        ('cat', OneHotEncoder(), ['col2'])
+    ])
+    clf = make_pipeline(preprocess, LogisticRegression(solver='lbfgs'))
+    clf.fit(X, y)
+
+    result = permutation_importance(clf, X, y, n_repeats=n_repeats,
+                                    random_state=rng)
+
+    assert result.importances.shape == (X.shape[1], n_repeats)
+    # the correlated feature with y is the last column and should
+    # have the highest importance
+    assert np.all(result.importances_mean[-1] > result.importances_mean[:-1])
+
+
+def test_permutation_importance_linear_regresssion():
+    X, y = make_regression(n_samples=500, n_features=10, random_state=0)
+
+    X = scale(X)
+    y = scale(y)
+
+    lr = LinearRegression().fit(X, y)
+
+    # this relationship can be computed in closed form
+    expected_importances = 2 * lr.coef_**2
+    results = permutation_importance(lr, X, y,
+                                     n_repeats=50,
+                                     scoring='neg_mean_squared_error')
+    assert_allclose(expected_importances, results.importances_mean,
+                    rtol=1e-1, atol=1e-6)
+
+
+def test_permutation_importance_equivalence_sequential_parallel():
+    # regression test to make sure that sequential and parallel calls will
+    # output the same results.
+    X, y = make_regression(n_samples=500, n_features=10, random_state=0)
+    lr = LinearRegression().fit(X, y)
+
+    importance_sequential = permutation_importance(
+        lr, X, y, n_repeats=5, random_state=0, n_jobs=1
+    )
+
+    # First check that the problem is structured enough and that the model is
+    # complex enough to not yield trivial, constant importances:
+    imp_min = importance_sequential['importances'].min()
+    imp_max = importance_sequential['importances'].max()
+    assert imp_max - imp_min > 0.3
+
+    # The actually check that parallelism does not impact the results
+    # either with shared memory (threading) or without isolated memory
+    # via process-based parallelism using the default backend
+    # ('loky' or 'multiprocessing') depending on the joblib version:
+
+    # process-based parallelism (by default):
+    importance_processes = permutation_importance(
+        lr, X, y, n_repeats=5, random_state=0, n_jobs=2)
+    assert_allclose(
+        importance_processes['importances'],
+        importance_sequential['importances']
+    )
+
+    # thread-based parallelism:
+    with parallel_backend("threading"):
+        importance_threading = permutation_importance(
+            lr, X, y, n_repeats=5, random_state=0, n_jobs=2
+        )
+    assert_allclose(
+        importance_threading['importances'],
+        importance_sequential['importances']
+    )
+
+
+@pytest.mark.parametrize("n_jobs", [None, 1, 2])
+def test_permutation_importance_equivalence_array_dataframe(n_jobs):
+    # This test checks that the column shuffling logic has the same behavior
+    # both a dataframe and a simple numpy array.
+    pd = pytest.importorskip('pandas')
+
+    # regression test to make sure that sequential and parallel calls will
+    # output the same results.
+    X, y = make_regression(n_samples=100, n_features=5, random_state=0)
+    X_df = pd.DataFrame(X)
+
+    # Add a categorical feature that is statistically linked to y:
+    binner = KBinsDiscretizer(n_bins=3, encode="ordinal")
+    cat_column = binner.fit_transform(y.reshape(-1, 1))
+
+    # Concatenate the extra column to the numpy array: integers will be
+    # cast to float values
+    X = np.hstack([X, cat_column])
+    assert X.dtype.kind == "f"
+
+    # Insert extra column as a non-numpy-native dtype (while keeping backward
+    # compat for old pandas versions):
+    if hasattr(pd, "Categorical"):
+        cat_column = pd.Categorical(cat_column.ravel())
+    else:
+        cat_column = cat_column.ravel()
+    new_col_idx = len(X_df.columns)
+    X_df[new_col_idx] = cat_column
+    assert X_df[new_col_idx].dtype == cat_column.dtype
+
+    # Stich an aribtrary index to the dataframe:
+    X_df.index = np.arange(len(X_df)).astype(str)
+
+    rf = RandomForestRegressor(n_estimators=5, max_depth=3, random_state=0)
+    rf.fit(X, y)
+
+    n_repeats = 3
+    importance_array = permutation_importance(
+        rf, X, y, n_repeats=n_repeats, random_state=0, n_jobs=n_jobs
+    )
+
+    # First check that the problem is structured enough and that the model is
+    # complex enough to not yield trivial, constant importances:
+    imp_min = importance_array['importances'].min()
+    imp_max = importance_array['importances'].max()
+    assert imp_max - imp_min > 0.3
+
+    # Now check that importances computed on dataframe matche the values
+    # of those computed on the array with the same data.
+    importance_dataframe = permutation_importance(
+        rf, X_df, y, n_repeats=n_repeats, random_state=0, n_jobs=n_jobs
+    )
+    assert_allclose(
+        importance_array['importances'],
+        importance_dataframe['importances']
+    )
+
+
+@pytest.mark.parametrize("input_type", ["array", "dataframe"])
+def test_permutation_importance_large_memmaped_data(input_type):
+    # Smoke, non-regression test for:
+    # https://github.com/scikit-learn/scikit-learn/issues/15810
+    n_samples, n_features = int(5e4), 4
+    X, y = make_classification(n_samples=n_samples, n_features=n_features,
+                               random_state=0)
+    assert X.nbytes > 1e6  # trigger joblib memmaping
+
+    X = _convert_container(X, input_type)
+    clf = DummyClassifier(strategy='prior').fit(X, y)
+
+    # Actual smoke test: should not raise any error:
+    n_repeats = 5
+    r = permutation_importance(clf, X, y, n_repeats=n_repeats, n_jobs=2)
+
+    # Auxiliary check: DummyClassifier is feature independent:
+    # permutating feature should not change the predictions
+    expected_importances = np.zeros((n_features, n_repeats))
+    assert_allclose(expected_importances, r.importances)