Uploaded Test files

2020-11-12 11:05:57 -05:00 · 2020-11-12 11:05:57 -05:00 · 2e81cb7d99
commit 2e81cb7d99
parent f584ad9d97
16627 changed files with 2065359 additions and 102444 deletions
--- a/venv/Lib/site-packages/sklearn/feature_selection/tests/init.py
+++ b/venv/Lib/site-packages/sklearn/feature_selection/tests/init.py
--- a/venv/Lib/site-packages/sklearn/feature_selection/tests/pycache/init.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_selection/tests/pycache/init.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_selection/tests/pycache/test_base.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_selection/tests/pycache/test_base.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_selection/tests/pycache/test_chi2.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_selection/tests/pycache/test_chi2.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_selection/tests/pycache/test_feature_select.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_selection/tests/pycache/test_feature_select.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_selection/tests/pycache/test_from_model.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_selection/tests/pycache/test_from_model.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_selection/tests/pycache/test_mutual_info.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_selection/tests/pycache/test_mutual_info.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_selection/tests/pycache/test_rfe.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_selection/tests/pycache/test_rfe.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_selection/tests/pycache/test_variance_threshold.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_selection/tests/pycache/test_variance_threshold.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_selection/tests/test_base.py
+++ b/venv/Lib/site-packages/sklearn/feature_selection/tests/test_base.py
@ -0,0 +1,119 @@
+import numpy as np
+import pytest
+from scipy import sparse as sp
+
+from numpy.testing import assert_array_equal
+
+from sklearn.base import BaseEstimator
+from sklearn.feature_selection._base import SelectorMixin
+from sklearn.utils import check_array
+
+
+class StepSelector(SelectorMixin, BaseEstimator):
+    """Retain every `step` features (beginning with 0)"""
+    def __init__(self, step=2):
+        self.step = step
+
+    def fit(self, X, y=None):
+        X = check_array(X, accept_sparse='csc')
+        self.n_input_feats = X.shape[1]
+        return self
+
+    def _get_support_mask(self):
+        mask = np.zeros(self.n_input_feats, dtype=bool)
+        mask[::self.step] = True
+        return mask
+
+
+support = [True, False] * 5
+support_inds = [0, 2, 4, 6, 8]
+X = np.arange(20).reshape(2, 10)
+Xt = np.arange(0, 20, 2).reshape(2, 5)
+Xinv = X.copy()
+Xinv[:, 1::2] = 0
+y = [0, 1]
+feature_names = list('ABCDEFGHIJ')
+feature_names_t = feature_names[::2]
+feature_names_inv = np.array(feature_names)
+feature_names_inv[1::2] = ''
+
+
+def test_transform_dense():
+    sel = StepSelector()
+    Xt_actual = sel.fit(X, y).transform(X)
+    Xt_actual2 = StepSelector().fit_transform(X, y)
+    assert_array_equal(Xt, Xt_actual)
+    assert_array_equal(Xt, Xt_actual2)
+
+    # Check dtype matches
+    assert np.int32 == sel.transform(X.astype(np.int32)).dtype
+    assert np.float32 == sel.transform(X.astype(np.float32)).dtype
+
+    # Check 1d list and other dtype:
+    names_t_actual = sel.transform([feature_names])
+    assert_array_equal(feature_names_t, names_t_actual.ravel())
+
+    # Check wrong shape raises error
+    with pytest.raises(ValueError):
+        sel.transform(np.array([[1], [2]]))
+
+
+def test_transform_sparse():
+    sparse = sp.csc_matrix
+    sel = StepSelector()
+    Xt_actual = sel.fit(sparse(X)).transform(sparse(X))
+    Xt_actual2 = sel.fit_transform(sparse(X))
+    assert_array_equal(Xt, Xt_actual.toarray())
+    assert_array_equal(Xt, Xt_actual2.toarray())
+
+    # Check dtype matches
+    assert np.int32 == sel.transform(sparse(X).astype(np.int32)).dtype
+    assert np.float32 == sel.transform(sparse(X).astype(np.float32)).dtype
+
+    # Check wrong shape raises error
+    with pytest.raises(ValueError):
+        sel.transform(np.array([[1], [2]]))
+
+
+def test_inverse_transform_dense():
+    sel = StepSelector()
+    Xinv_actual = sel.fit(X, y).inverse_transform(Xt)
+    assert_array_equal(Xinv, Xinv_actual)
+
+    # Check dtype matches
+    assert (np.int32 ==
+                 sel.inverse_transform(Xt.astype(np.int32)).dtype)
+    assert (np.float32 ==
+                 sel.inverse_transform(Xt.astype(np.float32)).dtype)
+
+    # Check 1d list and other dtype:
+    names_inv_actual = sel.inverse_transform([feature_names_t])
+    assert_array_equal(feature_names_inv, names_inv_actual.ravel())
+
+    # Check wrong shape raises error
+    with pytest.raises(ValueError):
+        sel.inverse_transform(np.array([[1], [2]]))
+
+
+def test_inverse_transform_sparse():
+    sparse = sp.csc_matrix
+    sel = StepSelector()
+    Xinv_actual = sel.fit(sparse(X)).inverse_transform(sparse(Xt))
+    assert_array_equal(Xinv, Xinv_actual.toarray())
+
+    # Check dtype matches
+    assert (np.int32 ==
+                 sel.inverse_transform(sparse(Xt).astype(np.int32)).dtype)
+    assert (np.float32 ==
+                 sel.inverse_transform(sparse(Xt).astype(np.float32)).dtype)
+
+    # Check wrong shape raises error
+    with pytest.raises(ValueError):
+        sel.inverse_transform(np.array([[1], [2]]))
+
+
+def test_get_support():
+    sel = StepSelector()
+    sel.fit(X, y)
+    assert_array_equal(support, sel.get_support())
+    assert_array_equal(support_inds, sel.get_support(indices=True))
--- a/venv/Lib/site-packages/sklearn/feature_selection/tests/test_chi2.py
+++ b/venv/Lib/site-packages/sklearn/feature_selection/tests/test_chi2.py
@ -0,0 +1,96 @@
+"""
+Tests for chi2, currently the only feature selection function designed
+specifically to work with sparse matrices.
+"""
+
+import warnings
+
+import numpy as np
+import pytest
+from scipy.sparse import coo_matrix, csr_matrix
+import scipy.stats
+
+from sklearn.feature_selection import SelectKBest, chi2
+from sklearn.feature_selection._univariate_selection import _chisquare
+from sklearn.utils._testing import assert_array_almost_equal
+from sklearn.utils._testing import assert_array_equal
+
+# Feature 0 is highly informative for class 1;
+# feature 1 is the same everywhere;
+# feature 2 is a bit informative for class 2.
+X = [[2, 1, 2],
+     [9, 1, 1],
+     [6, 1, 2],
+     [0, 1, 2]]
+y = [0, 1, 2, 2]
+
+
+def mkchi2(k):
+    """Make k-best chi2 selector"""
+    return SelectKBest(chi2, k=k)
+
+
+def test_chi2():
+    # Test Chi2 feature extraction
+
+    chi2 = mkchi2(k=1).fit(X, y)
+    chi2 = mkchi2(k=1).fit(X, y)
+    assert_array_equal(chi2.get_support(indices=True), [0])
+    assert_array_equal(chi2.transform(X), np.array(X)[:, [0]])
+
+    chi2 = mkchi2(k=2).fit(X, y)
+    assert_array_equal(sorted(chi2.get_support(indices=True)), [0, 2])
+
+    Xsp = csr_matrix(X, dtype=np.float64)
+    chi2 = mkchi2(k=2).fit(Xsp, y)
+    assert_array_equal(sorted(chi2.get_support(indices=True)), [0, 2])
+    Xtrans = chi2.transform(Xsp)
+    assert_array_equal(Xtrans.shape, [Xsp.shape[0], 2])
+
+    # == doesn't work on scipy.sparse matrices
+    Xtrans = Xtrans.toarray()
+    Xtrans2 = mkchi2(k=2).fit_transform(Xsp, y).toarray()
+    assert_array_almost_equal(Xtrans, Xtrans2)
+
+
+def test_chi2_coo():
+    # Check that chi2 works with a COO matrix
+    # (as returned by CountVectorizer, DictVectorizer)
+    Xcoo = coo_matrix(X)
+    mkchi2(k=2).fit_transform(Xcoo, y)
+    # if we got here without an exception, we're safe
+
+
+def test_chi2_negative():
+    # Check for proper error on negative numbers in the input X.
+    X, y = [[0, 1], [-1e-20, 1]], [0, 1]
+    for X in (X, np.array(X), csr_matrix(X)):
+        with pytest.raises(ValueError):
+            chi2(X, y)
+
+
+def test_chi2_unused_feature():
+    # Unused feature should evaluate to NaN
+    # and should issue no runtime warning
+    with warnings.catch_warnings(record=True) as warned:
+        warnings.simplefilter('always')
+        chi, p = chi2([[1, 0], [0, 0]], [1, 0])
+        for w in warned:
+            if 'divide by zero' in repr(w):
+                raise AssertionError('Found unexpected warning %s' % w)
+    assert_array_equal(chi, [1, np.nan])
+    assert_array_equal(p[1], np.nan)
+
+
+def test_chisquare():
+    # Test replacement for scipy.stats.chisquare against the original.
+    obs = np.array([[2., 2.],
+                    [1., 1.]])
+    exp = np.array([[1.5, 1.5],
+                    [1.5, 1.5]])
+    # call SciPy first because our version overwrites obs
+    chi_scp, p_scp = scipy.stats.chisquare(obs, exp)
+    chi_our, p_our = _chisquare(obs, exp)
+
+    assert_array_almost_equal(chi_scp, chi_our)
+    assert_array_almost_equal(p_scp, p_our)
--- a/venv/Lib/site-packages/sklearn/feature_selection/tests/test_feature_select.py
+++ b/venv/Lib/site-packages/sklearn/feature_selection/tests/test_feature_select.py
@ -0,0 +1,669 @@
+"""
+Todo: cross-check the F-value with stats model
+"""
+import itertools
+import warnings
+import numpy as np
+from scipy import stats, sparse
+
+import pytest
+
+from sklearn.utils._testing import assert_almost_equal
+from sklearn.utils._testing import assert_array_equal
+from sklearn.utils._testing import assert_array_almost_equal
+from sklearn.utils._testing import assert_warns
+from sklearn.utils._testing import ignore_warnings
+from sklearn.utils._testing import assert_warns_message
+from sklearn.utils import safe_mask
+
+from sklearn.datasets import make_classification, make_regression
+from sklearn.feature_selection import (
+    chi2, f_classif, f_oneway, f_regression, mutual_info_classif,
+    mutual_info_regression, SelectPercentile, SelectKBest, SelectFpr,
+    SelectFdr, SelectFwe, GenericUnivariateSelect)
+
+
+##############################################################################
+# Test the score functions
+
+def test_f_oneway_vs_scipy_stats():
+    # Test that our f_oneway gives the same result as scipy.stats
+    rng = np.random.RandomState(0)
+    X1 = rng.randn(10, 3)
+    X2 = 1 + rng.randn(10, 3)
+    f, pv = stats.f_oneway(X1, X2)
+    f2, pv2 = f_oneway(X1, X2)
+    assert np.allclose(f, f2)
+    assert np.allclose(pv, pv2)
+
+
+def test_f_oneway_ints():
+    # Smoke test f_oneway on integers: that it does raise casting errors
+    # with recent numpys
+    rng = np.random.RandomState(0)
+    X = rng.randint(10, size=(10, 10))
+    y = np.arange(10)
+    fint, pint = f_oneway(X, y)
+
+    # test that is gives the same result as with float
+    f, p = f_oneway(X.astype(np.float), y)
+    assert_array_almost_equal(f, fint, decimal=4)
+    assert_array_almost_equal(p, pint, decimal=4)
+
+
+def test_f_classif():
+    # Test whether the F test yields meaningful results
+    # on a simple simulated classification problem
+    X, y = make_classification(n_samples=200, n_features=20,
+                               n_informative=3, n_redundant=2,
+                               n_repeated=0, n_classes=8,
+                               n_clusters_per_class=1, flip_y=0.0,
+                               class_sep=10, shuffle=False, random_state=0)
+
+    F, pv = f_classif(X, y)
+    F_sparse, pv_sparse = f_classif(sparse.csr_matrix(X), y)
+    assert (F > 0).all()
+    assert (pv > 0).all()
+    assert (pv < 1).all()
+    assert (pv[:5] < 0.05).all()
+    assert (pv[5:] > 1.e-4).all()
+    assert_array_almost_equal(F_sparse, F)
+    assert_array_almost_equal(pv_sparse, pv)
+
+
+def test_f_regression():
+    # Test whether the F test yields meaningful results
+    # on a simple simulated regression problem
+    X, y = make_regression(n_samples=200, n_features=20, n_informative=5,
+                           shuffle=False, random_state=0)
+
+    F, pv = f_regression(X, y)
+    assert (F > 0).all()
+    assert (pv > 0).all()
+    assert (pv < 1).all()
+    assert (pv[:5] < 0.05).all()
+    assert (pv[5:] > 1.e-4).all()
+
+    # with centering, compare with sparse
+    F, pv = f_regression(X, y, center=True)
+    F_sparse, pv_sparse = f_regression(sparse.csr_matrix(X), y, center=True)
+    assert_array_almost_equal(F_sparse, F)
+    assert_array_almost_equal(pv_sparse, pv)
+
+    # again without centering, compare with sparse
+    F, pv = f_regression(X, y, center=False)
+    F_sparse, pv_sparse = f_regression(sparse.csr_matrix(X), y, center=False)
+    assert_array_almost_equal(F_sparse, F)
+    assert_array_almost_equal(pv_sparse, pv)
+
+
+def test_f_regression_input_dtype():
+    # Test whether f_regression returns the same value
+    # for any numeric data_type
+    rng = np.random.RandomState(0)
+    X = rng.rand(10, 20)
+    y = np.arange(10).astype(np.int)
+
+    F1, pv1 = f_regression(X, y)
+    F2, pv2 = f_regression(X, y.astype(np.float))
+    assert_array_almost_equal(F1, F2, 5)
+    assert_array_almost_equal(pv1, pv2, 5)
+
+
+def test_f_regression_center():
+    # Test whether f_regression preserves dof according to 'center' argument
+    # We use two centered variates so we have a simple relationship between
+    # F-score with variates centering and F-score without variates centering.
+    # Create toy example
+    X = np.arange(-5, 6).reshape(-1, 1)  # X has zero mean
+    n_samples = X.size
+    Y = np.ones(n_samples)
+    Y[::2] *= -1.
+    Y[0] = 0.  # have Y mean being null
+
+    F1, _ = f_regression(X, Y, center=True)
+    F2, _ = f_regression(X, Y, center=False)
+    assert_array_almost_equal(F1 * (n_samples - 1.) / (n_samples - 2.), F2)
+    assert_almost_equal(F2[0], 0.232558139)  # value from statsmodels OLS
+
+
+def test_f_classif_multi_class():
+    # Test whether the F test yields meaningful results
+    # on a simple simulated classification problem
+    X, y = make_classification(n_samples=200, n_features=20,
+                               n_informative=3, n_redundant=2,
+                               n_repeated=0, n_classes=8,
+                               n_clusters_per_class=1, flip_y=0.0,
+                               class_sep=10, shuffle=False, random_state=0)
+
+    F, pv = f_classif(X, y)
+    assert (F > 0).all()
+    assert (pv > 0).all()
+    assert (pv < 1).all()
+    assert (pv[:5] < 0.05).all()
+    assert (pv[5:] > 1.e-4).all()
+
+
+def test_select_percentile_classif():
+    # Test whether the relative univariate feature selection
+    # gets the correct items in a simple classification problem
+    # with the percentile heuristic
+    X, y = make_classification(n_samples=200, n_features=20,
+                               n_informative=3, n_redundant=2,
+                               n_repeated=0, n_classes=8,
+                               n_clusters_per_class=1, flip_y=0.0,
+                               class_sep=10, shuffle=False, random_state=0)
+
+    univariate_filter = SelectPercentile(f_classif, percentile=25)
+    X_r = univariate_filter.fit(X, y).transform(X)
+    X_r2 = GenericUnivariateSelect(f_classif, mode='percentile',
+                                   param=25).fit(X, y).transform(X)
+    assert_array_equal(X_r, X_r2)
+    support = univariate_filter.get_support()
+    gtruth = np.zeros(20)
+    gtruth[:5] = 1
+    assert_array_equal(support, gtruth)
+
+
+def test_select_percentile_classif_sparse():
+    # Test whether the relative univariate feature selection
+    # gets the correct items in a simple classification problem
+    # with the percentile heuristic
+    X, y = make_classification(n_samples=200, n_features=20,
+                               n_informative=3, n_redundant=2,
+                               n_repeated=0, n_classes=8,
+                               n_clusters_per_class=1, flip_y=0.0,
+                               class_sep=10, shuffle=False, random_state=0)
+    X = sparse.csr_matrix(X)
+    univariate_filter = SelectPercentile(f_classif, percentile=25)
+    X_r = univariate_filter.fit(X, y).transform(X)
+    X_r2 = GenericUnivariateSelect(f_classif, mode='percentile',
+                                   param=25).fit(X, y).transform(X)
+    assert_array_equal(X_r.toarray(), X_r2.toarray())
+    support = univariate_filter.get_support()
+    gtruth = np.zeros(20)
+    gtruth[:5] = 1
+    assert_array_equal(support, gtruth)
+
+    X_r2inv = univariate_filter.inverse_transform(X_r2)
+    assert sparse.issparse(X_r2inv)
+    support_mask = safe_mask(X_r2inv, support)
+    assert X_r2inv.shape == X.shape
+    assert_array_equal(X_r2inv[:, support_mask].toarray(), X_r.toarray())
+    # Check other columns are empty
+    assert X_r2inv.getnnz() == X_r.getnnz()
+
+
+##############################################################################
+# Test univariate selection in classification settings
+
+def test_select_kbest_classif():
+    # Test whether the relative univariate feature selection
+    # gets the correct items in a simple classification problem
+    # with the k best heuristic
+    X, y = make_classification(n_samples=200, n_features=20,
+                               n_informative=3, n_redundant=2,
+                               n_repeated=0, n_classes=8,
+                               n_clusters_per_class=1, flip_y=0.0,
+                               class_sep=10, shuffle=False, random_state=0)
+
+    univariate_filter = SelectKBest(f_classif, k=5)
+    X_r = univariate_filter.fit(X, y).transform(X)
+    X_r2 = GenericUnivariateSelect(
+        f_classif, mode='k_best', param=5).fit(X, y).transform(X)
+    assert_array_equal(X_r, X_r2)
+    support = univariate_filter.get_support()
+    gtruth = np.zeros(20)
+    gtruth[:5] = 1
+    assert_array_equal(support, gtruth)
+
+
+def test_select_kbest_all():
+    # Test whether k="all" correctly returns all features.
+    X, y = make_classification(n_samples=20, n_features=10,
+                               shuffle=False, random_state=0)
+
+    univariate_filter = SelectKBest(f_classif, k='all')
+    X_r = univariate_filter.fit(X, y).transform(X)
+    assert_array_equal(X, X_r)
+
+
+def test_select_kbest_zero():
+    # Test whether k=0 correctly returns no features.
+    X, y = make_classification(n_samples=20, n_features=10,
+                               shuffle=False, random_state=0)
+
+    univariate_filter = SelectKBest(f_classif, k=0)
+    univariate_filter.fit(X, y)
+    support = univariate_filter.get_support()
+    gtruth = np.zeros(10, dtype=bool)
+    assert_array_equal(support, gtruth)
+    X_selected = assert_warns_message(UserWarning, 'No features were selected',
+                                      univariate_filter.transform, X)
+    assert X_selected.shape == (20, 0)
+
+
+def test_select_heuristics_classif():
+    # Test whether the relative univariate feature selection
+    # gets the correct items in a simple classification problem
+    # with the fdr, fwe and fpr heuristics
+    X, y = make_classification(n_samples=200, n_features=20,
+                               n_informative=3, n_redundant=2,
+                               n_repeated=0, n_classes=8,
+                               n_clusters_per_class=1, flip_y=0.0,
+                               class_sep=10, shuffle=False, random_state=0)
+
+    univariate_filter = SelectFwe(f_classif, alpha=0.01)
+    X_r = univariate_filter.fit(X, y).transform(X)
+    gtruth = np.zeros(20)
+    gtruth[:5] = 1
+    for mode in ['fdr', 'fpr', 'fwe']:
+        X_r2 = GenericUnivariateSelect(
+            f_classif, mode=mode, param=0.01).fit(X, y).transform(X)
+        assert_array_equal(X_r, X_r2)
+        support = univariate_filter.get_support()
+        assert_array_almost_equal(support, gtruth)
+
+
+##############################################################################
+# Test univariate selection in regression settings
+
+
+def assert_best_scores_kept(score_filter):
+    scores = score_filter.scores_
+    support = score_filter.get_support()
+    assert_array_almost_equal(np.sort(scores[support]),
+                              np.sort(scores)[-support.sum():])
+
+
+def test_select_percentile_regression():
+    # Test whether the relative univariate feature selection
+    # gets the correct items in a simple regression problem
+    # with the percentile heuristic
+    X, y = make_regression(n_samples=200, n_features=20,
+                           n_informative=5, shuffle=False, random_state=0)
+
+    univariate_filter = SelectPercentile(f_regression, percentile=25)
+    X_r = univariate_filter.fit(X, y).transform(X)
+    assert_best_scores_kept(univariate_filter)
+    X_r2 = GenericUnivariateSelect(
+        f_regression, mode='percentile', param=25).fit(X, y).transform(X)
+    assert_array_equal(X_r, X_r2)
+    support = univariate_filter.get_support()
+    gtruth = np.zeros(20)
+    gtruth[:5] = 1
+    assert_array_equal(support, gtruth)
+    X_2 = X.copy()
+    X_2[:, np.logical_not(support)] = 0
+    assert_array_equal(X_2, univariate_filter.inverse_transform(X_r))
+    # Check inverse_transform respects dtype
+    assert_array_equal(X_2.astype(bool),
+                       univariate_filter.inverse_transform(X_r.astype(bool)))
+
+
+def test_select_percentile_regression_full():
+    # Test whether the relative univariate feature selection
+    # selects all features when '100%' is asked.
+    X, y = make_regression(n_samples=200, n_features=20,
+                           n_informative=5, shuffle=False, random_state=0)
+
+    univariate_filter = SelectPercentile(f_regression, percentile=100)
+    X_r = univariate_filter.fit(X, y).transform(X)
+    assert_best_scores_kept(univariate_filter)
+    X_r2 = GenericUnivariateSelect(
+        f_regression, mode='percentile', param=100).fit(X, y).transform(X)
+    assert_array_equal(X_r, X_r2)
+    support = univariate_filter.get_support()
+    gtruth = np.ones(20)
+    assert_array_equal(support, gtruth)
+
+
+def test_invalid_percentile():
+    X, y = make_regression(n_samples=10, n_features=20,
+                           n_informative=2, shuffle=False, random_state=0)
+
+    with pytest.raises(ValueError):
+        SelectPercentile(percentile=-1).fit(X, y)
+    with pytest.raises(ValueError):
+        SelectPercentile(percentile=101).fit(X, y)
+    with pytest.raises(ValueError):
+        GenericUnivariateSelect(mode='percentile', param=-1).fit(X, y)
+    with pytest.raises(ValueError):
+        GenericUnivariateSelect(mode='percentile', param=101).fit(X, y)
+
+
+def test_select_kbest_regression():
+    # Test whether the relative univariate feature selection
+    # gets the correct items in a simple regression problem
+    # with the k best heuristic
+    X, y = make_regression(n_samples=200, n_features=20, n_informative=5,
+                           shuffle=False, random_state=0, noise=10)
+
+    univariate_filter = SelectKBest(f_regression, k=5)
+    X_r = univariate_filter.fit(X, y).transform(X)
+    assert_best_scores_kept(univariate_filter)
+    X_r2 = GenericUnivariateSelect(
+        f_regression, mode='k_best', param=5).fit(X, y).transform(X)
+    assert_array_equal(X_r, X_r2)
+    support = univariate_filter.get_support()
+    gtruth = np.zeros(20)
+    gtruth[:5] = 1
+    assert_array_equal(support, gtruth)
+
+
+def test_select_heuristics_regression():
+    # Test whether the relative univariate feature selection
+    # gets the correct items in a simple regression problem
+    # with the fpr, fdr or fwe heuristics
+    X, y = make_regression(n_samples=200, n_features=20, n_informative=5,
+                           shuffle=False, random_state=0, noise=10)
+
+    univariate_filter = SelectFpr(f_regression, alpha=0.01)
+    X_r = univariate_filter.fit(X, y).transform(X)
+    gtruth = np.zeros(20)
+    gtruth[:5] = 1
+    for mode in ['fdr', 'fpr', 'fwe']:
+        X_r2 = GenericUnivariateSelect(
+            f_regression, mode=mode, param=0.01).fit(X, y).transform(X)
+        assert_array_equal(X_r, X_r2)
+        support = univariate_filter.get_support()
+        assert_array_equal(support[:5], np.ones((5, ), dtype=np.bool))
+        assert np.sum(support[5:] == 1) < 3
+
+
+def test_boundary_case_ch2():
+    # Test boundary case, and always aim to select 1 feature.
+    X = np.array([[10, 20], [20, 20], [20, 30]])
+    y = np.array([[1], [0], [0]])
+    scores, pvalues = chi2(X, y)
+    assert_array_almost_equal(scores, np.array([4., 0.71428571]))
+    assert_array_almost_equal(pvalues, np.array([0.04550026, 0.39802472]))
+
+    filter_fdr = SelectFdr(chi2, alpha=0.1)
+    filter_fdr.fit(X, y)
+    support_fdr = filter_fdr.get_support()
+    assert_array_equal(support_fdr, np.array([True, False]))
+
+    filter_kbest = SelectKBest(chi2, k=1)
+    filter_kbest.fit(X, y)
+    support_kbest = filter_kbest.get_support()
+    assert_array_equal(support_kbest, np.array([True, False]))
+
+    filter_percentile = SelectPercentile(chi2, percentile=50)
+    filter_percentile.fit(X, y)
+    support_percentile = filter_percentile.get_support()
+    assert_array_equal(support_percentile, np.array([True, False]))
+
+    filter_fpr = SelectFpr(chi2, alpha=0.1)
+    filter_fpr.fit(X, y)
+    support_fpr = filter_fpr.get_support()
+    assert_array_equal(support_fpr, np.array([True, False]))
+
+    filter_fwe = SelectFwe(chi2, alpha=0.1)
+    filter_fwe.fit(X, y)
+    support_fwe = filter_fwe.get_support()
+    assert_array_equal(support_fwe, np.array([True, False]))
+
+
+@pytest.mark.parametrize("alpha", [0.001, 0.01, 0.1])
+@pytest.mark.parametrize("n_informative", [1, 5, 10])
+def test_select_fdr_regression(alpha, n_informative):
+    # Test that fdr heuristic actually has low FDR.
+    def single_fdr(alpha, n_informative, random_state):
+        X, y = make_regression(n_samples=150, n_features=20,
+                               n_informative=n_informative, shuffle=False,
+                               random_state=random_state, noise=10)
+
+        with warnings.catch_warnings(record=True):
+            # Warnings can be raised when no features are selected
+            # (low alpha or very noisy data)
+            univariate_filter = SelectFdr(f_regression, alpha=alpha)
+            X_r = univariate_filter.fit(X, y).transform(X)
+            X_r2 = GenericUnivariateSelect(
+                f_regression, mode='fdr', param=alpha).fit(X, y).transform(X)
+
+        assert_array_equal(X_r, X_r2)
+        support = univariate_filter.get_support()
+        num_false_positives = np.sum(support[n_informative:] == 1)
+        num_true_positives = np.sum(support[:n_informative] == 1)
+
+        if num_false_positives == 0:
+            return 0.
+        false_discovery_rate = (num_false_positives /
+                                (num_true_positives + num_false_positives))
+        return false_discovery_rate
+
+    # As per Benjamini-Hochberg, the expected false discovery rate
+    # should be lower than alpha:
+    # FDR = E(FP / (TP + FP)) <= alpha
+    false_discovery_rate = np.mean([single_fdr(alpha, n_informative,
+                                               random_state) for
+                                    random_state in range(100)])
+    assert alpha >= false_discovery_rate
+
+    # Make sure that the empirical false discovery rate increases
+    # with alpha:
+    if false_discovery_rate != 0:
+        assert false_discovery_rate > alpha / 10
+
+
+def test_select_fwe_regression():
+    # Test whether the relative univariate feature selection
+    # gets the correct items in a simple regression problem
+    # with the fwe heuristic
+    X, y = make_regression(n_samples=200, n_features=20,
+                           n_informative=5, shuffle=False, random_state=0)
+
+    univariate_filter = SelectFwe(f_regression, alpha=0.01)
+    X_r = univariate_filter.fit(X, y).transform(X)
+    X_r2 = GenericUnivariateSelect(
+        f_regression, mode='fwe', param=0.01).fit(X, y).transform(X)
+    assert_array_equal(X_r, X_r2)
+    support = univariate_filter.get_support()
+    gtruth = np.zeros(20)
+    gtruth[:5] = 1
+    assert_array_equal(support[:5], np.ones((5, ), dtype=np.bool))
+    assert np.sum(support[5:] == 1) < 2
+
+
+def test_selectkbest_tiebreaking():
+    # Test whether SelectKBest actually selects k features in case of ties.
+    # Prior to 0.11, SelectKBest would return more features than requested.
+    Xs = [[0, 1, 1], [0, 0, 1], [1, 0, 0], [1, 1, 0]]
+    y = [1]
+    dummy_score = lambda X, y: (X[0], X[0])
+    for X in Xs:
+        sel = SelectKBest(dummy_score, k=1)
+        X1 = ignore_warnings(sel.fit_transform)([X], y)
+        assert X1.shape[1] == 1
+        assert_best_scores_kept(sel)
+
+        sel = SelectKBest(dummy_score, k=2)
+        X2 = ignore_warnings(sel.fit_transform)([X], y)
+        assert X2.shape[1] == 2
+        assert_best_scores_kept(sel)
+
+
+def test_selectpercentile_tiebreaking():
+    # Test if SelectPercentile selects the right n_features in case of ties.
+    Xs = [[0, 1, 1], [0, 0, 1], [1, 0, 0], [1, 1, 0]]
+    y = [1]
+    dummy_score = lambda X, y: (X[0], X[0])
+    for X in Xs:
+        sel = SelectPercentile(dummy_score, percentile=34)
+        X1 = ignore_warnings(sel.fit_transform)([X], y)
+        assert X1.shape[1] == 1
+        assert_best_scores_kept(sel)
+
+        sel = SelectPercentile(dummy_score, percentile=67)
+        X2 = ignore_warnings(sel.fit_transform)([X], y)
+        assert X2.shape[1] == 2
+        assert_best_scores_kept(sel)
+
+
+def test_tied_pvalues():
+    # Test whether k-best and percentiles work with tied pvalues from chi2.
+    # chi2 will return the same p-values for the following features, but it
+    # will return different scores.
+    X0 = np.array([[10000, 9999, 9998], [1, 1, 1]])
+    y = [0, 1]
+
+    for perm in itertools.permutations((0, 1, 2)):
+        X = X0[:, perm]
+        Xt = SelectKBest(chi2, k=2).fit_transform(X, y)
+        assert Xt.shape == (2, 2)
+        assert 9998 not in Xt
+
+        Xt = SelectPercentile(chi2, percentile=67).fit_transform(X, y)
+        assert Xt.shape == (2, 2)
+        assert 9998 not in Xt
+
+
+def test_scorefunc_multilabel():
+    # Test whether k-best and percentiles works with multilabels with chi2.
+
+    X = np.array([[10000, 9999, 0], [100, 9999, 0], [1000, 99, 0]])
+    y = [[1, 1], [0, 1], [1, 0]]
+
+    Xt = SelectKBest(chi2, k=2).fit_transform(X, y)
+    assert Xt.shape == (3, 2)
+    assert 0 not in Xt
+
+    Xt = SelectPercentile(chi2, percentile=67).fit_transform(X, y)
+    assert Xt.shape == (3, 2)
+    assert 0 not in Xt
+
+
+def test_tied_scores():
+    # Test for stable sorting in k-best with tied scores.
+    X_train = np.array([[0, 0, 0], [1, 1, 1]])
+    y_train = [0, 1]
+
+    for n_features in [1, 2, 3]:
+        sel = SelectKBest(chi2, k=n_features).fit(X_train, y_train)
+        X_test = sel.transform([[0, 1, 2]])
+        assert_array_equal(X_test[0], np.arange(3)[-n_features:])
+
+
+def test_nans():
+    # Assert that SelectKBest and SelectPercentile can handle NaNs.
+    # First feature has zero variance to confuse f_classif (ANOVA) and
+    # make it return a NaN.
+    X = [[0, 1, 0], [0, -1, -1], [0, .5, .5]]
+    y = [1, 0, 1]
+
+    for select in (SelectKBest(f_classif, k=2),
+                   SelectPercentile(f_classif, percentile=67)):
+        ignore_warnings(select.fit)(X, y)
+        assert_array_equal(select.get_support(indices=True), np.array([1, 2]))
+
+
+def test_score_func_error():
+    X = [[0, 1, 0], [0, -1, -1], [0, .5, .5]]
+    y = [1, 0, 1]
+
+    for SelectFeatures in [SelectKBest, SelectPercentile, SelectFwe,
+                           SelectFdr, SelectFpr, GenericUnivariateSelect]:
+        with pytest.raises(TypeError):
+            SelectFeatures(score_func=10).fit(X, y)
+
+
+def test_invalid_k():
+    X = [[0, 1, 0], [0, -1, -1], [0, .5, .5]]
+    y = [1, 0, 1]
+
+    with pytest.raises(ValueError):
+        SelectKBest(k=-1).fit(X, y)
+    with pytest.raises(ValueError):
+        SelectKBest(k=4).fit(X, y)
+    with pytest.raises(ValueError):
+        GenericUnivariateSelect(mode='k_best', param=-1).fit(X, y)
+    with pytest.raises(ValueError):
+        GenericUnivariateSelect(mode='k_best', param=4).fit(X, y)
+
+
+def test_f_classif_constant_feature():
+    # Test that f_classif warns if a feature is constant throughout.
+
+    X, y = make_classification(n_samples=10, n_features=5)
+    X[:, 0] = 2.0
+    assert_warns(UserWarning, f_classif, X, y)
+
+
+def test_no_feature_selected():
+    rng = np.random.RandomState(0)
+
+    # Generate random uncorrelated data: a strict univariate test should
+    # rejects all the features
+    X = rng.rand(40, 10)
+    y = rng.randint(0, 4, size=40)
+    strict_selectors = [
+        SelectFwe(alpha=0.01).fit(X, y),
+        SelectFdr(alpha=0.01).fit(X, y),
+        SelectFpr(alpha=0.01).fit(X, y),
+        SelectPercentile(percentile=0).fit(X, y),
+        SelectKBest(k=0).fit(X, y),
+    ]
+    for selector in strict_selectors:
+        assert_array_equal(selector.get_support(), np.zeros(10))
+        X_selected = assert_warns_message(
+            UserWarning, 'No features were selected', selector.transform, X)
+        assert X_selected.shape == (40, 0)
+
+
+def test_mutual_info_classif():
+    X, y = make_classification(n_samples=100, n_features=5,
+                               n_informative=1, n_redundant=1,
+                               n_repeated=0, n_classes=2,
+                               n_clusters_per_class=1, flip_y=0.0,
+                               class_sep=10, shuffle=False, random_state=0)
+
+    # Test in KBest mode.
+    univariate_filter = SelectKBest(mutual_info_classif, k=2)
+    X_r = univariate_filter.fit(X, y).transform(X)
+    X_r2 = GenericUnivariateSelect(
+        mutual_info_classif, mode='k_best', param=2).fit(X, y).transform(X)
+    assert_array_equal(X_r, X_r2)
+    support = univariate_filter.get_support()
+    gtruth = np.zeros(5)
+    gtruth[:2] = 1
+    assert_array_equal(support, gtruth)
+
+    # Test in Percentile mode.
+    univariate_filter = SelectPercentile(mutual_info_classif, percentile=40)
+    X_r = univariate_filter.fit(X, y).transform(X)
+    X_r2 = GenericUnivariateSelect(
+        mutual_info_classif, mode='percentile', param=40).fit(X, y).transform(X)
+    assert_array_equal(X_r, X_r2)
+    support = univariate_filter.get_support()
+    gtruth = np.zeros(5)
+    gtruth[:2] = 1
+    assert_array_equal(support, gtruth)
+
+
+def test_mutual_info_regression():
+    X, y = make_regression(n_samples=100, n_features=10, n_informative=2,
+                           shuffle=False, random_state=0, noise=10)
+
+    # Test in KBest mode.
+    univariate_filter = SelectKBest(mutual_info_regression, k=2)
+    X_r = univariate_filter.fit(X, y).transform(X)
+    assert_best_scores_kept(univariate_filter)
+    X_r2 = GenericUnivariateSelect(
+        mutual_info_regression, mode='k_best', param=2).fit(X, y).transform(X)
+    assert_array_equal(X_r, X_r2)
+    support = univariate_filter.get_support()
+    gtruth = np.zeros(10)
+    gtruth[:2] = 1
+    assert_array_equal(support, gtruth)
+
+    # Test in Percentile mode.
+    univariate_filter = SelectPercentile(mutual_info_regression, percentile=20)
+    X_r = univariate_filter.fit(X, y).transform(X)
+    X_r2 = GenericUnivariateSelect(mutual_info_regression, mode='percentile',
+                                   param=20).fit(X, y).transform(X)
+    assert_array_equal(X_r, X_r2)
+    support = univariate_filter.get_support()
+    gtruth = np.zeros(10)
+    gtruth[:2] = 1
+    assert_array_equal(support, gtruth)
--- a/venv/Lib/site-packages/sklearn/feature_selection/tests/test_from_model.py
+++ b/venv/Lib/site-packages/sklearn/feature_selection/tests/test_from_model.py
@ -0,0 +1,369 @@
+import pytest
+import numpy as np
+
+from sklearn.utils._testing import assert_array_almost_equal
+from sklearn.utils._testing import assert_array_equal
+from sklearn.utils._testing import assert_allclose
+from sklearn.utils._testing import skip_if_32bit
+
+from sklearn import datasets
+from sklearn.linear_model import LogisticRegression, SGDClassifier, Lasso
+from sklearn.svm import LinearSVC
+from sklearn.feature_selection import SelectFromModel
+from sklearn.experimental import enable_hist_gradient_boosting  # noqa
+from sklearn.ensemble import (RandomForestClassifier,
+                              HistGradientBoostingClassifier)
+from sklearn.linear_model import PassiveAggressiveClassifier
+from sklearn.base import BaseEstimator
+
+
+class NaNTag(BaseEstimator):
+    def _more_tags(self):
+        return {'allow_nan': True}
+
+
+class NoNaNTag(BaseEstimator):
+    def _more_tags(self):
+        return {'allow_nan': False}
+
+
+class NaNTagRandomForest(RandomForestClassifier):
+    def _more_tags(self):
+        return {'allow_nan': True}
+
+
+iris = datasets.load_iris()
+data, y = iris.data, iris.target
+rng = np.random.RandomState(0)
+
+
+def test_invalid_input():
+    clf = SGDClassifier(alpha=0.1, max_iter=10, shuffle=True,
+                        random_state=None, tol=None)
+    for threshold in ["gobbledigook", ".5 * gobbledigook"]:
+        model = SelectFromModel(clf, threshold=threshold)
+        model.fit(data, y)
+        with pytest.raises(ValueError):
+            model.transform(data)
+
+
+def test_input_estimator_unchanged():
+    # Test that SelectFromModel fits on a clone of the estimator.
+    est = RandomForestClassifier()
+    transformer = SelectFromModel(estimator=est)
+    transformer.fit(data, y)
+    assert transformer.estimator is est
+
+
+@pytest.mark.parametrize(
+    "max_features, err_type, err_msg",
+    [(-1, ValueError, "'max_features' should be 0 and"),
+     (data.shape[1] + 1, ValueError, "'max_features' should be 0 and"),
+     ('gobbledigook', TypeError, "should be an integer"),
+     ('all', TypeError, "should be an integer")]
+)
+def test_max_features_error(max_features, err_type, err_msg):
+    clf = RandomForestClassifier(n_estimators=50, random_state=0)
+
+    transformer = SelectFromModel(estimator=clf,
+                                  max_features=max_features,
+                                  threshold=-np.inf)
+    with pytest.raises(err_type, match=err_msg):
+        transformer.fit(data, y)
+
+
+@pytest.mark.parametrize("max_features", [0, 2, data.shape[1]])
+def test_max_features_dim(max_features):
+    clf = RandomForestClassifier(n_estimators=50, random_state=0)
+    transformer = SelectFromModel(estimator=clf,
+                                  max_features=max_features,
+                                  threshold=-np.inf)
+    X_trans = transformer.fit_transform(data, y)
+    assert X_trans.shape[1] == max_features
+
+
+class FixedImportanceEstimator(BaseEstimator):
+    def __init__(self, importances):
+        self.importances = importances
+
+    def fit(self, X, y=None):
+        self.feature_importances_ = np.array(self.importances)
+
+
+def test_max_features():
+    # Test max_features parameter using various values
+    X, y = datasets.make_classification(
+        n_samples=1000, n_features=10, n_informative=3, n_redundant=0,
+        n_repeated=0, shuffle=False, random_state=0)
+    max_features = X.shape[1]
+    est = RandomForestClassifier(n_estimators=50, random_state=0)
+
+    transformer1 = SelectFromModel(estimator=est,
+                                   threshold=-np.inf)
+    transformer2 = SelectFromModel(estimator=est,
+                                   max_features=max_features,
+                                   threshold=-np.inf)
+    X_new1 = transformer1.fit_transform(X, y)
+    X_new2 = transformer2.fit_transform(X, y)
+    assert_allclose(X_new1, X_new2)
+
+    # Test max_features against actual model.
+    transformer1 = SelectFromModel(estimator=Lasso(alpha=0.025,
+                                                   random_state=42))
+    X_new1 = transformer1.fit_transform(X, y)
+    scores1 = np.abs(transformer1.estimator_.coef_)
+    candidate_indices1 = np.argsort(-scores1, kind='mergesort')
+
+    for n_features in range(1, X_new1.shape[1] + 1):
+        transformer2 = SelectFromModel(estimator=Lasso(alpha=0.025,
+                                       random_state=42),
+                                       max_features=n_features,
+                                       threshold=-np.inf)
+        X_new2 = transformer2.fit_transform(X, y)
+        scores2 = np.abs(transformer2.estimator_.coef_)
+        candidate_indices2 = np.argsort(-scores2, kind='mergesort')
+        assert_allclose(X[:, candidate_indices1[:n_features]],
+                        X[:, candidate_indices2[:n_features]])
+    assert_allclose(transformer1.estimator_.coef_,
+                    transformer2.estimator_.coef_)
+
+
+def test_max_features_tiebreak():
+    # Test if max_features can break tie among feature importance
+    X, y = datasets.make_classification(
+        n_samples=1000, n_features=10, n_informative=3, n_redundant=0,
+        n_repeated=0, shuffle=False, random_state=0)
+    max_features = X.shape[1]
+
+    feature_importances = np.array([4, 4, 4, 4, 3, 3, 3, 2, 2, 1])
+    for n_features in range(1, max_features + 1):
+        transformer = SelectFromModel(
+            FixedImportanceEstimator(feature_importances),
+            max_features=n_features,
+            threshold=-np.inf)
+        X_new = transformer.fit_transform(X, y)
+        selected_feature_indices = np.where(transformer._get_support_mask())[0]
+        assert_array_equal(selected_feature_indices, np.arange(n_features))
+        assert X_new.shape[1] == n_features
+
+
+def test_threshold_and_max_features():
+    X, y = datasets.make_classification(
+        n_samples=1000, n_features=10, n_informative=3, n_redundant=0,
+        n_repeated=0, shuffle=False, random_state=0)
+    est = RandomForestClassifier(n_estimators=50, random_state=0)
+
+    transformer1 = SelectFromModel(estimator=est, max_features=3,
+                                   threshold=-np.inf)
+    X_new1 = transformer1.fit_transform(X, y)
+
+    transformer2 = SelectFromModel(estimator=est, threshold=0.04)
+    X_new2 = transformer2.fit_transform(X, y)
+
+    transformer3 = SelectFromModel(estimator=est, max_features=3,
+                                   threshold=0.04)
+    X_new3 = transformer3.fit_transform(X, y)
+    assert X_new3.shape[1] == min(X_new1.shape[1], X_new2.shape[1])
+    selected_indices = transformer3.transform(
+        np.arange(X.shape[1])[np.newaxis, :])
+    assert_allclose(X_new3, X[:, selected_indices[0]])
+
+
+@skip_if_32bit
+def test_feature_importances():
+    X, y = datasets.make_classification(
+        n_samples=1000, n_features=10, n_informative=3, n_redundant=0,
+        n_repeated=0, shuffle=False, random_state=0)
+
+    est = RandomForestClassifier(n_estimators=50, random_state=0)
+    for threshold, func in zip(["mean", "median"], [np.mean, np.median]):
+        transformer = SelectFromModel(estimator=est, threshold=threshold)
+        transformer.fit(X, y)
+        assert hasattr(transformer.estimator_, 'feature_importances_')
+
+        X_new = transformer.transform(X)
+        assert X_new.shape[1] < X.shape[1]
+        importances = transformer.estimator_.feature_importances_
+
+        feature_mask = np.abs(importances) > func(importances)
+        assert_array_almost_equal(X_new, X[:, feature_mask])
+
+
+def test_sample_weight():
+    # Ensure sample weights are passed to underlying estimator
+    X, y = datasets.make_classification(
+        n_samples=100, n_features=10, n_informative=3, n_redundant=0,
+        n_repeated=0, shuffle=False, random_state=0)
+
+    # Check with sample weights
+    sample_weight = np.ones(y.shape)
+    sample_weight[y == 1] *= 100
+
+    est = LogisticRegression(random_state=0, fit_intercept=False)
+    transformer = SelectFromModel(estimator=est)
+    transformer.fit(X, y, sample_weight=None)
+    mask = transformer._get_support_mask()
+    transformer.fit(X, y, sample_weight=sample_weight)
+    weighted_mask = transformer._get_support_mask()
+    assert not np.all(weighted_mask == mask)
+    transformer.fit(X, y, sample_weight=3 * sample_weight)
+    reweighted_mask = transformer._get_support_mask()
+    assert np.all(weighted_mask == reweighted_mask)
+
+
+def test_coef_default_threshold():
+    X, y = datasets.make_classification(
+        n_samples=100, n_features=10, n_informative=3, n_redundant=0,
+        n_repeated=0, shuffle=False, random_state=0)
+
+    # For the Lasso and related models, the threshold defaults to 1e-5
+    transformer = SelectFromModel(estimator=Lasso(alpha=0.1,
+                                  random_state=42))
+    transformer.fit(X, y)
+    X_new = transformer.transform(X)
+    mask = np.abs(transformer.estimator_.coef_) > 1e-5
+    assert_array_almost_equal(X_new, X[:, mask])
+
+
+@skip_if_32bit
+def test_2d_coef():
+    X, y = datasets.make_classification(
+        n_samples=1000, n_features=10, n_informative=3, n_redundant=0,
+        n_repeated=0, shuffle=False, random_state=0, n_classes=4)
+
+    est = LogisticRegression()
+    for threshold, func in zip(["mean", "median"], [np.mean, np.median]):
+        for order in [1, 2, np.inf]:
+            # Fit SelectFromModel a multi-class problem
+            transformer = SelectFromModel(estimator=LogisticRegression(),
+                                          threshold=threshold,
+                                          norm_order=order)
+            transformer.fit(X, y)
+            assert hasattr(transformer.estimator_, 'coef_')
+            X_new = transformer.transform(X)
+            assert X_new.shape[1] < X.shape[1]
+
+            # Manually check that the norm is correctly performed
+            est.fit(X, y)
+            importances = np.linalg.norm(est.coef_, axis=0, ord=order)
+            feature_mask = importances > func(importances)
+            assert_array_almost_equal(X_new, X[:, feature_mask])
+
+
+def test_partial_fit():
+    est = PassiveAggressiveClassifier(random_state=0, shuffle=False,
+                                      max_iter=5, tol=None)
+    transformer = SelectFromModel(estimator=est)
+    transformer.partial_fit(data, y,
+                            classes=np.unique(y))
+    old_model = transformer.estimator_
+    transformer.partial_fit(data, y,
+                            classes=np.unique(y))
+    new_model = transformer.estimator_
+    assert old_model is new_model
+
+    X_transform = transformer.transform(data)
+    transformer.fit(np.vstack((data, data)), np.concatenate((y, y)))
+    assert_array_almost_equal(X_transform, transformer.transform(data))
+
+    # check that if est doesn't have partial_fit, neither does SelectFromModel
+    transformer = SelectFromModel(estimator=RandomForestClassifier())
+    assert not hasattr(transformer, "partial_fit")
+
+
+def test_calling_fit_reinitializes():
+    est = LinearSVC(random_state=0)
+    transformer = SelectFromModel(estimator=est)
+    transformer.fit(data, y)
+    transformer.set_params(estimator__C=100)
+    transformer.fit(data, y)
+    assert transformer.estimator_.C == 100
+
+
+def test_prefit():
+    # Test all possible combinations of the prefit parameter.
+
+    # Passing a prefit parameter with the selected model
+    # and fitting a unfit model with prefit=False should give same results.
+    clf = SGDClassifier(alpha=0.1, max_iter=10, shuffle=True,
+                        random_state=0, tol=None)
+    model = SelectFromModel(clf)
+    model.fit(data, y)
+    X_transform = model.transform(data)
+    clf.fit(data, y)
+    model = SelectFromModel(clf, prefit=True)
+    assert_array_almost_equal(model.transform(data), X_transform)
+
+    # Check that the model is rewritten if prefit=False and a fitted model is
+    # passed
+    model = SelectFromModel(clf, prefit=False)
+    model.fit(data, y)
+    assert_array_almost_equal(model.transform(data), X_transform)
+
+    # Check that prefit=True and calling fit raises a ValueError
+    model = SelectFromModel(clf, prefit=True)
+    with pytest.raises(ValueError):
+        model.fit(data, y)
+
+
+def test_threshold_string():
+    est = RandomForestClassifier(n_estimators=50, random_state=0)
+    model = SelectFromModel(est, threshold="0.5*mean")
+    model.fit(data, y)
+    X_transform = model.transform(data)
+
+    # Calculate the threshold from the estimator directly.
+    est.fit(data, y)
+    threshold = 0.5 * np.mean(est.feature_importances_)
+    mask = est.feature_importances_ > threshold
+    assert_array_almost_equal(X_transform, data[:, mask])
+
+
+def test_threshold_without_refitting():
+    # Test that the threshold can be set without refitting the model.
+    clf = SGDClassifier(alpha=0.1, max_iter=10, shuffle=True,
+                        random_state=0, tol=None)
+    model = SelectFromModel(clf, threshold="0.1 * mean")
+    model.fit(data, y)
+    X_transform = model.transform(data)
+
+    # Set a higher threshold to filter out more features.
+    model.threshold = "1.0 * mean"
+    assert X_transform.shape[1] > model.transform(data).shape[1]
+
+
+def test_fit_accepts_nan_inf():
+    # Test that fit doesn't check for np.inf and np.nan values.
+    clf = HistGradientBoostingClassifier(random_state=0)
+
+    model = SelectFromModel(estimator=clf)
+
+    nan_data = data.copy()
+    nan_data[0] = np.NaN
+    nan_data[1] = np.Inf
+
+    model.fit(data, y)
+
+
+def test_transform_accepts_nan_inf():
+    # Test that transform doesn't check for np.inf and np.nan values.
+    clf = NaNTagRandomForest(n_estimators=100, random_state=0)
+    nan_data = data.copy()
+
+    model = SelectFromModel(estimator=clf)
+    model.fit(nan_data, y)
+
+    nan_data[0] = np.NaN
+    nan_data[1] = np.Inf
+
+    model.transform(nan_data)
+
+
+def test_allow_nan_tag_comes_from_estimator():
+    allow_nan_est = NaNTag()
+    model = SelectFromModel(estimator=allow_nan_est)
+    assert model._get_tags()['allow_nan'] is True
+
+    no_nan_est = NoNaNTag()
+    model = SelectFromModel(estimator=no_nan_est)
+    assert model._get_tags()['allow_nan'] is False
--- a/venv/Lib/site-packages/sklearn/feature_selection/tests/test_mutual_info.py
+++ b/venv/Lib/site-packages/sklearn/feature_selection/tests/test_mutual_info.py
@ -0,0 +1,209 @@
+
+import numpy as np
+import pytest
+from scipy.sparse import csr_matrix
+
+from sklearn.utils import check_random_state
+from sklearn.utils._testing import assert_array_equal, assert_almost_equal
+from sklearn.feature_selection._mutual_info import _compute_mi
+from sklearn.feature_selection import (mutual_info_regression,
+                                       mutual_info_classif)
+
+
+def test_compute_mi_dd():
+    # In discrete case computations are straightforward and can be done
+    # by hand on given vectors.
+    x = np.array([0, 1, 1, 0, 0])
+    y = np.array([1, 0, 0, 0, 1])
+
+    H_x = H_y = -(3/5) * np.log(3/5) - (2/5) * np.log(2/5)
+    H_xy = -1/5 * np.log(1/5) - 2/5 * np.log(2/5) - 2/5 * np.log(2/5)
+    I_xy = H_x + H_y - H_xy
+
+    assert_almost_equal(_compute_mi(x, y, True, True), I_xy)
+
+
+def test_compute_mi_cc():
+    # For two continuous variables a good approach is to test on bivariate
+    # normal distribution, where mutual information is known.
+
+    # Mean of the distribution, irrelevant for mutual information.
+    mean = np.zeros(2)
+
+    # Setup covariance matrix with correlation coeff. equal 0.5.
+    sigma_1 = 1
+    sigma_2 = 10
+    corr = 0.5
+    cov = np.array([
+        [sigma_1**2, corr * sigma_1 * sigma_2],
+        [corr * sigma_1 * sigma_2, sigma_2**2]
+    ])
+
+    # True theoretical mutual information.
+    I_theory = (np.log(sigma_1) + np.log(sigma_2) -
+                0.5 * np.log(np.linalg.det(cov)))
+
+    rng = check_random_state(0)
+    Z = rng.multivariate_normal(mean, cov, size=1000)
+
+    x, y = Z[:, 0], Z[:, 1]
+
+    # Theory and computed values won't be very close, assert that the
+    # first figures after decimal point match.
+    for n_neighbors in [3, 5, 7]:
+        I_computed = _compute_mi(x, y, False, False, n_neighbors)
+        assert_almost_equal(I_computed, I_theory, 1)
+
+
+def test_compute_mi_cd():
+    # To test define a joint distribution as follows:
+    # p(x, y) = p(x) p(y | x)
+    # X ~ Bernoulli(p)
+    # (Y | x = 0) ~ Uniform(-1, 1)
+    # (Y | x = 1) ~ Uniform(0, 2)
+
+    # Use the following formula for mutual information:
+    # I(X; Y) = H(Y) - H(Y | X)
+    # Two entropies can be computed by hand:
+    # H(Y) = -(1-p)/2 * ln((1-p)/2) - p/2*log(p/2) - 1/2*log(1/2)
+    # H(Y | X) = ln(2)
+
+    # Now we need to implement sampling from out distribution, which is
+    # done easily using conditional distribution logic.
+
+    n_samples = 1000
+    rng = check_random_state(0)
+
+    for p in [0.3, 0.5, 0.7]:
+        x = rng.uniform(size=n_samples) > p
+
+        y = np.empty(n_samples)
+        mask = x == 0
+        y[mask] = rng.uniform(-1, 1, size=np.sum(mask))
+        y[~mask] = rng.uniform(0, 2, size=np.sum(~mask))
+
+        I_theory = -0.5 * ((1 - p) * np.log(0.5 * (1 - p)) +
+                           p * np.log(0.5 * p) + np.log(0.5)) - np.log(2)
+
+        # Assert the same tolerance.
+        for n_neighbors in [3, 5, 7]:
+            I_computed = _compute_mi(x, y, True, False, n_neighbors)
+            assert_almost_equal(I_computed, I_theory, 1)
+
+
+def test_compute_mi_cd_unique_label():
+    # Test that adding unique label doesn't change MI.
+    n_samples = 100
+    x = np.random.uniform(size=n_samples) > 0.5
+
+    y = np.empty(n_samples)
+    mask = x == 0
+    y[mask] = np.random.uniform(-1, 1, size=np.sum(mask))
+    y[~mask] = np.random.uniform(0, 2, size=np.sum(~mask))
+
+    mi_1 = _compute_mi(x, y, True, False)
+
+    x = np.hstack((x, 2))
+    y = np.hstack((y, 10))
+    mi_2 = _compute_mi(x, y, True, False)
+
+    assert mi_1 == mi_2
+
+
+# We are going test that feature ordering by MI matches our expectations.
+def test_mutual_info_classif_discrete():
+    X = np.array([[0, 0, 0],
+                  [1, 1, 0],
+                  [2, 0, 1],
+                  [2, 0, 1],
+                  [2, 0, 1]])
+    y = np.array([0, 1, 2, 2, 1])
+
+    # Here X[:, 0] is the most informative feature, and X[:, 1] is weakly
+    # informative.
+    mi = mutual_info_classif(X, y, discrete_features=True)
+    assert_array_equal(np.argsort(-mi), np.array([0, 2, 1]))
+
+
+def test_mutual_info_regression():
+    # We generate sample from multivariate normal distribution, using
+    # transformation from initially uncorrelated variables. The zero
+    # variables after transformation is selected as the target vector,
+    # it has the strongest correlation with the variable 2, and
+    # the weakest correlation with the variable 1.
+    T = np.array([
+        [1, 0.5, 2, 1],
+        [0, 1, 0.1, 0.0],
+        [0, 0.1, 1, 0.1],
+        [0, 0.1, 0.1, 1]
+    ])
+    cov = T.dot(T.T)
+    mean = np.zeros(4)
+
+    rng = check_random_state(0)
+    Z = rng.multivariate_normal(mean, cov, size=1000)
+    X = Z[:, 1:]
+    y = Z[:, 0]
+
+    mi = mutual_info_regression(X, y, random_state=0)
+    assert_array_equal(np.argsort(-mi), np.array([1, 2, 0]))
+
+
+def test_mutual_info_classif_mixed():
+    # Here the target is discrete and there are two continuous and one
+    # discrete feature. The idea of this test is clear from the code.
+    rng = check_random_state(0)
+    X = rng.rand(1000, 3)
+    X[:, 1] += X[:, 0]
+    y = ((0.5 * X[:, 0] + X[:, 2]) > 0.5).astype(int)
+    X[:, 2] = X[:, 2] > 0.5
+
+    mi = mutual_info_classif(X, y, discrete_features=[2], n_neighbors=3,
+                             random_state=0)
+    assert_array_equal(np.argsort(-mi), [2, 0, 1])
+    for n_neighbors in [5, 7, 9]:
+        mi_nn = mutual_info_classif(X, y, discrete_features=[2],
+                                    n_neighbors=n_neighbors, random_state=0)
+        # Check that the continuous values have an higher MI with greater
+        # n_neighbors
+        assert mi_nn[0] > mi[0]
+        assert mi_nn[1] > mi[1]
+        # The n_neighbors should not have any effect on the discrete value
+        # The MI should be the same
+        assert mi_nn[2] == mi[2]
+
+
+def test_mutual_info_options():
+    X = np.array([[0, 0, 0],
+                  [1, 1, 0],
+                  [2, 0, 1],
+                  [2, 0, 1],
+                  [2, 0, 1]], dtype=float)
+    y = np.array([0, 1, 2, 2, 1], dtype=float)
+    X_csr = csr_matrix(X)
+
+    for mutual_info in (mutual_info_regression, mutual_info_classif):
+        with pytest.raises(ValueError):
+            mutual_info(X_csr, y, discrete_features=False)
+        with pytest.raises(ValueError):
+            mutual_info(X, y, discrete_features='manual')
+        with pytest.raises(ValueError):
+            mutual_info(X_csr, y, discrete_features=[True, False, True])
+        with pytest.raises(IndexError):
+            mutual_info(X, y, discrete_features=[True, False, True, False])
+        with pytest.raises(IndexError):
+            mutual_info(X, y, discrete_features=[1, 4])
+
+        mi_1 = mutual_info(X, y, discrete_features='auto', random_state=0)
+        mi_2 = mutual_info(X, y, discrete_features=False, random_state=0)
+        mi_3 = mutual_info(X_csr, y, discrete_features='auto', random_state=0)
+        mi_4 = mutual_info(X_csr, y, discrete_features=True, random_state=0)
+        mi_5 = mutual_info(X, y, discrete_features=[True, False, True],
+                           random_state=0)
+        mi_6 = mutual_info(X, y, discrete_features=[0, 2], random_state=0)
+
+        assert_array_equal(mi_1, mi_2)
+        assert_array_equal(mi_3, mi_4)
+        assert_array_equal(mi_5, mi_6)
+
+    assert not np.allclose(mi_1, mi_3)
--- a/venv/Lib/site-packages/sklearn/feature_selection/tests/test_rfe.py
+++ b/venv/Lib/site-packages/sklearn/feature_selection/tests/test_rfe.py
@ -0,0 +1,405 @@
+"""
+Testing Recursive feature elimination
+"""
+
+import pytest
+import numpy as np
+from numpy.testing import assert_array_almost_equal, assert_array_equal
+from scipy import sparse
+
+from sklearn.feature_selection import RFE, RFECV
+from sklearn.datasets import load_iris, make_friedman1
+from sklearn.metrics import zero_one_loss
+from sklearn.svm import SVC, SVR
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.model_selection import cross_val_score
+from sklearn.model_selection import GroupKFold
+
+from sklearn.utils import check_random_state
+from sklearn.utils._testing import ignore_warnings
+
+from sklearn.metrics import make_scorer
+from sklearn.metrics import get_scorer
+
+
+class MockClassifier:
+    """
+    Dummy classifier to test recursive feature elimination
+    """
+
+    def __init__(self, foo_param=0):
+        self.foo_param = foo_param
+
+    def fit(self, X, y):
+        assert len(X) == len(y)
+        self.coef_ = np.ones(X.shape[1], dtype=np.float64)
+        return self
+
+    def predict(self, T):
+        return T.shape[0]
+
+    predict_proba = predict
+    decision_function = predict
+    transform = predict
+
+    def score(self, X=None, y=None):
+        return 0.
+
+    def get_params(self, deep=True):
+        return {'foo_param': self.foo_param}
+
+    def set_params(self, **params):
+        return self
+
+    def _get_tags(self):
+        return {}
+
+
+def test_rfe_features_importance():
+    generator = check_random_state(0)
+    iris = load_iris()
+    X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
+    y = iris.target
+
+    clf = RandomForestClassifier(n_estimators=20,
+                                 random_state=generator, max_depth=2)
+    rfe = RFE(estimator=clf, n_features_to_select=4, step=0.1)
+    rfe.fit(X, y)
+    assert len(rfe.ranking_) == X.shape[1]
+
+    clf_svc = SVC(kernel="linear")
+    rfe_svc = RFE(estimator=clf_svc, n_features_to_select=4, step=0.1)
+    rfe_svc.fit(X, y)
+
+    # Check if the supports are equal
+    assert_array_equal(rfe.get_support(), rfe_svc.get_support())
+
+
+def test_rfe():
+    generator = check_random_state(0)
+    iris = load_iris()
+    X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
+    X_sparse = sparse.csr_matrix(X)
+    y = iris.target
+
+    # dense model
+    clf = SVC(kernel="linear")
+    rfe = RFE(estimator=clf, n_features_to_select=4, step=0.1)
+    rfe.fit(X, y)
+    X_r = rfe.transform(X)
+    clf.fit(X_r, y)
+    assert len(rfe.ranking_) == X.shape[1]
+
+    # sparse model
+    clf_sparse = SVC(kernel="linear")
+    rfe_sparse = RFE(estimator=clf_sparse, n_features_to_select=4, step=0.1)
+    rfe_sparse.fit(X_sparse, y)
+    X_r_sparse = rfe_sparse.transform(X_sparse)
+
+    assert X_r.shape == iris.data.shape
+    assert_array_almost_equal(X_r[:10], iris.data[:10])
+
+    assert_array_almost_equal(rfe.predict(X), clf.predict(iris.data))
+    assert rfe.score(X, y) == clf.score(iris.data, iris.target)
+    assert_array_almost_equal(X_r, X_r_sparse.toarray())
+
+
+def test_rfe_mockclassifier():
+    generator = check_random_state(0)
+    iris = load_iris()
+    X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
+    y = iris.target
+
+    # dense model
+    clf = MockClassifier()
+    rfe = RFE(estimator=clf, n_features_to_select=4, step=0.1)
+    rfe.fit(X, y)
+    X_r = rfe.transform(X)
+    clf.fit(X_r, y)
+    assert len(rfe.ranking_) == X.shape[1]
+    assert X_r.shape == iris.data.shape
+
+
+def test_rfecv():
+    generator = check_random_state(0)
+    iris = load_iris()
+    X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
+    y = list(iris.target)   # regression test: list should be supported
+
+    # Test using the score function
+    rfecv = RFECV(estimator=SVC(kernel="linear"), step=1)
+    rfecv.fit(X, y)
+    # non-regression test for missing worst feature:
+    assert len(rfecv.grid_scores_) == X.shape[1]
+    assert len(rfecv.ranking_) == X.shape[1]
+    X_r = rfecv.transform(X)
+
+    # All the noisy variable were filtered out
+    assert_array_equal(X_r, iris.data)
+
+    # same in sparse
+    rfecv_sparse = RFECV(estimator=SVC(kernel="linear"), step=1)
+    X_sparse = sparse.csr_matrix(X)
+    rfecv_sparse.fit(X_sparse, y)
+    X_r_sparse = rfecv_sparse.transform(X_sparse)
+    assert_array_equal(X_r_sparse.toarray(), iris.data)
+
+    # Test using a customized loss function
+    scoring = make_scorer(zero_one_loss, greater_is_better=False)
+    rfecv = RFECV(estimator=SVC(kernel="linear"), step=1, scoring=scoring)
+    ignore_warnings(rfecv.fit)(X, y)
+    X_r = rfecv.transform(X)
+    assert_array_equal(X_r, iris.data)
+
+    # Test using a scorer
+    scorer = get_scorer('accuracy')
+    rfecv = RFECV(estimator=SVC(kernel="linear"), step=1, scoring=scorer)
+    rfecv.fit(X, y)
+    X_r = rfecv.transform(X)
+    assert_array_equal(X_r, iris.data)
+
+    # Test fix on grid_scores
+    def test_scorer(estimator, X, y):
+        return 1.0
+    rfecv = RFECV(estimator=SVC(kernel="linear"), step=1, scoring=test_scorer)
+    rfecv.fit(X, y)
+    assert_array_equal(rfecv.grid_scores_, np.ones(len(rfecv.grid_scores_)))
+    # In the event of cross validation score ties, the expected behavior of
+    # RFECV is to return the FEWEST features that maximize the CV score.
+    # Because test_scorer always returns 1.0 in this example, RFECV should
+    # reduce the dimensionality to a single feature (i.e. n_features_ = 1)
+    assert rfecv.n_features_ == 1
+
+    # Same as the first two tests, but with step=2
+    rfecv = RFECV(estimator=SVC(kernel="linear"), step=2)
+    rfecv.fit(X, y)
+    assert len(rfecv.grid_scores_) == 6
+    assert len(rfecv.ranking_) == X.shape[1]
+    X_r = rfecv.transform(X)
+    assert_array_equal(X_r, iris.data)
+
+    rfecv_sparse = RFECV(estimator=SVC(kernel="linear"), step=2)
+    X_sparse = sparse.csr_matrix(X)
+    rfecv_sparse.fit(X_sparse, y)
+    X_r_sparse = rfecv_sparse.transform(X_sparse)
+    assert_array_equal(X_r_sparse.toarray(), iris.data)
+
+    # Verifying that steps < 1 don't blow up.
+    rfecv_sparse = RFECV(estimator=SVC(kernel="linear"), step=.2)
+    X_sparse = sparse.csr_matrix(X)
+    rfecv_sparse.fit(X_sparse, y)
+    X_r_sparse = rfecv_sparse.transform(X_sparse)
+    assert_array_equal(X_r_sparse.toarray(), iris.data)
+
+
+def test_rfecv_mockclassifier():
+    generator = check_random_state(0)
+    iris = load_iris()
+    X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
+    y = list(iris.target)   # regression test: list should be supported
+
+    # Test using the score function
+    rfecv = RFECV(estimator=MockClassifier(), step=1)
+    rfecv.fit(X, y)
+    # non-regression test for missing worst feature:
+    assert len(rfecv.grid_scores_) == X.shape[1]
+    assert len(rfecv.ranking_) == X.shape[1]
+
+
+def test_rfecv_verbose_output():
+    # Check verbose=1 is producing an output.
+    from io import StringIO
+    import sys
+    sys.stdout = StringIO()
+
+    generator = check_random_state(0)
+    iris = load_iris()
+    X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
+    y = list(iris.target)
+
+    rfecv = RFECV(estimator=SVC(kernel="linear"), step=1, verbose=1)
+    rfecv.fit(X, y)
+
+    verbose_output = sys.stdout
+    verbose_output.seek(0)
+    assert len(verbose_output.readline()) > 0
+
+
+def test_rfecv_grid_scores_size():
+    generator = check_random_state(0)
+    iris = load_iris()
+    X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
+    y = list(iris.target)   # regression test: list should be supported
+
+    # Non-regression test for varying combinations of step and
+    # min_features_to_select.
+    for step, min_features_to_select in [[2, 1], [2, 2], [3, 3]]:
+        rfecv = RFECV(estimator=MockClassifier(), step=step,
+                      min_features_to_select=min_features_to_select)
+        rfecv.fit(X, y)
+
+        score_len = np.ceil(
+            (X.shape[1] - min_features_to_select) / step) + 1
+        assert len(rfecv.grid_scores_) == score_len
+        assert len(rfecv.ranking_) == X.shape[1]
+        assert rfecv.n_features_ >= min_features_to_select
+
+
+def test_rfe_estimator_tags():
+    rfe = RFE(SVC(kernel='linear'))
+    assert rfe._estimator_type == "classifier"
+    # make sure that cross-validation is stratified
+    iris = load_iris()
+    score = cross_val_score(rfe, iris.data, iris.target)
+    assert score.min() > .7
+
+
+def test_rfe_min_step():
+    n_features = 10
+    X, y = make_friedman1(n_samples=50, n_features=n_features, random_state=0)
+    n_samples, n_features = X.shape
+    estimator = SVR(kernel="linear")
+
+    # Test when floor(step * n_features) <= 0
+    selector = RFE(estimator, step=0.01)
+    sel = selector.fit(X, y)
+    assert sel.support_.sum() == n_features // 2
+
+    # Test when step is between (0,1) and floor(step * n_features) > 0
+    selector = RFE(estimator, step=0.20)
+    sel = selector.fit(X, y)
+    assert sel.support_.sum() == n_features // 2
+
+    # Test when step is an integer
+    selector = RFE(estimator, step=5)
+    sel = selector.fit(X, y)
+    assert sel.support_.sum() == n_features // 2
+
+
+def test_number_of_subsets_of_features():
+    # In RFE, 'number_of_subsets_of_features'
+    # = the number of iterations in '_fit'
+    # = max(ranking_)
+    # = 1 + (n_features + step - n_features_to_select - 1) // step
+    # After optimization #4534, this number
+    # = 1 + np.ceil((n_features - n_features_to_select) / float(step))
+    # This test case is to test their equivalence, refer to #4534 and #3824
+
+    def formula1(n_features, n_features_to_select, step):
+        return 1 + ((n_features + step - n_features_to_select - 1) // step)
+
+    def formula2(n_features, n_features_to_select, step):
+        return 1 + np.ceil((n_features - n_features_to_select) / float(step))
+
+    # RFE
+    # Case 1, n_features - n_features_to_select is divisible by step
+    # Case 2, n_features - n_features_to_select is not divisible by step
+    n_features_list = [11, 11]
+    n_features_to_select_list = [3, 3]
+    step_list = [2, 3]
+    for n_features, n_features_to_select, step in zip(
+            n_features_list, n_features_to_select_list, step_list):
+        generator = check_random_state(43)
+        X = generator.normal(size=(100, n_features))
+        y = generator.rand(100).round()
+        rfe = RFE(estimator=SVC(kernel="linear"),
+                  n_features_to_select=n_features_to_select, step=step)
+        rfe.fit(X, y)
+        # this number also equals to the maximum of ranking_
+        assert (np.max(rfe.ranking_) ==
+                     formula1(n_features, n_features_to_select, step))
+        assert (np.max(rfe.ranking_) ==
+                     formula2(n_features, n_features_to_select, step))
+
+    # In RFECV, 'fit' calls 'RFE._fit'
+    # 'number_of_subsets_of_features' of RFE
+    # = the size of 'grid_scores' of RFECV
+    # = the number of iterations of the for loop before optimization #4534
+
+    # RFECV, n_features_to_select = 1
+    # Case 1, n_features - 1 is divisible by step
+    # Case 2, n_features - 1 is not divisible by step
+
+    n_features_to_select = 1
+    n_features_list = [11, 10]
+    step_list = [2, 2]
+    for n_features, step in zip(n_features_list, step_list):
+        generator = check_random_state(43)
+        X = generator.normal(size=(100, n_features))
+        y = generator.rand(100).round()
+        rfecv = RFECV(estimator=SVC(kernel="linear"), step=step)
+        rfecv.fit(X, y)
+
+        assert (rfecv.grid_scores_.shape[0] ==
+                     formula1(n_features, n_features_to_select, step))
+        assert (rfecv.grid_scores_.shape[0] ==
+                     formula2(n_features, n_features_to_select, step))
+
+
+def test_rfe_cv_n_jobs():
+    generator = check_random_state(0)
+    iris = load_iris()
+    X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
+    y = iris.target
+
+    rfecv = RFECV(estimator=SVC(kernel='linear'))
+    rfecv.fit(X, y)
+    rfecv_ranking = rfecv.ranking_
+    rfecv_grid_scores = rfecv.grid_scores_
+
+    rfecv.set_params(n_jobs=2)
+    rfecv.fit(X, y)
+    assert_array_almost_equal(rfecv.ranking_, rfecv_ranking)
+    assert_array_almost_equal(rfecv.grid_scores_, rfecv_grid_scores)
+
+
+def test_rfe_cv_groups():
+    generator = check_random_state(0)
+    iris = load_iris()
+    number_groups = 4
+    groups = np.floor(np.linspace(0, number_groups, len(iris.target)))
+    X = iris.data
+    y = (iris.target > 0).astype(int)
+
+    est_groups = RFECV(
+        estimator=RandomForestClassifier(random_state=generator),
+        step=1,
+        scoring='accuracy',
+        cv=GroupKFold(n_splits=2)
+    )
+    est_groups.fit(X, y, groups=groups)
+    assert est_groups.n_features_ > 0
+
+
+@pytest.mark.parametrize("cv", [
+    None,
+    5
+])
+def test_rfe_allow_nan_inf_in_x(cv):
+    iris = load_iris()
+    X = iris.data
+    y = iris.target
+
+    # add nan and inf value to X
+    X[0][0] = np.NaN
+    X[0][1] = np.Inf
+
+    clf = MockClassifier()
+    if cv is not None:
+        rfe = RFECV(estimator=clf, cv=cv)
+    else:
+        rfe = RFE(estimator=clf)
+    rfe.fit(X, y)
+    rfe.transform(X)
+
+
+@pytest.mark.parametrize('ClsRFE', [
+    RFE,
+    RFECV
+    ])
+def test_multioutput(ClsRFE):
+    X = np.random.normal(size=(10, 3))
+    y = np.random.randint(2, size=(10, 2))
+    clf = RandomForestClassifier(n_estimators=5)
+    rfe_test = ClsRFE(clf)
+    rfe_test.fit(X, y)
--- a/venv/Lib/site-packages/sklearn/feature_selection/tests/test_variance_threshold.py
+++ b/venv/Lib/site-packages/sklearn/feature_selection/tests/test_variance_threshold.py
@ -0,0 +1,60 @@
+import numpy as np
+import pytest
+
+from sklearn.utils._testing import assert_array_equal
+
+from scipy.sparse import bsr_matrix, csc_matrix, csr_matrix
+
+from sklearn.feature_selection import VarianceThreshold
+
+data = [[0, 1, 2, 3, 4],
+        [0, 2, 2, 3, 5],
+        [1, 1, 2, 4, 0]]
+
+data2 = [[-0.13725701]] * 10
+
+def test_zero_variance():
+    # Test VarianceThreshold with default setting, zero variance.
+
+    for X in [data, csr_matrix(data), csc_matrix(data), bsr_matrix(data)]:
+        sel = VarianceThreshold().fit(X)
+        assert_array_equal([0, 1, 3, 4], sel.get_support(indices=True))
+
+    with pytest.raises(ValueError):
+        VarianceThreshold().fit([[0, 1, 2, 3]])
+    with pytest.raises(ValueError):
+        VarianceThreshold().fit([[0, 1], [0, 1]])
+
+
+def test_variance_threshold():
+    # Test VarianceThreshold with custom variance.
+    for X in [data, csr_matrix(data)]:
+        X = VarianceThreshold(threshold=.4).fit_transform(X)
+        assert (len(data), 1) == X.shape
+
+
+@pytest.mark.skipif(np.var(data2) == 0,
+                    reason=('This test is not valid for this platform, '
+                            'as it relies on numerical instabilities.'))
+def test_zero_variance_floating_point_error():
+    # Test that VarianceThreshold(0.0).fit eliminates features that have
+    # the same value in every sample, even when floating point errors
+    # cause np.var not to be 0 for the feature.
+    # See #13691
+
+    for X in [data2, csr_matrix(data2), csc_matrix(data2), bsr_matrix(data2)]:
+        msg = "No feature in X meets the variance threshold 0.00000"
+        with pytest.raises(ValueError, match=msg):
+            VarianceThreshold().fit(X)
+
+
+def test_variance_nan():
+    arr = np.array(data, dtype=np.float64)
+    # add single NaN and feature should still be included
+    arr[0, 0] = np.NaN
+    # make all values in feature NaN and feature should be rejected
+    arr[:, 1] = np.NaN
+
+    for X in [arr, csr_matrix(arr), csc_matrix(arr), bsr_matrix(arr)]:
+        sel = VarianceThreshold().fit(X)
+        assert_array_equal([0, 3, 4], sel.get_support(indices=True))