Uploaded Test files

2020-11-12 11:05:57 -05:00 · 2020-11-12 11:05:57 -05:00 · 2e81cb7d99
commit 2e81cb7d99
parent f584ad9d97
16627 changed files with 2065359 additions and 102444 deletions
--- a/venv/Lib/site-packages/sklearn/feature_selection/init.py
+++ b/venv/Lib/site-packages/sklearn/feature_selection/init.py
@ -0,0 +1,46 @@
+"""
+The :mod:`sklearn.feature_selection` module implements feature selection
+algorithms. It currently includes univariate filter selection methods and the
+recursive feature elimination algorithm.
+"""
+
+from ._univariate_selection import chi2
+from ._univariate_selection import f_classif
+from ._univariate_selection import f_oneway
+from ._univariate_selection import f_regression
+from ._univariate_selection import SelectPercentile
+from ._univariate_selection import SelectKBest
+from ._univariate_selection import SelectFpr
+from ._univariate_selection import SelectFdr
+from ._univariate_selection import SelectFwe
+from ._univariate_selection import GenericUnivariateSelect
+
+from ._variance_threshold import VarianceThreshold
+
+from ._rfe import RFE
+from ._rfe import RFECV
+
+from ._from_model import SelectFromModel
+
+from ._mutual_info import mutual_info_regression, mutual_info_classif
+
+from ._base import SelectorMixin
+
+
+__all__ = ['GenericUnivariateSelect',
+           'RFE',
+           'RFECV',
+           'SelectFdr',
+           'SelectFpr',
+           'SelectFwe',
+           'SelectKBest',
+           'SelectFromModel',
+           'SelectPercentile',
+           'VarianceThreshold',
+           'chi2',
+           'f_classif',
+           'f_oneway',
+           'f_regression',
+           'mutual_info_classif',
+           'mutual_info_regression',
+           'SelectorMixin']
--- a/venv/Lib/site-packages/sklearn/feature_selection/pycache/init.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_selection/pycache/init.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_selection/pycache/_base.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_selection/pycache/_base.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_selection/pycache/_from_model.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_selection/pycache/_from_model.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_selection/pycache/_mutual_info.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_selection/pycache/_mutual_info.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_selection/pycache/_rfe.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_selection/pycache/_rfe.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_selection/pycache/_univariate_selection.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_selection/pycache/_univariate_selection.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_selection/pycache/_variance_threshold.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_selection/pycache/_variance_threshold.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_selection/pycache/base.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_selection/pycache/base.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_selection/pycache/from_model.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_selection/pycache/from_model.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_selection/pycache/mutual_info.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_selection/pycache/mutual_info.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_selection/pycache/rfe.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_selection/pycache/rfe.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_selection/pycache/univariate_selection.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_selection/pycache/univariate_selection.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_selection/pycache/variance_threshold.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_selection/pycache/variance_threshold.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_selection/_base.py
+++ b/venv/Lib/site-packages/sklearn/feature_selection/_base.py
@ -0,0 +1,123 @@
+# -*- coding: utf-8 -*-
+"""Generic feature selection mixin"""
+
+# Authors: G. Varoquaux, A. Gramfort, L. Buitinck, J. Nothman
+# License: BSD 3 clause
+
+from abc import ABCMeta, abstractmethod
+from warnings import warn
+
+import numpy as np
+from scipy.sparse import issparse, csc_matrix
+
+from ..base import TransformerMixin
+from ..utils import check_array, safe_mask
+
+
+class SelectorMixin(TransformerMixin, metaclass=ABCMeta):
+    """
+    Transformer mixin that performs feature selection given a support mask
+
+    This mixin provides a feature selector implementation with `transform` and
+    `inverse_transform` functionality given an implementation of
+    `_get_support_mask`.
+    """
+
+    def get_support(self, indices=False):
+        """
+        Get a mask, or integer index, of the features selected
+
+        Parameters
+        ----------
+        indices : boolean (default False)
+            If True, the return value will be an array of integers, rather
+            than a boolean mask.
+
+        Returns
+        -------
+        support : array
+            An index that selects the retained features from a feature vector.
+            If `indices` is False, this is a boolean array of shape
+            [# input features], in which an element is True iff its
+            corresponding feature is selected for retention. If `indices` is
+            True, this is an integer array of shape [# output features] whose
+            values are indices into the input feature vector.
+        """
+        mask = self._get_support_mask()
+        return mask if not indices else np.where(mask)[0]
+
+    @abstractmethod
+    def _get_support_mask(self):
+        """
+        Get the boolean mask indicating which features are selected
+
+        Returns
+        -------
+        support : boolean array of shape [# input features]
+            An element is True iff its corresponding feature is selected for
+            retention.
+        """
+
+    def transform(self, X):
+        """Reduce X to the selected features.
+
+        Parameters
+        ----------
+        X : array of shape [n_samples, n_features]
+            The input samples.
+
+        Returns
+        -------
+        X_r : array of shape [n_samples, n_selected_features]
+            The input samples with only the selected features.
+        """
+        tags = self._get_tags()
+        X = check_array(X, dtype=None, accept_sparse='csr',
+                        force_all_finite=not tags.get('allow_nan', True))
+        mask = self.get_support()
+        if not mask.any():
+            warn("No features were selected: either the data is"
+                 " too noisy or the selection test too strict.",
+                 UserWarning)
+            return np.empty(0).reshape((X.shape[0], 0))
+        if len(mask) != X.shape[1]:
+            raise ValueError("X has a different shape than during fitting.")
+        return X[:, safe_mask(X, mask)]
+
+    def inverse_transform(self, X):
+        """
+        Reverse the transformation operation
+
+        Parameters
+        ----------
+        X : array of shape [n_samples, n_selected_features]
+            The input samples.
+
+        Returns
+        -------
+        X_r : array of shape [n_samples, n_original_features]
+            `X` with columns of zeros inserted where features would have
+            been removed by :meth:`transform`.
+        """
+        if issparse(X):
+            X = X.tocsc()
+            # insert additional entries in indptr:
+            # e.g. if transform changed indptr from [0 2 6 7] to [0 2 3]
+            # col_nonzeros here will be [2 0 1] so indptr becomes [0 2 2 3]
+            it = self.inverse_transform(np.diff(X.indptr).reshape(1, -1))
+            col_nonzeros = it.ravel()
+            indptr = np.concatenate([[0], np.cumsum(col_nonzeros)])
+            Xt = csc_matrix((X.data, X.indices, indptr),
+                            shape=(X.shape[0], len(indptr) - 1), dtype=X.dtype)
+            return Xt
+
+        support = self.get_support()
+        X = check_array(X, dtype=None)
+        if support.sum() != X.shape[1]:
+            raise ValueError("X has a different shape than during fitting.")
+
+        if X.ndim == 1:
+            X = X[None, :]
+        Xt = np.zeros((X.shape[0], support.size), dtype=X.dtype)
+        Xt[:, support] = X
+        return Xt
--- a/venv/Lib/site-packages/sklearn/feature_selection/_from_model.py
+++ b/venv/Lib/site-packages/sklearn/feature_selection/_from_model.py
@ -0,0 +1,275 @@
+# Authors: Gilles Louppe, Mathieu Blondel, Maheshakya Wijewardena
+# License: BSD 3 clause
+
+import numpy as np
+import numbers
+
+from ._base import SelectorMixin
+from ..base import BaseEstimator, clone, MetaEstimatorMixin
+from ..utils.validation import check_is_fitted
+
+from ..exceptions import NotFittedError
+from ..utils.metaestimators import if_delegate_has_method
+from ..utils.validation import _deprecate_positional_args
+
+
+def _get_feature_importances(estimator, norm_order=1):
+    """Retrieve or aggregate feature importances from estimator"""
+    importances = getattr(estimator, "feature_importances_", None)
+
+    coef_ = getattr(estimator, "coef_", None)
+    if importances is None and coef_ is not None:
+        if estimator.coef_.ndim == 1:
+            importances = np.abs(coef_)
+
+        else:
+            importances = np.linalg.norm(coef_, axis=0,
+                                         ord=norm_order)
+
+    elif importances is None:
+        raise ValueError(
+            "The underlying estimator %s has no `coef_` or "
+            "`feature_importances_` attribute. Either pass a fitted estimator"
+            " to SelectFromModel or call fit before calling transform."
+            % estimator.__class__.__name__)
+
+    return importances
+
+
+def _calculate_threshold(estimator, importances, threshold):
+    """Interpret the threshold value"""
+
+    if threshold is None:
+        # determine default from estimator
+        est_name = estimator.__class__.__name__
+        if ((hasattr(estimator, "penalty") and estimator.penalty == "l1") or
+                "Lasso" in est_name):
+            # the natural default threshold is 0 when l1 penalty was used
+            threshold = 1e-5
+        else:
+            threshold = "mean"
+
+    if isinstance(threshold, str):
+        if "*" in threshold:
+            scale, reference = threshold.split("*")
+            scale = float(scale.strip())
+            reference = reference.strip()
+
+            if reference == "median":
+                reference = np.median(importances)
+            elif reference == "mean":
+                reference = np.mean(importances)
+            else:
+                raise ValueError("Unknown reference: " + reference)
+
+            threshold = scale * reference
+
+        elif threshold == "median":
+            threshold = np.median(importances)
+
+        elif threshold == "mean":
+            threshold = np.mean(importances)
+
+        else:
+            raise ValueError("Expected threshold='mean' or threshold='median' "
+                             "got %s" % threshold)
+
+    else:
+        threshold = float(threshold)
+
+    return threshold
+
+
+class SelectFromModel(MetaEstimatorMixin, SelectorMixin, BaseEstimator):
+    """Meta-transformer for selecting features based on importance weights.
+
+    .. versionadded:: 0.17
+
+    Parameters
+    ----------
+    estimator : object
+        The base estimator from which the transformer is built.
+        This can be both a fitted (if ``prefit`` is set to True)
+        or a non-fitted estimator. The estimator must have either a
+        ``feature_importances_`` or ``coef_`` attribute after fitting.
+
+    threshold : string, float, optional default None
+        The threshold value to use for feature selection. Features whose
+        importance is greater or equal are kept while the others are
+        discarded. If "median" (resp. "mean"), then the ``threshold`` value is
+        the median (resp. the mean) of the feature importances. A scaling
+        factor (e.g., "1.25*mean") may also be used. If None and if the
+        estimator has a parameter penalty set to l1, either explicitly
+        or implicitly (e.g, Lasso), the threshold used is 1e-5.
+        Otherwise, "mean" is used by default.
+
+    prefit : bool, default False
+        Whether a prefit model is expected to be passed into the constructor
+        directly or not. If True, ``transform`` must be called directly
+        and SelectFromModel cannot be used with ``cross_val_score``,
+        ``GridSearchCV`` and similar utilities that clone the estimator.
+        Otherwise train the model using ``fit`` and then ``transform`` to do
+        feature selection.
+
+    norm_order : non-zero int, inf, -inf, default 1
+        Order of the norm used to filter the vectors of coefficients below
+        ``threshold`` in the case where the ``coef_`` attribute of the
+        estimator is of dimension 2.
+
+    max_features : int or None, optional
+        The maximum number of features to select.
+        To only select based on ``max_features``, set ``threshold=-np.inf``.
+
+        .. versionadded:: 0.20
+
+    Attributes
+    ----------
+    estimator_ : an estimator
+        The base estimator from which the transformer is built.
+        This is stored only when a non-fitted estimator is passed to the
+        ``SelectFromModel``, i.e when prefit is False.
+
+    threshold_ : float
+        The threshold value used for feature selection.
+
+    Notes
+    -----
+    Allows NaN/Inf in the input if the underlying estimator does as well.
+
+    Examples
+    --------
+    >>> from sklearn.feature_selection import SelectFromModel
+    >>> from sklearn.linear_model import LogisticRegression
+    >>> X = [[ 0.87, -1.34,  0.31 ],
+    ...      [-2.79, -0.02, -0.85 ],
+    ...      [-1.34, -0.48, -2.55 ],
+    ...      [ 1.92,  1.48,  0.65 ]]
+    >>> y = [0, 1, 0, 1]
+    >>> selector = SelectFromModel(estimator=LogisticRegression()).fit(X, y)
+    >>> selector.estimator_.coef_
+    array([[-0.3252302 ,  0.83462377,  0.49750423]])
+    >>> selector.threshold_
+    0.55245...
+    >>> selector.get_support()
+    array([False,  True, False])
+    >>> selector.transform(X)
+    array([[-1.34],
+           [-0.02],
+           [-0.48],
+           [ 1.48]])
+    """
+    @_deprecate_positional_args
+    def __init__(self, estimator, *, threshold=None, prefit=False,
+                 norm_order=1, max_features=None):
+        self.estimator = estimator
+        self.threshold = threshold
+        self.prefit = prefit
+        self.norm_order = norm_order
+        self.max_features = max_features
+
+    def _get_support_mask(self):
+        # SelectFromModel can directly call on transform.
+        if self.prefit:
+            estimator = self.estimator
+        elif hasattr(self, 'estimator_'):
+            estimator = self.estimator_
+        else:
+            raise ValueError('Either fit the model before transform or set'
+                             ' "prefit=True" while passing the fitted'
+                             ' estimator to the constructor.')
+        scores = _get_feature_importances(estimator, self.norm_order)
+        threshold = _calculate_threshold(estimator, scores, self.threshold)
+        if self.max_features is not None:
+            mask = np.zeros_like(scores, dtype=bool)
+            candidate_indices = \
+                np.argsort(-scores, kind='mergesort')[:self.max_features]
+            mask[candidate_indices] = True
+        else:
+            mask = np.ones_like(scores, dtype=bool)
+        mask[scores < threshold] = False
+        return mask
+
+    def fit(self, X, y=None, **fit_params):
+        """Fit the SelectFromModel meta-transformer.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The training input samples.
+
+        y : array-like, shape (n_samples,)
+            The target values (integers that correspond to classes in
+            classification, real numbers in regression).
+
+        **fit_params : Other estimator specific parameters
+
+        Returns
+        -------
+        self : object
+        """
+        if self.max_features is not None:
+            if not isinstance(self.max_features, numbers.Integral):
+                raise TypeError("'max_features' should be an integer between"
+                                " 0 and {} features. Got {!r} instead."
+                                .format(X.shape[1], self.max_features))
+            elif self.max_features < 0 or self.max_features > X.shape[1]:
+                raise ValueError("'max_features' should be 0 and {} features."
+                                 "Got {} instead."
+                                 .format(X.shape[1], self.max_features))
+
+        if self.prefit:
+            raise NotFittedError(
+                "Since 'prefit=True', call transform directly")
+        self.estimator_ = clone(self.estimator)
+        self.estimator_.fit(X, y, **fit_params)
+        return self
+
+    @property
+    def threshold_(self):
+        scores = _get_feature_importances(self.estimator_, self.norm_order)
+        return _calculate_threshold(self.estimator, scores, self.threshold)
+
+    @if_delegate_has_method('estimator')
+    def partial_fit(self, X, y=None, **fit_params):
+        """Fit the SelectFromModel meta-transformer only once.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The training input samples.
+
+        y : array-like, shape (n_samples,)
+            The target values (integers that correspond to classes in
+            classification, real numbers in regression).
+
+        **fit_params : Other estimator specific parameters
+
+        Returns
+        -------
+        self : object
+        """
+        if self.prefit:
+            raise NotFittedError(
+                "Since 'prefit=True', call transform directly")
+        if not hasattr(self, "estimator_"):
+            self.estimator_ = clone(self.estimator)
+        self.estimator_.partial_fit(X, y, **fit_params)
+        return self
+
+    @property
+    def n_features_in_(self):
+        # For consistency with other estimators we raise a AttributeError so
+        # that hasattr() fails if the estimator isn't fitted.
+        try:
+            check_is_fitted(self)
+        except NotFittedError as nfe:
+            raise AttributeError(
+                "{} object has no n_features_in_ attribute."
+                .format(self.__class__.__name__)
+            ) from nfe
+
+        return self.estimator_.n_features_in_
+
+    def _more_tags(self):
+        estimator_tags = self.estimator._get_tags()
+        return {'allow_nan': estimator_tags.get('allow_nan', True)}
--- a/venv/Lib/site-packages/sklearn/feature_selection/_mutual_info.py
+++ b/venv/Lib/site-packages/sklearn/feature_selection/_mutual_info.py
@ -0,0 +1,448 @@
+# Author: Nikolay Mayorov <n59_ru@hotmail.com>
+# License: 3-clause BSD
+
+import numpy as np
+from scipy.sparse import issparse
+from scipy.special import digamma
+
+from ..metrics.cluster import mutual_info_score
+from ..neighbors import NearestNeighbors
+from ..preprocessing import scale
+from ..utils import check_random_state
+from ..utils.fixes import _astype_copy_false
+from ..utils.validation import check_array, check_X_y
+from ..utils.validation import _deprecate_positional_args
+from ..utils.multiclass import check_classification_targets
+
+
+def _compute_mi_cc(x, y, n_neighbors):
+    """Compute mutual information between two continuous variables.
+
+    Parameters
+    ----------
+    x, y : ndarray, shape (n_samples,)
+        Samples of two continuous random variables, must have an identical
+        shape.
+
+    n_neighbors : int
+        Number of nearest neighbors to search for each point, see [1]_.
+
+    Returns
+    -------
+    mi : float
+        Estimated mutual information. If it turned out to be negative it is
+        replace by 0.
+
+    Notes
+    -----
+    True mutual information can't be negative. If its estimate by a numerical
+    method is negative, it means (providing the method is adequate) that the
+    mutual information is close to 0 and replacing it by 0 is a reasonable
+    strategy.
+
+    References
+    ----------
+    .. [1] A. Kraskov, H. Stogbauer and P. Grassberger, "Estimating mutual
+           information". Phys. Rev. E 69, 2004.
+    """
+    n_samples = x.size
+
+    x = x.reshape((-1, 1))
+    y = y.reshape((-1, 1))
+    xy = np.hstack((x, y))
+
+    # Here we rely on NearestNeighbors to select the fastest algorithm.
+    nn = NearestNeighbors(metric='chebyshev', n_neighbors=n_neighbors)
+
+    nn.fit(xy)
+    radius = nn.kneighbors()[0]
+    radius = np.nextafter(radius[:, -1], 0)
+
+    # Algorithm is selected explicitly to allow passing an array as radius
+    # later (not all algorithms support this).
+    nn.set_params(algorithm='kd_tree')
+
+    nn.fit(x)
+    ind = nn.radius_neighbors(radius=radius, return_distance=False)
+    nx = np.array([i.size for i in ind])
+
+    nn.fit(y)
+    ind = nn.radius_neighbors(radius=radius, return_distance=False)
+    ny = np.array([i.size for i in ind])
+
+    mi = (digamma(n_samples) + digamma(n_neighbors) -
+          np.mean(digamma(nx + 1)) - np.mean(digamma(ny + 1)))
+
+    return max(0, mi)
+
+
+def _compute_mi_cd(c, d, n_neighbors):
+    """Compute mutual information between continuous and discrete variables.
+
+    Parameters
+    ----------
+    c : ndarray, shape (n_samples,)
+        Samples of a continuous random variable.
+
+    d : ndarray, shape (n_samples,)
+        Samples of a discrete random variable.
+
+    n_neighbors : int
+        Number of nearest neighbors to search for each point, see [1]_.
+
+    Returns
+    -------
+    mi : float
+        Estimated mutual information. If it turned out to be negative it is
+        replace by 0.
+
+    Notes
+    -----
+    True mutual information can't be negative. If its estimate by a numerical
+    method is negative, it means (providing the method is adequate) that the
+    mutual information is close to 0 and replacing it by 0 is a reasonable
+    strategy.
+
+    References
+    ----------
+    .. [1] B. C. Ross "Mutual Information between Discrete and Continuous
+       Data Sets". PLoS ONE 9(2), 2014.
+    """
+    n_samples = c.shape[0]
+    c = c.reshape((-1, 1))
+
+    radius = np.empty(n_samples)
+    label_counts = np.empty(n_samples)
+    k_all = np.empty(n_samples)
+    nn = NearestNeighbors()
+    for label in np.unique(d):
+        mask = d == label
+        count = np.sum(mask)
+        if count > 1:
+            k = min(n_neighbors, count - 1)
+            nn.set_params(n_neighbors=k)
+            nn.fit(c[mask])
+            r = nn.kneighbors()[0]
+            radius[mask] = np.nextafter(r[:, -1], 0)
+            k_all[mask] = k
+        label_counts[mask] = count
+
+    # Ignore points with unique labels.
+    mask = label_counts > 1
+    n_samples = np.sum(mask)
+    label_counts = label_counts[mask]
+    k_all = k_all[mask]
+    c = c[mask]
+    radius = radius[mask]
+
+    nn.set_params(algorithm='kd_tree')
+    nn.fit(c)
+    ind = nn.radius_neighbors(radius=radius, return_distance=False)
+    m_all = np.array([i.size for i in ind])
+
+    mi = (digamma(n_samples) + np.mean(digamma(k_all)) -
+          np.mean(digamma(label_counts)) -
+          np.mean(digamma(m_all + 1)))
+
+    return max(0, mi)
+
+
+def _compute_mi(x, y, x_discrete, y_discrete, n_neighbors=3):
+    """Compute mutual information between two variables.
+
+    This is a simple wrapper which selects a proper function to call based on
+    whether `x` and `y` are discrete or not.
+    """
+    if x_discrete and y_discrete:
+        return mutual_info_score(x, y)
+    elif x_discrete and not y_discrete:
+        return _compute_mi_cd(y, x, n_neighbors)
+    elif not x_discrete and y_discrete:
+        return _compute_mi_cd(x, y, n_neighbors)
+    else:
+        return _compute_mi_cc(x, y, n_neighbors)
+
+
+def _iterate_columns(X, columns=None):
+    """Iterate over columns of a matrix.
+
+    Parameters
+    ----------
+    X : ndarray or csc_matrix, shape (n_samples, n_features)
+        Matrix over which to iterate.
+
+    columns : iterable or None, default None
+        Indices of columns to iterate over. If None, iterate over all columns.
+
+    Yields
+    ------
+    x : ndarray, shape (n_samples,)
+        Columns of `X` in dense format.
+    """
+    if columns is None:
+        columns = range(X.shape[1])
+
+    if issparse(X):
+        for i in columns:
+            x = np.zeros(X.shape[0])
+            start_ptr, end_ptr = X.indptr[i], X.indptr[i + 1]
+            x[X.indices[start_ptr:end_ptr]] = X.data[start_ptr:end_ptr]
+            yield x
+    else:
+        for i in columns:
+            yield X[:, i]
+
+
+def _estimate_mi(X, y, discrete_features='auto', discrete_target=False,
+                 n_neighbors=3, copy=True, random_state=None):
+    """Estimate mutual information between the features and the target.
+
+    Parameters
+    ----------
+    X : array_like or sparse matrix, shape (n_samples, n_features)
+        Feature matrix.
+
+    y : array_like, shape (n_samples,)
+        Target vector.
+
+    discrete_features : {'auto', bool, array_like}, default 'auto'
+        If bool, then determines whether to consider all features discrete
+        or continuous. If array, then it should be either a boolean mask
+        with shape (n_features,) or array with indices of discrete features.
+        If 'auto', it is assigned to False for dense `X` and to True for
+        sparse `X`.
+
+    discrete_target : bool, default False
+        Whether to consider `y` as a discrete variable.
+
+    n_neighbors : int, default 3
+        Number of neighbors to use for MI estimation for continuous variables,
+        see [1]_ and [2]_. Higher values reduce variance of the estimation, but
+        could introduce a bias.
+
+    copy : bool, default True
+        Whether to make a copy of the given data. If set to False, the initial
+        data will be overwritten.
+
+    random_state : int, RandomState instance or None, optional, default None
+        Determines random number generation for adding small noise to
+        continuous variables in order to remove repeated values.
+        Pass an int for reproducible results across multiple function calls.
+        See :term:`Glossary <random_state>`.
+
+    Returns
+    -------
+    mi : ndarray, shape (n_features,)
+        Estimated mutual information between each feature and the target.
+        A negative value will be replaced by 0.
+
+    References
+    ----------
+    .. [1] A. Kraskov, H. Stogbauer and P. Grassberger, "Estimating mutual
+           information". Phys. Rev. E 69, 2004.
+    .. [2] B. C. Ross "Mutual Information between Discrete and Continuous
+           Data Sets". PLoS ONE 9(2), 2014.
+    """
+    X, y = check_X_y(X, y, accept_sparse='csc', y_numeric=not discrete_target)
+    n_samples, n_features = X.shape
+
+    if isinstance(discrete_features, (str, bool)):
+        if isinstance(discrete_features, str):
+            if discrete_features == 'auto':
+                discrete_features = issparse(X)
+            else:
+                raise ValueError("Invalid string value for discrete_features.")
+        discrete_mask = np.empty(n_features, dtype=bool)
+        discrete_mask.fill(discrete_features)
+    else:
+        discrete_features = check_array(discrete_features, ensure_2d=False)
+        if discrete_features.dtype != 'bool':
+            discrete_mask = np.zeros(n_features, dtype=bool)
+            discrete_mask[discrete_features] = True
+        else:
+            discrete_mask = discrete_features
+
+    continuous_mask = ~discrete_mask
+    if np.any(continuous_mask) and issparse(X):
+        raise ValueError("Sparse matrix `X` can't have continuous features.")
+
+    rng = check_random_state(random_state)
+    if np.any(continuous_mask):
+        if copy:
+            X = X.copy()
+
+        if not discrete_target:
+            X[:, continuous_mask] = scale(X[:, continuous_mask],
+                                          with_mean=False, copy=False)
+
+        # Add small noise to continuous features as advised in Kraskov et. al.
+        X = X.astype(float, **_astype_copy_false(X))
+        means = np.maximum(1, np.mean(np.abs(X[:, continuous_mask]), axis=0))
+        X[:, continuous_mask] += 1e-10 * means * rng.randn(
+                n_samples, np.sum(continuous_mask))
+
+    if not discrete_target:
+        y = scale(y, with_mean=False)
+        y += 1e-10 * np.maximum(1, np.mean(np.abs(y))) * rng.randn(n_samples)
+
+    mi = [_compute_mi(x, y, discrete_feature, discrete_target, n_neighbors) for
+          x, discrete_feature in zip(_iterate_columns(X), discrete_mask)]
+
+    return np.array(mi)
+
+
+@_deprecate_positional_args
+def mutual_info_regression(X, y, *, discrete_features='auto', n_neighbors=3,
+                           copy=True, random_state=None):
+    """Estimate mutual information for a continuous target variable.
+
+    Mutual information (MI) [1]_ between two random variables is a non-negative
+    value, which measures the dependency between the variables. It is equal
+    to zero if and only if two random variables are independent, and higher
+    values mean higher dependency.
+
+    The function relies on nonparametric methods based on entropy estimation
+    from k-nearest neighbors distances as described in [2]_ and [3]_. Both
+    methods are based on the idea originally proposed in [4]_.
+
+    It can be used for univariate features selection, read more in the
+    :ref:`User Guide <univariate_feature_selection>`.
+
+    Parameters
+    ----------
+    X : array_like or sparse matrix, shape (n_samples, n_features)
+        Feature matrix.
+
+    y : array_like, shape (n_samples,)
+        Target vector.
+
+    discrete_features : {'auto', bool, array_like}, default 'auto'
+        If bool, then determines whether to consider all features discrete
+        or continuous. If array, then it should be either a boolean mask
+        with shape (n_features,) or array with indices of discrete features.
+        If 'auto', it is assigned to False for dense `X` and to True for
+        sparse `X`.
+
+    n_neighbors : int, default 3
+        Number of neighbors to use for MI estimation for continuous variables,
+        see [2]_ and [3]_. Higher values reduce variance of the estimation, but
+        could introduce a bias.
+
+    copy : bool, default True
+        Whether to make a copy of the given data. If set to False, the initial
+        data will be overwritten.
+
+    random_state : int, RandomState instance or None, optional, default None
+        Determines random number generation for adding small noise to
+        continuous variables in order to remove repeated values.
+        Pass an int for reproducible results across multiple function calls.
+        See :term:`Glossary <random_state>`.
+
+    Returns
+    -------
+    mi : ndarray, shape (n_features,)
+        Estimated mutual information between each feature and the target.
+
+    Notes
+    -----
+    1. The term "discrete features" is used instead of naming them
+       "categorical", because it describes the essence more accurately.
+       For example, pixel intensities of an image are discrete features
+       (but hardly categorical) and you will get better results if mark them
+       as such. Also note, that treating a continuous variable as discrete and
+       vice versa will usually give incorrect results, so be attentive about that.
+    2. True mutual information can't be negative. If its estimate turns out
+       to be negative, it is replaced by zero.
+
+    References
+    ----------
+    .. [1] `Mutual Information <https://en.wikipedia.org/wiki/Mutual_information>`_
+           on Wikipedia.
+    .. [2] A. Kraskov, H. Stogbauer and P. Grassberger, "Estimating mutual
+           information". Phys. Rev. E 69, 2004.
+    .. [3] B. C. Ross "Mutual Information between Discrete and Continuous
+           Data Sets". PLoS ONE 9(2), 2014.
+    .. [4] L. F. Kozachenko, N. N. Leonenko, "Sample Estimate of the Entropy
+           of a Random Vector", Probl. Peredachi Inf., 23:2 (1987), 9-16
+    """
+    return _estimate_mi(X, y, discrete_features, False, n_neighbors,
+                        copy, random_state)
+
+
+@_deprecate_positional_args
+def mutual_info_classif(X, y, *, discrete_features='auto', n_neighbors=3,
+                        copy=True, random_state=None):
+    """Estimate mutual information for a discrete target variable.
+
+    Mutual information (MI) [1]_ between two random variables is a non-negative
+    value, which measures the dependency between the variables. It is equal
+    to zero if and only if two random variables are independent, and higher
+    values mean higher dependency.
+
+    The function relies on nonparametric methods based on entropy estimation
+    from k-nearest neighbors distances as described in [2]_ and [3]_. Both
+    methods are based on the idea originally proposed in [4]_.
+
+    It can be used for univariate features selection, read more in the
+    :ref:`User Guide <univariate_feature_selection>`.
+
+    Parameters
+    ----------
+    X : array_like or sparse matrix, shape (n_samples, n_features)
+        Feature matrix.
+
+    y : array_like, shape (n_samples,)
+        Target vector.
+
+    discrete_features : {'auto', bool, array_like}, default 'auto'
+        If bool, then determines whether to consider all features discrete
+        or continuous. If array, then it should be either a boolean mask
+        with shape (n_features,) or array with indices of discrete features.
+        If 'auto', it is assigned to False for dense `X` and to True for
+        sparse `X`.
+
+    n_neighbors : int, default 3
+        Number of neighbors to use for MI estimation for continuous variables,
+        see [2]_ and [3]_. Higher values reduce variance of the estimation, but
+        could introduce a bias.
+
+    copy : bool, default True
+        Whether to make a copy of the given data. If set to False, the initial
+        data will be overwritten.
+
+    random_state : int, RandomState instance or None, optional, default None
+        Determines random number generation for adding small noise to
+        continuous variables in order to remove repeated values.
+        Pass an int for reproducible results across multiple function calls.
+        See :term:`Glossary <random_state>`.
+
+    Returns
+    -------
+    mi : ndarray, shape (n_features,)
+        Estimated mutual information between each feature and the target.
+
+    Notes
+    -----
+    1. The term "discrete features" is used instead of naming them
+       "categorical", because it describes the essence more accurately.
+       For example, pixel intensities of an image are discrete features
+       (but hardly categorical) and you will get better results if mark them
+       as such. Also note, that treating a continuous variable as discrete and
+       vice versa will usually give incorrect results, so be attentive about that.
+    2. True mutual information can't be negative. If its estimate turns out
+       to be negative, it is replaced by zero.
+
+    References
+    ----------
+    .. [1] `Mutual Information <https://en.wikipedia.org/wiki/Mutual_information>`_
+           on Wikipedia.
+    .. [2] A. Kraskov, H. Stogbauer and P. Grassberger, "Estimating mutual
+           information". Phys. Rev. E 69, 2004.
+    .. [3] B. C. Ross "Mutual Information between Discrete and Continuous
+           Data Sets". PLoS ONE 9(2), 2014.
+    .. [4] L. F. Kozachenko, N. N. Leonenko, "Sample Estimate of the Entropy
+           of a Random Vector:, Probl. Peredachi Inf., 23:2 (1987), 9-16
+    """
+    check_classification_targets(y)
+    return _estimate_mi(X, y, discrete_features, True, n_neighbors,
+                        copy, random_state)
--- a/venv/Lib/site-packages/sklearn/feature_selection/_rfe.py
+++ b/venv/Lib/site-packages/sklearn/feature_selection/_rfe.py
@ -0,0 +1,578 @@
+# Authors: Alexandre Gramfort <alexandre.gramfort@inria.fr>
+#          Vincent Michel <vincent.michel@inria.fr>
+#          Gilles Louppe <g.louppe@gmail.com>
+#
+# License: BSD 3 clause
+
+"""Recursive feature elimination for feature ranking"""
+
+import numpy as np
+from joblib import Parallel, delayed, effective_n_jobs
+
+from ..utils import safe_sqr
+from ..utils.metaestimators import if_delegate_has_method
+from ..utils.metaestimators import _safe_split
+from ..utils.validation import check_is_fitted
+from ..utils.validation import _deprecate_positional_args
+from ..base import BaseEstimator
+from ..base import MetaEstimatorMixin
+from ..base import clone
+from ..base import is_classifier
+from ..model_selection import check_cv
+from ..model_selection._validation import _score
+from ..metrics import check_scoring
+from ._base import SelectorMixin
+
+
+def _rfe_single_fit(rfe, estimator, X, y, train, test, scorer):
+    """
+    Return the score for a fit across one fold.
+    """
+    X_train, y_train = _safe_split(estimator, X, y, train)
+    X_test, y_test = _safe_split(estimator, X, y, test, train)
+    return rfe._fit(
+        X_train, y_train, lambda estimator, features:
+        _score(estimator, X_test[:, features], y_test, scorer)).scores_
+
+
+class RFE(SelectorMixin, MetaEstimatorMixin, BaseEstimator):
+    """Feature ranking with recursive feature elimination.
+
+    Given an external estimator that assigns weights to features (e.g., the
+    coefficients of a linear model), the goal of recursive feature elimination
+    (RFE) is to select features by recursively considering smaller and smaller
+    sets of features. First, the estimator is trained on the initial set of
+    features and the importance of each feature is obtained either through a
+    ``coef_`` attribute or through a ``feature_importances_`` attribute.
+    Then, the least important features are pruned from current set of features.
+    That procedure is recursively repeated on the pruned set until the desired
+    number of features to select is eventually reached.
+
+    Read more in the :ref:`User Guide <rfe>`.
+
+    Parameters
+    ----------
+    estimator : object
+        A supervised learning estimator with a ``fit`` method that provides
+        information about feature importance either through a ``coef_``
+        attribute or through a ``feature_importances_`` attribute.
+
+    n_features_to_select : int or None (default=None)
+        The number of features to select. If `None`, half of the features
+        are selected.
+
+    step : int or float, optional (default=1)
+        If greater than or equal to 1, then ``step`` corresponds to the
+        (integer) number of features to remove at each iteration.
+        If within (0.0, 1.0), then ``step`` corresponds to the percentage
+        (rounded down) of features to remove at each iteration.
+
+    verbose : int, (default=0)
+        Controls verbosity of output.
+
+    Attributes
+    ----------
+    n_features_ : int
+        The number of selected features.
+
+    support_ : array of shape [n_features]
+        The mask of selected features.
+
+    ranking_ : array of shape [n_features]
+        The feature ranking, such that ``ranking_[i]`` corresponds to the
+        ranking position of the i-th feature. Selected (i.e., estimated
+        best) features are assigned rank 1.
+
+    estimator_ : object
+        The external estimator fit on the reduced dataset.
+
+    Examples
+    --------
+    The following example shows how to retrieve the 5 most informative
+    features in the Friedman #1 dataset.
+
+    >>> from sklearn.datasets import make_friedman1
+    >>> from sklearn.feature_selection import RFE
+    >>> from sklearn.svm import SVR
+    >>> X, y = make_friedman1(n_samples=50, n_features=10, random_state=0)
+    >>> estimator = SVR(kernel="linear")
+    >>> selector = RFE(estimator, n_features_to_select=5, step=1)
+    >>> selector = selector.fit(X, y)
+    >>> selector.support_
+    array([ True,  True,  True,  True,  True, False, False, False, False,
+           False])
+    >>> selector.ranking_
+    array([1, 1, 1, 1, 1, 6, 4, 3, 2, 5])
+
+    Notes
+    -----
+    Allows NaN/Inf in the input if the underlying estimator does as well.
+
+    See also
+    --------
+    RFECV : Recursive feature elimination with built-in cross-validated
+        selection of the best number of features
+
+    References
+    ----------
+
+    .. [1] Guyon, I., Weston, J., Barnhill, S., & Vapnik, V., "Gene selection
+           for cancer classification using support vector machines",
+           Mach. Learn., 46(1-3), 389--422, 2002.
+    """
+    @_deprecate_positional_args
+    def __init__(self, estimator, *, n_features_to_select=None, step=1,
+                 verbose=0):
+        self.estimator = estimator
+        self.n_features_to_select = n_features_to_select
+        self.step = step
+        self.verbose = verbose
+
+    @property
+    def _estimator_type(self):
+        return self.estimator._estimator_type
+
+    @property
+    def classes_(self):
+        return self.estimator_.classes_
+
+    def fit(self, X, y):
+        """Fit the RFE model and then the underlying estimator on the selected
+           features.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            The training input samples.
+
+        y : array-like of shape (n_samples,)
+            The target values.
+        """
+        return self._fit(X, y)
+
+    def _fit(self, X, y, step_score=None):
+        # Parameter step_score controls the calculation of self.scores_
+        # step_score is not exposed to users
+        # and is used when implementing RFECV
+        # self.scores_ will not be calculated when calling _fit through fit
+
+        tags = self._get_tags()
+        X, y = self._validate_data(
+            X, y, accept_sparse="csc",
+            ensure_min_features=2,
+            force_all_finite=not tags.get('allow_nan', True),
+            multi_output=True
+        )
+        # Initialization
+        n_features = X.shape[1]
+        if self.n_features_to_select is None:
+            n_features_to_select = n_features // 2
+        else:
+            n_features_to_select = self.n_features_to_select
+
+        if 0.0 < self.step < 1.0:
+            step = int(max(1, self.step * n_features))
+        else:
+            step = int(self.step)
+        if step <= 0:
+            raise ValueError("Step must be >0")
+
+        support_ = np.ones(n_features, dtype=np.bool)
+        ranking_ = np.ones(n_features, dtype=np.int)
+
+        if step_score:
+            self.scores_ = []
+
+        # Elimination
+        while np.sum(support_) > n_features_to_select:
+            # Remaining features
+            features = np.arange(n_features)[support_]
+
+            # Rank the remaining features
+            estimator = clone(self.estimator)
+            if self.verbose > 0:
+                print("Fitting estimator with %d features." % np.sum(support_))
+
+            estimator.fit(X[:, features], y)
+
+            # Get coefs
+            if hasattr(estimator, 'coef_'):
+                coefs = estimator.coef_
+            else:
+                coefs = getattr(estimator, 'feature_importances_', None)
+            if coefs is None:
+                raise RuntimeError('The classifier does not expose '
+                                   '"coef_" or "feature_importances_" '
+                                   'attributes')
+
+            # Get ranks
+            if coefs.ndim > 1:
+                ranks = np.argsort(safe_sqr(coefs).sum(axis=0))
+            else:
+                ranks = np.argsort(safe_sqr(coefs))
+
+            # for sparse case ranks is matrix
+            ranks = np.ravel(ranks)
+
+            # Eliminate the worse features
+            threshold = min(step, np.sum(support_) - n_features_to_select)
+
+            # Compute step score on the previous selection iteration
+            # because 'estimator' must use features
+            # that have not been eliminated yet
+            if step_score:
+                self.scores_.append(step_score(estimator, features))
+            support_[features[ranks][:threshold]] = False
+            ranking_[np.logical_not(support_)] += 1
+
+        # Set final attributes
+        features = np.arange(n_features)[support_]
+        self.estimator_ = clone(self.estimator)
+        self.estimator_.fit(X[:, features], y)
+
+        # Compute step score when only n_features_to_select features left
+        if step_score:
+            self.scores_.append(step_score(self.estimator_, features))
+        self.n_features_ = support_.sum()
+        self.support_ = support_
+        self.ranking_ = ranking_
+
+        return self
+
+    @if_delegate_has_method(delegate='estimator')
+    def predict(self, X):
+        """Reduce X to the selected features and then predict using the
+           underlying estimator.
+
+        Parameters
+        ----------
+        X : array of shape [n_samples, n_features]
+            The input samples.
+
+        Returns
+        -------
+        y : array of shape [n_samples]
+            The predicted target values.
+        """
+        check_is_fitted(self)
+        return self.estimator_.predict(self.transform(X))
+
+    @if_delegate_has_method(delegate='estimator')
+    def score(self, X, y):
+        """Reduce X to the selected features and then return the score of the
+           underlying estimator.
+
+        Parameters
+        ----------
+        X : array of shape [n_samples, n_features]
+            The input samples.
+
+        y : array of shape [n_samples]
+            The target values.
+        """
+        check_is_fitted(self)
+        return self.estimator_.score(self.transform(X), y)
+
+    def _get_support_mask(self):
+        check_is_fitted(self)
+        return self.support_
+
+    @if_delegate_has_method(delegate='estimator')
+    def decision_function(self, X):
+        """Compute the decision function of ``X``.
+
+        Parameters
+        ----------
+        X : {array-like or sparse matrix} of shape (n_samples, n_features)
+            The input samples. Internally, it will be converted to
+            ``dtype=np.float32`` and if a sparse matrix is provided
+            to a sparse ``csr_matrix``.
+
+        Returns
+        -------
+        score : array, shape = [n_samples, n_classes] or [n_samples]
+            The decision function of the input samples. The order of the
+            classes corresponds to that in the attribute :term:`classes_`.
+            Regression and binary classification produce an array of shape
+            [n_samples].
+        """
+        check_is_fitted(self)
+        return self.estimator_.decision_function(self.transform(X))
+
+    @if_delegate_has_method(delegate='estimator')
+    def predict_proba(self, X):
+        """Predict class probabilities for X.
+
+        Parameters
+        ----------
+        X : {array-like or sparse matrix} of shape (n_samples, n_features)
+            The input samples. Internally, it will be converted to
+            ``dtype=np.float32`` and if a sparse matrix is provided
+            to a sparse ``csr_matrix``.
+
+        Returns
+        -------
+        p : array of shape (n_samples, n_classes)
+            The class probabilities of the input samples. The order of the
+            classes corresponds to that in the attribute :term:`classes_`.
+        """
+        check_is_fitted(self)
+        return self.estimator_.predict_proba(self.transform(X))
+
+    @if_delegate_has_method(delegate='estimator')
+    def predict_log_proba(self, X):
+        """Predict class log-probabilities for X.
+
+        Parameters
+        ----------
+        X : array of shape [n_samples, n_features]
+            The input samples.
+
+        Returns
+        -------
+        p : array of shape (n_samples, n_classes)
+            The class log-probabilities of the input samples. The order of the
+            classes corresponds to that in the attribute :term:`classes_`.
+        """
+        check_is_fitted(self)
+        return self.estimator_.predict_log_proba(self.transform(X))
+
+    def _more_tags(self):
+        estimator_tags = self.estimator._get_tags()
+        return {'poor_score': True,
+                'allow_nan': estimator_tags.get('allow_nan', True),
+                'requires_y': True,
+                }
+
+
+class RFECV(RFE):
+    """Feature ranking with recursive feature elimination and cross-validated
+    selection of the best number of features.
+
+    See glossary entry for :term:`cross-validation estimator`.
+
+    Read more in the :ref:`User Guide <rfe>`.
+
+    Parameters
+    ----------
+    estimator : object
+        A supervised learning estimator with a ``fit`` method that provides
+        information about feature importance either through a ``coef_``
+        attribute or through a ``feature_importances_`` attribute.
+
+    step : int or float, optional (default=1)
+        If greater than or equal to 1, then ``step`` corresponds to the
+        (integer) number of features to remove at each iteration.
+        If within (0.0, 1.0), then ``step`` corresponds to the percentage
+        (rounded down) of features to remove at each iteration.
+        Note that the last iteration may remove fewer than ``step`` features in
+        order to reach ``min_features_to_select``.
+
+    min_features_to_select : int, (default=1)
+        The minimum number of features to be selected. This number of features
+        will always be scored, even if the difference between the original
+        feature count and ``min_features_to_select`` isn't divisible by
+        ``step``.
+
+        .. versionadded:: 0.20
+
+    cv : int, cross-validation generator or an iterable, optional
+        Determines the cross-validation splitting strategy.
+        Possible inputs for cv are:
+
+        - None, to use the default 5-fold cross-validation,
+        - integer, to specify the number of folds.
+        - :term:`CV splitter`,
+        - An iterable yielding (train, test) splits as arrays of indices.
+
+        For integer/None inputs, if ``y`` is binary or multiclass,
+        :class:`sklearn.model_selection.StratifiedKFold` is used. If the
+        estimator is a classifier or if ``y`` is neither binary nor multiclass,
+        :class:`sklearn.model_selection.KFold` is used.
+
+        Refer :ref:`User Guide <cross_validation>` for the various
+        cross-validation strategies that can be used here.
+
+        .. versionchanged:: 0.22
+            ``cv`` default value of None changed from 3-fold to 5-fold.
+
+    scoring : string, callable or None, optional, (default=None)
+        A string (see model evaluation documentation) or
+        a scorer callable object / function with signature
+        ``scorer(estimator, X, y)``.
+
+    verbose : int, (default=0)
+        Controls verbosity of output.
+
+    n_jobs : int or None, optional (default=None)
+        Number of cores to run in parallel while fitting across folds.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+        .. versionadded:: 0.18
+
+    Attributes
+    ----------
+    n_features_ : int
+        The number of selected features with cross-validation.
+
+    support_ : array of shape [n_features]
+        The mask of selected features.
+
+    ranking_ : array of shape [n_features]
+        The feature ranking, such that `ranking_[i]`
+        corresponds to the ranking
+        position of the i-th feature.
+        Selected (i.e., estimated best)
+        features are assigned rank 1.
+
+    grid_scores_ : array of shape [n_subsets_of_features]
+        The cross-validation scores such that
+        ``grid_scores_[i]`` corresponds to
+        the CV score of the i-th subset of features.
+
+    estimator_ : object
+        The external estimator fit on the reduced dataset.
+
+    Notes
+    -----
+    The size of ``grid_scores_`` is equal to
+    ``ceil((n_features - min_features_to_select) / step) + 1``,
+    where step is the number of features removed at each iteration.
+
+    Allows NaN/Inf in the input if the underlying estimator does as well.
+
+    Examples
+    --------
+    The following example shows how to retrieve the a-priori not known 5
+    informative features in the Friedman #1 dataset.
+
+    >>> from sklearn.datasets import make_friedman1
+    >>> from sklearn.feature_selection import RFECV
+    >>> from sklearn.svm import SVR
+    >>> X, y = make_friedman1(n_samples=50, n_features=10, random_state=0)
+    >>> estimator = SVR(kernel="linear")
+    >>> selector = RFECV(estimator, step=1, cv=5)
+    >>> selector = selector.fit(X, y)
+    >>> selector.support_
+    array([ True,  True,  True,  True,  True, False, False, False, False,
+           False])
+    >>> selector.ranking_
+    array([1, 1, 1, 1, 1, 6, 4, 3, 2, 5])
+
+    See also
+    --------
+    RFE : Recursive feature elimination
+
+    References
+    ----------
+
+    .. [1] Guyon, I., Weston, J., Barnhill, S., & Vapnik, V., "Gene selection
+           for cancer classification using support vector machines",
+           Mach. Learn., 46(1-3), 389--422, 2002.
+    """
+    @_deprecate_positional_args
+    def __init__(self, estimator, *, step=1, min_features_to_select=1, cv=None,
+                 scoring=None, verbose=0, n_jobs=None):
+        self.estimator = estimator
+        self.step = step
+        self.cv = cv
+        self.scoring = scoring
+        self.verbose = verbose
+        self.n_jobs = n_jobs
+        self.min_features_to_select = min_features_to_select
+
+    def fit(self, X, y, groups=None):
+        """Fit the RFE model and automatically tune the number of selected
+           features.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Training vector, where `n_samples` is the number of samples and
+            `n_features` is the total number of features.
+
+        y : array-like of shape (n_samples,)
+            Target values (integers for classification, real numbers for
+            regression).
+
+        groups : array-like of shape (n_samples,) or None
+            Group labels for the samples used while splitting the dataset into
+            train/test set. Only used in conjunction with a "Group" :term:`cv`
+            instance (e.g., :class:`~sklearn.model_selection.GroupKFold`).
+
+            .. versionadded:: 0.20
+        """
+        tags = self._get_tags()
+        X, y = self._validate_data(
+            X, y, accept_sparse="csr", ensure_min_features=2,
+            force_all_finite=not tags.get('allow_nan', True),
+            multi_output=True
+        )
+
+        # Initialization
+        cv = check_cv(self.cv, y, classifier=is_classifier(self.estimator))
+        scorer = check_scoring(self.estimator, scoring=self.scoring)
+        n_features = X.shape[1]
+
+        if 0.0 < self.step < 1.0:
+            step = int(max(1, self.step * n_features))
+        else:
+            step = int(self.step)
+        if step <= 0:
+            raise ValueError("Step must be >0")
+
+        # Build an RFE object, which will evaluate and score each possible
+        # feature count, down to self.min_features_to_select
+        rfe = RFE(estimator=self.estimator,
+                  n_features_to_select=self.min_features_to_select,
+                  step=self.step, verbose=self.verbose)
+
+        # Determine the number of subsets of features by fitting across
+        # the train folds and choosing the "features_to_select" parameter
+        # that gives the least averaged error across all folds.
+
+        # Note that joblib raises a non-picklable error for bound methods
+        # even if n_jobs is set to 1 with the default multiprocessing
+        # backend.
+        # This branching is done so that to
+        # make sure that user code that sets n_jobs to 1
+        # and provides bound methods as scorers is not broken with the
+        # addition of n_jobs parameter in version 0.18.
+
+        if effective_n_jobs(self.n_jobs) == 1:
+            parallel, func = list, _rfe_single_fit
+        else:
+            parallel = Parallel(n_jobs=self.n_jobs)
+            func = delayed(_rfe_single_fit)
+
+        scores = parallel(
+            func(rfe, self.estimator, X, y, train, test, scorer)
+            for train, test in cv.split(X, y, groups))
+
+        scores = np.sum(scores, axis=0)
+        scores_rev = scores[::-1]
+        argmax_idx = len(scores) - np.argmax(scores_rev) - 1
+        n_features_to_select = max(
+            n_features - (argmax_idx * step),
+            self.min_features_to_select)
+
+        # Re-execute an elimination with best_k over the whole set
+        rfe = RFE(estimator=self.estimator,
+                  n_features_to_select=n_features_to_select, step=self.step,
+                  verbose=self.verbose)
+
+        rfe.fit(X, y)
+
+        # Set final attributes
+        self.support_ = rfe.support_
+        self.n_features_ = rfe.n_features_
+        self.ranking_ = rfe.ranking_
+        self.estimator_ = clone(self.estimator)
+        self.estimator_.fit(self.transform(X), y)
+
+        # Fixing a normalization error, n is equal to get_n_splits(X, y) - 1
+        # here, the scores are normalized by get_n_splits(X, y)
+        self.grid_scores_ = scores[::-1] / cv.get_n_splits(X, y, groups)
+        return self
--- a/venv/Lib/site-packages/sklearn/feature_selection/_univariate_selection.py
+++ b/venv/Lib/site-packages/sklearn/feature_selection/_univariate_selection.py
@ -0,0 +1,831 @@
+"""Univariate features selection."""
+
+# Authors: V. Michel, B. Thirion, G. Varoquaux, A. Gramfort, E. Duchesnay.
+#          L. Buitinck, A. Joly
+# License: BSD 3 clause
+
+
+import numpy as np
+import warnings
+
+from scipy import special, stats
+from scipy.sparse import issparse
+
+from ..base import BaseEstimator
+from ..preprocessing import LabelBinarizer
+from ..utils import (as_float_array, check_array, check_X_y, safe_sqr,
+                     safe_mask)
+from ..utils.extmath import safe_sparse_dot, row_norms
+from ..utils.validation import check_is_fitted
+from ..utils.validation import _deprecate_positional_args
+from ._base import SelectorMixin
+
+
+def _clean_nans(scores):
+    """
+    Fixes Issue #1240: NaNs can't be properly compared, so change them to the
+    smallest value of scores's dtype. -inf seems to be unreliable.
+    """
+    # XXX where should this function be called? fit? scoring functions
+    # themselves?
+    scores = as_float_array(scores, copy=True)
+    scores[np.isnan(scores)] = np.finfo(scores.dtype).min
+    return scores
+
+
+######################################################################
+# Scoring functions
+
+
+# The following function is a rewriting of scipy.stats.f_oneway
+# Contrary to the scipy.stats.f_oneway implementation it does not
+# copy the data while keeping the inputs unchanged.
+def f_oneway(*args):
+    """Performs a 1-way ANOVA.
+
+    The one-way ANOVA tests the null hypothesis that 2 or more groups have
+    the same population mean. The test is applied to samples from two or
+    more groups, possibly with differing sizes.
+
+    Read more in the :ref:`User Guide <univariate_feature_selection>`.
+
+    Parameters
+    ----------
+    *args : array_like, sparse matrices
+        sample1, sample2... The sample measurements should be given as
+        arguments.
+
+    Returns
+    -------
+    F-value : float
+        The computed F-value of the test.
+    p-value : float
+        The associated p-value from the F-distribution.
+
+    Notes
+    -----
+    The ANOVA test has important assumptions that must be satisfied in order
+    for the associated p-value to be valid.
+
+    1. The samples are independent
+    2. Each sample is from a normally distributed population
+    3. The population standard deviations of the groups are all equal. This
+       property is known as homoscedasticity.
+
+    If these assumptions are not true for a given set of data, it may still be
+    possible to use the Kruskal-Wallis H-test (`scipy.stats.kruskal`_) although
+    with some loss of power.
+
+    The algorithm is from Heiman[2], pp.394-7.
+
+    See ``scipy.stats.f_oneway`` that should give the same results while
+    being less efficient.
+
+    References
+    ----------
+
+    .. [1] Lowry, Richard.  "Concepts and Applications of Inferential
+           Statistics". Chapter 14.
+           http://faculty.vassar.edu/lowry/ch14pt1.html
+
+    .. [2] Heiman, G.W.  Research Methods in Statistics. 2002.
+
+    """
+    n_classes = len(args)
+    args = [as_float_array(a) for a in args]
+    n_samples_per_class = np.array([a.shape[0] for a in args])
+    n_samples = np.sum(n_samples_per_class)
+    ss_alldata = sum(safe_sqr(a).sum(axis=0) for a in args)
+    sums_args = [np.asarray(a.sum(axis=0)) for a in args]
+    square_of_sums_alldata = sum(sums_args) ** 2
+    square_of_sums_args = [s ** 2 for s in sums_args]
+    sstot = ss_alldata - square_of_sums_alldata / float(n_samples)
+    ssbn = 0.
+    for k, _ in enumerate(args):
+        ssbn += square_of_sums_args[k] / n_samples_per_class[k]
+    ssbn -= square_of_sums_alldata / float(n_samples)
+    sswn = sstot - ssbn
+    dfbn = n_classes - 1
+    dfwn = n_samples - n_classes
+    msb = ssbn / float(dfbn)
+    msw = sswn / float(dfwn)
+    constant_features_idx = np.where(msw == 0.)[0]
+    if (np.nonzero(msb)[0].size != msb.size and constant_features_idx.size):
+        warnings.warn("Features %s are constant." % constant_features_idx,
+                      UserWarning)
+    f = msb / msw
+    # flatten matrix to vector in sparse case
+    f = np.asarray(f).ravel()
+    prob = special.fdtrc(dfbn, dfwn, f)
+    return f, prob
+
+
+def f_classif(X, y):
+    """Compute the ANOVA F-value for the provided sample.
+
+    Read more in the :ref:`User Guide <univariate_feature_selection>`.
+
+    Parameters
+    ----------
+    X : {array-like, sparse matrix} shape = [n_samples, n_features]
+        The set of regressors that will be tested sequentially.
+
+    y : array of shape(n_samples)
+        The data matrix.
+
+    Returns
+    -------
+    F : array, shape = [n_features,]
+        The set of F values.
+
+    pval : array, shape = [n_features,]
+        The set of p-values.
+
+    See also
+    --------
+    chi2: Chi-squared stats of non-negative features for classification tasks.
+    f_regression: F-value between label/feature for regression tasks.
+    """
+    X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'])
+    args = [X[safe_mask(X, y == k)] for k in np.unique(y)]
+    return f_oneway(*args)
+
+
+def _chisquare(f_obs, f_exp):
+    """Fast replacement for scipy.stats.chisquare.
+
+    Version from https://github.com/scipy/scipy/pull/2525 with additional
+    optimizations.
+    """
+    f_obs = np.asarray(f_obs, dtype=np.float64)
+
+    k = len(f_obs)
+    # Reuse f_obs for chi-squared statistics
+    chisq = f_obs
+    chisq -= f_exp
+    chisq **= 2
+    with np.errstate(invalid="ignore"):
+        chisq /= f_exp
+    chisq = chisq.sum(axis=0)
+    return chisq, special.chdtrc(k - 1, chisq)
+
+
+def chi2(X, y):
+    """Compute chi-squared stats between each non-negative feature and class.
+
+    This score can be used to select the n_features features with the
+    highest values for the test chi-squared statistic from X, which must
+    contain only non-negative features such as booleans or frequencies
+    (e.g., term counts in document classification), relative to the classes.
+
+    Recall that the chi-square test measures dependence between stochastic
+    variables, so using this function "weeds out" the features that are the
+    most likely to be independent of class and therefore irrelevant for
+    classification.
+
+    Read more in the :ref:`User Guide <univariate_feature_selection>`.
+
+    Parameters
+    ----------
+    X : {array-like, sparse matrix} of shape (n_samples, n_features)
+        Sample vectors.
+
+    y : array-like of shape (n_samples,)
+        Target vector (class labels).
+
+    Returns
+    -------
+    chi2 : array, shape = (n_features,)
+        chi2 statistics of each feature.
+    pval : array, shape = (n_features,)
+        p-values of each feature.
+
+    Notes
+    -----
+    Complexity of this algorithm is O(n_classes * n_features).
+
+    See also
+    --------
+    f_classif: ANOVA F-value between label/feature for classification tasks.
+    f_regression: F-value between label/feature for regression tasks.
+    """
+
+    # XXX: we might want to do some of the following in logspace instead for
+    # numerical stability.
+    X = check_array(X, accept_sparse='csr')
+    if np.any((X.data if issparse(X) else X) < 0):
+        raise ValueError("Input X must be non-negative.")
+
+    Y = LabelBinarizer().fit_transform(y)
+    if Y.shape[1] == 1:
+        Y = np.append(1 - Y, Y, axis=1)
+
+    observed = safe_sparse_dot(Y.T, X)          # n_classes * n_features
+
+    feature_count = X.sum(axis=0).reshape(1, -1)
+    class_prob = Y.mean(axis=0).reshape(1, -1)
+    expected = np.dot(class_prob.T, feature_count)
+
+    return _chisquare(observed, expected)
+
+
+@_deprecate_positional_args
+def f_regression(X, y, *, center=True):
+    """Univariate linear regression tests.
+
+    Linear model for testing the individual effect of each of many regressors.
+    This is a scoring function to be used in a feature selection procedure, not
+    a free standing feature selection procedure.
+
+    This is done in 2 steps:
+
+    1. The correlation between each regressor and the target is computed,
+       that is, ((X[:, i] - mean(X[:, i])) * (y - mean_y)) / (std(X[:, i]) *
+       std(y)).
+    2. It is converted to an F score then to a p-value.
+
+    For more on usage see the :ref:`User Guide <univariate_feature_selection>`.
+
+    Parameters
+    ----------
+    X : {array-like, sparse matrix}  shape = (n_samples, n_features)
+        The set of regressors that will be tested sequentially.
+
+    y : array of shape(n_samples).
+        The data matrix
+
+    center : True, bool,
+        If true, X and y will be centered.
+
+    Returns
+    -------
+    F : array, shape=(n_features,)
+        F values of features.
+
+    pval : array, shape=(n_features,)
+        p-values of F-scores.
+
+
+    See also
+    --------
+    mutual_info_regression: Mutual information for a continuous target.
+    f_classif: ANOVA F-value between label/feature for classification tasks.
+    chi2: Chi-squared stats of non-negative features for classification tasks.
+    SelectKBest: Select features based on the k highest scores.
+    SelectFpr: Select features based on a false positive rate test.
+    SelectFdr: Select features based on an estimated false discovery rate.
+    SelectFwe: Select features based on family-wise error rate.
+    SelectPercentile: Select features based on percentile of the highest
+        scores.
+    """
+    X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],
+                     dtype=np.float64)
+    n_samples = X.shape[0]
+
+    # compute centered values
+    # note that E[(x - mean(x))*(y - mean(y))] = E[x*(y - mean(y))], so we
+    # need not center X
+    if center:
+        y = y - np.mean(y)
+        if issparse(X):
+            X_means = X.mean(axis=0).getA1()
+        else:
+            X_means = X.mean(axis=0)
+        # compute the scaled standard deviations via moments
+        X_norms = np.sqrt(row_norms(X.T, squared=True) -
+                          n_samples * X_means ** 2)
+    else:
+        X_norms = row_norms(X.T)
+
+    # compute the correlation
+    corr = safe_sparse_dot(y, X)
+    corr /= X_norms
+    corr /= np.linalg.norm(y)
+
+    # convert to p-value
+    degrees_of_freedom = y.size - (2 if center else 1)
+    F = corr ** 2 / (1 - corr ** 2) * degrees_of_freedom
+    pv = stats.f.sf(F, 1, degrees_of_freedom)
+    return F, pv
+
+
+######################################################################
+# Base classes
+
+class _BaseFilter(SelectorMixin, BaseEstimator):
+    """Initialize the univariate feature selection.
+
+    Parameters
+    ----------
+    score_func : callable
+        Function taking two arrays X and y, and returning a pair of arrays
+        (scores, pvalues) or a single array with scores.
+    """
+
+    def __init__(self, score_func):
+        self.score_func = score_func
+
+    def fit(self, X, y):
+        """Run score function on (X, y) and get the appropriate features.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The training input samples.
+
+        y : array-like of shape (n_samples,)
+            The target values (class labels in classification, real numbers in
+            regression).
+
+        Returns
+        -------
+        self : object
+        """
+        X, y = self._validate_data(X, y, accept_sparse=['csr', 'csc'],
+                                   multi_output=True)
+
+        if not callable(self.score_func):
+            raise TypeError("The score function should be a callable, %s (%s) "
+                            "was passed."
+                            % (self.score_func, type(self.score_func)))
+
+        self._check_params(X, y)
+        score_func_ret = self.score_func(X, y)
+        if isinstance(score_func_ret, (list, tuple)):
+            self.scores_, self.pvalues_ = score_func_ret
+            self.pvalues_ = np.asarray(self.pvalues_)
+        else:
+            self.scores_ = score_func_ret
+            self.pvalues_ = None
+
+        self.scores_ = np.asarray(self.scores_)
+
+        return self
+
+    def _check_params(self, X, y):
+        pass
+
+    def _more_tags(self):
+        return {'requires_y': True}
+
+
+######################################################################
+# Specific filters
+######################################################################
+class SelectPercentile(_BaseFilter):
+    """Select features according to a percentile of the highest scores.
+
+    Read more in the :ref:`User Guide <univariate_feature_selection>`.
+
+    Parameters
+    ----------
+    score_func : callable
+        Function taking two arrays X and y, and returning a pair of arrays
+        (scores, pvalues) or a single array with scores.
+        Default is f_classif (see below "See also"). The default function only
+        works with classification tasks.
+
+        .. versionadded:: 0.18
+
+    percentile : int, optional, default=10
+        Percent of features to keep.
+
+    Attributes
+    ----------
+    scores_ : array-like of shape (n_features,)
+        Scores of features.
+
+    pvalues_ : array-like of shape (n_features,)
+        p-values of feature scores, None if `score_func` returned only scores.
+
+    Examples
+    --------
+    >>> from sklearn.datasets import load_digits
+    >>> from sklearn.feature_selection import SelectPercentile, chi2
+    >>> X, y = load_digits(return_X_y=True)
+    >>> X.shape
+    (1797, 64)
+    >>> X_new = SelectPercentile(chi2, percentile=10).fit_transform(X, y)
+    >>> X_new.shape
+    (1797, 7)
+
+    Notes
+    -----
+    Ties between features with equal scores will be broken in an unspecified
+    way.
+
+    See also
+    --------
+    f_classif: ANOVA F-value between label/feature for classification tasks.
+    mutual_info_classif: Mutual information for a discrete target.
+    chi2: Chi-squared stats of non-negative features for classification tasks.
+    f_regression: F-value between label/feature for regression tasks.
+    mutual_info_regression: Mutual information for a continuous target.
+    SelectKBest: Select features based on the k highest scores.
+    SelectFpr: Select features based on a false positive rate test.
+    SelectFdr: Select features based on an estimated false discovery rate.
+    SelectFwe: Select features based on family-wise error rate.
+    GenericUnivariateSelect: Univariate feature selector with configurable mode.
+    """
+    @_deprecate_positional_args
+    def __init__(self, score_func=f_classif, *, percentile=10):
+        super().__init__(score_func=score_func)
+        self.percentile = percentile
+
+    def _check_params(self, X, y):
+        if not 0 <= self.percentile <= 100:
+            raise ValueError("percentile should be >=0, <=100; got %r"
+                             % self.percentile)
+
+    def _get_support_mask(self):
+        check_is_fitted(self)
+
+        # Cater for NaNs
+        if self.percentile == 100:
+            return np.ones(len(self.scores_), dtype=np.bool)
+        elif self.percentile == 0:
+            return np.zeros(len(self.scores_), dtype=np.bool)
+
+        scores = _clean_nans(self.scores_)
+        threshold = np.percentile(scores, 100 - self.percentile)
+        mask = scores > threshold
+        ties = np.where(scores == threshold)[0]
+        if len(ties):
+            max_feats = int(len(scores) * self.percentile / 100)
+            kept_ties = ties[:max_feats - mask.sum()]
+            mask[kept_ties] = True
+        return mask
+
+
+class SelectKBest(_BaseFilter):
+    """Select features according to the k highest scores.
+
+    Read more in the :ref:`User Guide <univariate_feature_selection>`.
+
+    Parameters
+    ----------
+    score_func : callable
+        Function taking two arrays X and y, and returning a pair of arrays
+        (scores, pvalues) or a single array with scores.
+        Default is f_classif (see below "See also"). The default function only
+        works with classification tasks.
+
+        .. versionadded:: 0.18
+
+    k : int or "all", optional, default=10
+        Number of top features to select.
+        The "all" option bypasses selection, for use in a parameter search.
+
+    Attributes
+    ----------
+    scores_ : array-like of shape (n_features,)
+        Scores of features.
+
+    pvalues_ : array-like of shape (n_features,)
+        p-values of feature scores, None if `score_func` returned only scores.
+
+    Examples
+    --------
+    >>> from sklearn.datasets import load_digits
+    >>> from sklearn.feature_selection import SelectKBest, chi2
+    >>> X, y = load_digits(return_X_y=True)
+    >>> X.shape
+    (1797, 64)
+    >>> X_new = SelectKBest(chi2, k=20).fit_transform(X, y)
+    >>> X_new.shape
+    (1797, 20)
+
+    Notes
+    -----
+    Ties between features with equal scores will be broken in an unspecified
+    way.
+
+    See also
+    --------
+    f_classif: ANOVA F-value between label/feature for classification tasks.
+    mutual_info_classif: Mutual information for a discrete target.
+    chi2: Chi-squared stats of non-negative features for classification tasks.
+    f_regression: F-value between label/feature for regression tasks.
+    mutual_info_regression: Mutual information for a continuous target.
+    SelectPercentile: Select features based on percentile of the highest scores.
+    SelectFpr: Select features based on a false positive rate test.
+    SelectFdr: Select features based on an estimated false discovery rate.
+    SelectFwe: Select features based on family-wise error rate.
+    GenericUnivariateSelect: Univariate feature selector with configurable mode.
+    """
+    @_deprecate_positional_args
+    def __init__(self, score_func=f_classif, *, k=10):
+        super().__init__(score_func=score_func)
+        self.k = k
+
+    def _check_params(self, X, y):
+        if not (self.k == "all" or 0 <= self.k <= X.shape[1]):
+            raise ValueError("k should be >=0, <= n_features = %d; got %r. "
+                             "Use k='all' to return all features."
+                             % (X.shape[1], self.k))
+
+    def _get_support_mask(self):
+        check_is_fitted(self)
+
+        if self.k == 'all':
+            return np.ones(self.scores_.shape, dtype=bool)
+        elif self.k == 0:
+            return np.zeros(self.scores_.shape, dtype=bool)
+        else:
+            scores = _clean_nans(self.scores_)
+            mask = np.zeros(scores.shape, dtype=bool)
+
+            # Request a stable sort. Mergesort takes more memory (~40MB per
+            # megafeature on x86-64).
+            mask[np.argsort(scores, kind="mergesort")[-self.k:]] = 1
+            return mask
+
+
+class SelectFpr(_BaseFilter):
+    """Filter: Select the pvalues below alpha based on a FPR test.
+
+    FPR test stands for False Positive Rate test. It controls the total
+    amount of false detections.
+
+    Read more in the :ref:`User Guide <univariate_feature_selection>`.
+
+    Parameters
+    ----------
+    score_func : callable
+        Function taking two arrays X and y, and returning a pair of arrays
+        (scores, pvalues).
+        Default is f_classif (see below "See also"). The default function only
+        works with classification tasks.
+
+    alpha : float, optional
+        The highest p-value for features to be kept.
+
+    Attributes
+    ----------
+    scores_ : array-like of shape (n_features,)
+        Scores of features.
+
+    pvalues_ : array-like of shape (n_features,)
+        p-values of feature scores.
+
+    Examples
+    --------
+    >>> from sklearn.datasets import load_breast_cancer
+    >>> from sklearn.feature_selection import SelectFpr, chi2
+    >>> X, y = load_breast_cancer(return_X_y=True)
+    >>> X.shape
+    (569, 30)
+    >>> X_new = SelectFpr(chi2, alpha=0.01).fit_transform(X, y)
+    >>> X_new.shape
+    (569, 16)
+
+    See also
+    --------
+    f_classif: ANOVA F-value between label/feature for classification tasks.
+    chi2: Chi-squared stats of non-negative features for classification tasks.
+    mutual_info_classif:
+    f_regression: F-value between label/feature for regression tasks.
+    mutual_info_regression: Mutual information between features and the target.
+    SelectPercentile: Select features based on percentile of the highest scores.
+    SelectKBest: Select features based on the k highest scores.
+    SelectFdr: Select features based on an estimated false discovery rate.
+    SelectFwe: Select features based on family-wise error rate.
+    GenericUnivariateSelect: Univariate feature selector with configurable mode.
+    """
+    @_deprecate_positional_args
+    def __init__(self, score_func=f_classif, *, alpha=5e-2):
+        super().__init__(score_func=score_func)
+        self.alpha = alpha
+
+    def _get_support_mask(self):
+        check_is_fitted(self)
+
+        return self.pvalues_ < self.alpha
+
+
+class SelectFdr(_BaseFilter):
+    """Filter: Select the p-values for an estimated false discovery rate
+
+    This uses the Benjamini-Hochberg procedure. ``alpha`` is an upper bound
+    on the expected false discovery rate.
+
+    Read more in the :ref:`User Guide <univariate_feature_selection>`.
+
+    Parameters
+    ----------
+    score_func : callable
+        Function taking two arrays X and y, and returning a pair of arrays
+        (scores, pvalues).
+        Default is f_classif (see below "See also"). The default function only
+        works with classification tasks.
+
+    alpha : float, optional
+        The highest uncorrected p-value for features to keep.
+
+    Examples
+    --------
+    >>> from sklearn.datasets import load_breast_cancer
+    >>> from sklearn.feature_selection import SelectFdr, chi2
+    >>> X, y = load_breast_cancer(return_X_y=True)
+    >>> X.shape
+    (569, 30)
+    >>> X_new = SelectFdr(chi2, alpha=0.01).fit_transform(X, y)
+    >>> X_new.shape
+    (569, 16)
+
+    Attributes
+    ----------
+    scores_ : array-like of shape (n_features,)
+        Scores of features.
+
+    pvalues_ : array-like of shape (n_features,)
+        p-values of feature scores.
+
+    References
+    ----------
+    https://en.wikipedia.org/wiki/False_discovery_rate
+
+    See also
+    --------
+    f_classif: ANOVA F-value between label/feature for classification tasks.
+    mutual_info_classif: Mutual information for a discrete target.
+    chi2: Chi-squared stats of non-negative features for classification tasks.
+    f_regression: F-value between label/feature for regression tasks.
+    mutual_info_regression: Mutual information for a contnuous target.
+    SelectPercentile: Select features based on percentile of the highest scores.
+    SelectKBest: Select features based on the k highest scores.
+    SelectFpr: Select features based on a false positive rate test.
+    SelectFwe: Select features based on family-wise error rate.
+    GenericUnivariateSelect: Univariate feature selector with configurable mode.
+    """
+    @_deprecate_positional_args
+    def __init__(self, score_func=f_classif, *, alpha=5e-2):
+        super().__init__(score_func=score_func)
+        self.alpha = alpha
+
+    def _get_support_mask(self):
+        check_is_fitted(self)
+
+        n_features = len(self.pvalues_)
+        sv = np.sort(self.pvalues_)
+        selected = sv[sv <= float(self.alpha) / n_features *
+                      np.arange(1, n_features + 1)]
+        if selected.size == 0:
+            return np.zeros_like(self.pvalues_, dtype=bool)
+        return self.pvalues_ <= selected.max()
+
+
+class SelectFwe(_BaseFilter):
+    """Filter: Select the p-values corresponding to Family-wise error rate
+
+    Read more in the :ref:`User Guide <univariate_feature_selection>`.
+
+    Parameters
+    ----------
+    score_func : callable
+        Function taking two arrays X and y, and returning a pair of arrays
+        (scores, pvalues).
+        Default is f_classif (see below "See also"). The default function only
+        works with classification tasks.
+
+    alpha : float, optional
+        The highest uncorrected p-value for features to keep.
+
+    Examples
+    --------
+    >>> from sklearn.datasets import load_breast_cancer
+    >>> from sklearn.feature_selection import SelectFwe, chi2
+    >>> X, y = load_breast_cancer(return_X_y=True)
+    >>> X.shape
+    (569, 30)
+    >>> X_new = SelectFwe(chi2, alpha=0.01).fit_transform(X, y)
+    >>> X_new.shape
+    (569, 15)
+
+    Attributes
+    ----------
+    scores_ : array-like of shape (n_features,)
+        Scores of features.
+
+    pvalues_ : array-like of shape (n_features,)
+        p-values of feature scores.
+
+    See also
+    --------
+    f_classif: ANOVA F-value between label/feature for classification tasks.
+    chi2: Chi-squared stats of non-negative features for classification tasks.
+    f_regression: F-value between label/feature for regression tasks.
+    SelectPercentile: Select features based on percentile of the highest scores.
+    SelectKBest: Select features based on the k highest scores.
+    SelectFpr: Select features based on a false positive rate test.
+    SelectFdr: Select features based on an estimated false discovery rate.
+    GenericUnivariateSelect: Univariate feature selector with configurable mode.
+    """
+    @_deprecate_positional_args
+    def __init__(self, score_func=f_classif, *, alpha=5e-2):
+        super().__init__(score_func=score_func)
+        self.alpha = alpha
+
+    def _get_support_mask(self):
+        check_is_fitted(self)
+
+        return (self.pvalues_ < self.alpha / len(self.pvalues_))
+
+
+######################################################################
+# Generic filter
+######################################################################
+
+# TODO this class should fit on either p-values or scores,
+# depending on the mode.
+class GenericUnivariateSelect(_BaseFilter):
+    """Univariate feature selector with configurable strategy.
+
+    Read more in the :ref:`User Guide <univariate_feature_selection>`.
+
+    Parameters
+    ----------
+    score_func : callable
+        Function taking two arrays X and y, and returning a pair of arrays
+        (scores, pvalues). For modes 'percentile' or 'kbest' it can return
+        a single array scores.
+
+    mode : {'percentile', 'k_best', 'fpr', 'fdr', 'fwe'}
+        Feature selection mode.
+
+    param : float or int depending on the feature selection mode
+        Parameter of the corresponding mode.
+
+    Attributes
+    ----------
+    scores_ : array-like of shape (n_features,)
+        Scores of features.
+
+    pvalues_ : array-like of shape (n_features,)
+        p-values of feature scores, None if `score_func` returned scores only.
+
+    Examples
+    --------
+    >>> from sklearn.datasets import load_breast_cancer
+    >>> from sklearn.feature_selection import GenericUnivariateSelect, chi2
+    >>> X, y = load_breast_cancer(return_X_y=True)
+    >>> X.shape
+    (569, 30)
+    >>> transformer = GenericUnivariateSelect(chi2, mode='k_best', param=20)
+    >>> X_new = transformer.fit_transform(X, y)
+    >>> X_new.shape
+    (569, 20)
+
+    See also
+    --------
+    f_classif: ANOVA F-value between label/feature for classification tasks.
+    mutual_info_classif: Mutual information for a discrete target.
+    chi2: Chi-squared stats of non-negative features for classification tasks.
+    f_regression: F-value between label/feature for regression tasks.
+    mutual_info_regression: Mutual information for a continuous target.
+    SelectPercentile: Select features based on percentile of the highest scores.
+    SelectKBest: Select features based on the k highest scores.
+    SelectFpr: Select features based on a false positive rate test.
+    SelectFdr: Select features based on an estimated false discovery rate.
+    SelectFwe: Select features based on family-wise error rate.
+    """
+
+    _selection_modes = {'percentile': SelectPercentile,
+                        'k_best': SelectKBest,
+                        'fpr': SelectFpr,
+                        'fdr': SelectFdr,
+                        'fwe': SelectFwe}
+
+    @_deprecate_positional_args
+    def __init__(self, score_func=f_classif, *, mode='percentile', param=1e-5):
+        super().__init__(score_func=score_func)
+        self.mode = mode
+        self.param = param
+
+    def _make_selector(self):
+        selector = self._selection_modes[self.mode](score_func=self.score_func)
+
+        # Now perform some acrobatics to set the right named parameter in
+        # the selector
+        possible_params = selector._get_param_names()
+        possible_params.remove('score_func')
+        selector.set_params(**{possible_params[0]: self.param})
+
+        return selector
+
+    def _check_params(self, X, y):
+        if self.mode not in self._selection_modes:
+            raise ValueError("The mode passed should be one of %s, %r,"
+                             " (type %s) was passed."
+                             % (self._selection_modes.keys(), self.mode,
+                                type(self.mode)))
+
+        self._make_selector()._check_params(X, y)
+
+    def _get_support_mask(self):
+        check_is_fitted(self)
+
+        selector = self._make_selector()
+        selector.pvalues_ = self.pvalues_
+        selector.scores_ = self.scores_
+        return selector._get_support_mask()
--- a/venv/Lib/site-packages/sklearn/feature_selection/_variance_threshold.py
+++ b/venv/Lib/site-packages/sklearn/feature_selection/_variance_threshold.py
@ -0,0 +1,102 @@
+# Author: Lars Buitinck
+# License: 3-clause BSD
+
+import numpy as np
+from ..base import BaseEstimator
+from ._base import SelectorMixin
+from ..utils.sparsefuncs import mean_variance_axis, min_max_axis
+from ..utils.validation import check_is_fitted
+
+
+class VarianceThreshold(SelectorMixin, BaseEstimator):
+    """Feature selector that removes all low-variance features.
+
+    This feature selection algorithm looks only at the features (X), not the
+    desired outputs (y), and can thus be used for unsupervised learning.
+
+    Read more in the :ref:`User Guide <variance_threshold>`.
+
+    Parameters
+    ----------
+    threshold : float, optional
+        Features with a training-set variance lower than this threshold will
+        be removed. The default is to keep all features with non-zero variance,
+        i.e. remove the features that have the same value in all samples.
+
+    Attributes
+    ----------
+    variances_ : array, shape (n_features,)
+        Variances of individual features.
+
+    Notes
+    -----
+    Allows NaN in the input.
+
+    Examples
+    --------
+    The following dataset has integer features, two of which are the same
+    in every sample. These are removed with the default setting for threshold::
+
+        >>> X = [[0, 2, 0, 3], [0, 1, 4, 3], [0, 1, 1, 3]]
+        >>> selector = VarianceThreshold()
+        >>> selector.fit_transform(X)
+        array([[2, 0],
+               [1, 4],
+               [1, 1]])
+    """
+
+    def __init__(self, threshold=0.):
+        self.threshold = threshold
+
+    def fit(self, X, y=None):
+        """Learn empirical variances from X.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix}, shape (n_samples, n_features)
+            Sample vectors from which to compute variances.
+
+        y : any
+            Ignored. This parameter exists only for compatibility with
+            sklearn.pipeline.Pipeline.
+
+        Returns
+        -------
+        self
+        """
+        X = self._validate_data(X, accept_sparse=('csr', 'csc'),
+                                dtype=np.float64,
+                                force_all_finite='allow-nan')
+
+        if hasattr(X, "toarray"):   # sparse matrix
+            _, self.variances_ = mean_variance_axis(X, axis=0)
+            if self.threshold == 0:
+                mins, maxes = min_max_axis(X, axis=0)
+                peak_to_peaks = maxes - mins
+        else:
+            self.variances_ = np.nanvar(X, axis=0)
+            if self.threshold == 0:
+                peak_to_peaks = np.ptp(X, axis=0)
+
+        if self.threshold == 0:
+            # Use peak-to-peak to avoid numeric precision issues
+            # for constant features
+            compare_arr = np.array([self.variances_, peak_to_peaks])
+            self.variances_ = np.nanmin(compare_arr, axis=0)
+
+        if np.all(~np.isfinite(self.variances_) |
+                  (self.variances_ <= self.threshold)):
+            msg = "No feature in X meets the variance threshold {0:.5f}"
+            if X.shape[0] == 1:
+                msg += " (X contains only one sample)"
+            raise ValueError(msg.format(self.threshold))
+
+        return self
+
+    def _get_support_mask(self):
+        check_is_fitted(self)
+
+        return self.variances_ > self.threshold
+
+    def _more_tags(self):
+        return {'allow_nan': True}
--- a/venv/Lib/site-packages/sklearn/feature_selection/base.py
+++ b/venv/Lib/site-packages/sklearn/feature_selection/base.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _base  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.feature_selection.base'
+correct_import_path = 'sklearn.feature_selection'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_base, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/feature_selection/from_model.py
+++ b/venv/Lib/site-packages/sklearn/feature_selection/from_model.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _from_model  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.feature_selection.from_model'
+correct_import_path = 'sklearn.feature_selection'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_from_model, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/feature_selection/mutual_info.py
+++ b/venv/Lib/site-packages/sklearn/feature_selection/mutual_info.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _mutual_info  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.feature_selection.mutual_info'
+correct_import_path = 'sklearn.feature_selection'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_mutual_info, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/feature_selection/rfe.py
+++ b/venv/Lib/site-packages/sklearn/feature_selection/rfe.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _rfe  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.feature_selection.rfe'
+correct_import_path = 'sklearn.feature_selection.rfe'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_rfe, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/feature_selection/tests/init.py
+++ b/venv/Lib/site-packages/sklearn/feature_selection/tests/init.py
--- a/venv/Lib/site-packages/sklearn/feature_selection/tests/pycache/init.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_selection/tests/pycache/init.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_selection/tests/pycache/test_base.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_selection/tests/pycache/test_base.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_selection/tests/pycache/test_chi2.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_selection/tests/pycache/test_chi2.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_selection/tests/pycache/test_feature_select.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_selection/tests/pycache/test_feature_select.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_selection/tests/pycache/test_from_model.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_selection/tests/pycache/test_from_model.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_selection/tests/pycache/test_mutual_info.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_selection/tests/pycache/test_mutual_info.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_selection/tests/pycache/test_rfe.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_selection/tests/pycache/test_rfe.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_selection/tests/pycache/test_variance_threshold.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/feature_selection/tests/pycache/test_variance_threshold.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/feature_selection/tests/test_base.py
+++ b/venv/Lib/site-packages/sklearn/feature_selection/tests/test_base.py
@ -0,0 +1,119 @@
+import numpy as np
+import pytest
+from scipy import sparse as sp
+
+from numpy.testing import assert_array_equal
+
+from sklearn.base import BaseEstimator
+from sklearn.feature_selection._base import SelectorMixin
+from sklearn.utils import check_array
+
+
+class StepSelector(SelectorMixin, BaseEstimator):
+    """Retain every `step` features (beginning with 0)"""
+    def __init__(self, step=2):
+        self.step = step
+
+    def fit(self, X, y=None):
+        X = check_array(X, accept_sparse='csc')
+        self.n_input_feats = X.shape[1]
+        return self
+
+    def _get_support_mask(self):
+        mask = np.zeros(self.n_input_feats, dtype=bool)
+        mask[::self.step] = True
+        return mask
+
+
+support = [True, False] * 5
+support_inds = [0, 2, 4, 6, 8]
+X = np.arange(20).reshape(2, 10)
+Xt = np.arange(0, 20, 2).reshape(2, 5)
+Xinv = X.copy()
+Xinv[:, 1::2] = 0
+y = [0, 1]
+feature_names = list('ABCDEFGHIJ')
+feature_names_t = feature_names[::2]
+feature_names_inv = np.array(feature_names)
+feature_names_inv[1::2] = ''
+
+
+def test_transform_dense():
+    sel = StepSelector()
+    Xt_actual = sel.fit(X, y).transform(X)
+    Xt_actual2 = StepSelector().fit_transform(X, y)
+    assert_array_equal(Xt, Xt_actual)
+    assert_array_equal(Xt, Xt_actual2)
+
+    # Check dtype matches
+    assert np.int32 == sel.transform(X.astype(np.int32)).dtype
+    assert np.float32 == sel.transform(X.astype(np.float32)).dtype
+
+    # Check 1d list and other dtype:
+    names_t_actual = sel.transform([feature_names])
+    assert_array_equal(feature_names_t, names_t_actual.ravel())
+
+    # Check wrong shape raises error
+    with pytest.raises(ValueError):
+        sel.transform(np.array([[1], [2]]))
+
+
+def test_transform_sparse():
+    sparse = sp.csc_matrix
+    sel = StepSelector()
+    Xt_actual = sel.fit(sparse(X)).transform(sparse(X))
+    Xt_actual2 = sel.fit_transform(sparse(X))
+    assert_array_equal(Xt, Xt_actual.toarray())
+    assert_array_equal(Xt, Xt_actual2.toarray())
+
+    # Check dtype matches
+    assert np.int32 == sel.transform(sparse(X).astype(np.int32)).dtype
+    assert np.float32 == sel.transform(sparse(X).astype(np.float32)).dtype
+
+    # Check wrong shape raises error
+    with pytest.raises(ValueError):
+        sel.transform(np.array([[1], [2]]))
+
+
+def test_inverse_transform_dense():
+    sel = StepSelector()
+    Xinv_actual = sel.fit(X, y).inverse_transform(Xt)
+    assert_array_equal(Xinv, Xinv_actual)
+
+    # Check dtype matches
+    assert (np.int32 ==
+                 sel.inverse_transform(Xt.astype(np.int32)).dtype)
+    assert (np.float32 ==
+                 sel.inverse_transform(Xt.astype(np.float32)).dtype)
+
+    # Check 1d list and other dtype:
+    names_inv_actual = sel.inverse_transform([feature_names_t])
+    assert_array_equal(feature_names_inv, names_inv_actual.ravel())
+
+    # Check wrong shape raises error
+    with pytest.raises(ValueError):
+        sel.inverse_transform(np.array([[1], [2]]))
+
+
+def test_inverse_transform_sparse():
+    sparse = sp.csc_matrix
+    sel = StepSelector()
+    Xinv_actual = sel.fit(sparse(X)).inverse_transform(sparse(Xt))
+    assert_array_equal(Xinv, Xinv_actual.toarray())
+
+    # Check dtype matches
+    assert (np.int32 ==
+                 sel.inverse_transform(sparse(Xt).astype(np.int32)).dtype)
+    assert (np.float32 ==
+                 sel.inverse_transform(sparse(Xt).astype(np.float32)).dtype)
+
+    # Check wrong shape raises error
+    with pytest.raises(ValueError):
+        sel.inverse_transform(np.array([[1], [2]]))
+
+
+def test_get_support():
+    sel = StepSelector()
+    sel.fit(X, y)
+    assert_array_equal(support, sel.get_support())
+    assert_array_equal(support_inds, sel.get_support(indices=True))
--- a/venv/Lib/site-packages/sklearn/feature_selection/tests/test_chi2.py
+++ b/venv/Lib/site-packages/sklearn/feature_selection/tests/test_chi2.py
@ -0,0 +1,96 @@
+"""
+Tests for chi2, currently the only feature selection function designed
+specifically to work with sparse matrices.
+"""
+
+import warnings
+
+import numpy as np
+import pytest
+from scipy.sparse import coo_matrix, csr_matrix
+import scipy.stats
+
+from sklearn.feature_selection import SelectKBest, chi2
+from sklearn.feature_selection._univariate_selection import _chisquare
+from sklearn.utils._testing import assert_array_almost_equal
+from sklearn.utils._testing import assert_array_equal
+
+# Feature 0 is highly informative for class 1;
+# feature 1 is the same everywhere;
+# feature 2 is a bit informative for class 2.
+X = [[2, 1, 2],
+     [9, 1, 1],
+     [6, 1, 2],
+     [0, 1, 2]]
+y = [0, 1, 2, 2]
+
+
+def mkchi2(k):
+    """Make k-best chi2 selector"""
+    return SelectKBest(chi2, k=k)
+
+
+def test_chi2():
+    # Test Chi2 feature extraction
+
+    chi2 = mkchi2(k=1).fit(X, y)
+    chi2 = mkchi2(k=1).fit(X, y)
+    assert_array_equal(chi2.get_support(indices=True), [0])
+    assert_array_equal(chi2.transform(X), np.array(X)[:, [0]])
+
+    chi2 = mkchi2(k=2).fit(X, y)
+    assert_array_equal(sorted(chi2.get_support(indices=True)), [0, 2])
+
+    Xsp = csr_matrix(X, dtype=np.float64)
+    chi2 = mkchi2(k=2).fit(Xsp, y)
+    assert_array_equal(sorted(chi2.get_support(indices=True)), [0, 2])
+    Xtrans = chi2.transform(Xsp)
+    assert_array_equal(Xtrans.shape, [Xsp.shape[0], 2])
+
+    # == doesn't work on scipy.sparse matrices
+    Xtrans = Xtrans.toarray()
+    Xtrans2 = mkchi2(k=2).fit_transform(Xsp, y).toarray()
+    assert_array_almost_equal(Xtrans, Xtrans2)
+
+
+def test_chi2_coo():
+    # Check that chi2 works with a COO matrix
+    # (as returned by CountVectorizer, DictVectorizer)
+    Xcoo = coo_matrix(X)
+    mkchi2(k=2).fit_transform(Xcoo, y)
+    # if we got here without an exception, we're safe
+
+
+def test_chi2_negative():
+    # Check for proper error on negative numbers in the input X.
+    X, y = [[0, 1], [-1e-20, 1]], [0, 1]
+    for X in (X, np.array(X), csr_matrix(X)):
+        with pytest.raises(ValueError):
+            chi2(X, y)
+
+
+def test_chi2_unused_feature():
+    # Unused feature should evaluate to NaN
+    # and should issue no runtime warning
+    with warnings.catch_warnings(record=True) as warned:
+        warnings.simplefilter('always')
+        chi, p = chi2([[1, 0], [0, 0]], [1, 0])
+        for w in warned:
+            if 'divide by zero' in repr(w):
+                raise AssertionError('Found unexpected warning %s' % w)
+    assert_array_equal(chi, [1, np.nan])
+    assert_array_equal(p[1], np.nan)
+
+
+def test_chisquare():
+    # Test replacement for scipy.stats.chisquare against the original.
+    obs = np.array([[2., 2.],
+                    [1., 1.]])
+    exp = np.array([[1.5, 1.5],
+                    [1.5, 1.5]])
+    # call SciPy first because our version overwrites obs
+    chi_scp, p_scp = scipy.stats.chisquare(obs, exp)
+    chi_our, p_our = _chisquare(obs, exp)
+
+    assert_array_almost_equal(chi_scp, chi_our)
+    assert_array_almost_equal(p_scp, p_our)
--- a/venv/Lib/site-packages/sklearn/feature_selection/tests/test_feature_select.py
+++ b/venv/Lib/site-packages/sklearn/feature_selection/tests/test_feature_select.py
@ -0,0 +1,669 @@
+"""
+Todo: cross-check the F-value with stats model
+"""
+import itertools
+import warnings
+import numpy as np
+from scipy import stats, sparse
+
+import pytest
+
+from sklearn.utils._testing import assert_almost_equal
+from sklearn.utils._testing import assert_array_equal
+from sklearn.utils._testing import assert_array_almost_equal
+from sklearn.utils._testing import assert_warns
+from sklearn.utils._testing import ignore_warnings
+from sklearn.utils._testing import assert_warns_message
+from sklearn.utils import safe_mask
+
+from sklearn.datasets import make_classification, make_regression
+from sklearn.feature_selection import (
+    chi2, f_classif, f_oneway, f_regression, mutual_info_classif,
+    mutual_info_regression, SelectPercentile, SelectKBest, SelectFpr,
+    SelectFdr, SelectFwe, GenericUnivariateSelect)
+
+
+##############################################################################
+# Test the score functions
+
+def test_f_oneway_vs_scipy_stats():
+    # Test that our f_oneway gives the same result as scipy.stats
+    rng = np.random.RandomState(0)
+    X1 = rng.randn(10, 3)
+    X2 = 1 + rng.randn(10, 3)
+    f, pv = stats.f_oneway(X1, X2)
+    f2, pv2 = f_oneway(X1, X2)
+    assert np.allclose(f, f2)
+    assert np.allclose(pv, pv2)
+
+
+def test_f_oneway_ints():
+    # Smoke test f_oneway on integers: that it does raise casting errors
+    # with recent numpys
+    rng = np.random.RandomState(0)
+    X = rng.randint(10, size=(10, 10))
+    y = np.arange(10)
+    fint, pint = f_oneway(X, y)
+
+    # test that is gives the same result as with float
+    f, p = f_oneway(X.astype(np.float), y)
+    assert_array_almost_equal(f, fint, decimal=4)
+    assert_array_almost_equal(p, pint, decimal=4)
+
+
+def test_f_classif():
+    # Test whether the F test yields meaningful results
+    # on a simple simulated classification problem
+    X, y = make_classification(n_samples=200, n_features=20,
+                               n_informative=3, n_redundant=2,
+                               n_repeated=0, n_classes=8,
+                               n_clusters_per_class=1, flip_y=0.0,
+                               class_sep=10, shuffle=False, random_state=0)
+
+    F, pv = f_classif(X, y)
+    F_sparse, pv_sparse = f_classif(sparse.csr_matrix(X), y)
+    assert (F > 0).all()
+    assert (pv > 0).all()
+    assert (pv < 1).all()
+    assert (pv[:5] < 0.05).all()
+    assert (pv[5:] > 1.e-4).all()
+    assert_array_almost_equal(F_sparse, F)
+    assert_array_almost_equal(pv_sparse, pv)
+
+
+def test_f_regression():
+    # Test whether the F test yields meaningful results
+    # on a simple simulated regression problem
+    X, y = make_regression(n_samples=200, n_features=20, n_informative=5,
+                           shuffle=False, random_state=0)
+
+    F, pv = f_regression(X, y)
+    assert (F > 0).all()
+    assert (pv > 0).all()
+    assert (pv < 1).all()
+    assert (pv[:5] < 0.05).all()
+    assert (pv[5:] > 1.e-4).all()
+
+    # with centering, compare with sparse
+    F, pv = f_regression(X, y, center=True)
+    F_sparse, pv_sparse = f_regression(sparse.csr_matrix(X), y, center=True)
+    assert_array_almost_equal(F_sparse, F)
+    assert_array_almost_equal(pv_sparse, pv)
+
+    # again without centering, compare with sparse
+    F, pv = f_regression(X, y, center=False)
+    F_sparse, pv_sparse = f_regression(sparse.csr_matrix(X), y, center=False)
+    assert_array_almost_equal(F_sparse, F)
+    assert_array_almost_equal(pv_sparse, pv)
+
+
+def test_f_regression_input_dtype():
+    # Test whether f_regression returns the same value
+    # for any numeric data_type
+    rng = np.random.RandomState(0)
+    X = rng.rand(10, 20)
+    y = np.arange(10).astype(np.int)
+
+    F1, pv1 = f_regression(X, y)
+    F2, pv2 = f_regression(X, y.astype(np.float))
+    assert_array_almost_equal(F1, F2, 5)
+    assert_array_almost_equal(pv1, pv2, 5)
+
+
+def test_f_regression_center():
+    # Test whether f_regression preserves dof according to 'center' argument
+    # We use two centered variates so we have a simple relationship between
+    # F-score with variates centering and F-score without variates centering.
+    # Create toy example
+    X = np.arange(-5, 6).reshape(-1, 1)  # X has zero mean
+    n_samples = X.size
+    Y = np.ones(n_samples)
+    Y[::2] *= -1.
+    Y[0] = 0.  # have Y mean being null
+
+    F1, _ = f_regression(X, Y, center=True)
+    F2, _ = f_regression(X, Y, center=False)
+    assert_array_almost_equal(F1 * (n_samples - 1.) / (n_samples - 2.), F2)
+    assert_almost_equal(F2[0], 0.232558139)  # value from statsmodels OLS
+
+
+def test_f_classif_multi_class():
+    # Test whether the F test yields meaningful results
+    # on a simple simulated classification problem
+    X, y = make_classification(n_samples=200, n_features=20,
+                               n_informative=3, n_redundant=2,
+                               n_repeated=0, n_classes=8,
+                               n_clusters_per_class=1, flip_y=0.0,
+                               class_sep=10, shuffle=False, random_state=0)
+
+    F, pv = f_classif(X, y)
+    assert (F > 0).all()
+    assert (pv > 0).all()
+    assert (pv < 1).all()
+    assert (pv[:5] < 0.05).all()
+    assert (pv[5:] > 1.e-4).all()
+
+
+def test_select_percentile_classif():
+    # Test whether the relative univariate feature selection
+    # gets the correct items in a simple classification problem
+    # with the percentile heuristic
+    X, y = make_classification(n_samples=200, n_features=20,
+                               n_informative=3, n_redundant=2,
+                               n_repeated=0, n_classes=8,
+                               n_clusters_per_class=1, flip_y=0.0,
+                               class_sep=10, shuffle=False, random_state=0)
+
+    univariate_filter = SelectPercentile(f_classif, percentile=25)
+    X_r = univariate_filter.fit(X, y).transform(X)
+    X_r2 = GenericUnivariateSelect(f_classif, mode='percentile',
+                                   param=25).fit(X, y).transform(X)
+    assert_array_equal(X_r, X_r2)
+    support = univariate_filter.get_support()
+    gtruth = np.zeros(20)
+    gtruth[:5] = 1
+    assert_array_equal(support, gtruth)
+
+
+def test_select_percentile_classif_sparse():
+    # Test whether the relative univariate feature selection
+    # gets the correct items in a simple classification problem
+    # with the percentile heuristic
+    X, y = make_classification(n_samples=200, n_features=20,
+                               n_informative=3, n_redundant=2,
+                               n_repeated=0, n_classes=8,
+                               n_clusters_per_class=1, flip_y=0.0,
+                               class_sep=10, shuffle=False, random_state=0)
+    X = sparse.csr_matrix(X)
+    univariate_filter = SelectPercentile(f_classif, percentile=25)
+    X_r = univariate_filter.fit(X, y).transform(X)
+    X_r2 = GenericUnivariateSelect(f_classif, mode='percentile',
+                                   param=25).fit(X, y).transform(X)
+    assert_array_equal(X_r.toarray(), X_r2.toarray())
+    support = univariate_filter.get_support()
+    gtruth = np.zeros(20)
+    gtruth[:5] = 1
+    assert_array_equal(support, gtruth)
+
+    X_r2inv = univariate_filter.inverse_transform(X_r2)
+    assert sparse.issparse(X_r2inv)
+    support_mask = safe_mask(X_r2inv, support)
+    assert X_r2inv.shape == X.shape
+    assert_array_equal(X_r2inv[:, support_mask].toarray(), X_r.toarray())
+    # Check other columns are empty
+    assert X_r2inv.getnnz() == X_r.getnnz()
+
+
+##############################################################################
+# Test univariate selection in classification settings
+
+def test_select_kbest_classif():
+    # Test whether the relative univariate feature selection
+    # gets the correct items in a simple classification problem
+    # with the k best heuristic
+    X, y = make_classification(n_samples=200, n_features=20,
+                               n_informative=3, n_redundant=2,
+                               n_repeated=0, n_classes=8,
+                               n_clusters_per_class=1, flip_y=0.0,
+                               class_sep=10, shuffle=False, random_state=0)
+
+    univariate_filter = SelectKBest(f_classif, k=5)
+    X_r = univariate_filter.fit(X, y).transform(X)
+    X_r2 = GenericUnivariateSelect(
+        f_classif, mode='k_best', param=5).fit(X, y).transform(X)
+    assert_array_equal(X_r, X_r2)
+    support = univariate_filter.get_support()
+    gtruth = np.zeros(20)
+    gtruth[:5] = 1
+    assert_array_equal(support, gtruth)
+
+
+def test_select_kbest_all():
+    # Test whether k="all" correctly returns all features.
+    X, y = make_classification(n_samples=20, n_features=10,
+                               shuffle=False, random_state=0)
+
+    univariate_filter = SelectKBest(f_classif, k='all')
+    X_r = univariate_filter.fit(X, y).transform(X)
+    assert_array_equal(X, X_r)
+
+
+def test_select_kbest_zero():
+    # Test whether k=0 correctly returns no features.
+    X, y = make_classification(n_samples=20, n_features=10,
+                               shuffle=False, random_state=0)
+
+    univariate_filter = SelectKBest(f_classif, k=0)
+    univariate_filter.fit(X, y)
+    support = univariate_filter.get_support()
+    gtruth = np.zeros(10, dtype=bool)
+    assert_array_equal(support, gtruth)
+    X_selected = assert_warns_message(UserWarning, 'No features were selected',
+                                      univariate_filter.transform, X)
+    assert X_selected.shape == (20, 0)
+
+
+def test_select_heuristics_classif():
+    # Test whether the relative univariate feature selection
+    # gets the correct items in a simple classification problem
+    # with the fdr, fwe and fpr heuristics
+    X, y = make_classification(n_samples=200, n_features=20,
+                               n_informative=3, n_redundant=2,
+                               n_repeated=0, n_classes=8,
+                               n_clusters_per_class=1, flip_y=0.0,
+                               class_sep=10, shuffle=False, random_state=0)
+
+    univariate_filter = SelectFwe(f_classif, alpha=0.01)
+    X_r = univariate_filter.fit(X, y).transform(X)
+    gtruth = np.zeros(20)
+    gtruth[:5] = 1
+    for mode in ['fdr', 'fpr', 'fwe']:
+        X_r2 = GenericUnivariateSelect(
+            f_classif, mode=mode, param=0.01).fit(X, y).transform(X)
+        assert_array_equal(X_r, X_r2)
+        support = univariate_filter.get_support()
+        assert_array_almost_equal(support, gtruth)
+
+
+##############################################################################
+# Test univariate selection in regression settings
+
+
+def assert_best_scores_kept(score_filter):
+    scores = score_filter.scores_
+    support = score_filter.get_support()
+    assert_array_almost_equal(np.sort(scores[support]),
+                              np.sort(scores)[-support.sum():])
+
+
+def test_select_percentile_regression():
+    # Test whether the relative univariate feature selection
+    # gets the correct items in a simple regression problem
+    # with the percentile heuristic
+    X, y = make_regression(n_samples=200, n_features=20,
+                           n_informative=5, shuffle=False, random_state=0)
+
+    univariate_filter = SelectPercentile(f_regression, percentile=25)
+    X_r = univariate_filter.fit(X, y).transform(X)
+    assert_best_scores_kept(univariate_filter)
+    X_r2 = GenericUnivariateSelect(
+        f_regression, mode='percentile', param=25).fit(X, y).transform(X)
+    assert_array_equal(X_r, X_r2)
+    support = univariate_filter.get_support()
+    gtruth = np.zeros(20)
+    gtruth[:5] = 1
+    assert_array_equal(support, gtruth)
+    X_2 = X.copy()
+    X_2[:, np.logical_not(support)] = 0
+    assert_array_equal(X_2, univariate_filter.inverse_transform(X_r))
+    # Check inverse_transform respects dtype
+    assert_array_equal(X_2.astype(bool),
+                       univariate_filter.inverse_transform(X_r.astype(bool)))
+
+
+def test_select_percentile_regression_full():
+    # Test whether the relative univariate feature selection
+    # selects all features when '100%' is asked.
+    X, y = make_regression(n_samples=200, n_features=20,
+                           n_informative=5, shuffle=False, random_state=0)
+
+    univariate_filter = SelectPercentile(f_regression, percentile=100)
+    X_r = univariate_filter.fit(X, y).transform(X)
+    assert_best_scores_kept(univariate_filter)
+    X_r2 = GenericUnivariateSelect(
+        f_regression, mode='percentile', param=100).fit(X, y).transform(X)
+    assert_array_equal(X_r, X_r2)
+    support = univariate_filter.get_support()
+    gtruth = np.ones(20)
+    assert_array_equal(support, gtruth)
+
+
+def test_invalid_percentile():
+    X, y = make_regression(n_samples=10, n_features=20,
+                           n_informative=2, shuffle=False, random_state=0)
+
+    with pytest.raises(ValueError):
+        SelectPercentile(percentile=-1).fit(X, y)
+    with pytest.raises(ValueError):
+        SelectPercentile(percentile=101).fit(X, y)
+    with pytest.raises(ValueError):
+        GenericUnivariateSelect(mode='percentile', param=-1).fit(X, y)
+    with pytest.raises(ValueError):
+        GenericUnivariateSelect(mode='percentile', param=101).fit(X, y)
+
+
+def test_select_kbest_regression():
+    # Test whether the relative univariate feature selection
+    # gets the correct items in a simple regression problem
+    # with the k best heuristic
+    X, y = make_regression(n_samples=200, n_features=20, n_informative=5,
+                           shuffle=False, random_state=0, noise=10)
+
+    univariate_filter = SelectKBest(f_regression, k=5)
+    X_r = univariate_filter.fit(X, y).transform(X)
+    assert_best_scores_kept(univariate_filter)
+    X_r2 = GenericUnivariateSelect(
+        f_regression, mode='k_best', param=5).fit(X, y).transform(X)
+    assert_array_equal(X_r, X_r2)
+    support = univariate_filter.get_support()
+    gtruth = np.zeros(20)
+    gtruth[:5] = 1
+    assert_array_equal(support, gtruth)
+
+
+def test_select_heuristics_regression():
+    # Test whether the relative univariate feature selection
+    # gets the correct items in a simple regression problem
+    # with the fpr, fdr or fwe heuristics
+    X, y = make_regression(n_samples=200, n_features=20, n_informative=5,
+                           shuffle=False, random_state=0, noise=10)
+
+    univariate_filter = SelectFpr(f_regression, alpha=0.01)
+    X_r = univariate_filter.fit(X, y).transform(X)
+    gtruth = np.zeros(20)
+    gtruth[:5] = 1
+    for mode in ['fdr', 'fpr', 'fwe']:
+        X_r2 = GenericUnivariateSelect(
+            f_regression, mode=mode, param=0.01).fit(X, y).transform(X)
+        assert_array_equal(X_r, X_r2)
+        support = univariate_filter.get_support()
+        assert_array_equal(support[:5], np.ones((5, ), dtype=np.bool))
+        assert np.sum(support[5:] == 1) < 3
+
+
+def test_boundary_case_ch2():
+    # Test boundary case, and always aim to select 1 feature.
+    X = np.array([[10, 20], [20, 20], [20, 30]])
+    y = np.array([[1], [0], [0]])
+    scores, pvalues = chi2(X, y)
+    assert_array_almost_equal(scores, np.array([4., 0.71428571]))
+    assert_array_almost_equal(pvalues, np.array([0.04550026, 0.39802472]))
+
+    filter_fdr = SelectFdr(chi2, alpha=0.1)
+    filter_fdr.fit(X, y)
+    support_fdr = filter_fdr.get_support()
+    assert_array_equal(support_fdr, np.array([True, False]))
+
+    filter_kbest = SelectKBest(chi2, k=1)
+    filter_kbest.fit(X, y)
+    support_kbest = filter_kbest.get_support()
+    assert_array_equal(support_kbest, np.array([True, False]))
+
+    filter_percentile = SelectPercentile(chi2, percentile=50)
+    filter_percentile.fit(X, y)
+    support_percentile = filter_percentile.get_support()
+    assert_array_equal(support_percentile, np.array([True, False]))
+
+    filter_fpr = SelectFpr(chi2, alpha=0.1)
+    filter_fpr.fit(X, y)
+    support_fpr = filter_fpr.get_support()
+    assert_array_equal(support_fpr, np.array([True, False]))
+
+    filter_fwe = SelectFwe(chi2, alpha=0.1)
+    filter_fwe.fit(X, y)
+    support_fwe = filter_fwe.get_support()
+    assert_array_equal(support_fwe, np.array([True, False]))
+
+
+@pytest.mark.parametrize("alpha", [0.001, 0.01, 0.1])
+@pytest.mark.parametrize("n_informative", [1, 5, 10])
+def test_select_fdr_regression(alpha, n_informative):
+    # Test that fdr heuristic actually has low FDR.
+    def single_fdr(alpha, n_informative, random_state):
+        X, y = make_regression(n_samples=150, n_features=20,
+                               n_informative=n_informative, shuffle=False,
+                               random_state=random_state, noise=10)
+
+        with warnings.catch_warnings(record=True):
+            # Warnings can be raised when no features are selected
+            # (low alpha or very noisy data)
+            univariate_filter = SelectFdr(f_regression, alpha=alpha)
+            X_r = univariate_filter.fit(X, y).transform(X)
+            X_r2 = GenericUnivariateSelect(
+                f_regression, mode='fdr', param=alpha).fit(X, y).transform(X)
+
+        assert_array_equal(X_r, X_r2)
+        support = univariate_filter.get_support()
+        num_false_positives = np.sum(support[n_informative:] == 1)
+        num_true_positives = np.sum(support[:n_informative] == 1)
+
+        if num_false_positives == 0:
+            return 0.
+        false_discovery_rate = (num_false_positives /
+                                (num_true_positives + num_false_positives))
+        return false_discovery_rate
+
+    # As per Benjamini-Hochberg, the expected false discovery rate
+    # should be lower than alpha:
+    # FDR = E(FP / (TP + FP)) <= alpha
+    false_discovery_rate = np.mean([single_fdr(alpha, n_informative,
+                                               random_state) for
+                                    random_state in range(100)])
+    assert alpha >= false_discovery_rate
+
+    # Make sure that the empirical false discovery rate increases
+    # with alpha:
+    if false_discovery_rate != 0:
+        assert false_discovery_rate > alpha / 10
+
+
+def test_select_fwe_regression():
+    # Test whether the relative univariate feature selection
+    # gets the correct items in a simple regression problem
+    # with the fwe heuristic
+    X, y = make_regression(n_samples=200, n_features=20,
+                           n_informative=5, shuffle=False, random_state=0)
+
+    univariate_filter = SelectFwe(f_regression, alpha=0.01)
+    X_r = univariate_filter.fit(X, y).transform(X)
+    X_r2 = GenericUnivariateSelect(
+        f_regression, mode='fwe', param=0.01).fit(X, y).transform(X)
+    assert_array_equal(X_r, X_r2)
+    support = univariate_filter.get_support()
+    gtruth = np.zeros(20)
+    gtruth[:5] = 1
+    assert_array_equal(support[:5], np.ones((5, ), dtype=np.bool))
+    assert np.sum(support[5:] == 1) < 2
+
+
+def test_selectkbest_tiebreaking():
+    # Test whether SelectKBest actually selects k features in case of ties.
+    # Prior to 0.11, SelectKBest would return more features than requested.
+    Xs = [[0, 1, 1], [0, 0, 1], [1, 0, 0], [1, 1, 0]]
+    y = [1]
+    dummy_score = lambda X, y: (X[0], X[0])
+    for X in Xs:
+        sel = SelectKBest(dummy_score, k=1)
+        X1 = ignore_warnings(sel.fit_transform)([X], y)
+        assert X1.shape[1] == 1
+        assert_best_scores_kept(sel)
+
+        sel = SelectKBest(dummy_score, k=2)
+        X2 = ignore_warnings(sel.fit_transform)([X], y)
+        assert X2.shape[1] == 2
+        assert_best_scores_kept(sel)
+
+
+def test_selectpercentile_tiebreaking():
+    # Test if SelectPercentile selects the right n_features in case of ties.
+    Xs = [[0, 1, 1], [0, 0, 1], [1, 0, 0], [1, 1, 0]]
+    y = [1]
+    dummy_score = lambda X, y: (X[0], X[0])
+    for X in Xs:
+        sel = SelectPercentile(dummy_score, percentile=34)
+        X1 = ignore_warnings(sel.fit_transform)([X], y)
+        assert X1.shape[1] == 1
+        assert_best_scores_kept(sel)
+
+        sel = SelectPercentile(dummy_score, percentile=67)
+        X2 = ignore_warnings(sel.fit_transform)([X], y)
+        assert X2.shape[1] == 2
+        assert_best_scores_kept(sel)
+
+
+def test_tied_pvalues():
+    # Test whether k-best and percentiles work with tied pvalues from chi2.
+    # chi2 will return the same p-values for the following features, but it
+    # will return different scores.
+    X0 = np.array([[10000, 9999, 9998], [1, 1, 1]])
+    y = [0, 1]
+
+    for perm in itertools.permutations((0, 1, 2)):
+        X = X0[:, perm]
+        Xt = SelectKBest(chi2, k=2).fit_transform(X, y)
+        assert Xt.shape == (2, 2)
+        assert 9998 not in Xt
+
+        Xt = SelectPercentile(chi2, percentile=67).fit_transform(X, y)
+        assert Xt.shape == (2, 2)
+        assert 9998 not in Xt
+
+
+def test_scorefunc_multilabel():
+    # Test whether k-best and percentiles works with multilabels with chi2.
+
+    X = np.array([[10000, 9999, 0], [100, 9999, 0], [1000, 99, 0]])
+    y = [[1, 1], [0, 1], [1, 0]]
+
+    Xt = SelectKBest(chi2, k=2).fit_transform(X, y)
+    assert Xt.shape == (3, 2)
+    assert 0 not in Xt
+
+    Xt = SelectPercentile(chi2, percentile=67).fit_transform(X, y)
+    assert Xt.shape == (3, 2)
+    assert 0 not in Xt
+
+
+def test_tied_scores():
+    # Test for stable sorting in k-best with tied scores.
+    X_train = np.array([[0, 0, 0], [1, 1, 1]])
+    y_train = [0, 1]
+
+    for n_features in [1, 2, 3]:
+        sel = SelectKBest(chi2, k=n_features).fit(X_train, y_train)
+        X_test = sel.transform([[0, 1, 2]])
+        assert_array_equal(X_test[0], np.arange(3)[-n_features:])
+
+
+def test_nans():
+    # Assert that SelectKBest and SelectPercentile can handle NaNs.
+    # First feature has zero variance to confuse f_classif (ANOVA) and
+    # make it return a NaN.
+    X = [[0, 1, 0], [0, -1, -1], [0, .5, .5]]
+    y = [1, 0, 1]
+
+    for select in (SelectKBest(f_classif, k=2),
+                   SelectPercentile(f_classif, percentile=67)):
+        ignore_warnings(select.fit)(X, y)
+        assert_array_equal(select.get_support(indices=True), np.array([1, 2]))
+
+
+def test_score_func_error():
+    X = [[0, 1, 0], [0, -1, -1], [0, .5, .5]]
+    y = [1, 0, 1]
+
+    for SelectFeatures in [SelectKBest, SelectPercentile, SelectFwe,
+                           SelectFdr, SelectFpr, GenericUnivariateSelect]:
+        with pytest.raises(TypeError):
+            SelectFeatures(score_func=10).fit(X, y)
+
+
+def test_invalid_k():
+    X = [[0, 1, 0], [0, -1, -1], [0, .5, .5]]
+    y = [1, 0, 1]
+
+    with pytest.raises(ValueError):
+        SelectKBest(k=-1).fit(X, y)
+    with pytest.raises(ValueError):
+        SelectKBest(k=4).fit(X, y)
+    with pytest.raises(ValueError):
+        GenericUnivariateSelect(mode='k_best', param=-1).fit(X, y)
+    with pytest.raises(ValueError):
+        GenericUnivariateSelect(mode='k_best', param=4).fit(X, y)
+
+
+def test_f_classif_constant_feature():
+    # Test that f_classif warns if a feature is constant throughout.
+
+    X, y = make_classification(n_samples=10, n_features=5)
+    X[:, 0] = 2.0
+    assert_warns(UserWarning, f_classif, X, y)
+
+
+def test_no_feature_selected():
+    rng = np.random.RandomState(0)
+
+    # Generate random uncorrelated data: a strict univariate test should
+    # rejects all the features
+    X = rng.rand(40, 10)
+    y = rng.randint(0, 4, size=40)
+    strict_selectors = [
+        SelectFwe(alpha=0.01).fit(X, y),
+        SelectFdr(alpha=0.01).fit(X, y),
+        SelectFpr(alpha=0.01).fit(X, y),
+        SelectPercentile(percentile=0).fit(X, y),
+        SelectKBest(k=0).fit(X, y),
+    ]
+    for selector in strict_selectors:
+        assert_array_equal(selector.get_support(), np.zeros(10))
+        X_selected = assert_warns_message(
+            UserWarning, 'No features were selected', selector.transform, X)
+        assert X_selected.shape == (40, 0)
+
+
+def test_mutual_info_classif():
+    X, y = make_classification(n_samples=100, n_features=5,
+                               n_informative=1, n_redundant=1,
+                               n_repeated=0, n_classes=2,
+                               n_clusters_per_class=1, flip_y=0.0,
+                               class_sep=10, shuffle=False, random_state=0)
+
+    # Test in KBest mode.
+    univariate_filter = SelectKBest(mutual_info_classif, k=2)
+    X_r = univariate_filter.fit(X, y).transform(X)
+    X_r2 = GenericUnivariateSelect(
+        mutual_info_classif, mode='k_best', param=2).fit(X, y).transform(X)
+    assert_array_equal(X_r, X_r2)
+    support = univariate_filter.get_support()
+    gtruth = np.zeros(5)
+    gtruth[:2] = 1
+    assert_array_equal(support, gtruth)
+
+    # Test in Percentile mode.
+    univariate_filter = SelectPercentile(mutual_info_classif, percentile=40)
+    X_r = univariate_filter.fit(X, y).transform(X)
+    X_r2 = GenericUnivariateSelect(
+        mutual_info_classif, mode='percentile', param=40).fit(X, y).transform(X)
+    assert_array_equal(X_r, X_r2)
+    support = univariate_filter.get_support()
+    gtruth = np.zeros(5)
+    gtruth[:2] = 1
+    assert_array_equal(support, gtruth)
+
+
+def test_mutual_info_regression():
+    X, y = make_regression(n_samples=100, n_features=10, n_informative=2,
+                           shuffle=False, random_state=0, noise=10)
+
+    # Test in KBest mode.
+    univariate_filter = SelectKBest(mutual_info_regression, k=2)
+    X_r = univariate_filter.fit(X, y).transform(X)
+    assert_best_scores_kept(univariate_filter)
+    X_r2 = GenericUnivariateSelect(
+        mutual_info_regression, mode='k_best', param=2).fit(X, y).transform(X)
+    assert_array_equal(X_r, X_r2)
+    support = univariate_filter.get_support()
+    gtruth = np.zeros(10)
+    gtruth[:2] = 1
+    assert_array_equal(support, gtruth)
+
+    # Test in Percentile mode.
+    univariate_filter = SelectPercentile(mutual_info_regression, percentile=20)
+    X_r = univariate_filter.fit(X, y).transform(X)
+    X_r2 = GenericUnivariateSelect(mutual_info_regression, mode='percentile',
+                                   param=20).fit(X, y).transform(X)
+    assert_array_equal(X_r, X_r2)
+    support = univariate_filter.get_support()
+    gtruth = np.zeros(10)
+    gtruth[:2] = 1
+    assert_array_equal(support, gtruth)
--- a/venv/Lib/site-packages/sklearn/feature_selection/tests/test_from_model.py
+++ b/venv/Lib/site-packages/sklearn/feature_selection/tests/test_from_model.py
@ -0,0 +1,369 @@
+import pytest
+import numpy as np
+
+from sklearn.utils._testing import assert_array_almost_equal
+from sklearn.utils._testing import assert_array_equal
+from sklearn.utils._testing import assert_allclose
+from sklearn.utils._testing import skip_if_32bit
+
+from sklearn import datasets
+from sklearn.linear_model import LogisticRegression, SGDClassifier, Lasso
+from sklearn.svm import LinearSVC
+from sklearn.feature_selection import SelectFromModel
+from sklearn.experimental import enable_hist_gradient_boosting  # noqa
+from sklearn.ensemble import (RandomForestClassifier,
+                              HistGradientBoostingClassifier)
+from sklearn.linear_model import PassiveAggressiveClassifier
+from sklearn.base import BaseEstimator
+
+
+class NaNTag(BaseEstimator):
+    def _more_tags(self):
+        return {'allow_nan': True}
+
+
+class NoNaNTag(BaseEstimator):
+    def _more_tags(self):
+        return {'allow_nan': False}
+
+
+class NaNTagRandomForest(RandomForestClassifier):
+    def _more_tags(self):
+        return {'allow_nan': True}
+
+
+iris = datasets.load_iris()
+data, y = iris.data, iris.target
+rng = np.random.RandomState(0)
+
+
+def test_invalid_input():
+    clf = SGDClassifier(alpha=0.1, max_iter=10, shuffle=True,
+                        random_state=None, tol=None)
+    for threshold in ["gobbledigook", ".5 * gobbledigook"]:
+        model = SelectFromModel(clf, threshold=threshold)
+        model.fit(data, y)
+        with pytest.raises(ValueError):
+            model.transform(data)
+
+
+def test_input_estimator_unchanged():
+    # Test that SelectFromModel fits on a clone of the estimator.
+    est = RandomForestClassifier()
+    transformer = SelectFromModel(estimator=est)
+    transformer.fit(data, y)
+    assert transformer.estimator is est
+
+
+@pytest.mark.parametrize(
+    "max_features, err_type, err_msg",
+    [(-1, ValueError, "'max_features' should be 0 and"),
+     (data.shape[1] + 1, ValueError, "'max_features' should be 0 and"),
+     ('gobbledigook', TypeError, "should be an integer"),
+     ('all', TypeError, "should be an integer")]
+)
+def test_max_features_error(max_features, err_type, err_msg):
+    clf = RandomForestClassifier(n_estimators=50, random_state=0)
+
+    transformer = SelectFromModel(estimator=clf,
+                                  max_features=max_features,
+                                  threshold=-np.inf)
+    with pytest.raises(err_type, match=err_msg):
+        transformer.fit(data, y)
+
+
+@pytest.mark.parametrize("max_features", [0, 2, data.shape[1]])
+def test_max_features_dim(max_features):
+    clf = RandomForestClassifier(n_estimators=50, random_state=0)
+    transformer = SelectFromModel(estimator=clf,
+                                  max_features=max_features,
+                                  threshold=-np.inf)
+    X_trans = transformer.fit_transform(data, y)
+    assert X_trans.shape[1] == max_features
+
+
+class FixedImportanceEstimator(BaseEstimator):
+    def __init__(self, importances):
+        self.importances = importances
+
+    def fit(self, X, y=None):
+        self.feature_importances_ = np.array(self.importances)
+
+
+def test_max_features():
+    # Test max_features parameter using various values
+    X, y = datasets.make_classification(
+        n_samples=1000, n_features=10, n_informative=3, n_redundant=0,
+        n_repeated=0, shuffle=False, random_state=0)
+    max_features = X.shape[1]
+    est = RandomForestClassifier(n_estimators=50, random_state=0)
+
+    transformer1 = SelectFromModel(estimator=est,
+                                   threshold=-np.inf)
+    transformer2 = SelectFromModel(estimator=est,
+                                   max_features=max_features,
+                                   threshold=-np.inf)
+    X_new1 = transformer1.fit_transform(X, y)
+    X_new2 = transformer2.fit_transform(X, y)
+    assert_allclose(X_new1, X_new2)
+
+    # Test max_features against actual model.
+    transformer1 = SelectFromModel(estimator=Lasso(alpha=0.025,
+                                                   random_state=42))
+    X_new1 = transformer1.fit_transform(X, y)
+    scores1 = np.abs(transformer1.estimator_.coef_)
+    candidate_indices1 = np.argsort(-scores1, kind='mergesort')
+
+    for n_features in range(1, X_new1.shape[1] + 1):
+        transformer2 = SelectFromModel(estimator=Lasso(alpha=0.025,
+                                       random_state=42),
+                                       max_features=n_features,
+                                       threshold=-np.inf)
+        X_new2 = transformer2.fit_transform(X, y)
+        scores2 = np.abs(transformer2.estimator_.coef_)
+        candidate_indices2 = np.argsort(-scores2, kind='mergesort')
+        assert_allclose(X[:, candidate_indices1[:n_features]],
+                        X[:, candidate_indices2[:n_features]])
+    assert_allclose(transformer1.estimator_.coef_,
+                    transformer2.estimator_.coef_)
+
+
+def test_max_features_tiebreak():
+    # Test if max_features can break tie among feature importance
+    X, y = datasets.make_classification(
+        n_samples=1000, n_features=10, n_informative=3, n_redundant=0,
+        n_repeated=0, shuffle=False, random_state=0)
+    max_features = X.shape[1]
+
+    feature_importances = np.array([4, 4, 4, 4, 3, 3, 3, 2, 2, 1])
+    for n_features in range(1, max_features + 1):
+        transformer = SelectFromModel(
+            FixedImportanceEstimator(feature_importances),
+            max_features=n_features,
+            threshold=-np.inf)
+        X_new = transformer.fit_transform(X, y)
+        selected_feature_indices = np.where(transformer._get_support_mask())[0]
+        assert_array_equal(selected_feature_indices, np.arange(n_features))
+        assert X_new.shape[1] == n_features
+
+
+def test_threshold_and_max_features():
+    X, y = datasets.make_classification(
+        n_samples=1000, n_features=10, n_informative=3, n_redundant=0,
+        n_repeated=0, shuffle=False, random_state=0)
+    est = RandomForestClassifier(n_estimators=50, random_state=0)
+
+    transformer1 = SelectFromModel(estimator=est, max_features=3,
+                                   threshold=-np.inf)
+    X_new1 = transformer1.fit_transform(X, y)
+
+    transformer2 = SelectFromModel(estimator=est, threshold=0.04)
+    X_new2 = transformer2.fit_transform(X, y)
+
+    transformer3 = SelectFromModel(estimator=est, max_features=3,
+                                   threshold=0.04)
+    X_new3 = transformer3.fit_transform(X, y)
+    assert X_new3.shape[1] == min(X_new1.shape[1], X_new2.shape[1])
+    selected_indices = transformer3.transform(
+        np.arange(X.shape[1])[np.newaxis, :])
+    assert_allclose(X_new3, X[:, selected_indices[0]])
+
+
+@skip_if_32bit
+def test_feature_importances():
+    X, y = datasets.make_classification(
+        n_samples=1000, n_features=10, n_informative=3, n_redundant=0,
+        n_repeated=0, shuffle=False, random_state=0)
+
+    est = RandomForestClassifier(n_estimators=50, random_state=0)
+    for threshold, func in zip(["mean", "median"], [np.mean, np.median]):
+        transformer = SelectFromModel(estimator=est, threshold=threshold)
+        transformer.fit(X, y)
+        assert hasattr(transformer.estimator_, 'feature_importances_')
+
+        X_new = transformer.transform(X)
+        assert X_new.shape[1] < X.shape[1]
+        importances = transformer.estimator_.feature_importances_
+
+        feature_mask = np.abs(importances) > func(importances)
+        assert_array_almost_equal(X_new, X[:, feature_mask])
+
+
+def test_sample_weight():
+    # Ensure sample weights are passed to underlying estimator
+    X, y = datasets.make_classification(
+        n_samples=100, n_features=10, n_informative=3, n_redundant=0,
+        n_repeated=0, shuffle=False, random_state=0)
+
+    # Check with sample weights
+    sample_weight = np.ones(y.shape)
+    sample_weight[y == 1] *= 100
+
+    est = LogisticRegression(random_state=0, fit_intercept=False)
+    transformer = SelectFromModel(estimator=est)
+    transformer.fit(X, y, sample_weight=None)
+    mask = transformer._get_support_mask()
+    transformer.fit(X, y, sample_weight=sample_weight)
+    weighted_mask = transformer._get_support_mask()
+    assert not np.all(weighted_mask == mask)
+    transformer.fit(X, y, sample_weight=3 * sample_weight)
+    reweighted_mask = transformer._get_support_mask()
+    assert np.all(weighted_mask == reweighted_mask)
+
+
+def test_coef_default_threshold():
+    X, y = datasets.make_classification(
+        n_samples=100, n_features=10, n_informative=3, n_redundant=0,
+        n_repeated=0, shuffle=False, random_state=0)
+
+    # For the Lasso and related models, the threshold defaults to 1e-5
+    transformer = SelectFromModel(estimator=Lasso(alpha=0.1,
+                                  random_state=42))
+    transformer.fit(X, y)
+    X_new = transformer.transform(X)
+    mask = np.abs(transformer.estimator_.coef_) > 1e-5
+    assert_array_almost_equal(X_new, X[:, mask])
+
+
+@skip_if_32bit
+def test_2d_coef():
+    X, y = datasets.make_classification(
+        n_samples=1000, n_features=10, n_informative=3, n_redundant=0,
+        n_repeated=0, shuffle=False, random_state=0, n_classes=4)
+
+    est = LogisticRegression()
+    for threshold, func in zip(["mean", "median"], [np.mean, np.median]):
+        for order in [1, 2, np.inf]:
+            # Fit SelectFromModel a multi-class problem
+            transformer = SelectFromModel(estimator=LogisticRegression(),
+                                          threshold=threshold,
+                                          norm_order=order)
+            transformer.fit(X, y)
+            assert hasattr(transformer.estimator_, 'coef_')
+            X_new = transformer.transform(X)
+            assert X_new.shape[1] < X.shape[1]
+
+            # Manually check that the norm is correctly performed
+            est.fit(X, y)
+            importances = np.linalg.norm(est.coef_, axis=0, ord=order)
+            feature_mask = importances > func(importances)
+            assert_array_almost_equal(X_new, X[:, feature_mask])
+
+
+def test_partial_fit():
+    est = PassiveAggressiveClassifier(random_state=0, shuffle=False,
+                                      max_iter=5, tol=None)
+    transformer = SelectFromModel(estimator=est)
+    transformer.partial_fit(data, y,
+                            classes=np.unique(y))
+    old_model = transformer.estimator_
+    transformer.partial_fit(data, y,
+                            classes=np.unique(y))
+    new_model = transformer.estimator_
+    assert old_model is new_model
+
+    X_transform = transformer.transform(data)
+    transformer.fit(np.vstack((data, data)), np.concatenate((y, y)))
+    assert_array_almost_equal(X_transform, transformer.transform(data))
+
+    # check that if est doesn't have partial_fit, neither does SelectFromModel
+    transformer = SelectFromModel(estimator=RandomForestClassifier())
+    assert not hasattr(transformer, "partial_fit")
+
+
+def test_calling_fit_reinitializes():
+    est = LinearSVC(random_state=0)
+    transformer = SelectFromModel(estimator=est)
+    transformer.fit(data, y)
+    transformer.set_params(estimator__C=100)
+    transformer.fit(data, y)
+    assert transformer.estimator_.C == 100
+
+
+def test_prefit():
+    # Test all possible combinations of the prefit parameter.
+
+    # Passing a prefit parameter with the selected model
+    # and fitting a unfit model with prefit=False should give same results.
+    clf = SGDClassifier(alpha=0.1, max_iter=10, shuffle=True,
+                        random_state=0, tol=None)
+    model = SelectFromModel(clf)
+    model.fit(data, y)
+    X_transform = model.transform(data)
+    clf.fit(data, y)
+    model = SelectFromModel(clf, prefit=True)
+    assert_array_almost_equal(model.transform(data), X_transform)
+
+    # Check that the model is rewritten if prefit=False and a fitted model is
+    # passed
+    model = SelectFromModel(clf, prefit=False)
+    model.fit(data, y)
+    assert_array_almost_equal(model.transform(data), X_transform)
+
+    # Check that prefit=True and calling fit raises a ValueError
+    model = SelectFromModel(clf, prefit=True)
+    with pytest.raises(ValueError):
+        model.fit(data, y)
+
+
+def test_threshold_string():
+    est = RandomForestClassifier(n_estimators=50, random_state=0)
+    model = SelectFromModel(est, threshold="0.5*mean")
+    model.fit(data, y)
+    X_transform = model.transform(data)
+
+    # Calculate the threshold from the estimator directly.
+    est.fit(data, y)
+    threshold = 0.5 * np.mean(est.feature_importances_)
+    mask = est.feature_importances_ > threshold
+    assert_array_almost_equal(X_transform, data[:, mask])
+
+
+def test_threshold_without_refitting():
+    # Test that the threshold can be set without refitting the model.
+    clf = SGDClassifier(alpha=0.1, max_iter=10, shuffle=True,
+                        random_state=0, tol=None)
+    model = SelectFromModel(clf, threshold="0.1 * mean")
+    model.fit(data, y)
+    X_transform = model.transform(data)
+
+    # Set a higher threshold to filter out more features.
+    model.threshold = "1.0 * mean"
+    assert X_transform.shape[1] > model.transform(data).shape[1]
+
+
+def test_fit_accepts_nan_inf():
+    # Test that fit doesn't check for np.inf and np.nan values.
+    clf = HistGradientBoostingClassifier(random_state=0)
+
+    model = SelectFromModel(estimator=clf)
+
+    nan_data = data.copy()
+    nan_data[0] = np.NaN
+    nan_data[1] = np.Inf
+
+    model.fit(data, y)
+
+
+def test_transform_accepts_nan_inf():
+    # Test that transform doesn't check for np.inf and np.nan values.
+    clf = NaNTagRandomForest(n_estimators=100, random_state=0)
+    nan_data = data.copy()
+
+    model = SelectFromModel(estimator=clf)
+    model.fit(nan_data, y)
+
+    nan_data[0] = np.NaN
+    nan_data[1] = np.Inf
+
+    model.transform(nan_data)
+
+
+def test_allow_nan_tag_comes_from_estimator():
+    allow_nan_est = NaNTag()
+    model = SelectFromModel(estimator=allow_nan_est)
+    assert model._get_tags()['allow_nan'] is True
+
+    no_nan_est = NoNaNTag()
+    model = SelectFromModel(estimator=no_nan_est)
+    assert model._get_tags()['allow_nan'] is False
--- a/venv/Lib/site-packages/sklearn/feature_selection/tests/test_mutual_info.py
+++ b/venv/Lib/site-packages/sklearn/feature_selection/tests/test_mutual_info.py
@ -0,0 +1,209 @@
+
+import numpy as np
+import pytest
+from scipy.sparse import csr_matrix
+
+from sklearn.utils import check_random_state
+from sklearn.utils._testing import assert_array_equal, assert_almost_equal
+from sklearn.feature_selection._mutual_info import _compute_mi
+from sklearn.feature_selection import (mutual_info_regression,
+                                       mutual_info_classif)
+
+
+def test_compute_mi_dd():
+    # In discrete case computations are straightforward and can be done
+    # by hand on given vectors.
+    x = np.array([0, 1, 1, 0, 0])
+    y = np.array([1, 0, 0, 0, 1])
+
+    H_x = H_y = -(3/5) * np.log(3/5) - (2/5) * np.log(2/5)
+    H_xy = -1/5 * np.log(1/5) - 2/5 * np.log(2/5) - 2/5 * np.log(2/5)
+    I_xy = H_x + H_y - H_xy
+
+    assert_almost_equal(_compute_mi(x, y, True, True), I_xy)
+
+
+def test_compute_mi_cc():
+    # For two continuous variables a good approach is to test on bivariate
+    # normal distribution, where mutual information is known.
+
+    # Mean of the distribution, irrelevant for mutual information.
+    mean = np.zeros(2)
+
+    # Setup covariance matrix with correlation coeff. equal 0.5.
+    sigma_1 = 1
+    sigma_2 = 10
+    corr = 0.5
+    cov = np.array([
+        [sigma_1**2, corr * sigma_1 * sigma_2],
+        [corr * sigma_1 * sigma_2, sigma_2**2]
+    ])
+
+    # True theoretical mutual information.
+    I_theory = (np.log(sigma_1) + np.log(sigma_2) -
+                0.5 * np.log(np.linalg.det(cov)))
+
+    rng = check_random_state(0)
+    Z = rng.multivariate_normal(mean, cov, size=1000)
+
+    x, y = Z[:, 0], Z[:, 1]
+
+    # Theory and computed values won't be very close, assert that the
+    # first figures after decimal point match.
+    for n_neighbors in [3, 5, 7]:
+        I_computed = _compute_mi(x, y, False, False, n_neighbors)
+        assert_almost_equal(I_computed, I_theory, 1)
+
+
+def test_compute_mi_cd():
+    # To test define a joint distribution as follows:
+    # p(x, y) = p(x) p(y | x)
+    # X ~ Bernoulli(p)
+    # (Y | x = 0) ~ Uniform(-1, 1)
+    # (Y | x = 1) ~ Uniform(0, 2)
+
+    # Use the following formula for mutual information:
+    # I(X; Y) = H(Y) - H(Y | X)
+    # Two entropies can be computed by hand:
+    # H(Y) = -(1-p)/2 * ln((1-p)/2) - p/2*log(p/2) - 1/2*log(1/2)
+    # H(Y | X) = ln(2)
+
+    # Now we need to implement sampling from out distribution, which is
+    # done easily using conditional distribution logic.
+
+    n_samples = 1000
+    rng = check_random_state(0)
+
+    for p in [0.3, 0.5, 0.7]:
+        x = rng.uniform(size=n_samples) > p
+
+        y = np.empty(n_samples)
+        mask = x == 0
+        y[mask] = rng.uniform(-1, 1, size=np.sum(mask))
+        y[~mask] = rng.uniform(0, 2, size=np.sum(~mask))
+
+        I_theory = -0.5 * ((1 - p) * np.log(0.5 * (1 - p)) +
+                           p * np.log(0.5 * p) + np.log(0.5)) - np.log(2)
+
+        # Assert the same tolerance.
+        for n_neighbors in [3, 5, 7]:
+            I_computed = _compute_mi(x, y, True, False, n_neighbors)
+            assert_almost_equal(I_computed, I_theory, 1)
+
+
+def test_compute_mi_cd_unique_label():
+    # Test that adding unique label doesn't change MI.
+    n_samples = 100
+    x = np.random.uniform(size=n_samples) > 0.5
+
+    y = np.empty(n_samples)
+    mask = x == 0
+    y[mask] = np.random.uniform(-1, 1, size=np.sum(mask))
+    y[~mask] = np.random.uniform(0, 2, size=np.sum(~mask))
+
+    mi_1 = _compute_mi(x, y, True, False)
+
+    x = np.hstack((x, 2))
+    y = np.hstack((y, 10))
+    mi_2 = _compute_mi(x, y, True, False)
+
+    assert mi_1 == mi_2
+
+
+# We are going test that feature ordering by MI matches our expectations.
+def test_mutual_info_classif_discrete():
+    X = np.array([[0, 0, 0],
+                  [1, 1, 0],
+                  [2, 0, 1],
+                  [2, 0, 1],
+                  [2, 0, 1]])
+    y = np.array([0, 1, 2, 2, 1])
+
+    # Here X[:, 0] is the most informative feature, and X[:, 1] is weakly
+    # informative.
+    mi = mutual_info_classif(X, y, discrete_features=True)
+    assert_array_equal(np.argsort(-mi), np.array([0, 2, 1]))
+
+
+def test_mutual_info_regression():
+    # We generate sample from multivariate normal distribution, using
+    # transformation from initially uncorrelated variables. The zero
+    # variables after transformation is selected as the target vector,
+    # it has the strongest correlation with the variable 2, and
+    # the weakest correlation with the variable 1.
+    T = np.array([
+        [1, 0.5, 2, 1],
+        [0, 1, 0.1, 0.0],
+        [0, 0.1, 1, 0.1],
+        [0, 0.1, 0.1, 1]
+    ])
+    cov = T.dot(T.T)
+    mean = np.zeros(4)
+
+    rng = check_random_state(0)
+    Z = rng.multivariate_normal(mean, cov, size=1000)
+    X = Z[:, 1:]
+    y = Z[:, 0]
+
+    mi = mutual_info_regression(X, y, random_state=0)
+    assert_array_equal(np.argsort(-mi), np.array([1, 2, 0]))
+
+
+def test_mutual_info_classif_mixed():
+    # Here the target is discrete and there are two continuous and one
+    # discrete feature. The idea of this test is clear from the code.
+    rng = check_random_state(0)
+    X = rng.rand(1000, 3)
+    X[:, 1] += X[:, 0]
+    y = ((0.5 * X[:, 0] + X[:, 2]) > 0.5).astype(int)
+    X[:, 2] = X[:, 2] > 0.5
+
+    mi = mutual_info_classif(X, y, discrete_features=[2], n_neighbors=3,
+                             random_state=0)
+    assert_array_equal(np.argsort(-mi), [2, 0, 1])
+    for n_neighbors in [5, 7, 9]:
+        mi_nn = mutual_info_classif(X, y, discrete_features=[2],
+                                    n_neighbors=n_neighbors, random_state=0)
+        # Check that the continuous values have an higher MI with greater
+        # n_neighbors
+        assert mi_nn[0] > mi[0]
+        assert mi_nn[1] > mi[1]
+        # The n_neighbors should not have any effect on the discrete value
+        # The MI should be the same
+        assert mi_nn[2] == mi[2]
+
+
+def test_mutual_info_options():
+    X = np.array([[0, 0, 0],
+                  [1, 1, 0],
+                  [2, 0, 1],
+                  [2, 0, 1],
+                  [2, 0, 1]], dtype=float)
+    y = np.array([0, 1, 2, 2, 1], dtype=float)
+    X_csr = csr_matrix(X)
+
+    for mutual_info in (mutual_info_regression, mutual_info_classif):
+        with pytest.raises(ValueError):
+            mutual_info(X_csr, y, discrete_features=False)
+        with pytest.raises(ValueError):
+            mutual_info(X, y, discrete_features='manual')
+        with pytest.raises(ValueError):
+            mutual_info(X_csr, y, discrete_features=[True, False, True])
+        with pytest.raises(IndexError):
+            mutual_info(X, y, discrete_features=[True, False, True, False])
+        with pytest.raises(IndexError):
+            mutual_info(X, y, discrete_features=[1, 4])
+
+        mi_1 = mutual_info(X, y, discrete_features='auto', random_state=0)
+        mi_2 = mutual_info(X, y, discrete_features=False, random_state=0)
+        mi_3 = mutual_info(X_csr, y, discrete_features='auto', random_state=0)
+        mi_4 = mutual_info(X_csr, y, discrete_features=True, random_state=0)
+        mi_5 = mutual_info(X, y, discrete_features=[True, False, True],
+                           random_state=0)
+        mi_6 = mutual_info(X, y, discrete_features=[0, 2], random_state=0)
+
+        assert_array_equal(mi_1, mi_2)
+        assert_array_equal(mi_3, mi_4)
+        assert_array_equal(mi_5, mi_6)
+
+    assert not np.allclose(mi_1, mi_3)
--- a/venv/Lib/site-packages/sklearn/feature_selection/tests/test_rfe.py
+++ b/venv/Lib/site-packages/sklearn/feature_selection/tests/test_rfe.py
@ -0,0 +1,405 @@
+"""
+Testing Recursive feature elimination
+"""
+
+import pytest
+import numpy as np
+from numpy.testing import assert_array_almost_equal, assert_array_equal
+from scipy import sparse
+
+from sklearn.feature_selection import RFE, RFECV
+from sklearn.datasets import load_iris, make_friedman1
+from sklearn.metrics import zero_one_loss
+from sklearn.svm import SVC, SVR
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.model_selection import cross_val_score
+from sklearn.model_selection import GroupKFold
+
+from sklearn.utils import check_random_state
+from sklearn.utils._testing import ignore_warnings
+
+from sklearn.metrics import make_scorer
+from sklearn.metrics import get_scorer
+
+
+class MockClassifier:
+    """
+    Dummy classifier to test recursive feature elimination
+    """
+
+    def __init__(self, foo_param=0):
+        self.foo_param = foo_param
+
+    def fit(self, X, y):
+        assert len(X) == len(y)
+        self.coef_ = np.ones(X.shape[1], dtype=np.float64)
+        return self
+
+    def predict(self, T):
+        return T.shape[0]
+
+    predict_proba = predict
+    decision_function = predict
+    transform = predict
+
+    def score(self, X=None, y=None):
+        return 0.
+
+    def get_params(self, deep=True):
+        return {'foo_param': self.foo_param}
+
+    def set_params(self, **params):
+        return self
+
+    def _get_tags(self):
+        return {}
+
+
+def test_rfe_features_importance():
+    generator = check_random_state(0)
+    iris = load_iris()
+    X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
+    y = iris.target
+
+    clf = RandomForestClassifier(n_estimators=20,
+                                 random_state=generator, max_depth=2)
+    rfe = RFE(estimator=clf, n_features_to_select=4, step=0.1)
+    rfe.fit(X, y)
+    assert len(rfe.ranking_) == X.shape[1]
+
+    clf_svc = SVC(kernel="linear")
+    rfe_svc = RFE(estimator=clf_svc, n_features_to_select=4, step=0.1)
+    rfe_svc.fit(X, y)
+
+    # Check if the supports are equal
+    assert_array_equal(rfe.get_support(), rfe_svc.get_support())
+
+
+def test_rfe():
+    generator = check_random_state(0)
+    iris = load_iris()
+    X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
+    X_sparse = sparse.csr_matrix(X)
+    y = iris.target
+
+    # dense model
+    clf = SVC(kernel="linear")
+    rfe = RFE(estimator=clf, n_features_to_select=4, step=0.1)
+    rfe.fit(X, y)
+    X_r = rfe.transform(X)
+    clf.fit(X_r, y)
+    assert len(rfe.ranking_) == X.shape[1]
+
+    # sparse model
+    clf_sparse = SVC(kernel="linear")
+    rfe_sparse = RFE(estimator=clf_sparse, n_features_to_select=4, step=0.1)
+    rfe_sparse.fit(X_sparse, y)
+    X_r_sparse = rfe_sparse.transform(X_sparse)
+
+    assert X_r.shape == iris.data.shape
+    assert_array_almost_equal(X_r[:10], iris.data[:10])
+
+    assert_array_almost_equal(rfe.predict(X), clf.predict(iris.data))
+    assert rfe.score(X, y) == clf.score(iris.data, iris.target)
+    assert_array_almost_equal(X_r, X_r_sparse.toarray())
+
+
+def test_rfe_mockclassifier():
+    generator = check_random_state(0)
+    iris = load_iris()
+    X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
+    y = iris.target
+
+    # dense model
+    clf = MockClassifier()
+    rfe = RFE(estimator=clf, n_features_to_select=4, step=0.1)
+    rfe.fit(X, y)
+    X_r = rfe.transform(X)
+    clf.fit(X_r, y)
+    assert len(rfe.ranking_) == X.shape[1]
+    assert X_r.shape == iris.data.shape
+
+
+def test_rfecv():
+    generator = check_random_state(0)
+    iris = load_iris()
+    X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
+    y = list(iris.target)   # regression test: list should be supported
+
+    # Test using the score function
+    rfecv = RFECV(estimator=SVC(kernel="linear"), step=1)
+    rfecv.fit(X, y)
+    # non-regression test for missing worst feature:
+    assert len(rfecv.grid_scores_) == X.shape[1]
+    assert len(rfecv.ranking_) == X.shape[1]
+    X_r = rfecv.transform(X)
+
+    # All the noisy variable were filtered out
+    assert_array_equal(X_r, iris.data)
+
+    # same in sparse
+    rfecv_sparse = RFECV(estimator=SVC(kernel="linear"), step=1)
+    X_sparse = sparse.csr_matrix(X)
+    rfecv_sparse.fit(X_sparse, y)
+    X_r_sparse = rfecv_sparse.transform(X_sparse)
+    assert_array_equal(X_r_sparse.toarray(), iris.data)
+
+    # Test using a customized loss function
+    scoring = make_scorer(zero_one_loss, greater_is_better=False)
+    rfecv = RFECV(estimator=SVC(kernel="linear"), step=1, scoring=scoring)
+    ignore_warnings(rfecv.fit)(X, y)
+    X_r = rfecv.transform(X)
+    assert_array_equal(X_r, iris.data)
+
+    # Test using a scorer
+    scorer = get_scorer('accuracy')
+    rfecv = RFECV(estimator=SVC(kernel="linear"), step=1, scoring=scorer)
+    rfecv.fit(X, y)
+    X_r = rfecv.transform(X)
+    assert_array_equal(X_r, iris.data)
+
+    # Test fix on grid_scores
+    def test_scorer(estimator, X, y):
+        return 1.0
+    rfecv = RFECV(estimator=SVC(kernel="linear"), step=1, scoring=test_scorer)
+    rfecv.fit(X, y)
+    assert_array_equal(rfecv.grid_scores_, np.ones(len(rfecv.grid_scores_)))
+    # In the event of cross validation score ties, the expected behavior of
+    # RFECV is to return the FEWEST features that maximize the CV score.
+    # Because test_scorer always returns 1.0 in this example, RFECV should
+    # reduce the dimensionality to a single feature (i.e. n_features_ = 1)
+    assert rfecv.n_features_ == 1
+
+    # Same as the first two tests, but with step=2
+    rfecv = RFECV(estimator=SVC(kernel="linear"), step=2)
+    rfecv.fit(X, y)
+    assert len(rfecv.grid_scores_) == 6
+    assert len(rfecv.ranking_) == X.shape[1]
+    X_r = rfecv.transform(X)
+    assert_array_equal(X_r, iris.data)
+
+    rfecv_sparse = RFECV(estimator=SVC(kernel="linear"), step=2)
+    X_sparse = sparse.csr_matrix(X)
+    rfecv_sparse.fit(X_sparse, y)
+    X_r_sparse = rfecv_sparse.transform(X_sparse)
+    assert_array_equal(X_r_sparse.toarray(), iris.data)
+
+    # Verifying that steps < 1 don't blow up.
+    rfecv_sparse = RFECV(estimator=SVC(kernel="linear"), step=.2)
+    X_sparse = sparse.csr_matrix(X)
+    rfecv_sparse.fit(X_sparse, y)
+    X_r_sparse = rfecv_sparse.transform(X_sparse)
+    assert_array_equal(X_r_sparse.toarray(), iris.data)
+
+
+def test_rfecv_mockclassifier():
+    generator = check_random_state(0)
+    iris = load_iris()
+    X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
+    y = list(iris.target)   # regression test: list should be supported
+
+    # Test using the score function
+    rfecv = RFECV(estimator=MockClassifier(), step=1)
+    rfecv.fit(X, y)
+    # non-regression test for missing worst feature:
+    assert len(rfecv.grid_scores_) == X.shape[1]
+    assert len(rfecv.ranking_) == X.shape[1]
+
+
+def test_rfecv_verbose_output():
+    # Check verbose=1 is producing an output.
+    from io import StringIO
+    import sys
+    sys.stdout = StringIO()
+
+    generator = check_random_state(0)
+    iris = load_iris()
+    X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
+    y = list(iris.target)
+
+    rfecv = RFECV(estimator=SVC(kernel="linear"), step=1, verbose=1)
+    rfecv.fit(X, y)
+
+    verbose_output = sys.stdout
+    verbose_output.seek(0)
+    assert len(verbose_output.readline()) > 0
+
+
+def test_rfecv_grid_scores_size():
+    generator = check_random_state(0)
+    iris = load_iris()
+    X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
+    y = list(iris.target)   # regression test: list should be supported
+
+    # Non-regression test for varying combinations of step and
+    # min_features_to_select.
+    for step, min_features_to_select in [[2, 1], [2, 2], [3, 3]]:
+        rfecv = RFECV(estimator=MockClassifier(), step=step,
+                      min_features_to_select=min_features_to_select)
+        rfecv.fit(X, y)
+
+        score_len = np.ceil(
+            (X.shape[1] - min_features_to_select) / step) + 1
+        assert len(rfecv.grid_scores_) == score_len
+        assert len(rfecv.ranking_) == X.shape[1]
+        assert rfecv.n_features_ >= min_features_to_select
+
+
+def test_rfe_estimator_tags():
+    rfe = RFE(SVC(kernel='linear'))
+    assert rfe._estimator_type == "classifier"
+    # make sure that cross-validation is stratified
+    iris = load_iris()
+    score = cross_val_score(rfe, iris.data, iris.target)
+    assert score.min() > .7
+
+
+def test_rfe_min_step():
+    n_features = 10
+    X, y = make_friedman1(n_samples=50, n_features=n_features, random_state=0)
+    n_samples, n_features = X.shape
+    estimator = SVR(kernel="linear")
+
+    # Test when floor(step * n_features) <= 0
+    selector = RFE(estimator, step=0.01)
+    sel = selector.fit(X, y)
+    assert sel.support_.sum() == n_features // 2
+
+    # Test when step is between (0,1) and floor(step * n_features) > 0
+    selector = RFE(estimator, step=0.20)
+    sel = selector.fit(X, y)
+    assert sel.support_.sum() == n_features // 2
+
+    # Test when step is an integer
+    selector = RFE(estimator, step=5)
+    sel = selector.fit(X, y)
+    assert sel.support_.sum() == n_features // 2
+
+
+def test_number_of_subsets_of_features():
+    # In RFE, 'number_of_subsets_of_features'
+    # = the number of iterations in '_fit'
+    # = max(ranking_)
+    # = 1 + (n_features + step - n_features_to_select - 1) // step
+    # After optimization #4534, this number
+    # = 1 + np.ceil((n_features - n_features_to_select) / float(step))
+    # This test case is to test their equivalence, refer to #4534 and #3824
+
+    def formula1(n_features, n_features_to_select, step):
+        return 1 + ((n_features + step - n_features_to_select - 1) // step)
+
+    def formula2(n_features, n_features_to_select, step):
+        return 1 + np.ceil((n_features - n_features_to_select) / float(step))
+
+    # RFE
+    # Case 1, n_features - n_features_to_select is divisible by step
+    # Case 2, n_features - n_features_to_select is not divisible by step
+    n_features_list = [11, 11]
+    n_features_to_select_list = [3, 3]
+    step_list = [2, 3]
+    for n_features, n_features_to_select, step in zip(
+            n_features_list, n_features_to_select_list, step_list):
+        generator = check_random_state(43)
+        X = generator.normal(size=(100, n_features))
+        y = generator.rand(100).round()
+        rfe = RFE(estimator=SVC(kernel="linear"),
+                  n_features_to_select=n_features_to_select, step=step)
+        rfe.fit(X, y)
+        # this number also equals to the maximum of ranking_
+        assert (np.max(rfe.ranking_) ==
+                     formula1(n_features, n_features_to_select, step))
+        assert (np.max(rfe.ranking_) ==
+                     formula2(n_features, n_features_to_select, step))
+
+    # In RFECV, 'fit' calls 'RFE._fit'
+    # 'number_of_subsets_of_features' of RFE
+    # = the size of 'grid_scores' of RFECV
+    # = the number of iterations of the for loop before optimization #4534
+
+    # RFECV, n_features_to_select = 1
+    # Case 1, n_features - 1 is divisible by step
+    # Case 2, n_features - 1 is not divisible by step
+
+    n_features_to_select = 1
+    n_features_list = [11, 10]
+    step_list = [2, 2]
+    for n_features, step in zip(n_features_list, step_list):
+        generator = check_random_state(43)
+        X = generator.normal(size=(100, n_features))
+        y = generator.rand(100).round()
+        rfecv = RFECV(estimator=SVC(kernel="linear"), step=step)
+        rfecv.fit(X, y)
+
+        assert (rfecv.grid_scores_.shape[0] ==
+                     formula1(n_features, n_features_to_select, step))
+        assert (rfecv.grid_scores_.shape[0] ==
+                     formula2(n_features, n_features_to_select, step))
+
+
+def test_rfe_cv_n_jobs():
+    generator = check_random_state(0)
+    iris = load_iris()
+    X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
+    y = iris.target
+
+    rfecv = RFECV(estimator=SVC(kernel='linear'))
+    rfecv.fit(X, y)
+    rfecv_ranking = rfecv.ranking_
+    rfecv_grid_scores = rfecv.grid_scores_
+
+    rfecv.set_params(n_jobs=2)
+    rfecv.fit(X, y)
+    assert_array_almost_equal(rfecv.ranking_, rfecv_ranking)
+    assert_array_almost_equal(rfecv.grid_scores_, rfecv_grid_scores)
+
+
+def test_rfe_cv_groups():
+    generator = check_random_state(0)
+    iris = load_iris()
+    number_groups = 4
+    groups = np.floor(np.linspace(0, number_groups, len(iris.target)))
+    X = iris.data
+    y = (iris.target > 0).astype(int)
+
+    est_groups = RFECV(
+        estimator=RandomForestClassifier(random_state=generator),
+        step=1,
+        scoring='accuracy',
+        cv=GroupKFold(n_splits=2)
+    )
+    est_groups.fit(X, y, groups=groups)
+    assert est_groups.n_features_ > 0
+
+
+@pytest.mark.parametrize("cv", [
+    None,
+    5
+])
+def test_rfe_allow_nan_inf_in_x(cv):
+    iris = load_iris()
+    X = iris.data
+    y = iris.target
+
+    # add nan and inf value to X
+    X[0][0] = np.NaN
+    X[0][1] = np.Inf
+
+    clf = MockClassifier()
+    if cv is not None:
+        rfe = RFECV(estimator=clf, cv=cv)
+    else:
+        rfe = RFE(estimator=clf)
+    rfe.fit(X, y)
+    rfe.transform(X)
+
+
+@pytest.mark.parametrize('ClsRFE', [
+    RFE,
+    RFECV
+    ])
+def test_multioutput(ClsRFE):
+    X = np.random.normal(size=(10, 3))
+    y = np.random.randint(2, size=(10, 2))
+    clf = RandomForestClassifier(n_estimators=5)
+    rfe_test = ClsRFE(clf)
+    rfe_test.fit(X, y)
--- a/venv/Lib/site-packages/sklearn/feature_selection/tests/test_variance_threshold.py
+++ b/venv/Lib/site-packages/sklearn/feature_selection/tests/test_variance_threshold.py
@ -0,0 +1,60 @@
+import numpy as np
+import pytest
+
+from sklearn.utils._testing import assert_array_equal
+
+from scipy.sparse import bsr_matrix, csc_matrix, csr_matrix
+
+from sklearn.feature_selection import VarianceThreshold
+
+data = [[0, 1, 2, 3, 4],
+        [0, 2, 2, 3, 5],
+        [1, 1, 2, 4, 0]]
+
+data2 = [[-0.13725701]] * 10
+
+def test_zero_variance():
+    # Test VarianceThreshold with default setting, zero variance.
+
+    for X in [data, csr_matrix(data), csc_matrix(data), bsr_matrix(data)]:
+        sel = VarianceThreshold().fit(X)
+        assert_array_equal([0, 1, 3, 4], sel.get_support(indices=True))
+
+    with pytest.raises(ValueError):
+        VarianceThreshold().fit([[0, 1, 2, 3]])
+    with pytest.raises(ValueError):
+        VarianceThreshold().fit([[0, 1], [0, 1]])
+
+
+def test_variance_threshold():
+    # Test VarianceThreshold with custom variance.
+    for X in [data, csr_matrix(data)]:
+        X = VarianceThreshold(threshold=.4).fit_transform(X)
+        assert (len(data), 1) == X.shape
+
+
+@pytest.mark.skipif(np.var(data2) == 0,
+                    reason=('This test is not valid for this platform, '
+                            'as it relies on numerical instabilities.'))
+def test_zero_variance_floating_point_error():
+    # Test that VarianceThreshold(0.0).fit eliminates features that have
+    # the same value in every sample, even when floating point errors
+    # cause np.var not to be 0 for the feature.
+    # See #13691
+
+    for X in [data2, csr_matrix(data2), csc_matrix(data2), bsr_matrix(data2)]:
+        msg = "No feature in X meets the variance threshold 0.00000"
+        with pytest.raises(ValueError, match=msg):
+            VarianceThreshold().fit(X)
+
+
+def test_variance_nan():
+    arr = np.array(data, dtype=np.float64)
+    # add single NaN and feature should still be included
+    arr[0, 0] = np.NaN
+    # make all values in feature NaN and feature should be rejected
+    arr[:, 1] = np.NaN
+
+    for X in [arr, csr_matrix(arr), csc_matrix(arr), bsr_matrix(arr)]:
+        sel = VarianceThreshold().fit(X)
+        assert_array_equal([0, 3, 4], sel.get_support(indices=True))
--- a/venv/Lib/site-packages/sklearn/feature_selection/univariate_selection.py
+++ b/venv/Lib/site-packages/sklearn/feature_selection/univariate_selection.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _univariate_selection  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.feature_selection.univariate_selection'
+correct_import_path = 'sklearn.feature_selection'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_univariate_selection, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/feature_selection/variance_threshold.py
+++ b/venv/Lib/site-packages/sklearn/feature_selection/variance_threshold.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _variance_threshold  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.feature_selection.variance_threshold'
+correct_import_path = 'sklearn.feature_selection'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_variance_threshold, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)