Uploaded Test files

2020-11-12 11:05:57 -05:00 · 2020-11-12 11:05:57 -05:00 · 2e81cb7d99
commit 2e81cb7d99
parent f584ad9d97
16627 changed files with 2065359 additions and 102444 deletions
--- a/venv/Lib/site-packages/sklearn/feature_selection/_from_model.py
+++ b/venv/Lib/site-packages/sklearn/feature_selection/_from_model.py
@ -0,0 +1,275 @@
+# Authors: Gilles Louppe, Mathieu Blondel, Maheshakya Wijewardena
+# License: BSD 3 clause
+
+import numpy as np
+import numbers
+
+from ._base import SelectorMixin
+from ..base import BaseEstimator, clone, MetaEstimatorMixin
+from ..utils.validation import check_is_fitted
+
+from ..exceptions import NotFittedError
+from ..utils.metaestimators import if_delegate_has_method
+from ..utils.validation import _deprecate_positional_args
+
+
+def _get_feature_importances(estimator, norm_order=1):
+    """Retrieve or aggregate feature importances from estimator"""
+    importances = getattr(estimator, "feature_importances_", None)
+
+    coef_ = getattr(estimator, "coef_", None)
+    if importances is None and coef_ is not None:
+        if estimator.coef_.ndim == 1:
+            importances = np.abs(coef_)
+
+        else:
+            importances = np.linalg.norm(coef_, axis=0,
+                                         ord=norm_order)
+
+    elif importances is None:
+        raise ValueError(
+            "The underlying estimator %s has no `coef_` or "
+            "`feature_importances_` attribute. Either pass a fitted estimator"
+            " to SelectFromModel or call fit before calling transform."
+            % estimator.__class__.__name__)
+
+    return importances
+
+
+def _calculate_threshold(estimator, importances, threshold):
+    """Interpret the threshold value"""
+
+    if threshold is None:
+        # determine default from estimator
+        est_name = estimator.__class__.__name__
+        if ((hasattr(estimator, "penalty") and estimator.penalty == "l1") or
+                "Lasso" in est_name):
+            # the natural default threshold is 0 when l1 penalty was used
+            threshold = 1e-5
+        else:
+            threshold = "mean"
+
+    if isinstance(threshold, str):
+        if "*" in threshold:
+            scale, reference = threshold.split("*")
+            scale = float(scale.strip())
+            reference = reference.strip()
+
+            if reference == "median":
+                reference = np.median(importances)
+            elif reference == "mean":
+                reference = np.mean(importances)
+            else:
+                raise ValueError("Unknown reference: " + reference)
+
+            threshold = scale * reference
+
+        elif threshold == "median":
+            threshold = np.median(importances)
+
+        elif threshold == "mean":
+            threshold = np.mean(importances)
+
+        else:
+            raise ValueError("Expected threshold='mean' or threshold='median' "
+                             "got %s" % threshold)
+
+    else:
+        threshold = float(threshold)
+
+    return threshold
+
+
+class SelectFromModel(MetaEstimatorMixin, SelectorMixin, BaseEstimator):
+    """Meta-transformer for selecting features based on importance weights.
+
+    .. versionadded:: 0.17
+
+    Parameters
+    ----------
+    estimator : object
+        The base estimator from which the transformer is built.
+        This can be both a fitted (if ``prefit`` is set to True)
+        or a non-fitted estimator. The estimator must have either a
+        ``feature_importances_`` or ``coef_`` attribute after fitting.
+
+    threshold : string, float, optional default None
+        The threshold value to use for feature selection. Features whose
+        importance is greater or equal are kept while the others are
+        discarded. If "median" (resp. "mean"), then the ``threshold`` value is
+        the median (resp. the mean) of the feature importances. A scaling
+        factor (e.g., "1.25*mean") may also be used. If None and if the
+        estimator has a parameter penalty set to l1, either explicitly
+        or implicitly (e.g, Lasso), the threshold used is 1e-5.
+        Otherwise, "mean" is used by default.
+
+    prefit : bool, default False
+        Whether a prefit model is expected to be passed into the constructor
+        directly or not. If True, ``transform`` must be called directly
+        and SelectFromModel cannot be used with ``cross_val_score``,
+        ``GridSearchCV`` and similar utilities that clone the estimator.
+        Otherwise train the model using ``fit`` and then ``transform`` to do
+        feature selection.
+
+    norm_order : non-zero int, inf, -inf, default 1
+        Order of the norm used to filter the vectors of coefficients below
+        ``threshold`` in the case where the ``coef_`` attribute of the
+        estimator is of dimension 2.
+
+    max_features : int or None, optional
+        The maximum number of features to select.
+        To only select based on ``max_features``, set ``threshold=-np.inf``.
+
+        .. versionadded:: 0.20
+
+    Attributes
+    ----------
+    estimator_ : an estimator
+        The base estimator from which the transformer is built.
+        This is stored only when a non-fitted estimator is passed to the
+        ``SelectFromModel``, i.e when prefit is False.
+
+    threshold_ : float
+        The threshold value used for feature selection.
+
+    Notes
+    -----
+    Allows NaN/Inf in the input if the underlying estimator does as well.
+
+    Examples
+    --------
+    >>> from sklearn.feature_selection import SelectFromModel
+    >>> from sklearn.linear_model import LogisticRegression
+    >>> X = [[ 0.87, -1.34,  0.31 ],
+    ...      [-2.79, -0.02, -0.85 ],
+    ...      [-1.34, -0.48, -2.55 ],
+    ...      [ 1.92,  1.48,  0.65 ]]
+    >>> y = [0, 1, 0, 1]
+    >>> selector = SelectFromModel(estimator=LogisticRegression()).fit(X, y)
+    >>> selector.estimator_.coef_
+    array([[-0.3252302 ,  0.83462377,  0.49750423]])
+    >>> selector.threshold_
+    0.55245...
+    >>> selector.get_support()
+    array([False,  True, False])
+    >>> selector.transform(X)
+    array([[-1.34],
+           [-0.02],
+           [-0.48],
+           [ 1.48]])
+    """
+    @_deprecate_positional_args
+    def __init__(self, estimator, *, threshold=None, prefit=False,
+                 norm_order=1, max_features=None):
+        self.estimator = estimator
+        self.threshold = threshold
+        self.prefit = prefit
+        self.norm_order = norm_order
+        self.max_features = max_features
+
+    def _get_support_mask(self):
+        # SelectFromModel can directly call on transform.
+        if self.prefit:
+            estimator = self.estimator
+        elif hasattr(self, 'estimator_'):
+            estimator = self.estimator_
+        else:
+            raise ValueError('Either fit the model before transform or set'
+                             ' "prefit=True" while passing the fitted'
+                             ' estimator to the constructor.')
+        scores = _get_feature_importances(estimator, self.norm_order)
+        threshold = _calculate_threshold(estimator, scores, self.threshold)
+        if self.max_features is not None:
+            mask = np.zeros_like(scores, dtype=bool)
+            candidate_indices = \
+                np.argsort(-scores, kind='mergesort')[:self.max_features]
+            mask[candidate_indices] = True
+        else:
+            mask = np.ones_like(scores, dtype=bool)
+        mask[scores < threshold] = False
+        return mask
+
+    def fit(self, X, y=None, **fit_params):
+        """Fit the SelectFromModel meta-transformer.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The training input samples.
+
+        y : array-like, shape (n_samples,)
+            The target values (integers that correspond to classes in
+            classification, real numbers in regression).
+
+        **fit_params : Other estimator specific parameters
+
+        Returns
+        -------
+        self : object
+        """
+        if self.max_features is not None:
+            if not isinstance(self.max_features, numbers.Integral):
+                raise TypeError("'max_features' should be an integer between"
+                                " 0 and {} features. Got {!r} instead."
+                                .format(X.shape[1], self.max_features))
+            elif self.max_features < 0 or self.max_features > X.shape[1]:
+                raise ValueError("'max_features' should be 0 and {} features."
+                                 "Got {} instead."
+                                 .format(X.shape[1], self.max_features))
+
+        if self.prefit:
+            raise NotFittedError(
+                "Since 'prefit=True', call transform directly")
+        self.estimator_ = clone(self.estimator)
+        self.estimator_.fit(X, y, **fit_params)
+        return self
+
+    @property
+    def threshold_(self):
+        scores = _get_feature_importances(self.estimator_, self.norm_order)
+        return _calculate_threshold(self.estimator, scores, self.threshold)
+
+    @if_delegate_has_method('estimator')
+    def partial_fit(self, X, y=None, **fit_params):
+        """Fit the SelectFromModel meta-transformer only once.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The training input samples.
+
+        y : array-like, shape (n_samples,)
+            The target values (integers that correspond to classes in
+            classification, real numbers in regression).
+
+        **fit_params : Other estimator specific parameters
+
+        Returns
+        -------
+        self : object
+        """
+        if self.prefit:
+            raise NotFittedError(
+                "Since 'prefit=True', call transform directly")
+        if not hasattr(self, "estimator_"):
+            self.estimator_ = clone(self.estimator)
+        self.estimator_.partial_fit(X, y, **fit_params)
+        return self
+
+    @property
+    def n_features_in_(self):
+        # For consistency with other estimators we raise a AttributeError so
+        # that hasattr() fails if the estimator isn't fitted.
+        try:
+            check_is_fitted(self)
+        except NotFittedError as nfe:
+            raise AttributeError(
+                "{} object has no n_features_in_ attribute."
+                .format(self.__class__.__name__)
+            ) from nfe
+
+        return self.estimator_.n_features_in_
+
+    def _more_tags(self):
+        estimator_tags = self.estimator._get_tags()
+        return {'allow_nan': estimator_tags.get('allow_nan', True)}