Uploaded Test files
This commit is contained in:
parent
f584ad9d97
commit
2e81cb7d99
16627 changed files with 2065359 additions and 102444 deletions
275
venv/Lib/site-packages/sklearn/feature_selection/_from_model.py
Normal file
275
venv/Lib/site-packages/sklearn/feature_selection/_from_model.py
Normal file
|
|
@ -0,0 +1,275 @@
|
|||
# Authors: Gilles Louppe, Mathieu Blondel, Maheshakya Wijewardena
|
||||
# License: BSD 3 clause
|
||||
|
||||
import numpy as np
|
||||
import numbers
|
||||
|
||||
from ._base import SelectorMixin
|
||||
from ..base import BaseEstimator, clone, MetaEstimatorMixin
|
||||
from ..utils.validation import check_is_fitted
|
||||
|
||||
from ..exceptions import NotFittedError
|
||||
from ..utils.metaestimators import if_delegate_has_method
|
||||
from ..utils.validation import _deprecate_positional_args
|
||||
|
||||
|
||||
def _get_feature_importances(estimator, norm_order=1):
|
||||
"""Retrieve or aggregate feature importances from estimator"""
|
||||
importances = getattr(estimator, "feature_importances_", None)
|
||||
|
||||
coef_ = getattr(estimator, "coef_", None)
|
||||
if importances is None and coef_ is not None:
|
||||
if estimator.coef_.ndim == 1:
|
||||
importances = np.abs(coef_)
|
||||
|
||||
else:
|
||||
importances = np.linalg.norm(coef_, axis=0,
|
||||
ord=norm_order)
|
||||
|
||||
elif importances is None:
|
||||
raise ValueError(
|
||||
"The underlying estimator %s has no `coef_` or "
|
||||
"`feature_importances_` attribute. Either pass a fitted estimator"
|
||||
" to SelectFromModel or call fit before calling transform."
|
||||
% estimator.__class__.__name__)
|
||||
|
||||
return importances
|
||||
|
||||
|
||||
def _calculate_threshold(estimator, importances, threshold):
|
||||
"""Interpret the threshold value"""
|
||||
|
||||
if threshold is None:
|
||||
# determine default from estimator
|
||||
est_name = estimator.__class__.__name__
|
||||
if ((hasattr(estimator, "penalty") and estimator.penalty == "l1") or
|
||||
"Lasso" in est_name):
|
||||
# the natural default threshold is 0 when l1 penalty was used
|
||||
threshold = 1e-5
|
||||
else:
|
||||
threshold = "mean"
|
||||
|
||||
if isinstance(threshold, str):
|
||||
if "*" in threshold:
|
||||
scale, reference = threshold.split("*")
|
||||
scale = float(scale.strip())
|
||||
reference = reference.strip()
|
||||
|
||||
if reference == "median":
|
||||
reference = np.median(importances)
|
||||
elif reference == "mean":
|
||||
reference = np.mean(importances)
|
||||
else:
|
||||
raise ValueError("Unknown reference: " + reference)
|
||||
|
||||
threshold = scale * reference
|
||||
|
||||
elif threshold == "median":
|
||||
threshold = np.median(importances)
|
||||
|
||||
elif threshold == "mean":
|
||||
threshold = np.mean(importances)
|
||||
|
||||
else:
|
||||
raise ValueError("Expected threshold='mean' or threshold='median' "
|
||||
"got %s" % threshold)
|
||||
|
||||
else:
|
||||
threshold = float(threshold)
|
||||
|
||||
return threshold
|
||||
|
||||
|
||||
class SelectFromModel(MetaEstimatorMixin, SelectorMixin, BaseEstimator):
|
||||
"""Meta-transformer for selecting features based on importance weights.
|
||||
|
||||
.. versionadded:: 0.17
|
||||
|
||||
Parameters
|
||||
----------
|
||||
estimator : object
|
||||
The base estimator from which the transformer is built.
|
||||
This can be both a fitted (if ``prefit`` is set to True)
|
||||
or a non-fitted estimator. The estimator must have either a
|
||||
``feature_importances_`` or ``coef_`` attribute after fitting.
|
||||
|
||||
threshold : string, float, optional default None
|
||||
The threshold value to use for feature selection. Features whose
|
||||
importance is greater or equal are kept while the others are
|
||||
discarded. If "median" (resp. "mean"), then the ``threshold`` value is
|
||||
the median (resp. the mean) of the feature importances. A scaling
|
||||
factor (e.g., "1.25*mean") may also be used. If None and if the
|
||||
estimator has a parameter penalty set to l1, either explicitly
|
||||
or implicitly (e.g, Lasso), the threshold used is 1e-5.
|
||||
Otherwise, "mean" is used by default.
|
||||
|
||||
prefit : bool, default False
|
||||
Whether a prefit model is expected to be passed into the constructor
|
||||
directly or not. If True, ``transform`` must be called directly
|
||||
and SelectFromModel cannot be used with ``cross_val_score``,
|
||||
``GridSearchCV`` and similar utilities that clone the estimator.
|
||||
Otherwise train the model using ``fit`` and then ``transform`` to do
|
||||
feature selection.
|
||||
|
||||
norm_order : non-zero int, inf, -inf, default 1
|
||||
Order of the norm used to filter the vectors of coefficients below
|
||||
``threshold`` in the case where the ``coef_`` attribute of the
|
||||
estimator is of dimension 2.
|
||||
|
||||
max_features : int or None, optional
|
||||
The maximum number of features to select.
|
||||
To only select based on ``max_features``, set ``threshold=-np.inf``.
|
||||
|
||||
.. versionadded:: 0.20
|
||||
|
||||
Attributes
|
||||
----------
|
||||
estimator_ : an estimator
|
||||
The base estimator from which the transformer is built.
|
||||
This is stored only when a non-fitted estimator is passed to the
|
||||
``SelectFromModel``, i.e when prefit is False.
|
||||
|
||||
threshold_ : float
|
||||
The threshold value used for feature selection.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Allows NaN/Inf in the input if the underlying estimator does as well.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from sklearn.feature_selection import SelectFromModel
|
||||
>>> from sklearn.linear_model import LogisticRegression
|
||||
>>> X = [[ 0.87, -1.34, 0.31 ],
|
||||
... [-2.79, -0.02, -0.85 ],
|
||||
... [-1.34, -0.48, -2.55 ],
|
||||
... [ 1.92, 1.48, 0.65 ]]
|
||||
>>> y = [0, 1, 0, 1]
|
||||
>>> selector = SelectFromModel(estimator=LogisticRegression()).fit(X, y)
|
||||
>>> selector.estimator_.coef_
|
||||
array([[-0.3252302 , 0.83462377, 0.49750423]])
|
||||
>>> selector.threshold_
|
||||
0.55245...
|
||||
>>> selector.get_support()
|
||||
array([False, True, False])
|
||||
>>> selector.transform(X)
|
||||
array([[-1.34],
|
||||
[-0.02],
|
||||
[-0.48],
|
||||
[ 1.48]])
|
||||
"""
|
||||
@_deprecate_positional_args
|
||||
def __init__(self, estimator, *, threshold=None, prefit=False,
|
||||
norm_order=1, max_features=None):
|
||||
self.estimator = estimator
|
||||
self.threshold = threshold
|
||||
self.prefit = prefit
|
||||
self.norm_order = norm_order
|
||||
self.max_features = max_features
|
||||
|
||||
def _get_support_mask(self):
|
||||
# SelectFromModel can directly call on transform.
|
||||
if self.prefit:
|
||||
estimator = self.estimator
|
||||
elif hasattr(self, 'estimator_'):
|
||||
estimator = self.estimator_
|
||||
else:
|
||||
raise ValueError('Either fit the model before transform or set'
|
||||
' "prefit=True" while passing the fitted'
|
||||
' estimator to the constructor.')
|
||||
scores = _get_feature_importances(estimator, self.norm_order)
|
||||
threshold = _calculate_threshold(estimator, scores, self.threshold)
|
||||
if self.max_features is not None:
|
||||
mask = np.zeros_like(scores, dtype=bool)
|
||||
candidate_indices = \
|
||||
np.argsort(-scores, kind='mergesort')[:self.max_features]
|
||||
mask[candidate_indices] = True
|
||||
else:
|
||||
mask = np.ones_like(scores, dtype=bool)
|
||||
mask[scores < threshold] = False
|
||||
return mask
|
||||
|
||||
def fit(self, X, y=None, **fit_params):
|
||||
"""Fit the SelectFromModel meta-transformer.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
The training input samples.
|
||||
|
||||
y : array-like, shape (n_samples,)
|
||||
The target values (integers that correspond to classes in
|
||||
classification, real numbers in regression).
|
||||
|
||||
**fit_params : Other estimator specific parameters
|
||||
|
||||
Returns
|
||||
-------
|
||||
self : object
|
||||
"""
|
||||
if self.max_features is not None:
|
||||
if not isinstance(self.max_features, numbers.Integral):
|
||||
raise TypeError("'max_features' should be an integer between"
|
||||
" 0 and {} features. Got {!r} instead."
|
||||
.format(X.shape[1], self.max_features))
|
||||
elif self.max_features < 0 or self.max_features > X.shape[1]:
|
||||
raise ValueError("'max_features' should be 0 and {} features."
|
||||
"Got {} instead."
|
||||
.format(X.shape[1], self.max_features))
|
||||
|
||||
if self.prefit:
|
||||
raise NotFittedError(
|
||||
"Since 'prefit=True', call transform directly")
|
||||
self.estimator_ = clone(self.estimator)
|
||||
self.estimator_.fit(X, y, **fit_params)
|
||||
return self
|
||||
|
||||
@property
|
||||
def threshold_(self):
|
||||
scores = _get_feature_importances(self.estimator_, self.norm_order)
|
||||
return _calculate_threshold(self.estimator, scores, self.threshold)
|
||||
|
||||
@if_delegate_has_method('estimator')
|
||||
def partial_fit(self, X, y=None, **fit_params):
|
||||
"""Fit the SelectFromModel meta-transformer only once.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
The training input samples.
|
||||
|
||||
y : array-like, shape (n_samples,)
|
||||
The target values (integers that correspond to classes in
|
||||
classification, real numbers in regression).
|
||||
|
||||
**fit_params : Other estimator specific parameters
|
||||
|
||||
Returns
|
||||
-------
|
||||
self : object
|
||||
"""
|
||||
if self.prefit:
|
||||
raise NotFittedError(
|
||||
"Since 'prefit=True', call transform directly")
|
||||
if not hasattr(self, "estimator_"):
|
||||
self.estimator_ = clone(self.estimator)
|
||||
self.estimator_.partial_fit(X, y, **fit_params)
|
||||
return self
|
||||
|
||||
@property
|
||||
def n_features_in_(self):
|
||||
# For consistency with other estimators we raise a AttributeError so
|
||||
# that hasattr() fails if the estimator isn't fitted.
|
||||
try:
|
||||
check_is_fitted(self)
|
||||
except NotFittedError as nfe:
|
||||
raise AttributeError(
|
||||
"{} object has no n_features_in_ attribute."
|
||||
.format(self.__class__.__name__)
|
||||
) from nfe
|
||||
|
||||
return self.estimator_.n_features_in_
|
||||
|
||||
def _more_tags(self):
|
||||
estimator_tags = self.estimator._get_tags()
|
||||
return {'allow_nan': estimator_tags.get('allow_nan', True)}
|
||||
Loading…
Add table
Add a link
Reference in a new issue