Uploaded Test files
This commit is contained in:
parent
f584ad9d97
commit
2e81cb7d99
16627 changed files with 2065359 additions and 102444 deletions
80
venv/Lib/site-packages/sklearn/linear_model/__init__.py
Normal file
80
venv/Lib/site-packages/sklearn/linear_model/__init__.py
Normal file
|
@ -0,0 +1,80 @@
|
|||
"""
|
||||
The :mod:`sklearn.linear_model` module implements a variety of linear models.
|
||||
"""
|
||||
|
||||
# See http://scikit-learn.sourceforge.net/modules/sgd.html and
|
||||
# http://scikit-learn.sourceforge.net/modules/linear_model.html for
|
||||
# complete documentation.
|
||||
|
||||
from ._base import LinearRegression
|
||||
from ._bayes import BayesianRidge, ARDRegression
|
||||
from ._least_angle import (Lars, LassoLars, lars_path, lars_path_gram, LarsCV,
|
||||
LassoLarsCV, LassoLarsIC)
|
||||
from ._coordinate_descent import (Lasso, ElasticNet, LassoCV, ElasticNetCV,
|
||||
lasso_path, enet_path, MultiTaskLasso,
|
||||
MultiTaskElasticNet, MultiTaskElasticNetCV,
|
||||
MultiTaskLassoCV)
|
||||
from ._glm import (PoissonRegressor,
|
||||
GammaRegressor, TweedieRegressor)
|
||||
from ._huber import HuberRegressor
|
||||
from ._sgd_fast import Hinge, Log, ModifiedHuber, SquaredLoss, Huber
|
||||
from ._stochastic_gradient import SGDClassifier, SGDRegressor
|
||||
from ._ridge import (Ridge, RidgeCV, RidgeClassifier, RidgeClassifierCV,
|
||||
ridge_regression)
|
||||
from ._logistic import LogisticRegression, LogisticRegressionCV
|
||||
from ._omp import (orthogonal_mp, orthogonal_mp_gram,
|
||||
OrthogonalMatchingPursuit, OrthogonalMatchingPursuitCV)
|
||||
from ._passive_aggressive import PassiveAggressiveClassifier
|
||||
from ._passive_aggressive import PassiveAggressiveRegressor
|
||||
from ._perceptron import Perceptron
|
||||
|
||||
from ._ransac import RANSACRegressor
|
||||
from ._theil_sen import TheilSenRegressor
|
||||
|
||||
__all__ = ['ARDRegression',
|
||||
'BayesianRidge',
|
||||
'ElasticNet',
|
||||
'ElasticNetCV',
|
||||
'Hinge',
|
||||
'Huber',
|
||||
'HuberRegressor',
|
||||
'Lars',
|
||||
'LarsCV',
|
||||
'Lasso',
|
||||
'LassoCV',
|
||||
'LassoLars',
|
||||
'LassoLarsCV',
|
||||
'LassoLarsIC',
|
||||
'LinearRegression',
|
||||
'Log',
|
||||
'LogisticRegression',
|
||||
'LogisticRegressionCV',
|
||||
'ModifiedHuber',
|
||||
'MultiTaskElasticNet',
|
||||
'MultiTaskElasticNetCV',
|
||||
'MultiTaskLasso',
|
||||
'MultiTaskLassoCV',
|
||||
'OrthogonalMatchingPursuit',
|
||||
'OrthogonalMatchingPursuitCV',
|
||||
'PassiveAggressiveClassifier',
|
||||
'PassiveAggressiveRegressor',
|
||||
'Perceptron',
|
||||
'Ridge',
|
||||
'RidgeCV',
|
||||
'RidgeClassifier',
|
||||
'RidgeClassifierCV',
|
||||
'SGDClassifier',
|
||||
'SGDRegressor',
|
||||
'SquaredLoss',
|
||||
'TheilSenRegressor',
|
||||
'enet_path',
|
||||
'lars_path',
|
||||
'lars_path_gram',
|
||||
'lasso_path',
|
||||
'orthogonal_mp',
|
||||
'orthogonal_mp_gram',
|
||||
'ridge_regression',
|
||||
'RANSACRegressor',
|
||||
'PoissonRegressor',
|
||||
'GammaRegressor',
|
||||
'TweedieRegressor']
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
620
venv/Lib/site-packages/sklearn/linear_model/_base.py
Normal file
620
venv/Lib/site-packages/sklearn/linear_model/_base.py
Normal file
|
@ -0,0 +1,620 @@
|
|||
"""
|
||||
Generalized Linear Models.
|
||||
"""
|
||||
|
||||
# Author: Alexandre Gramfort <alexandre.gramfort@inria.fr>
|
||||
# Fabian Pedregosa <fabian.pedregosa@inria.fr>
|
||||
# Olivier Grisel <olivier.grisel@ensta.org>
|
||||
# Vincent Michel <vincent.michel@inria.fr>
|
||||
# Peter Prettenhofer <peter.prettenhofer@gmail.com>
|
||||
# Mathieu Blondel <mathieu@mblondel.org>
|
||||
# Lars Buitinck
|
||||
# Maryan Morel <maryan.morel@polytechnique.edu>
|
||||
# Giorgio Patrini <giorgio.patrini@anu.edu.au>
|
||||
# License: BSD 3 clause
|
||||
|
||||
from abc import ABCMeta, abstractmethod
|
||||
import numbers
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import scipy.sparse as sp
|
||||
from scipy import linalg
|
||||
from scipy import sparse
|
||||
from scipy.special import expit
|
||||
from joblib import Parallel, delayed
|
||||
|
||||
from ..base import (BaseEstimator, ClassifierMixin, RegressorMixin,
|
||||
MultiOutputMixin)
|
||||
from ..utils import check_array
|
||||
from ..utils.validation import FLOAT_DTYPES
|
||||
from ..utils.validation import _deprecate_positional_args
|
||||
from ..utils import check_random_state
|
||||
from ..utils.extmath import safe_sparse_dot
|
||||
from ..utils.sparsefuncs import mean_variance_axis, inplace_column_scale
|
||||
from ..utils.fixes import sparse_lsqr
|
||||
from ..utils._seq_dataset import ArrayDataset32, CSRDataset32
|
||||
from ..utils._seq_dataset import ArrayDataset64, CSRDataset64
|
||||
from ..utils.validation import check_is_fitted, _check_sample_weight
|
||||
from ..preprocessing import normalize as f_normalize
|
||||
|
||||
# TODO: bayesian_ridge_regression and bayesian_regression_ard
|
||||
# should be squashed into its respective objects.
|
||||
|
||||
SPARSE_INTERCEPT_DECAY = 0.01
|
||||
# For sparse data intercept updates are scaled by this decay factor to avoid
|
||||
# intercept oscillation.
|
||||
|
||||
|
||||
def make_dataset(X, y, sample_weight, random_state=None):
|
||||
"""Create ``Dataset`` abstraction for sparse and dense inputs.
|
||||
|
||||
This also returns the ``intercept_decay`` which is different
|
||||
for sparse datasets.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array_like, shape (n_samples, n_features)
|
||||
Training data
|
||||
|
||||
y : array_like, shape (n_samples, )
|
||||
Target values.
|
||||
|
||||
sample_weight : numpy array of shape (n_samples,)
|
||||
The weight of each sample
|
||||
|
||||
random_state : int, RandomState instance or None (default)
|
||||
Determines random number generation for dataset shuffling and noise.
|
||||
Pass an int for reproducible output across multiple function calls.
|
||||
See :term:`Glossary <random_state>`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
dataset
|
||||
The ``Dataset`` abstraction
|
||||
intercept_decay
|
||||
The intercept decay
|
||||
"""
|
||||
|
||||
rng = check_random_state(random_state)
|
||||
# seed should never be 0 in SequentialDataset64
|
||||
seed = rng.randint(1, np.iinfo(np.int32).max)
|
||||
|
||||
if X.dtype == np.float32:
|
||||
CSRData = CSRDataset32
|
||||
ArrayData = ArrayDataset32
|
||||
else:
|
||||
CSRData = CSRDataset64
|
||||
ArrayData = ArrayDataset64
|
||||
|
||||
if sp.issparse(X):
|
||||
dataset = CSRData(X.data, X.indptr, X.indices, y, sample_weight,
|
||||
seed=seed)
|
||||
intercept_decay = SPARSE_INTERCEPT_DECAY
|
||||
else:
|
||||
X = np.ascontiguousarray(X)
|
||||
dataset = ArrayData(X, y, sample_weight, seed=seed)
|
||||
intercept_decay = 1.0
|
||||
|
||||
return dataset, intercept_decay
|
||||
|
||||
|
||||
def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True,
|
||||
sample_weight=None, return_mean=False, check_input=True):
|
||||
"""Center and scale data.
|
||||
|
||||
Centers data to have mean zero along axis 0. If fit_intercept=False or if
|
||||
the X is a sparse matrix, no centering is done, but normalization can still
|
||||
be applied. The function returns the statistics necessary to reconstruct
|
||||
the input data, which are X_offset, y_offset, X_scale, such that the output
|
||||
|
||||
X = (X - X_offset) / X_scale
|
||||
|
||||
X_scale is the L2 norm of X - X_offset. If sample_weight is not None,
|
||||
then the weighted mean of X and y is zero, and not the mean itself. If
|
||||
return_mean=True, the mean, eventually weighted, is returned, independently
|
||||
of whether X was centered (option used for optimization with sparse data in
|
||||
coordinate_descend).
|
||||
|
||||
This is here because nearly all linear models will want their data to be
|
||||
centered. This function also systematically makes y consistent with X.dtype
|
||||
"""
|
||||
if isinstance(sample_weight, numbers.Number):
|
||||
sample_weight = None
|
||||
if sample_weight is not None:
|
||||
sample_weight = np.asarray(sample_weight)
|
||||
|
||||
if check_input:
|
||||
X = check_array(X, copy=copy, accept_sparse=['csr', 'csc'],
|
||||
dtype=FLOAT_DTYPES)
|
||||
elif copy:
|
||||
if sp.issparse(X):
|
||||
X = X.copy()
|
||||
else:
|
||||
X = X.copy(order='K')
|
||||
|
||||
y = np.asarray(y, dtype=X.dtype)
|
||||
|
||||
if fit_intercept:
|
||||
if sp.issparse(X):
|
||||
X_offset, X_var = mean_variance_axis(X, axis=0)
|
||||
if not return_mean:
|
||||
X_offset[:] = X.dtype.type(0)
|
||||
|
||||
if normalize:
|
||||
|
||||
# TODO: f_normalize could be used here as well but the function
|
||||
# inplace_csr_row_normalize_l2 must be changed such that it
|
||||
# can return also the norms computed internally
|
||||
|
||||
# transform variance to norm in-place
|
||||
X_var *= X.shape[0]
|
||||
X_scale = np.sqrt(X_var, X_var)
|
||||
del X_var
|
||||
X_scale[X_scale == 0] = 1
|
||||
inplace_column_scale(X, 1. / X_scale)
|
||||
else:
|
||||
X_scale = np.ones(X.shape[1], dtype=X.dtype)
|
||||
|
||||
else:
|
||||
X_offset = np.average(X, axis=0, weights=sample_weight)
|
||||
X -= X_offset
|
||||
if normalize:
|
||||
X, X_scale = f_normalize(X, axis=0, copy=False,
|
||||
return_norm=True)
|
||||
else:
|
||||
X_scale = np.ones(X.shape[1], dtype=X.dtype)
|
||||
y_offset = np.average(y, axis=0, weights=sample_weight)
|
||||
y = y - y_offset
|
||||
else:
|
||||
X_offset = np.zeros(X.shape[1], dtype=X.dtype)
|
||||
X_scale = np.ones(X.shape[1], dtype=X.dtype)
|
||||
if y.ndim == 1:
|
||||
y_offset = X.dtype.type(0)
|
||||
else:
|
||||
y_offset = np.zeros(y.shape[1], dtype=X.dtype)
|
||||
|
||||
return X, y, X_offset, y_offset, X_scale
|
||||
|
||||
|
||||
# TODO: _rescale_data should be factored into _preprocess_data.
|
||||
# Currently, the fact that sag implements its own way to deal with
|
||||
# sample_weight makes the refactoring tricky.
|
||||
|
||||
def _rescale_data(X, y, sample_weight):
|
||||
"""Rescale data sample-wise by square root of sample_weight.
|
||||
|
||||
For many linear models, this enables easy support for sample_weight.
|
||||
|
||||
Returns
|
||||
-------
|
||||
X_rescaled : {array-like, sparse matrix}
|
||||
|
||||
y_rescaled : {array-like, sparse matrix}
|
||||
"""
|
||||
n_samples = X.shape[0]
|
||||
sample_weight = np.asarray(sample_weight)
|
||||
if sample_weight.ndim == 0:
|
||||
sample_weight = np.full(n_samples, sample_weight,
|
||||
dtype=sample_weight.dtype)
|
||||
sample_weight = np.sqrt(sample_weight)
|
||||
sw_matrix = sparse.dia_matrix((sample_weight, 0),
|
||||
shape=(n_samples, n_samples))
|
||||
X = safe_sparse_dot(sw_matrix, X)
|
||||
y = safe_sparse_dot(sw_matrix, y)
|
||||
return X, y
|
||||
|
||||
|
||||
class LinearModel(BaseEstimator, metaclass=ABCMeta):
|
||||
"""Base class for Linear Models"""
|
||||
|
||||
@abstractmethod
|
||||
def fit(self, X, y):
|
||||
"""Fit model."""
|
||||
|
||||
def _decision_function(self, X):
|
||||
check_is_fitted(self)
|
||||
|
||||
X = check_array(X, accept_sparse=['csr', 'csc', 'coo'])
|
||||
return safe_sparse_dot(X, self.coef_.T,
|
||||
dense_output=True) + self.intercept_
|
||||
|
||||
def predict(self, X):
|
||||
"""
|
||||
Predict using the linear model.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array_like or sparse matrix, shape (n_samples, n_features)
|
||||
Samples.
|
||||
|
||||
Returns
|
||||
-------
|
||||
C : array, shape (n_samples,)
|
||||
Returns predicted values.
|
||||
"""
|
||||
return self._decision_function(X)
|
||||
|
||||
_preprocess_data = staticmethod(_preprocess_data)
|
||||
|
||||
def _set_intercept(self, X_offset, y_offset, X_scale):
|
||||
"""Set the intercept_
|
||||
"""
|
||||
if self.fit_intercept:
|
||||
self.coef_ = self.coef_ / X_scale
|
||||
self.intercept_ = y_offset - np.dot(X_offset, self.coef_.T)
|
||||
else:
|
||||
self.intercept_ = 0.
|
||||
|
||||
def _more_tags(self):
|
||||
return {'requires_y': True}
|
||||
|
||||
|
||||
# XXX Should this derive from LinearModel? It should be a mixin, not an ABC.
|
||||
# Maybe the n_features checking can be moved to LinearModel.
|
||||
class LinearClassifierMixin(ClassifierMixin):
|
||||
"""Mixin for linear classifiers.
|
||||
|
||||
Handles prediction for sparse and dense X.
|
||||
"""
|
||||
|
||||
def decision_function(self, X):
|
||||
"""
|
||||
Predict confidence scores for samples.
|
||||
|
||||
The confidence score for a sample is the signed distance of that
|
||||
sample to the hyperplane.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array_like or sparse matrix, shape (n_samples, n_features)
|
||||
Samples.
|
||||
|
||||
Returns
|
||||
-------
|
||||
array, shape=(n_samples,) if n_classes == 2 else (n_samples, n_classes)
|
||||
Confidence scores per (sample, class) combination. In the binary
|
||||
case, confidence score for self.classes_[1] where >0 means this
|
||||
class would be predicted.
|
||||
"""
|
||||
check_is_fitted(self)
|
||||
|
||||
X = check_array(X, accept_sparse='csr')
|
||||
|
||||
n_features = self.coef_.shape[1]
|
||||
if X.shape[1] != n_features:
|
||||
raise ValueError("X has %d features per sample; expecting %d"
|
||||
% (X.shape[1], n_features))
|
||||
|
||||
scores = safe_sparse_dot(X, self.coef_.T,
|
||||
dense_output=True) + self.intercept_
|
||||
return scores.ravel() if scores.shape[1] == 1 else scores
|
||||
|
||||
def predict(self, X):
|
||||
"""
|
||||
Predict class labels for samples in X.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array_like or sparse matrix, shape (n_samples, n_features)
|
||||
Samples.
|
||||
|
||||
Returns
|
||||
-------
|
||||
C : array, shape [n_samples]
|
||||
Predicted class label per sample.
|
||||
"""
|
||||
scores = self.decision_function(X)
|
||||
if len(scores.shape) == 1:
|
||||
indices = (scores > 0).astype(np.int)
|
||||
else:
|
||||
indices = scores.argmax(axis=1)
|
||||
return self.classes_[indices]
|
||||
|
||||
def _predict_proba_lr(self, X):
|
||||
"""Probability estimation for OvR logistic regression.
|
||||
|
||||
Positive class probabilities are computed as
|
||||
1. / (1. + np.exp(-self.decision_function(X)));
|
||||
multiclass is handled by normalizing that over all classes.
|
||||
"""
|
||||
prob = self.decision_function(X)
|
||||
expit(prob, out=prob)
|
||||
if prob.ndim == 1:
|
||||
return np.vstack([1 - prob, prob]).T
|
||||
else:
|
||||
# OvR normalization, like LibLinear's predict_probability
|
||||
prob /= prob.sum(axis=1).reshape((prob.shape[0], -1))
|
||||
return prob
|
||||
|
||||
|
||||
class SparseCoefMixin:
|
||||
"""Mixin for converting coef_ to and from CSR format.
|
||||
|
||||
L1-regularizing estimators should inherit this.
|
||||
"""
|
||||
|
||||
def densify(self):
|
||||
"""
|
||||
Convert coefficient matrix to dense array format.
|
||||
|
||||
Converts the ``coef_`` member (back) to a numpy.ndarray. This is the
|
||||
default format of ``coef_`` and is required for fitting, so calling
|
||||
this method is only required on models that have previously been
|
||||
sparsified; otherwise, it is a no-op.
|
||||
|
||||
Returns
|
||||
-------
|
||||
self
|
||||
Fitted estimator.
|
||||
"""
|
||||
msg = "Estimator, %(name)s, must be fitted before densifying."
|
||||
check_is_fitted(self, msg=msg)
|
||||
if sp.issparse(self.coef_):
|
||||
self.coef_ = self.coef_.toarray()
|
||||
return self
|
||||
|
||||
def sparsify(self):
|
||||
"""
|
||||
Convert coefficient matrix to sparse format.
|
||||
|
||||
Converts the ``coef_`` member to a scipy.sparse matrix, which for
|
||||
L1-regularized models can be much more memory- and storage-efficient
|
||||
than the usual numpy.ndarray representation.
|
||||
|
||||
The ``intercept_`` member is not converted.
|
||||
|
||||
Returns
|
||||
-------
|
||||
self
|
||||
Fitted estimator.
|
||||
|
||||
Notes
|
||||
-----
|
||||
For non-sparse models, i.e. when there are not many zeros in ``coef_``,
|
||||
this may actually *increase* memory usage, so use this method with
|
||||
care. A rule of thumb is that the number of zero elements, which can
|
||||
be computed with ``(coef_ == 0).sum()``, must be more than 50% for this
|
||||
to provide significant benefits.
|
||||
|
||||
After calling this method, further fitting with the partial_fit
|
||||
method (if any) will not work until you call densify.
|
||||
"""
|
||||
msg = "Estimator, %(name)s, must be fitted before sparsifying."
|
||||
check_is_fitted(self, msg=msg)
|
||||
self.coef_ = sp.csr_matrix(self.coef_)
|
||||
return self
|
||||
|
||||
|
||||
class LinearRegression(MultiOutputMixin, RegressorMixin, LinearModel):
|
||||
"""
|
||||
Ordinary least squares Linear Regression.
|
||||
|
||||
LinearRegression fits a linear model with coefficients w = (w1, ..., wp)
|
||||
to minimize the residual sum of squares between the observed targets in
|
||||
the dataset, and the targets predicted by the linear approximation.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
fit_intercept : bool, default=True
|
||||
Whether to calculate the intercept for this model. If set
|
||||
to False, no intercept will be used in calculations
|
||||
(i.e. data is expected to be centered).
|
||||
|
||||
normalize : bool, default=False
|
||||
This parameter is ignored when ``fit_intercept`` is set to False.
|
||||
If True, the regressors X will be normalized before regression by
|
||||
subtracting the mean and dividing by the l2-norm.
|
||||
If you wish to standardize, please use
|
||||
:class:`sklearn.preprocessing.StandardScaler` before calling ``fit`` on
|
||||
an estimator with ``normalize=False``.
|
||||
|
||||
copy_X : bool, default=True
|
||||
If True, X will be copied; else, it may be overwritten.
|
||||
|
||||
n_jobs : int, default=None
|
||||
The number of jobs to use for the computation. This will only provide
|
||||
speedup for n_targets > 1 and sufficient large problems.
|
||||
``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
|
||||
``-1`` means using all processors. See :term:`Glossary <n_jobs>`
|
||||
for more details.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
coef_ : array of shape (n_features, ) or (n_targets, n_features)
|
||||
Estimated coefficients for the linear regression problem.
|
||||
If multiple targets are passed during the fit (y 2D), this
|
||||
is a 2D array of shape (n_targets, n_features), while if only
|
||||
one target is passed, this is a 1D array of length n_features.
|
||||
|
||||
rank_ : int
|
||||
Rank of matrix `X`. Only available when `X` is dense.
|
||||
|
||||
singular_ : array of shape (min(X, y),)
|
||||
Singular values of `X`. Only available when `X` is dense.
|
||||
|
||||
intercept_ : float or array of shape (n_targets,)
|
||||
Independent term in the linear model. Set to 0.0 if
|
||||
`fit_intercept = False`.
|
||||
|
||||
See Also
|
||||
--------
|
||||
sklearn.linear_model.Ridge : Ridge regression addresses some of the
|
||||
problems of Ordinary Least Squares by imposing a penalty on the
|
||||
size of the coefficients with l2 regularization.
|
||||
sklearn.linear_model.Lasso : The Lasso is a linear model that estimates
|
||||
sparse coefficients with l1 regularization.
|
||||
sklearn.linear_model.ElasticNet : Elastic-Net is a linear regression
|
||||
model trained with both l1 and l2 -norm regularization of the
|
||||
coefficients.
|
||||
|
||||
Notes
|
||||
-----
|
||||
From the implementation point of view, this is just plain Ordinary
|
||||
Least Squares (scipy.linalg.lstsq) wrapped as a predictor object.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import numpy as np
|
||||
>>> from sklearn.linear_model import LinearRegression
|
||||
>>> X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
|
||||
>>> # y = 1 * x_0 + 2 * x_1 + 3
|
||||
>>> y = np.dot(X, np.array([1, 2])) + 3
|
||||
>>> reg = LinearRegression().fit(X, y)
|
||||
>>> reg.score(X, y)
|
||||
1.0
|
||||
>>> reg.coef_
|
||||
array([1., 2.])
|
||||
>>> reg.intercept_
|
||||
3.0000...
|
||||
>>> reg.predict(np.array([[3, 5]]))
|
||||
array([16.])
|
||||
"""
|
||||
@_deprecate_positional_args
|
||||
def __init__(self, *, fit_intercept=True, normalize=False, copy_X=True,
|
||||
n_jobs=None):
|
||||
self.fit_intercept = fit_intercept
|
||||
self.normalize = normalize
|
||||
self.copy_X = copy_X
|
||||
self.n_jobs = n_jobs
|
||||
|
||||
def fit(self, X, y, sample_weight=None):
|
||||
"""
|
||||
Fit linear model.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||||
Training data
|
||||
|
||||
y : array-like of shape (n_samples,) or (n_samples, n_targets)
|
||||
Target values. Will be cast to X's dtype if necessary
|
||||
|
||||
sample_weight : array-like of shape (n_samples,), default=None
|
||||
Individual weights for each sample
|
||||
|
||||
.. versionadded:: 0.17
|
||||
parameter *sample_weight* support to LinearRegression.
|
||||
|
||||
Returns
|
||||
-------
|
||||
self : returns an instance of self.
|
||||
"""
|
||||
|
||||
n_jobs_ = self.n_jobs
|
||||
X, y = self._validate_data(X, y, accept_sparse=['csr', 'csc', 'coo'],
|
||||
y_numeric=True, multi_output=True)
|
||||
|
||||
if sample_weight is not None:
|
||||
sample_weight = _check_sample_weight(sample_weight, X,
|
||||
dtype=X.dtype)
|
||||
|
||||
X, y, X_offset, y_offset, X_scale = self._preprocess_data(
|
||||
X, y, fit_intercept=self.fit_intercept, normalize=self.normalize,
|
||||
copy=self.copy_X, sample_weight=sample_weight,
|
||||
return_mean=True)
|
||||
|
||||
if sample_weight is not None:
|
||||
# Sample weight can be implemented via a simple rescaling.
|
||||
X, y = _rescale_data(X, y, sample_weight)
|
||||
|
||||
if sp.issparse(X):
|
||||
X_offset_scale = X_offset / X_scale
|
||||
|
||||
def matvec(b):
|
||||
return X.dot(b) - b.dot(X_offset_scale)
|
||||
|
||||
def rmatvec(b):
|
||||
return X.T.dot(b) - X_offset_scale * np.sum(b)
|
||||
|
||||
X_centered = sparse.linalg.LinearOperator(shape=X.shape,
|
||||
matvec=matvec,
|
||||
rmatvec=rmatvec)
|
||||
|
||||
if y.ndim < 2:
|
||||
out = sparse_lsqr(X_centered, y)
|
||||
self.coef_ = out[0]
|
||||
self._residues = out[3]
|
||||
else:
|
||||
# sparse_lstsq cannot handle y with shape (M, K)
|
||||
outs = Parallel(n_jobs=n_jobs_)(
|
||||
delayed(sparse_lsqr)(X_centered, y[:, j].ravel())
|
||||
for j in range(y.shape[1]))
|
||||
self.coef_ = np.vstack([out[0] for out in outs])
|
||||
self._residues = np.vstack([out[3] for out in outs])
|
||||
else:
|
||||
self.coef_, self._residues, self.rank_, self.singular_ = \
|
||||
linalg.lstsq(X, y)
|
||||
self.coef_ = self.coef_.T
|
||||
|
||||
if y.ndim == 1:
|
||||
self.coef_ = np.ravel(self.coef_)
|
||||
self._set_intercept(X_offset, y_offset, X_scale)
|
||||
return self
|
||||
|
||||
|
||||
def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy,
|
||||
check_input=True, sample_weight=None):
|
||||
"""Aux function used at beginning of fit in linear models
|
||||
|
||||
Parameters
|
||||
----------
|
||||
order : 'F', 'C' or None, default=None
|
||||
Whether X and y will be forced to be fortran or c-style. Only relevant
|
||||
if sample_weight is not None.
|
||||
"""
|
||||
n_samples, n_features = X.shape
|
||||
|
||||
if sparse.isspmatrix(X):
|
||||
# copy is not needed here as X is not modified inplace when X is sparse
|
||||
precompute = False
|
||||
X, y, X_offset, y_offset, X_scale = _preprocess_data(
|
||||
X, y, fit_intercept=fit_intercept, normalize=normalize,
|
||||
copy=False, return_mean=True, check_input=check_input)
|
||||
else:
|
||||
# copy was done in fit if necessary
|
||||
X, y, X_offset, y_offset, X_scale = _preprocess_data(
|
||||
X, y, fit_intercept=fit_intercept, normalize=normalize, copy=copy,
|
||||
check_input=check_input, sample_weight=sample_weight)
|
||||
if sample_weight is not None:
|
||||
X, y = _rescale_data(X, y, sample_weight=sample_weight)
|
||||
if hasattr(precompute, '__array__') and (
|
||||
fit_intercept and not np.allclose(X_offset, np.zeros(n_features)) or
|
||||
normalize and not np.allclose(X_scale, np.ones(n_features))):
|
||||
warnings.warn("Gram matrix was provided but X was centered"
|
||||
" to fit intercept, "
|
||||
"or X was normalized : recomputing Gram matrix.",
|
||||
UserWarning)
|
||||
# recompute Gram
|
||||
precompute = 'auto'
|
||||
Xy = None
|
||||
|
||||
# precompute if n_samples > n_features
|
||||
if isinstance(precompute, str) and precompute == 'auto':
|
||||
precompute = (n_samples > n_features)
|
||||
|
||||
if precompute is True:
|
||||
# make sure that the 'precompute' array is contiguous.
|
||||
precompute = np.empty(shape=(n_features, n_features), dtype=X.dtype,
|
||||
order='C')
|
||||
np.dot(X.T, X, out=precompute)
|
||||
|
||||
if not hasattr(precompute, '__array__'):
|
||||
Xy = None # cannot use Xy if precompute is not Gram
|
||||
|
||||
if hasattr(precompute, '__array__') and Xy is None:
|
||||
common_dtype = np.find_common_type([X.dtype, y.dtype], [])
|
||||
if y.ndim == 1:
|
||||
# Xy is 1d, make sure it is contiguous.
|
||||
Xy = np.empty(shape=n_features, dtype=common_dtype, order='C')
|
||||
np.dot(X.T, y, out=Xy)
|
||||
else:
|
||||
# Make sure that Xy is always F contiguous even if X or y are not
|
||||
# contiguous: the goal is to make it fast to extract the data for a
|
||||
# specific target.
|
||||
n_targets = y.shape[1]
|
||||
Xy = np.empty(shape=(n_features, n_targets), dtype=common_dtype,
|
||||
order='F')
|
||||
np.dot(y.T, X, out=Xy.T)
|
||||
|
||||
return X, y, X_offset, y_offset, X_scale, precompute, Xy
|
674
venv/Lib/site-packages/sklearn/linear_model/_bayes.py
Normal file
674
venv/Lib/site-packages/sklearn/linear_model/_bayes.py
Normal file
|
@ -0,0 +1,674 @@
|
|||
"""
|
||||
Various bayesian regression
|
||||
"""
|
||||
|
||||
# Authors: V. Michel, F. Pedregosa, A. Gramfort
|
||||
# License: BSD 3 clause
|
||||
|
||||
from math import log
|
||||
import numpy as np
|
||||
from scipy import linalg
|
||||
|
||||
from ._base import LinearModel, _rescale_data
|
||||
from ..base import RegressorMixin
|
||||
from ..utils.extmath import fast_logdet
|
||||
from scipy.linalg import pinvh
|
||||
from ..utils.validation import _check_sample_weight
|
||||
from ..utils.validation import _deprecate_positional_args
|
||||
|
||||
|
||||
###############################################################################
|
||||
# BayesianRidge regression
|
||||
|
||||
class BayesianRidge(RegressorMixin, LinearModel):
|
||||
"""Bayesian ridge regression.
|
||||
|
||||
Fit a Bayesian ridge model. See the Notes section for details on this
|
||||
implementation and the optimization of the regularization parameters
|
||||
lambda (precision of the weights) and alpha (precision of the noise).
|
||||
|
||||
Read more in the :ref:`User Guide <bayesian_regression>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
n_iter : int, default=300
|
||||
Maximum number of iterations. Should be greater than or equal to 1.
|
||||
|
||||
tol : float, default=1e-3
|
||||
Stop the algorithm if w has converged.
|
||||
|
||||
alpha_1 : float, default=1e-6
|
||||
Hyper-parameter : shape parameter for the Gamma distribution prior
|
||||
over the alpha parameter.
|
||||
|
||||
alpha_2 : float, default=1e-6
|
||||
Hyper-parameter : inverse scale parameter (rate parameter) for the
|
||||
Gamma distribution prior over the alpha parameter.
|
||||
|
||||
lambda_1 : float, default=1e-6
|
||||
Hyper-parameter : shape parameter for the Gamma distribution prior
|
||||
over the lambda parameter.
|
||||
|
||||
lambda_2 : float, default=1e-6
|
||||
Hyper-parameter : inverse scale parameter (rate parameter) for the
|
||||
Gamma distribution prior over the lambda parameter.
|
||||
|
||||
alpha_init : float, default=None
|
||||
Initial value for alpha (precision of the noise).
|
||||
If not set, alpha_init is 1/Var(y).
|
||||
|
||||
.. versionadded:: 0.22
|
||||
|
||||
lambda_init : float, default=None
|
||||
Initial value for lambda (precision of the weights).
|
||||
If not set, lambda_init is 1.
|
||||
|
||||
.. versionadded:: 0.22
|
||||
|
||||
compute_score : bool, default=False
|
||||
If True, compute the log marginal likelihood at each iteration of the
|
||||
optimization.
|
||||
|
||||
fit_intercept : bool, default=True
|
||||
Whether to calculate the intercept for this model.
|
||||
The intercept is not treated as a probabilistic parameter
|
||||
and thus has no associated variance. If set
|
||||
to False, no intercept will be used in calculations
|
||||
(i.e. data is expected to be centered).
|
||||
|
||||
normalize : bool, default=False
|
||||
This parameter is ignored when ``fit_intercept`` is set to False.
|
||||
If True, the regressors X will be normalized before regression by
|
||||
subtracting the mean and dividing by the l2-norm.
|
||||
If you wish to standardize, please use
|
||||
:class:`sklearn.preprocessing.StandardScaler` before calling ``fit``
|
||||
on an estimator with ``normalize=False``.
|
||||
|
||||
copy_X : bool, default=True
|
||||
If True, X will be copied; else, it may be overwritten.
|
||||
|
||||
verbose : bool, default=False
|
||||
Verbose mode when fitting the model.
|
||||
|
||||
|
||||
Attributes
|
||||
----------
|
||||
coef_ : array-like of shape (n_features,)
|
||||
Coefficients of the regression model (mean of distribution)
|
||||
|
||||
intercept_ : float
|
||||
Independent term in decision function. Set to 0.0 if
|
||||
``fit_intercept = False``.
|
||||
|
||||
alpha_ : float
|
||||
Estimated precision of the noise.
|
||||
|
||||
lambda_ : float
|
||||
Estimated precision of the weights.
|
||||
|
||||
sigma_ : array-like of shape (n_features, n_features)
|
||||
Estimated variance-covariance matrix of the weights
|
||||
|
||||
scores_ : array-like of shape (n_iter_+1,)
|
||||
If computed_score is True, value of the log marginal likelihood (to be
|
||||
maximized) at each iteration of the optimization. The array starts
|
||||
with the value of the log marginal likelihood obtained for the initial
|
||||
values of alpha and lambda and ends with the value obtained for the
|
||||
estimated alpha and lambda.
|
||||
|
||||
n_iter_ : int
|
||||
The actual number of iterations to reach the stopping criterion.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from sklearn import linear_model
|
||||
>>> clf = linear_model.BayesianRidge()
|
||||
>>> clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])
|
||||
BayesianRidge()
|
||||
>>> clf.predict([[1, 1]])
|
||||
array([1.])
|
||||
|
||||
Notes
|
||||
-----
|
||||
There exist several strategies to perform Bayesian ridge regression. This
|
||||
implementation is based on the algorithm described in Appendix A of
|
||||
(Tipping, 2001) where updates of the regularization parameters are done as
|
||||
suggested in (MacKay, 1992). Note that according to A New
|
||||
View of Automatic Relevance Determination (Wipf and Nagarajan, 2008) these
|
||||
update rules do not guarantee that the marginal likelihood is increasing
|
||||
between two consecutive iterations of the optimization.
|
||||
|
||||
References
|
||||
----------
|
||||
D. J. C. MacKay, Bayesian Interpolation, Computation and Neural Systems,
|
||||
Vol. 4, No. 3, 1992.
|
||||
|
||||
M. E. Tipping, Sparse Bayesian Learning and the Relevance Vector Machine,
|
||||
Journal of Machine Learning Research, Vol. 1, 2001.
|
||||
"""
|
||||
@_deprecate_positional_args
|
||||
def __init__(self, *, n_iter=300, tol=1.e-3, alpha_1=1.e-6, alpha_2=1.e-6,
|
||||
lambda_1=1.e-6, lambda_2=1.e-6, alpha_init=None,
|
||||
lambda_init=None, compute_score=False, fit_intercept=True,
|
||||
normalize=False, copy_X=True, verbose=False):
|
||||
self.n_iter = n_iter
|
||||
self.tol = tol
|
||||
self.alpha_1 = alpha_1
|
||||
self.alpha_2 = alpha_2
|
||||
self.lambda_1 = lambda_1
|
||||
self.lambda_2 = lambda_2
|
||||
self.alpha_init = alpha_init
|
||||
self.lambda_init = lambda_init
|
||||
self.compute_score = compute_score
|
||||
self.fit_intercept = fit_intercept
|
||||
self.normalize = normalize
|
||||
self.copy_X = copy_X
|
||||
self.verbose = verbose
|
||||
|
||||
def fit(self, X, y, sample_weight=None):
|
||||
"""Fit the model
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : ndarray of shape (n_samples, n_features)
|
||||
Training data
|
||||
y : ndarray of shape (n_samples,)
|
||||
Target values. Will be cast to X's dtype if necessary
|
||||
|
||||
sample_weight : ndarray of shape (n_samples,), default=None
|
||||
Individual weights for each sample
|
||||
|
||||
.. versionadded:: 0.20
|
||||
parameter *sample_weight* support to BayesianRidge.
|
||||
|
||||
Returns
|
||||
-------
|
||||
self : returns an instance of self.
|
||||
"""
|
||||
|
||||
if self.n_iter < 1:
|
||||
raise ValueError('n_iter should be greater than or equal to 1.'
|
||||
' Got {!r}.'.format(self.n_iter))
|
||||
|
||||
X, y = self._validate_data(X, y, dtype=np.float64, y_numeric=True)
|
||||
|
||||
if sample_weight is not None:
|
||||
sample_weight = _check_sample_weight(sample_weight, X,
|
||||
dtype=X.dtype)
|
||||
|
||||
X, y, X_offset_, y_offset_, X_scale_ = self._preprocess_data(
|
||||
X, y, self.fit_intercept, self.normalize, self.copy_X,
|
||||
sample_weight=sample_weight)
|
||||
|
||||
if sample_weight is not None:
|
||||
# Sample weight can be implemented via a simple rescaling.
|
||||
X, y = _rescale_data(X, y, sample_weight)
|
||||
|
||||
self.X_offset_ = X_offset_
|
||||
self.X_scale_ = X_scale_
|
||||
n_samples, n_features = X.shape
|
||||
|
||||
# Initialization of the values of the parameters
|
||||
eps = np.finfo(np.float64).eps
|
||||
# Add `eps` in the denominator to omit division by zero if `np.var(y)`
|
||||
# is zero
|
||||
alpha_ = self.alpha_init
|
||||
lambda_ = self.lambda_init
|
||||
if alpha_ is None:
|
||||
alpha_ = 1. / (np.var(y) + eps)
|
||||
if lambda_ is None:
|
||||
lambda_ = 1.
|
||||
|
||||
verbose = self.verbose
|
||||
lambda_1 = self.lambda_1
|
||||
lambda_2 = self.lambda_2
|
||||
alpha_1 = self.alpha_1
|
||||
alpha_2 = self.alpha_2
|
||||
|
||||
self.scores_ = list()
|
||||
coef_old_ = None
|
||||
|
||||
XT_y = np.dot(X.T, y)
|
||||
U, S, Vh = linalg.svd(X, full_matrices=False)
|
||||
eigen_vals_ = S ** 2
|
||||
|
||||
# Convergence loop of the bayesian ridge regression
|
||||
for iter_ in range(self.n_iter):
|
||||
|
||||
# update posterior mean coef_ based on alpha_ and lambda_ and
|
||||
# compute corresponding rmse
|
||||
coef_, rmse_ = self._update_coef_(X, y, n_samples, n_features,
|
||||
XT_y, U, Vh, eigen_vals_,
|
||||
alpha_, lambda_)
|
||||
if self.compute_score:
|
||||
# compute the log marginal likelihood
|
||||
s = self._log_marginal_likelihood(n_samples, n_features,
|
||||
eigen_vals_,
|
||||
alpha_, lambda_,
|
||||
coef_, rmse_)
|
||||
self.scores_.append(s)
|
||||
|
||||
# Update alpha and lambda according to (MacKay, 1992)
|
||||
gamma_ = np.sum((alpha_ * eigen_vals_) /
|
||||
(lambda_ + alpha_ * eigen_vals_))
|
||||
lambda_ = ((gamma_ + 2 * lambda_1) /
|
||||
(np.sum(coef_ ** 2) + 2 * lambda_2))
|
||||
alpha_ = ((n_samples - gamma_ + 2 * alpha_1) /
|
||||
(rmse_ + 2 * alpha_2))
|
||||
|
||||
# Check for convergence
|
||||
if iter_ != 0 and np.sum(np.abs(coef_old_ - coef_)) < self.tol:
|
||||
if verbose:
|
||||
print("Convergence after ", str(iter_), " iterations")
|
||||
break
|
||||
coef_old_ = np.copy(coef_)
|
||||
|
||||
self.n_iter_ = iter_ + 1
|
||||
|
||||
# return regularization parameters and corresponding posterior mean,
|
||||
# log marginal likelihood and posterior covariance
|
||||
self.alpha_ = alpha_
|
||||
self.lambda_ = lambda_
|
||||
self.coef_, rmse_ = self._update_coef_(X, y, n_samples, n_features,
|
||||
XT_y, U, Vh, eigen_vals_,
|
||||
alpha_, lambda_)
|
||||
if self.compute_score:
|
||||
# compute the log marginal likelihood
|
||||
s = self._log_marginal_likelihood(n_samples, n_features,
|
||||
eigen_vals_,
|
||||
alpha_, lambda_,
|
||||
coef_, rmse_)
|
||||
self.scores_.append(s)
|
||||
self.scores_ = np.array(self.scores_)
|
||||
|
||||
# posterior covariance is given by 1/alpha_ * scaled_sigma_
|
||||
scaled_sigma_ = np.dot(Vh.T,
|
||||
Vh / (eigen_vals_ +
|
||||
lambda_ / alpha_)[:, np.newaxis])
|
||||
self.sigma_ = (1. / alpha_) * scaled_sigma_
|
||||
|
||||
self._set_intercept(X_offset_, y_offset_, X_scale_)
|
||||
|
||||
return self
|
||||
|
||||
def predict(self, X, return_std=False):
|
||||
"""Predict using the linear model.
|
||||
|
||||
In addition to the mean of the predictive distribution, also its
|
||||
standard deviation can be returned.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||||
Samples.
|
||||
|
||||
return_std : bool, default=False
|
||||
Whether to return the standard deviation of posterior prediction.
|
||||
|
||||
Returns
|
||||
-------
|
||||
y_mean : array-like of shape (n_samples,)
|
||||
Mean of predictive distribution of query points.
|
||||
|
||||
y_std : array-like of shape (n_samples,)
|
||||
Standard deviation of predictive distribution of query points.
|
||||
"""
|
||||
y_mean = self._decision_function(X)
|
||||
if return_std is False:
|
||||
return y_mean
|
||||
else:
|
||||
if self.normalize:
|
||||
X = (X - self.X_offset_) / self.X_scale_
|
||||
sigmas_squared_data = (np.dot(X, self.sigma_) * X).sum(axis=1)
|
||||
y_std = np.sqrt(sigmas_squared_data + (1. / self.alpha_))
|
||||
return y_mean, y_std
|
||||
|
||||
def _update_coef_(self, X, y, n_samples, n_features, XT_y, U, Vh,
|
||||
eigen_vals_, alpha_, lambda_):
|
||||
"""Update posterior mean and compute corresponding rmse.
|
||||
|
||||
Posterior mean is given by coef_ = scaled_sigma_ * X.T * y where
|
||||
scaled_sigma_ = (lambda_/alpha_ * np.eye(n_features)
|
||||
+ np.dot(X.T, X))^-1
|
||||
"""
|
||||
|
||||
if n_samples > n_features:
|
||||
coef_ = np.dot(Vh.T,
|
||||
Vh / (eigen_vals_ +
|
||||
lambda_ / alpha_)[:, np.newaxis])
|
||||
coef_ = np.dot(coef_, XT_y)
|
||||
else:
|
||||
coef_ = np.dot(X.T, np.dot(
|
||||
U / (eigen_vals_ + lambda_ / alpha_)[None, :], U.T))
|
||||
coef_ = np.dot(coef_, y)
|
||||
|
||||
rmse_ = np.sum((y - np.dot(X, coef_)) ** 2)
|
||||
|
||||
return coef_, rmse_
|
||||
|
||||
def _log_marginal_likelihood(self, n_samples, n_features, eigen_vals,
|
||||
alpha_, lambda_, coef, rmse):
|
||||
"""Log marginal likelihood."""
|
||||
alpha_1 = self.alpha_1
|
||||
alpha_2 = self.alpha_2
|
||||
lambda_1 = self.lambda_1
|
||||
lambda_2 = self.lambda_2
|
||||
|
||||
# compute the log of the determinant of the posterior covariance.
|
||||
# posterior covariance is given by
|
||||
# sigma = (lambda_ * np.eye(n_features) + alpha_ * np.dot(X.T, X))^-1
|
||||
if n_samples > n_features:
|
||||
logdet_sigma = - np.sum(np.log(lambda_ + alpha_ * eigen_vals))
|
||||
else:
|
||||
logdet_sigma = np.full(n_features, lambda_,
|
||||
dtype=np.array(lambda_).dtype)
|
||||
logdet_sigma[:n_samples] += alpha_ * eigen_vals
|
||||
logdet_sigma = - np.sum(np.log(logdet_sigma))
|
||||
|
||||
score = lambda_1 * log(lambda_) - lambda_2 * lambda_
|
||||
score += alpha_1 * log(alpha_) - alpha_2 * alpha_
|
||||
score += 0.5 * (n_features * log(lambda_) +
|
||||
n_samples * log(alpha_) -
|
||||
alpha_ * rmse -
|
||||
lambda_ * np.sum(coef ** 2) +
|
||||
logdet_sigma -
|
||||
n_samples * log(2 * np.pi))
|
||||
|
||||
return score
|
||||
|
||||
|
||||
###############################################################################
|
||||
# ARD (Automatic Relevance Determination) regression
|
||||
|
||||
|
||||
class ARDRegression(RegressorMixin, LinearModel):
|
||||
"""Bayesian ARD regression.
|
||||
|
||||
Fit the weights of a regression model, using an ARD prior. The weights of
|
||||
the regression model are assumed to be in Gaussian distributions.
|
||||
Also estimate the parameters lambda (precisions of the distributions of the
|
||||
weights) and alpha (precision of the distribution of the noise).
|
||||
The estimation is done by an iterative procedures (Evidence Maximization)
|
||||
|
||||
Read more in the :ref:`User Guide <bayesian_regression>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
n_iter : int, default=300
|
||||
Maximum number of iterations.
|
||||
|
||||
tol : float, default=1e-3
|
||||
Stop the algorithm if w has converged.
|
||||
|
||||
alpha_1 : float, default=1e-6
|
||||
Hyper-parameter : shape parameter for the Gamma distribution prior
|
||||
over the alpha parameter.
|
||||
|
||||
alpha_2 : float, default=1e-6
|
||||
Hyper-parameter : inverse scale parameter (rate parameter) for the
|
||||
Gamma distribution prior over the alpha parameter.
|
||||
|
||||
lambda_1 : float, default=1e-6
|
||||
Hyper-parameter : shape parameter for the Gamma distribution prior
|
||||
over the lambda parameter.
|
||||
|
||||
lambda_2 : float, default=1e-6
|
||||
Hyper-parameter : inverse scale parameter (rate parameter) for the
|
||||
Gamma distribution prior over the lambda parameter.
|
||||
|
||||
compute_score : bool, default=False
|
||||
If True, compute the objective function at each step of the model.
|
||||
|
||||
threshold_lambda : float, default=10 000
|
||||
threshold for removing (pruning) weights with high precision from
|
||||
the computation.
|
||||
|
||||
fit_intercept : bool, default=True
|
||||
whether to calculate the intercept for this model. If set
|
||||
to false, no intercept will be used in calculations
|
||||
(i.e. data is expected to be centered).
|
||||
|
||||
normalize : bool, default=False
|
||||
This parameter is ignored when ``fit_intercept`` is set to False.
|
||||
If True, the regressors X will be normalized before regression by
|
||||
subtracting the mean and dividing by the l2-norm.
|
||||
If you wish to standardize, please use
|
||||
:class:`sklearn.preprocessing.StandardScaler` before calling ``fit``
|
||||
on an estimator with ``normalize=False``.
|
||||
|
||||
copy_X : bool, default=True
|
||||
If True, X will be copied; else, it may be overwritten.
|
||||
|
||||
verbose : bool, default=False
|
||||
Verbose mode when fitting the model.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
coef_ : array-like of shape (n_features,)
|
||||
Coefficients of the regression model (mean of distribution)
|
||||
|
||||
alpha_ : float
|
||||
estimated precision of the noise.
|
||||
|
||||
lambda_ : array-like of shape (n_features,)
|
||||
estimated precisions of the weights.
|
||||
|
||||
sigma_ : array-like of shape (n_features, n_features)
|
||||
estimated variance-covariance matrix of the weights
|
||||
|
||||
scores_ : float
|
||||
if computed, value of the objective function (to be maximized)
|
||||
|
||||
intercept_ : float
|
||||
Independent term in decision function. Set to 0.0 if
|
||||
``fit_intercept = False``.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from sklearn import linear_model
|
||||
>>> clf = linear_model.ARDRegression()
|
||||
>>> clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])
|
||||
ARDRegression()
|
||||
>>> clf.predict([[1, 1]])
|
||||
array([1.])
|
||||
|
||||
Notes
|
||||
-----
|
||||
For an example, see :ref:`examples/linear_model/plot_ard.py
|
||||
<sphx_glr_auto_examples_linear_model_plot_ard.py>`.
|
||||
|
||||
References
|
||||
----------
|
||||
D. J. C. MacKay, Bayesian nonlinear modeling for the prediction
|
||||
competition, ASHRAE Transactions, 1994.
|
||||
|
||||
R. Salakhutdinov, Lecture notes on Statistical Machine Learning,
|
||||
http://www.utstat.toronto.edu/~rsalakhu/sta4273/notes/Lecture2.pdf#page=15
|
||||
Their beta is our ``self.alpha_``
|
||||
Their alpha is our ``self.lambda_``
|
||||
ARD is a little different than the slide: only dimensions/features for
|
||||
which ``self.lambda_ < self.threshold_lambda`` are kept and the rest are
|
||||
discarded.
|
||||
"""
|
||||
@_deprecate_positional_args
|
||||
def __init__(self, *, n_iter=300, tol=1.e-3, alpha_1=1.e-6, alpha_2=1.e-6,
|
||||
lambda_1=1.e-6, lambda_2=1.e-6, compute_score=False,
|
||||
threshold_lambda=1.e+4, fit_intercept=True, normalize=False,
|
||||
copy_X=True, verbose=False):
|
||||
self.n_iter = n_iter
|
||||
self.tol = tol
|
||||
self.fit_intercept = fit_intercept
|
||||
self.normalize = normalize
|
||||
self.alpha_1 = alpha_1
|
||||
self.alpha_2 = alpha_2
|
||||
self.lambda_1 = lambda_1
|
||||
self.lambda_2 = lambda_2
|
||||
self.compute_score = compute_score
|
||||
self.threshold_lambda = threshold_lambda
|
||||
self.copy_X = copy_X
|
||||
self.verbose = verbose
|
||||
|
||||
def fit(self, X, y):
|
||||
"""Fit the ARDRegression model according to the given training data
|
||||
and parameters.
|
||||
|
||||
Iterative procedure to maximize the evidence
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
Training vector, where n_samples in the number of samples and
|
||||
n_features is the number of features.
|
||||
y : array-like of shape (n_samples,)
|
||||
Target values (integers). Will be cast to X's dtype if necessary
|
||||
|
||||
Returns
|
||||
-------
|
||||
self : returns an instance of self.
|
||||
"""
|
||||
X, y = self._validate_data(X, y, dtype=np.float64, y_numeric=True,
|
||||
ensure_min_samples=2)
|
||||
|
||||
n_samples, n_features = X.shape
|
||||
coef_ = np.zeros(n_features)
|
||||
|
||||
X, y, X_offset_, y_offset_, X_scale_ = self._preprocess_data(
|
||||
X, y, self.fit_intercept, self.normalize, self.copy_X)
|
||||
|
||||
# Launch the convergence loop
|
||||
keep_lambda = np.ones(n_features, dtype=bool)
|
||||
|
||||
lambda_1 = self.lambda_1
|
||||
lambda_2 = self.lambda_2
|
||||
alpha_1 = self.alpha_1
|
||||
alpha_2 = self.alpha_2
|
||||
verbose = self.verbose
|
||||
|
||||
# Initialization of the values of the parameters
|
||||
eps = np.finfo(np.float64).eps
|
||||
# Add `eps` in the denominator to omit division by zero if `np.var(y)`
|
||||
# is zero
|
||||
alpha_ = 1. / (np.var(y) + eps)
|
||||
lambda_ = np.ones(n_features)
|
||||
|
||||
self.scores_ = list()
|
||||
coef_old_ = None
|
||||
|
||||
def update_coeff(X, y, coef_, alpha_, keep_lambda, sigma_):
|
||||
coef_[keep_lambda] = alpha_ * np.dot(
|
||||
sigma_, np.dot(X[:, keep_lambda].T, y))
|
||||
return coef_
|
||||
|
||||
update_sigma = (self._update_sigma if n_samples >= n_features
|
||||
else self._update_sigma_woodbury)
|
||||
# Iterative procedure of ARDRegression
|
||||
for iter_ in range(self.n_iter):
|
||||
sigma_ = update_sigma(X, alpha_, lambda_, keep_lambda)
|
||||
coef_ = update_coeff(X, y, coef_, alpha_, keep_lambda, sigma_)
|
||||
|
||||
# Update alpha and lambda
|
||||
rmse_ = np.sum((y - np.dot(X, coef_)) ** 2)
|
||||
gamma_ = 1. - lambda_[keep_lambda] * np.diag(sigma_)
|
||||
lambda_[keep_lambda] = ((gamma_ + 2. * lambda_1) /
|
||||
((coef_[keep_lambda]) ** 2 +
|
||||
2. * lambda_2))
|
||||
alpha_ = ((n_samples - gamma_.sum() + 2. * alpha_1) /
|
||||
(rmse_ + 2. * alpha_2))
|
||||
|
||||
# Prune the weights with a precision over a threshold
|
||||
keep_lambda = lambda_ < self.threshold_lambda
|
||||
coef_[~keep_lambda] = 0
|
||||
|
||||
# Compute the objective function
|
||||
if self.compute_score:
|
||||
s = (lambda_1 * np.log(lambda_) - lambda_2 * lambda_).sum()
|
||||
s += alpha_1 * log(alpha_) - alpha_2 * alpha_
|
||||
s += 0.5 * (fast_logdet(sigma_) + n_samples * log(alpha_) +
|
||||
np.sum(np.log(lambda_)))
|
||||
s -= 0.5 * (alpha_ * rmse_ + (lambda_ * coef_ ** 2).sum())
|
||||
self.scores_.append(s)
|
||||
|
||||
# Check for convergence
|
||||
if iter_ > 0 and np.sum(np.abs(coef_old_ - coef_)) < self.tol:
|
||||
if verbose:
|
||||
print("Converged after %s iterations" % iter_)
|
||||
break
|
||||
coef_old_ = np.copy(coef_)
|
||||
|
||||
if not keep_lambda.any():
|
||||
break
|
||||
|
||||
if keep_lambda.any():
|
||||
# update sigma and mu using updated params from the last iteration
|
||||
sigma_ = update_sigma(X, alpha_, lambda_, keep_lambda)
|
||||
coef_ = update_coeff(X, y, coef_, alpha_, keep_lambda, sigma_)
|
||||
else:
|
||||
sigma_ = np.array([]).reshape(0, 0)
|
||||
|
||||
self.coef_ = coef_
|
||||
self.alpha_ = alpha_
|
||||
self.sigma_ = sigma_
|
||||
self.lambda_ = lambda_
|
||||
self._set_intercept(X_offset_, y_offset_, X_scale_)
|
||||
return self
|
||||
|
||||
def _update_sigma_woodbury(self, X, alpha_, lambda_, keep_lambda):
|
||||
# See slides as referenced in the docstring note
|
||||
# this function is used when n_samples < n_features and will invert
|
||||
# a matrix of shape (n_samples, n_samples) making use of the
|
||||
# woodbury formula:
|
||||
# https://en.wikipedia.org/wiki/Woodbury_matrix_identity
|
||||
n_samples = X.shape[0]
|
||||
X_keep = X[:, keep_lambda]
|
||||
inv_lambda = 1 / lambda_[keep_lambda].reshape(1, -1)
|
||||
sigma_ = pinvh(
|
||||
np.eye(n_samples) / alpha_ + np.dot(X_keep * inv_lambda, X_keep.T)
|
||||
)
|
||||
sigma_ = np.dot(sigma_, X_keep * inv_lambda)
|
||||
sigma_ = - np.dot(inv_lambda.reshape(-1, 1) * X_keep.T, sigma_)
|
||||
sigma_[np.diag_indices(sigma_.shape[1])] += 1. / lambda_[keep_lambda]
|
||||
return sigma_
|
||||
|
||||
def _update_sigma(self, X, alpha_, lambda_, keep_lambda):
|
||||
# See slides as referenced in the docstring note
|
||||
# this function is used when n_samples >= n_features and will
|
||||
# invert a matrix of shape (n_features, n_features)
|
||||
X_keep = X[:, keep_lambda]
|
||||
gram = np.dot(X_keep.T, X_keep)
|
||||
eye = np.eye(gram.shape[0])
|
||||
sigma_inv = lambda_[keep_lambda] * eye + alpha_ * gram
|
||||
sigma_ = pinvh(sigma_inv)
|
||||
return sigma_
|
||||
|
||||
def predict(self, X, return_std=False):
|
||||
"""Predict using the linear model.
|
||||
|
||||
In addition to the mean of the predictive distribution, also its
|
||||
standard deviation can be returned.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||||
Samples.
|
||||
|
||||
return_std : bool, default=False
|
||||
Whether to return the standard deviation of posterior prediction.
|
||||
|
||||
Returns
|
||||
-------
|
||||
y_mean : array-like of shape (n_samples,)
|
||||
Mean of predictive distribution of query points.
|
||||
|
||||
y_std : array-like of shape (n_samples,)
|
||||
Standard deviation of predictive distribution of query points.
|
||||
"""
|
||||
y_mean = self._decision_function(X)
|
||||
if return_std is False:
|
||||
return y_mean
|
||||
else:
|
||||
if self.normalize:
|
||||
X = (X - self.X_offset_) / self.X_scale_
|
||||
X = X[:, self.lambda_ < self.threshold_lambda]
|
||||
sigmas_squared_data = (np.dot(X, self.sigma_) * X).sum(axis=1)
|
||||
y_std = np.sqrt(sigmas_squared_data + (1. / self.alpha_))
|
||||
return y_mean, y_std
|
Binary file not shown.
2391
venv/Lib/site-packages/sklearn/linear_model/_coordinate_descent.py
Normal file
2391
venv/Lib/site-packages/sklearn/linear_model/_coordinate_descent.py
Normal file
File diff suppressed because it is too large
Load diff
15
venv/Lib/site-packages/sklearn/linear_model/_glm/__init__.py
Normal file
15
venv/Lib/site-packages/sklearn/linear_model/_glm/__init__.py
Normal file
|
@ -0,0 +1,15 @@
|
|||
# License: BSD 3 clause
|
||||
|
||||
from .glm import (
|
||||
GeneralizedLinearRegressor,
|
||||
PoissonRegressor,
|
||||
GammaRegressor,
|
||||
TweedieRegressor
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"GeneralizedLinearRegressor",
|
||||
"PoissonRegressor",
|
||||
"GammaRegressor",
|
||||
"TweedieRegressor"
|
||||
]
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
615
venv/Lib/site-packages/sklearn/linear_model/_glm/glm.py
Normal file
615
venv/Lib/site-packages/sklearn/linear_model/_glm/glm.py
Normal file
|
@ -0,0 +1,615 @@
|
|||
"""
|
||||
Generalized Linear Models with Exponential Dispersion Family
|
||||
"""
|
||||
|
||||
# Author: Christian Lorentzen <lorentzen.ch@googlemail.com>
|
||||
# some parts and tricks stolen from other sklearn files.
|
||||
# License: BSD 3 clause
|
||||
|
||||
import numbers
|
||||
|
||||
import numpy as np
|
||||
import scipy.optimize
|
||||
|
||||
from ...base import BaseEstimator, RegressorMixin
|
||||
from ...utils import check_array, check_X_y
|
||||
from ...utils.optimize import _check_optimize_result
|
||||
from ...utils.validation import check_is_fitted, _check_sample_weight
|
||||
from ..._loss.glm_distribution import (
|
||||
ExponentialDispersionModel,
|
||||
TweedieDistribution,
|
||||
EDM_DISTRIBUTIONS
|
||||
)
|
||||
from .link import (
|
||||
BaseLink,
|
||||
IdentityLink,
|
||||
LogLink,
|
||||
)
|
||||
|
||||
|
||||
def _safe_lin_pred(X, coef):
|
||||
"""Compute the linear predictor taking care if intercept is present."""
|
||||
if coef.size == X.shape[1] + 1:
|
||||
return X @ coef[1:] + coef[0]
|
||||
else:
|
||||
return X @ coef
|
||||
|
||||
|
||||
def _y_pred_deviance_derivative(coef, X, y, weights, family, link):
|
||||
"""Compute y_pred and the derivative of the deviance w.r.t coef."""
|
||||
lin_pred = _safe_lin_pred(X, coef)
|
||||
y_pred = link.inverse(lin_pred)
|
||||
d1 = link.inverse_derivative(lin_pred)
|
||||
temp = d1 * family.deviance_derivative(y, y_pred, weights)
|
||||
if coef.size == X.shape[1] + 1:
|
||||
devp = np.concatenate(([temp.sum()], temp @ X))
|
||||
else:
|
||||
devp = temp @ X # same as X.T @ temp
|
||||
return y_pred, devp
|
||||
|
||||
|
||||
class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
|
||||
"""Regression via a penalized Generalized Linear Model (GLM).
|
||||
|
||||
GLMs based on a reproductive Exponential Dispersion Model (EDM) aim at
|
||||
fitting and predicting the mean of the target y as y_pred=h(X*w).
|
||||
Therefore, the fit minimizes the following objective function with L2
|
||||
priors as regularizer::
|
||||
|
||||
1/(2*sum(s)) * deviance(y, h(X*w); s)
|
||||
+ 1/2 * alpha * |w|_2
|
||||
|
||||
with inverse link function h and s=sample_weight.
|
||||
The parameter ``alpha`` corresponds to the lambda parameter in glmnet.
|
||||
|
||||
Read more in the :ref:`User Guide <Generalized_linear_regression>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
alpha : float, default=1
|
||||
Constant that multiplies the penalty term and thus determines the
|
||||
regularization strength. ``alpha = 0`` is equivalent to unpenalized
|
||||
GLMs. In this case, the design matrix `X` must have full column rank
|
||||
(no collinearities).
|
||||
|
||||
fit_intercept : bool, default=True
|
||||
Specifies if a constant (a.k.a. bias or intercept) should be
|
||||
added to the linear predictor (X @ coef + intercept).
|
||||
|
||||
family : {'normal', 'poisson', 'gamma', 'inverse-gaussian'} \
|
||||
or an ExponentialDispersionModel instance, default='normal'
|
||||
The distributional assumption of the GLM, i.e. which distribution from
|
||||
the EDM, specifies the loss function to be minimized.
|
||||
|
||||
link : {'auto', 'identity', 'log'} or an instance of class BaseLink, \
|
||||
default='auto'
|
||||
The link function of the GLM, i.e. mapping from linear predictor
|
||||
`X @ coeff + intercept` to prediction `y_pred`. Option 'auto' sets
|
||||
the link depending on the chosen family as follows:
|
||||
|
||||
- 'identity' for Normal distribution
|
||||
- 'log' for Poisson, Gamma and Inverse Gaussian distributions
|
||||
|
||||
solver : 'lbfgs', default='lbfgs'
|
||||
Algorithm to use in the optimization problem:
|
||||
|
||||
'lbfgs'
|
||||
Calls scipy's L-BFGS-B optimizer.
|
||||
|
||||
max_iter : int, default=100
|
||||
The maximal number of iterations for the solver.
|
||||
|
||||
tol : float, default=1e-4
|
||||
Stopping criterion. For the lbfgs solver,
|
||||
the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``
|
||||
where ``g_j`` is the j-th component of the gradient (derivative) of
|
||||
the objective function.
|
||||
|
||||
warm_start : bool, default=False
|
||||
If set to ``True``, reuse the solution of the previous call to ``fit``
|
||||
as initialization for ``coef_`` and ``intercept_``.
|
||||
|
||||
verbose : int, default=0
|
||||
For the lbfgs solver set verbose to any positive number for verbosity.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
coef_ : array of shape (n_features,)
|
||||
Estimated coefficients for the linear predictor (`X @ coef_ +
|
||||
intercept_`) in the GLM.
|
||||
|
||||
intercept_ : float
|
||||
Intercept (a.k.a. bias) added to linear predictor.
|
||||
|
||||
n_iter_ : int
|
||||
Actual number of iterations used in the solver.
|
||||
"""
|
||||
def __init__(self, *, alpha=1.0,
|
||||
fit_intercept=True, family='normal', link='auto',
|
||||
solver='lbfgs', max_iter=100, tol=1e-4, warm_start=False,
|
||||
verbose=0):
|
||||
self.alpha = alpha
|
||||
self.fit_intercept = fit_intercept
|
||||
self.family = family
|
||||
self.link = link
|
||||
self.solver = solver
|
||||
self.max_iter = max_iter
|
||||
self.tol = tol
|
||||
self.warm_start = warm_start
|
||||
self.verbose = verbose
|
||||
|
||||
def fit(self, X, y, sample_weight=None):
|
||||
"""Fit a Generalized Linear Model.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||||
Training data.
|
||||
|
||||
y : array-like of shape (n_samples,)
|
||||
Target values.
|
||||
|
||||
sample_weight : array-like of shape (n_samples,), default=None
|
||||
Sample weights.
|
||||
|
||||
Returns
|
||||
-------
|
||||
self : returns an instance of self.
|
||||
"""
|
||||
if isinstance(self.family, ExponentialDispersionModel):
|
||||
self._family_instance = self.family
|
||||
elif self.family in EDM_DISTRIBUTIONS:
|
||||
self._family_instance = EDM_DISTRIBUTIONS[self.family]()
|
||||
else:
|
||||
raise ValueError(
|
||||
"The family must be an instance of class"
|
||||
" ExponentialDispersionModel or an element of"
|
||||
" ['normal', 'poisson', 'gamma', 'inverse-gaussian']"
|
||||
"; got (family={0})".format(self.family))
|
||||
|
||||
# Guarantee that self._link_instance is set to an instance of
|
||||
# class BaseLink
|
||||
if isinstance(self.link, BaseLink):
|
||||
self._link_instance = self.link
|
||||
else:
|
||||
if self.link == 'auto':
|
||||
if isinstance(self._family_instance, TweedieDistribution):
|
||||
if self._family_instance.power <= 0:
|
||||
self._link_instance = IdentityLink()
|
||||
if self._family_instance.power >= 1:
|
||||
self._link_instance = LogLink()
|
||||
else:
|
||||
raise ValueError("No default link known for the "
|
||||
"specified distribution family. Please "
|
||||
"set link manually, i.e. not to 'auto'; "
|
||||
"got (link='auto', family={})"
|
||||
.format(self.family))
|
||||
elif self.link == 'identity':
|
||||
self._link_instance = IdentityLink()
|
||||
elif self.link == 'log':
|
||||
self._link_instance = LogLink()
|
||||
else:
|
||||
raise ValueError(
|
||||
"The link must be an instance of class Link or "
|
||||
"an element of ['auto', 'identity', 'log']; "
|
||||
"got (link={0})".format(self.link))
|
||||
|
||||
if not isinstance(self.alpha, numbers.Number) or self.alpha < 0:
|
||||
raise ValueError("Penalty term must be a non-negative number;"
|
||||
" got (alpha={0})".format(self.alpha))
|
||||
if not isinstance(self.fit_intercept, bool):
|
||||
raise ValueError("The argument fit_intercept must be bool;"
|
||||
" got {0}".format(self.fit_intercept))
|
||||
if self.solver not in ['lbfgs']:
|
||||
raise ValueError("GeneralizedLinearRegressor supports only solvers"
|
||||
"'lbfgs'; got {0}".format(self.solver))
|
||||
solver = self.solver
|
||||
if (not isinstance(self.max_iter, numbers.Integral)
|
||||
or self.max_iter <= 0):
|
||||
raise ValueError("Maximum number of iteration must be a positive "
|
||||
"integer;"
|
||||
" got (max_iter={0!r})".format(self.max_iter))
|
||||
if not isinstance(self.tol, numbers.Number) or self.tol <= 0:
|
||||
raise ValueError("Tolerance for stopping criteria must be "
|
||||
"positive; got (tol={0!r})".format(self.tol))
|
||||
if not isinstance(self.warm_start, bool):
|
||||
raise ValueError("The argument warm_start must be bool;"
|
||||
" got {0}".format(self.warm_start))
|
||||
|
||||
family = self._family_instance
|
||||
link = self._link_instance
|
||||
|
||||
X, y = check_X_y(X, y, accept_sparse=['csc', 'csr'],
|
||||
dtype=[np.float64, np.float32],
|
||||
y_numeric=True, multi_output=False)
|
||||
|
||||
weights = _check_sample_weight(sample_weight, X)
|
||||
|
||||
_, n_features = X.shape
|
||||
|
||||
if not np.all(family.in_y_range(y)):
|
||||
raise ValueError("Some value(s) of y are out of the valid "
|
||||
"range for family {0}"
|
||||
.format(family.__class__.__name__))
|
||||
# TODO: if alpha=0 check that X is not rank deficient
|
||||
|
||||
# rescaling of sample_weight
|
||||
#
|
||||
# IMPORTANT NOTE: Since we want to minimize
|
||||
# 1/(2*sum(sample_weight)) * deviance + L2,
|
||||
# deviance = sum(sample_weight * unit_deviance),
|
||||
# we rescale weights such that sum(weights) = 1 and this becomes
|
||||
# 1/2*deviance + L2 with deviance=sum(weights * unit_deviance)
|
||||
weights = weights / weights.sum()
|
||||
|
||||
if self.warm_start and hasattr(self, 'coef_'):
|
||||
if self.fit_intercept:
|
||||
coef = np.concatenate((np.array([self.intercept_]),
|
||||
self.coef_))
|
||||
else:
|
||||
coef = self.coef_
|
||||
else:
|
||||
if self.fit_intercept:
|
||||
coef = np.zeros(n_features+1)
|
||||
coef[0] = link(np.average(y, weights=weights))
|
||||
else:
|
||||
coef = np.zeros(n_features)
|
||||
|
||||
# algorithms for optimization
|
||||
|
||||
if solver == 'lbfgs':
|
||||
def func(coef, X, y, weights, alpha, family, link):
|
||||
y_pred, devp = _y_pred_deviance_derivative(
|
||||
coef, X, y, weights, family, link
|
||||
)
|
||||
dev = family.deviance(y, y_pred, weights)
|
||||
# offset if coef[0] is intercept
|
||||
offset = 1 if self.fit_intercept else 0
|
||||
coef_scaled = alpha * coef[offset:]
|
||||
obj = 0.5 * dev + 0.5 * (coef[offset:] @ coef_scaled)
|
||||
objp = 0.5 * devp
|
||||
objp[offset:] += coef_scaled
|
||||
return obj, objp
|
||||
|
||||
args = (X, y, weights, self.alpha, family, link)
|
||||
|
||||
opt_res = scipy.optimize.minimize(
|
||||
func, coef, method="L-BFGS-B", jac=True,
|
||||
options={
|
||||
"maxiter": self.max_iter,
|
||||
"iprint": (self.verbose > 0) - 1,
|
||||
"gtol": self.tol,
|
||||
"ftol": 1e3*np.finfo(float).eps,
|
||||
},
|
||||
args=args)
|
||||
self.n_iter_ = _check_optimize_result("lbfgs", opt_res)
|
||||
coef = opt_res.x
|
||||
|
||||
if self.fit_intercept:
|
||||
self.intercept_ = coef[0]
|
||||
self.coef_ = coef[1:]
|
||||
else:
|
||||
# set intercept to zero as the other linear models do
|
||||
self.intercept_ = 0.
|
||||
self.coef_ = coef
|
||||
|
||||
return self
|
||||
|
||||
def _linear_predictor(self, X):
|
||||
"""Compute the linear_predictor = `X @ coef_ + intercept_`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||||
Samples.
|
||||
|
||||
Returns
|
||||
-------
|
||||
y_pred : array of shape (n_samples,)
|
||||
Returns predicted values of linear predictor.
|
||||
"""
|
||||
check_is_fitted(self)
|
||||
X = check_array(X, accept_sparse=['csr', 'csc', 'coo'],
|
||||
dtype=[np.float64, np.float32], ensure_2d=True,
|
||||
allow_nd=False)
|
||||
return X @ self.coef_ + self.intercept_
|
||||
|
||||
def predict(self, X):
|
||||
"""Predict using GLM with feature matrix X.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||||
Samples.
|
||||
|
||||
Returns
|
||||
-------
|
||||
y_pred : array of shape (n_samples,)
|
||||
Returns predicted values.
|
||||
"""
|
||||
# check_array is done in _linear_predictor
|
||||
eta = self._linear_predictor(X)
|
||||
y_pred = self._link_instance.inverse(eta)
|
||||
return y_pred
|
||||
|
||||
def score(self, X, y, sample_weight=None):
|
||||
"""Compute D^2, the percentage of deviance explained.
|
||||
|
||||
D^2 is a generalization of the coefficient of determination R^2.
|
||||
R^2 uses squared error and D^2 deviance. Note that those two are equal
|
||||
for ``family='normal'``.
|
||||
|
||||
D^2 is defined as
|
||||
:math:`D^2 = 1-\\frac{D(y_{true},y_{pred})}{D_{null}}`,
|
||||
:math:`D_{null}` is the null deviance, i.e. the deviance of a model
|
||||
with intercept alone, which corresponds to :math:`y_{pred} = \\bar{y}`.
|
||||
The mean :math:`\\bar{y}` is averaged by sample_weight.
|
||||
Best possible score is 1.0 and it can be negative (because the model
|
||||
can be arbitrarily worse).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||||
Test samples.
|
||||
|
||||
y : array-like of shape (n_samples,)
|
||||
True values of target.
|
||||
|
||||
sample_weight : array-like of shape (n_samples,), default=None
|
||||
Sample weights.
|
||||
|
||||
Returns
|
||||
-------
|
||||
score : float
|
||||
D^2 of self.predict(X) w.r.t. y.
|
||||
"""
|
||||
# Note, default score defined in RegressorMixin is R^2 score.
|
||||
# TODO: make D^2 a score function in module metrics (and thereby get
|
||||
# input validation and so on)
|
||||
weights = _check_sample_weight(sample_weight, X)
|
||||
y_pred = self.predict(X)
|
||||
dev = self._family_instance.deviance(y, y_pred, weights=weights)
|
||||
y_mean = np.average(y, weights=weights)
|
||||
dev_null = self._family_instance.deviance(y, y_mean, weights=weights)
|
||||
return 1 - dev / dev_null
|
||||
|
||||
def _more_tags(self):
|
||||
# create the _family_instance if fit wasn't called yet.
|
||||
if hasattr(self, '_family_instance'):
|
||||
_family_instance = self._family_instance
|
||||
elif isinstance(self.family, ExponentialDispersionModel):
|
||||
_family_instance = self.family
|
||||
elif self.family in EDM_DISTRIBUTIONS:
|
||||
_family_instance = EDM_DISTRIBUTIONS[self.family]()
|
||||
else:
|
||||
raise ValueError
|
||||
return {"requires_positive_y": not _family_instance.in_y_range(-1.0)}
|
||||
|
||||
|
||||
class PoissonRegressor(GeneralizedLinearRegressor):
|
||||
"""Generalized Linear Model with a Poisson distribution.
|
||||
|
||||
Read more in the :ref:`User Guide <Generalized_linear_regression>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
alpha : float, default=1
|
||||
Constant that multiplies the penalty term and thus determines the
|
||||
regularization strength. ``alpha = 0`` is equivalent to unpenalized
|
||||
GLMs. In this case, the design matrix `X` must have full column rank
|
||||
(no collinearities).
|
||||
|
||||
fit_intercept : bool, default=True
|
||||
Specifies if a constant (a.k.a. bias or intercept) should be
|
||||
added to the linear predictor (X @ coef + intercept).
|
||||
|
||||
max_iter : int, default=100
|
||||
The maximal number of iterations for the solver.
|
||||
|
||||
tol : float, default=1e-4
|
||||
Stopping criterion. For the lbfgs solver,
|
||||
the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``
|
||||
where ``g_j`` is the j-th component of the gradient (derivative) of
|
||||
the objective function.
|
||||
|
||||
warm_start : bool, default=False
|
||||
If set to ``True``, reuse the solution of the previous call to ``fit``
|
||||
as initialization for ``coef_`` and ``intercept_`` .
|
||||
|
||||
verbose : int, default=0
|
||||
For the lbfgs solver set verbose to any positive number for verbosity.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
coef_ : array of shape (n_features,)
|
||||
Estimated coefficients for the linear predictor (`X @ coef_ +
|
||||
intercept_`) in the GLM.
|
||||
|
||||
intercept_ : float
|
||||
Intercept (a.k.a. bias) added to linear predictor.
|
||||
|
||||
n_iter_ : int
|
||||
Actual number of iterations used in the solver.
|
||||
"""
|
||||
def __init__(self, *, alpha=1.0, fit_intercept=True, max_iter=100,
|
||||
tol=1e-4, warm_start=False, verbose=0):
|
||||
|
||||
super().__init__(alpha=alpha, fit_intercept=fit_intercept,
|
||||
family="poisson", link='log', max_iter=max_iter,
|
||||
tol=tol, warm_start=warm_start, verbose=verbose)
|
||||
|
||||
@property
|
||||
def family(self):
|
||||
# Make this attribute read-only to avoid mis-uses e.g. in GridSearch.
|
||||
return "poisson"
|
||||
|
||||
@family.setter
|
||||
def family(self, value):
|
||||
if value != "poisson":
|
||||
raise ValueError("PoissonRegressor.family must be 'poisson'!")
|
||||
|
||||
|
||||
class GammaRegressor(GeneralizedLinearRegressor):
|
||||
"""Generalized Linear Model with a Gamma distribution.
|
||||
|
||||
Read more in the :ref:`User Guide <Generalized_linear_regression>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
alpha : float, default=1
|
||||
Constant that multiplies the penalty term and thus determines the
|
||||
regularization strength. ``alpha = 0`` is equivalent to unpenalized
|
||||
GLMs. In this case, the design matrix `X` must have full column rank
|
||||
(no collinearities).
|
||||
|
||||
fit_intercept : bool, default=True
|
||||
Specifies if a constant (a.k.a. bias or intercept) should be
|
||||
added to the linear predictor (X @ coef + intercept).
|
||||
|
||||
max_iter : int, default=100
|
||||
The maximal number of iterations for the solver.
|
||||
|
||||
tol : float, default=1e-4
|
||||
Stopping criterion. For the lbfgs solver,
|
||||
the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``
|
||||
where ``g_j`` is the j-th component of the gradient (derivative) of
|
||||
the objective function.
|
||||
|
||||
warm_start : bool, default=False
|
||||
If set to ``True``, reuse the solution of the previous call to ``fit``
|
||||
as initialization for ``coef_`` and ``intercept_`` .
|
||||
|
||||
verbose : int, default=0
|
||||
For the lbfgs solver set verbose to any positive number for verbosity.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
coef_ : array of shape (n_features,)
|
||||
Estimated coefficients for the linear predictor (`X * coef_ +
|
||||
intercept_`) in the GLM.
|
||||
|
||||
intercept_ : float
|
||||
Intercept (a.k.a. bias) added to linear predictor.
|
||||
|
||||
n_iter_ : int
|
||||
Actual number of iterations used in the solver.
|
||||
"""
|
||||
def __init__(self, *, alpha=1.0, fit_intercept=True, max_iter=100,
|
||||
tol=1e-4, warm_start=False, verbose=0):
|
||||
|
||||
super().__init__(alpha=alpha, fit_intercept=fit_intercept,
|
||||
family="gamma", link='log', max_iter=max_iter,
|
||||
tol=tol, warm_start=warm_start, verbose=verbose)
|
||||
|
||||
@property
|
||||
def family(self):
|
||||
# Make this attribute read-only to avoid mis-uses e.g. in GridSearch.
|
||||
return "gamma"
|
||||
|
||||
@family.setter
|
||||
def family(self, value):
|
||||
if value != "gamma":
|
||||
raise ValueError("GammaRegressor.family must be 'gamma'!")
|
||||
|
||||
|
||||
class TweedieRegressor(GeneralizedLinearRegressor):
|
||||
"""Generalized Linear Model with a Tweedie distribution.
|
||||
|
||||
This estimator can be used to model different GLMs depending on the
|
||||
``power`` parameter, which determines the underlying distribution.
|
||||
|
||||
Read more in the :ref:`User Guide <Generalized_linear_regression>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
power : float, default=0
|
||||
The power determines the underlying target distribution according
|
||||
to the following table:
|
||||
|
||||
+-------+------------------------+
|
||||
| Power | Distribution |
|
||||
+=======+========================+
|
||||
| 0 | Normal |
|
||||
+-------+------------------------+
|
||||
| 1 | Poisson |
|
||||
+-------+------------------------+
|
||||
| (1,2) | Compound Poisson Gamma |
|
||||
+-------+------------------------+
|
||||
| 2 | Gamma |
|
||||
+-------+------------------------+
|
||||
| 3 | Inverse Gaussian |
|
||||
+-------+------------------------+
|
||||
|
||||
For ``0 < power < 1``, no distribution exists.
|
||||
|
||||
alpha : float, default=1
|
||||
Constant that multiplies the penalty term and thus determines the
|
||||
regularization strength. ``alpha = 0`` is equivalent to unpenalized
|
||||
GLMs. In this case, the design matrix `X` must have full column rank
|
||||
(no collinearities).
|
||||
|
||||
link : {'auto', 'identity', 'log'}, default='auto'
|
||||
The link function of the GLM, i.e. mapping from linear predictor
|
||||
`X @ coeff + intercept` to prediction `y_pred`. Option 'auto' sets
|
||||
the link depending on the chosen family as follows:
|
||||
|
||||
- 'identity' for Normal distribution
|
||||
- 'log' for Poisson, Gamma and Inverse Gaussian distributions
|
||||
|
||||
fit_intercept : bool, default=True
|
||||
Specifies if a constant (a.k.a. bias or intercept) should be
|
||||
added to the linear predictor (X @ coef + intercept).
|
||||
|
||||
max_iter : int, default=100
|
||||
The maximal number of iterations for the solver.
|
||||
|
||||
tol : float, default=1e-4
|
||||
Stopping criterion. For the lbfgs solver,
|
||||
the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``
|
||||
where ``g_j`` is the j-th component of the gradient (derivative) of
|
||||
the objective function.
|
||||
|
||||
warm_start : bool, default=False
|
||||
If set to ``True``, reuse the solution of the previous call to ``fit``
|
||||
as initialization for ``coef_`` and ``intercept_`` .
|
||||
|
||||
verbose : int, default=0
|
||||
For the lbfgs solver set verbose to any positive number for verbosity.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
coef_ : array of shape (n_features,)
|
||||
Estimated coefficients for the linear predictor (`X @ coef_ +
|
||||
intercept_`) in the GLM.
|
||||
|
||||
intercept_ : float
|
||||
Intercept (a.k.a. bias) added to linear predictor.
|
||||
|
||||
n_iter_ : int
|
||||
Actual number of iterations used in the solver.
|
||||
"""
|
||||
def __init__(self, *, power=0.0, alpha=1.0, fit_intercept=True,
|
||||
link='auto', max_iter=100, tol=1e-4,
|
||||
warm_start=False, verbose=0):
|
||||
|
||||
super().__init__(alpha=alpha, fit_intercept=fit_intercept,
|
||||
family=TweedieDistribution(power=power), link=link,
|
||||
max_iter=max_iter, tol=tol,
|
||||
warm_start=warm_start, verbose=verbose)
|
||||
|
||||
@property
|
||||
def family(self):
|
||||
# We use a property with a setter to make sure that the family is
|
||||
# always a Tweedie distribution, and that self.power and
|
||||
# self.family.power are identical by construction.
|
||||
dist = TweedieDistribution(power=self.power)
|
||||
# TODO: make the returned object immutable
|
||||
return dist
|
||||
|
||||
@family.setter
|
||||
def family(self, value):
|
||||
if isinstance(value, TweedieDistribution):
|
||||
self.power = value.power
|
||||
else:
|
||||
raise TypeError("TweedieRegressor.family must be of type "
|
||||
"TweedieDistribution!")
|
110
venv/Lib/site-packages/sklearn/linear_model/_glm/link.py
Normal file
110
venv/Lib/site-packages/sklearn/linear_model/_glm/link.py
Normal file
|
@ -0,0 +1,110 @@
|
|||
"""
|
||||
Link functions used in GLM
|
||||
"""
|
||||
|
||||
# Author: Christian Lorentzen <lorentzen.ch@googlemail.com>
|
||||
# License: BSD 3 clause
|
||||
|
||||
from abc import ABCMeta, abstractmethod
|
||||
|
||||
import numpy as np
|
||||
from scipy.special import expit, logit
|
||||
|
||||
|
||||
class BaseLink(metaclass=ABCMeta):
|
||||
"""Abstract base class for Link functions."""
|
||||
|
||||
@abstractmethod
|
||||
def __call__(self, y_pred):
|
||||
"""Compute the link function g(y_pred).
|
||||
|
||||
The link function links the mean y_pred=E[Y] to the so called linear
|
||||
predictor (X*w), i.e. g(y_pred) = linear predictor.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y_pred : array of shape (n_samples,)
|
||||
Usually the (predicted) mean.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def derivative(self, y_pred):
|
||||
"""Compute the derivative of the link g'(y_pred).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y_pred : array of shape (n_samples,)
|
||||
Usually the (predicted) mean.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def inverse(self, lin_pred):
|
||||
"""Compute the inverse link function h(lin_pred).
|
||||
|
||||
Gives the inverse relationship between linear predictor and the mean
|
||||
y_pred=E[Y], i.e. h(linear predictor) = y_pred.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
lin_pred : array of shape (n_samples,)
|
||||
Usually the (fitted) linear predictor.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def inverse_derivative(self, lin_pred):
|
||||
"""Compute the derivative of the inverse link function h'(lin_pred).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
lin_pred : array of shape (n_samples,)
|
||||
Usually the (fitted) linear predictor.
|
||||
"""
|
||||
|
||||
|
||||
class IdentityLink(BaseLink):
|
||||
"""The identity link function g(x)=x."""
|
||||
|
||||
def __call__(self, y_pred):
|
||||
return y_pred
|
||||
|
||||
def derivative(self, y_pred):
|
||||
return np.ones_like(y_pred)
|
||||
|
||||
def inverse(self, lin_pred):
|
||||
return lin_pred
|
||||
|
||||
def inverse_derivative(self, lin_pred):
|
||||
return np.ones_like(lin_pred)
|
||||
|
||||
|
||||
class LogLink(BaseLink):
|
||||
"""The log link function g(x)=log(x)."""
|
||||
|
||||
def __call__(self, y_pred):
|
||||
return np.log(y_pred)
|
||||
|
||||
def derivative(self, y_pred):
|
||||
return 1 / y_pred
|
||||
|
||||
def inverse(self, lin_pred):
|
||||
return np.exp(lin_pred)
|
||||
|
||||
def inverse_derivative(self, lin_pred):
|
||||
return np.exp(lin_pred)
|
||||
|
||||
|
||||
class LogitLink(BaseLink):
|
||||
"""The logit link function g(x)=logit(x)."""
|
||||
|
||||
def __call__(self, y_pred):
|
||||
return logit(y_pred)
|
||||
|
||||
def derivative(self, y_pred):
|
||||
return 1 / (y_pred * (1 - y_pred))
|
||||
|
||||
def inverse(self, lin_pred):
|
||||
return expit(lin_pred)
|
||||
|
||||
def inverse_derivative(self, lin_pred):
|
||||
ep = expit(lin_pred)
|
||||
return ep * (1 - ep)
|
|
@ -0,0 +1 @@
|
|||
# License: BSD 3 clause
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,431 @@
|
|||
# Authors: Christian Lorentzen <lorentzen.ch@gmail.com>
|
||||
#
|
||||
# License: BSD 3 clause
|
||||
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose
|
||||
import pytest
|
||||
import warnings
|
||||
|
||||
from sklearn.datasets import make_regression
|
||||
from sklearn.linear_model._glm import GeneralizedLinearRegressor
|
||||
from sklearn.linear_model import (
|
||||
TweedieRegressor,
|
||||
PoissonRegressor,
|
||||
GammaRegressor
|
||||
)
|
||||
from sklearn.linear_model._glm.link import (
|
||||
IdentityLink,
|
||||
LogLink,
|
||||
)
|
||||
from sklearn._loss.glm_distribution import (
|
||||
TweedieDistribution,
|
||||
NormalDistribution, PoissonDistribution,
|
||||
GammaDistribution, InverseGaussianDistribution,
|
||||
)
|
||||
from sklearn.linear_model import Ridge
|
||||
from sklearn.exceptions import ConvergenceWarning
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def regression_data():
|
||||
X, y = make_regression(n_samples=107,
|
||||
n_features=10,
|
||||
n_informative=80, noise=0.5,
|
||||
random_state=2)
|
||||
return X, y
|
||||
|
||||
|
||||
def test_sample_weights_validation():
|
||||
"""Test the raised errors in the validation of sample_weight."""
|
||||
# scalar value but not positive
|
||||
X = [[1]]
|
||||
y = [1]
|
||||
weights = 0
|
||||
glm = GeneralizedLinearRegressor()
|
||||
|
||||
# Positive weights are accepted
|
||||
glm.fit(X, y, sample_weight=1)
|
||||
|
||||
# 2d array
|
||||
weights = [[0]]
|
||||
with pytest.raises(ValueError, match="must be 1D array or scalar"):
|
||||
glm.fit(X, y, weights)
|
||||
|
||||
# 1d but wrong length
|
||||
weights = [1, 0]
|
||||
msg = r"sample_weight.shape == \(2,\), expected \(1,\)!"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
glm.fit(X, y, weights)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('name, instance',
|
||||
[('normal', NormalDistribution()),
|
||||
('poisson', PoissonDistribution()),
|
||||
('gamma', GammaDistribution()),
|
||||
('inverse-gaussian', InverseGaussianDistribution())])
|
||||
def test_glm_family_argument(name, instance):
|
||||
"""Test GLM family argument set as string."""
|
||||
y = np.array([0.1, 0.5]) # in range of all distributions
|
||||
X = np.array([[1], [2]])
|
||||
glm = GeneralizedLinearRegressor(family=name, alpha=0).fit(X, y)
|
||||
assert isinstance(glm._family_instance, instance.__class__)
|
||||
|
||||
glm = GeneralizedLinearRegressor(family='not a family')
|
||||
with pytest.raises(ValueError, match="family must be"):
|
||||
glm.fit(X, y)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('name, instance',
|
||||
[('identity', IdentityLink()),
|
||||
('log', LogLink())])
|
||||
def test_glm_link_argument(name, instance):
|
||||
"""Test GLM link argument set as string."""
|
||||
y = np.array([0.1, 0.5]) # in range of all distributions
|
||||
X = np.array([[1], [2]])
|
||||
glm = GeneralizedLinearRegressor(family='normal', link=name).fit(X, y)
|
||||
assert isinstance(glm._link_instance, instance.__class__)
|
||||
|
||||
glm = GeneralizedLinearRegressor(family='normal', link='not a link')
|
||||
with pytest.raises(ValueError, match="link must be"):
|
||||
glm.fit(X, y)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('family, expected_link_class', [
|
||||
('normal', IdentityLink),
|
||||
('poisson', LogLink),
|
||||
('gamma', LogLink),
|
||||
('inverse-gaussian', LogLink),
|
||||
])
|
||||
def test_glm_link_auto(family, expected_link_class):
|
||||
# Make sure link='auto' delivers the expected link function
|
||||
y = np.array([0.1, 0.5]) # in range of all distributions
|
||||
X = np.array([[1], [2]])
|
||||
glm = GeneralizedLinearRegressor(family=family, link='auto').fit(X, y)
|
||||
assert isinstance(glm._link_instance, expected_link_class)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('alpha', ['not a number', -4.2])
|
||||
def test_glm_alpha_argument(alpha):
|
||||
"""Test GLM for invalid alpha argument."""
|
||||
y = np.array([1, 2])
|
||||
X = np.array([[1], [2]])
|
||||
glm = GeneralizedLinearRegressor(family='normal', alpha=alpha)
|
||||
with pytest.raises(ValueError,
|
||||
match="Penalty term must be a non-negative"):
|
||||
glm.fit(X, y)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('fit_intercept', ['not bool', 1, 0, [True]])
|
||||
def test_glm_fit_intercept_argument(fit_intercept):
|
||||
"""Test GLM for invalid fit_intercept argument."""
|
||||
y = np.array([1, 2])
|
||||
X = np.array([[1], [1]])
|
||||
glm = GeneralizedLinearRegressor(fit_intercept=fit_intercept)
|
||||
with pytest.raises(ValueError, match="fit_intercept must be bool"):
|
||||
glm.fit(X, y)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('solver',
|
||||
['not a solver', 1, [1]])
|
||||
def test_glm_solver_argument(solver):
|
||||
"""Test GLM for invalid solver argument."""
|
||||
y = np.array([1, 2])
|
||||
X = np.array([[1], [2]])
|
||||
glm = GeneralizedLinearRegressor(solver=solver)
|
||||
with pytest.raises(ValueError):
|
||||
glm.fit(X, y)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('max_iter', ['not a number', 0, -1, 5.5, [1]])
|
||||
def test_glm_max_iter_argument(max_iter):
|
||||
"""Test GLM for invalid max_iter argument."""
|
||||
y = np.array([1, 2])
|
||||
X = np.array([[1], [2]])
|
||||
glm = GeneralizedLinearRegressor(max_iter=max_iter)
|
||||
with pytest.raises(ValueError, match="must be a positive integer"):
|
||||
glm.fit(X, y)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('tol', ['not a number', 0, -1.0, [1e-3]])
|
||||
def test_glm_tol_argument(tol):
|
||||
"""Test GLM for invalid tol argument."""
|
||||
y = np.array([1, 2])
|
||||
X = np.array([[1], [2]])
|
||||
glm = GeneralizedLinearRegressor(tol=tol)
|
||||
with pytest.raises(ValueError, match="stopping criteria must be positive"):
|
||||
glm.fit(X, y)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('warm_start', ['not bool', 1, 0, [True]])
|
||||
def test_glm_warm_start_argument(warm_start):
|
||||
"""Test GLM for invalid warm_start argument."""
|
||||
y = np.array([1, 2])
|
||||
X = np.array([[1], [1]])
|
||||
glm = GeneralizedLinearRegressor(warm_start=warm_start)
|
||||
with pytest.raises(ValueError, match="warm_start must be bool"):
|
||||
glm.fit(X, y)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('fit_intercept', [False, True])
|
||||
def test_glm_identity_regression(fit_intercept):
|
||||
"""Test GLM regression with identity link on a simple dataset."""
|
||||
coef = [1., 2.]
|
||||
X = np.array([[1, 1, 1, 1, 1], [0, 1, 2, 3, 4]]).T
|
||||
y = np.dot(X, coef)
|
||||
glm = GeneralizedLinearRegressor(alpha=0, family='normal', link='identity',
|
||||
fit_intercept=fit_intercept, tol=1e-12)
|
||||
if fit_intercept:
|
||||
glm.fit(X[:, 1:], y)
|
||||
assert_allclose(glm.coef_, coef[1:], rtol=1e-10)
|
||||
assert_allclose(glm.intercept_, coef[0], rtol=1e-10)
|
||||
else:
|
||||
glm.fit(X, y)
|
||||
assert_allclose(glm.coef_, coef, rtol=1e-12)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('fit_intercept', [False, True])
|
||||
@pytest.mark.parametrize('alpha', [0.0, 1.0])
|
||||
@pytest.mark.parametrize('family', ['normal', 'poisson', 'gamma'])
|
||||
def test_glm_sample_weight_consistentcy(fit_intercept, alpha, family):
|
||||
"""Test that the impact of sample_weight is consistent"""
|
||||
rng = np.random.RandomState(0)
|
||||
n_samples, n_features = 10, 5
|
||||
|
||||
X = rng.rand(n_samples, n_features)
|
||||
y = rng.rand(n_samples)
|
||||
glm_params = dict(alpha=alpha, family=family, link='auto',
|
||||
fit_intercept=fit_intercept)
|
||||
|
||||
glm = GeneralizedLinearRegressor(**glm_params).fit(X, y)
|
||||
coef = glm.coef_.copy()
|
||||
|
||||
# sample_weight=np.ones(..) should be equivalent to sample_weight=None
|
||||
sample_weight = np.ones(y.shape)
|
||||
glm.fit(X, y, sample_weight=sample_weight)
|
||||
assert_allclose(glm.coef_, coef, rtol=1e-12)
|
||||
|
||||
# sample_weight are normalized to 1 so, scaling them has no effect
|
||||
sample_weight = 2*np.ones(y.shape)
|
||||
glm.fit(X, y, sample_weight=sample_weight)
|
||||
assert_allclose(glm.coef_, coef, rtol=1e-12)
|
||||
|
||||
# setting one element of sample_weight to 0 is equivalent to removing
|
||||
# the correspoding sample
|
||||
sample_weight = np.ones(y.shape)
|
||||
sample_weight[-1] = 0
|
||||
glm.fit(X, y, sample_weight=sample_weight)
|
||||
coef1 = glm.coef_.copy()
|
||||
glm.fit(X[:-1], y[:-1])
|
||||
assert_allclose(glm.coef_, coef1, rtol=1e-12)
|
||||
|
||||
# check that multiplying sample_weight by 2 is equivalent
|
||||
# to repeating correspoding samples twice
|
||||
X2 = np.concatenate([X, X[:n_samples//2]], axis=0)
|
||||
y2 = np.concatenate([y, y[:n_samples//2]])
|
||||
sample_weight_1 = np.ones(len(y))
|
||||
sample_weight_1[:n_samples//2] = 2
|
||||
|
||||
glm1 = GeneralizedLinearRegressor(**glm_params).fit(
|
||||
X, y, sample_weight=sample_weight_1
|
||||
)
|
||||
|
||||
glm2 = GeneralizedLinearRegressor(**glm_params).fit(
|
||||
X2, y2, sample_weight=None
|
||||
)
|
||||
assert_allclose(glm1.coef_, glm2.coef_)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('fit_intercept', [True, False])
|
||||
@pytest.mark.parametrize(
|
||||
'family',
|
||||
[NormalDistribution(), PoissonDistribution(),
|
||||
GammaDistribution(), InverseGaussianDistribution(),
|
||||
TweedieDistribution(power=1.5), TweedieDistribution(power=4.5)])
|
||||
def test_glm_log_regression(fit_intercept, family):
|
||||
"""Test GLM regression with log link on a simple dataset."""
|
||||
coef = [0.2, -0.1]
|
||||
X = np.array([[1, 1, 1, 1, 1], [0, 1, 2, 3, 4]]).T
|
||||
y = np.exp(np.dot(X, coef))
|
||||
glm = GeneralizedLinearRegressor(
|
||||
alpha=0, family=family, link='log',
|
||||
fit_intercept=fit_intercept, tol=1e-7)
|
||||
if fit_intercept:
|
||||
res = glm.fit(X[:, 1:], y)
|
||||
assert_allclose(res.coef_, coef[1:], rtol=1e-6)
|
||||
assert_allclose(res.intercept_, coef[0], rtol=1e-6)
|
||||
else:
|
||||
res = glm.fit(X, y)
|
||||
assert_allclose(res.coef_, coef, rtol=2e-6)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('fit_intercept', [True, False])
|
||||
def test_warm_start(fit_intercept):
|
||||
n_samples, n_features = 110, 10
|
||||
X, y = make_regression(n_samples=n_samples, n_features=n_features,
|
||||
n_informative=n_features-2, noise=0.5,
|
||||
random_state=42)
|
||||
|
||||
glm1 = GeneralizedLinearRegressor(
|
||||
warm_start=False,
|
||||
fit_intercept=fit_intercept,
|
||||
max_iter=1000
|
||||
)
|
||||
glm1.fit(X, y)
|
||||
|
||||
glm2 = GeneralizedLinearRegressor(
|
||||
warm_start=True,
|
||||
fit_intercept=fit_intercept,
|
||||
max_iter=1
|
||||
)
|
||||
# As we intentionally set max_iter=1, L-BFGS-B will issue a
|
||||
# ConvergenceWarning which we here simply ignore.
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings('ignore', category=ConvergenceWarning)
|
||||
glm2.fit(X, y)
|
||||
assert glm1.score(X, y) > glm2.score(X, y)
|
||||
glm2.set_params(max_iter=1000)
|
||||
glm2.fit(X, y)
|
||||
# The two model are not exactly identical since the lbfgs solver
|
||||
# computes the approximate hessian from previous iterations, which
|
||||
# will not be strictly identical in the case of a warm start.
|
||||
assert_allclose(glm1.coef_, glm2.coef_, rtol=1e-5)
|
||||
assert_allclose(glm1.score(X, y), glm2.score(X, y), rtol=1e-4)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('n_samples, n_features', [(100, 10), (10, 100)])
|
||||
@pytest.mark.parametrize('fit_intercept', [True, False])
|
||||
@pytest.mark.parametrize('sample_weight', [None, True])
|
||||
def test_normal_ridge_comparison(n_samples, n_features, fit_intercept,
|
||||
sample_weight, request):
|
||||
"""Compare with Ridge regression for Normal distributions."""
|
||||
test_size = 10
|
||||
X, y = make_regression(n_samples=n_samples + test_size,
|
||||
n_features=n_features,
|
||||
n_informative=n_features-2, noise=0.5,
|
||||
random_state=42)
|
||||
|
||||
if n_samples > n_features:
|
||||
ridge_params = {"solver": "svd"}
|
||||
else:
|
||||
ridge_params = {"solver": "saga", "max_iter": 1000000, "tol": 1e-7}
|
||||
|
||||
X_train, X_test, y_train, y_test, = train_test_split(
|
||||
X, y, test_size=test_size, random_state=0
|
||||
)
|
||||
|
||||
alpha = 1.0
|
||||
if sample_weight is None:
|
||||
sw_train = None
|
||||
alpha_ridge = alpha * n_samples
|
||||
else:
|
||||
sw_train = np.random.RandomState(0).rand(len(y_train))
|
||||
alpha_ridge = alpha * sw_train.sum()
|
||||
|
||||
# GLM has 1/(2*n) * Loss + 1/2*L2, Ridge has Loss + L2
|
||||
ridge = Ridge(alpha=alpha_ridge, normalize=False,
|
||||
random_state=42, fit_intercept=fit_intercept,
|
||||
**ridge_params)
|
||||
ridge.fit(X_train, y_train, sample_weight=sw_train)
|
||||
|
||||
glm = GeneralizedLinearRegressor(alpha=alpha, family='normal',
|
||||
link='identity',
|
||||
fit_intercept=fit_intercept,
|
||||
max_iter=300,
|
||||
tol=1e-5)
|
||||
glm.fit(X_train, y_train, sample_weight=sw_train)
|
||||
assert glm.coef_.shape == (X.shape[1], )
|
||||
assert_allclose(glm.coef_, ridge.coef_, atol=5e-5)
|
||||
assert_allclose(glm.intercept_, ridge.intercept_, rtol=1e-5)
|
||||
assert_allclose(glm.predict(X_train), ridge.predict(X_train), rtol=2e-4)
|
||||
assert_allclose(glm.predict(X_test), ridge.predict(X_test), rtol=2e-4)
|
||||
|
||||
|
||||
def test_poisson_glmnet():
|
||||
"""Compare Poisson regression with L2 regularization and LogLink to glmnet
|
||||
"""
|
||||
# library("glmnet")
|
||||
# options(digits=10)
|
||||
# df <- data.frame(a=c(-2,-1,1,2), b=c(0,0,1,1), y=c(0,1,1,2))
|
||||
# x <- data.matrix(df[,c("a", "b")])
|
||||
# y <- df$y
|
||||
# fit <- glmnet(x=x, y=y, alpha=0, intercept=T, family="poisson",
|
||||
# standardize=F, thresh=1e-10, nlambda=10000)
|
||||
# coef(fit, s=1)
|
||||
# (Intercept) -0.12889386979
|
||||
# a 0.29019207995
|
||||
# b 0.03741173122
|
||||
X = np.array([[-2, -1, 1, 2], [0, 0, 1, 1]]).T
|
||||
y = np.array([0, 1, 1, 2])
|
||||
glm = GeneralizedLinearRegressor(alpha=1,
|
||||
fit_intercept=True, family='poisson',
|
||||
link='log', tol=1e-7,
|
||||
max_iter=300)
|
||||
glm.fit(X, y)
|
||||
assert_allclose(glm.intercept_, -0.12889386979, rtol=1e-5)
|
||||
assert_allclose(glm.coef_, [0.29019207995, 0.03741173122], rtol=1e-5)
|
||||
|
||||
|
||||
def test_convergence_warning(regression_data):
|
||||
X, y = regression_data
|
||||
|
||||
est = GeneralizedLinearRegressor(max_iter=1, tol=1e-20)
|
||||
with pytest.warns(ConvergenceWarning):
|
||||
est.fit(X, y)
|
||||
|
||||
|
||||
def test_poisson_regression_family(regression_data):
|
||||
# Make sure the family attribute is read-only to prevent searching over it
|
||||
# e.g. in a grid search
|
||||
est = PoissonRegressor()
|
||||
est.family == "poisson"
|
||||
|
||||
msg = "PoissonRegressor.family must be 'poisson'!"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
est.family = 0
|
||||
|
||||
|
||||
def test_gamma_regression_family(regression_data):
|
||||
# Make sure the family attribute is read-only to prevent searching over it
|
||||
# e.g. in a grid search
|
||||
est = GammaRegressor()
|
||||
est.family == "gamma"
|
||||
|
||||
msg = "GammaRegressor.family must be 'gamma'!"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
est.family = 0
|
||||
|
||||
|
||||
def test_tweedie_regression_family(regression_data):
|
||||
# Make sure the family attribute is always a TweedieDistribution and that
|
||||
# the power attribute is properly updated
|
||||
power = 2.0
|
||||
est = TweedieRegressor(power=power)
|
||||
assert isinstance(est.family, TweedieDistribution)
|
||||
assert est.family.power == power
|
||||
assert est.power == power
|
||||
|
||||
new_power = 0
|
||||
new_family = TweedieDistribution(power=new_power)
|
||||
est.family = new_family
|
||||
assert isinstance(est.family, TweedieDistribution)
|
||||
assert est.family.power == new_power
|
||||
assert est.power == new_power
|
||||
|
||||
msg = "TweedieRegressor.family must be of type TweedieDistribution!"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
est.family = None
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'estimator, value',
|
||||
[
|
||||
(PoissonRegressor(), True),
|
||||
(GammaRegressor(), True),
|
||||
(TweedieRegressor(power=1.5), True),
|
||||
(TweedieRegressor(power=0), False)
|
||||
],
|
||||
)
|
||||
def test_tags(estimator, value):
|
||||
assert estimator._get_tags()['requires_positive_y'] is value
|
|
@ -0,0 +1,45 @@
|
|||
# Authors: Christian Lorentzen <lorentzen.ch@gmail.com>
|
||||
#
|
||||
# License: BSD 3 clause
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose
|
||||
import pytest
|
||||
from scipy.optimize import check_grad
|
||||
|
||||
from sklearn.linear_model._glm.link import (
|
||||
IdentityLink,
|
||||
LogLink,
|
||||
LogitLink,
|
||||
)
|
||||
|
||||
|
||||
LINK_FUNCTIONS = [IdentityLink, LogLink, LogitLink]
|
||||
|
||||
|
||||
@pytest.mark.parametrize('Link', LINK_FUNCTIONS)
|
||||
def test_link_properties(Link):
|
||||
"""Test link inverse and derivative."""
|
||||
rng = np.random.RandomState(42)
|
||||
x = rng.rand(100) * 100
|
||||
link = Link()
|
||||
if isinstance(link, LogitLink):
|
||||
# careful for large x, note expit(36) = 1
|
||||
# limit max eta to 15
|
||||
x = x / 100 * 15
|
||||
assert_allclose(link(link.inverse(x)), x)
|
||||
# if g(h(x)) = x, then g'(h(x)) = 1/h'(x)
|
||||
# g = link, h = link.inverse
|
||||
assert_allclose(link.derivative(link.inverse(x)),
|
||||
1 / link.inverse_derivative(x))
|
||||
|
||||
|
||||
@pytest.mark.parametrize('Link', LINK_FUNCTIONS)
|
||||
def test_link_derivative(Link):
|
||||
link = Link()
|
||||
x = np.random.RandomState(0).rand(1)
|
||||
err = check_grad(link, link.derivative, x) / link.derivative(x)
|
||||
assert abs(err) < 1e-6
|
||||
|
||||
err = (check_grad(link.inverse, link.inverse_derivative, x)
|
||||
/ link.derivative(x))
|
||||
assert abs(err) < 1e-6
|
307
venv/Lib/site-packages/sklearn/linear_model/_huber.py
Normal file
307
venv/Lib/site-packages/sklearn/linear_model/_huber.py
Normal file
|
@ -0,0 +1,307 @@
|
|||
# Authors: Manoj Kumar mks542@nyu.edu
|
||||
# License: BSD 3 clause
|
||||
|
||||
import numpy as np
|
||||
|
||||
from scipy import optimize
|
||||
|
||||
from ..base import BaseEstimator, RegressorMixin
|
||||
from ._base import LinearModel
|
||||
from ..utils import axis0_safe_slice
|
||||
from ..utils.validation import _check_sample_weight
|
||||
from ..utils.validation import _deprecate_positional_args
|
||||
from ..utils.extmath import safe_sparse_dot
|
||||
from ..utils.optimize import _check_optimize_result
|
||||
|
||||
|
||||
def _huber_loss_and_gradient(w, X, y, epsilon, alpha, sample_weight=None):
|
||||
"""Returns the Huber loss and the gradient.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
w : ndarray, shape (n_features + 1,) or (n_features + 2,)
|
||||
Feature vector.
|
||||
w[:n_features] gives the coefficients
|
||||
w[-1] gives the scale factor and if the intercept is fit w[-2]
|
||||
gives the intercept factor.
|
||||
|
||||
X : ndarray, shape (n_samples, n_features)
|
||||
Input data.
|
||||
|
||||
y : ndarray, shape (n_samples,)
|
||||
Target vector.
|
||||
|
||||
epsilon : float
|
||||
Robustness of the Huber estimator.
|
||||
|
||||
alpha : float
|
||||
Regularization parameter.
|
||||
|
||||
sample_weight : ndarray, shape (n_samples,), optional
|
||||
Weight assigned to each sample.
|
||||
|
||||
Returns
|
||||
-------
|
||||
loss : float
|
||||
Huber loss.
|
||||
|
||||
gradient : ndarray, shape (len(w))
|
||||
Returns the derivative of the Huber loss with respect to each
|
||||
coefficient, intercept and the scale as a vector.
|
||||
"""
|
||||
_, n_features = X.shape
|
||||
fit_intercept = (n_features + 2 == w.shape[0])
|
||||
if fit_intercept:
|
||||
intercept = w[-2]
|
||||
sigma = w[-1]
|
||||
w = w[:n_features]
|
||||
n_samples = np.sum(sample_weight)
|
||||
|
||||
# Calculate the values where |y - X'w -c / sigma| > epsilon
|
||||
# The values above this threshold are outliers.
|
||||
linear_loss = y - safe_sparse_dot(X, w)
|
||||
if fit_intercept:
|
||||
linear_loss -= intercept
|
||||
abs_linear_loss = np.abs(linear_loss)
|
||||
outliers_mask = abs_linear_loss > epsilon * sigma
|
||||
|
||||
# Calculate the linear loss due to the outliers.
|
||||
# This is equal to (2 * M * |y - X'w -c / sigma| - M**2) * sigma
|
||||
outliers = abs_linear_loss[outliers_mask]
|
||||
num_outliers = np.count_nonzero(outliers_mask)
|
||||
n_non_outliers = X.shape[0] - num_outliers
|
||||
|
||||
# n_sq_outliers includes the weight give to the outliers while
|
||||
# num_outliers is just the number of outliers.
|
||||
outliers_sw = sample_weight[outliers_mask]
|
||||
n_sw_outliers = np.sum(outliers_sw)
|
||||
outlier_loss = (2. * epsilon * np.sum(outliers_sw * outliers) -
|
||||
sigma * n_sw_outliers * epsilon ** 2)
|
||||
|
||||
# Calculate the quadratic loss due to the non-outliers.-
|
||||
# This is equal to |(y - X'w - c)**2 / sigma**2| * sigma
|
||||
non_outliers = linear_loss[~outliers_mask]
|
||||
weighted_non_outliers = sample_weight[~outliers_mask] * non_outliers
|
||||
weighted_loss = np.dot(weighted_non_outliers.T, non_outliers)
|
||||
squared_loss = weighted_loss / sigma
|
||||
|
||||
if fit_intercept:
|
||||
grad = np.zeros(n_features + 2)
|
||||
else:
|
||||
grad = np.zeros(n_features + 1)
|
||||
|
||||
# Gradient due to the squared loss.
|
||||
X_non_outliers = -axis0_safe_slice(X, ~outliers_mask, n_non_outliers)
|
||||
grad[:n_features] = (
|
||||
2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))
|
||||
|
||||
# Gradient due to the linear loss.
|
||||
signed_outliers = np.ones_like(outliers)
|
||||
signed_outliers_mask = linear_loss[outliers_mask] < 0
|
||||
signed_outliers[signed_outliers_mask] = -1.0
|
||||
X_outliers = axis0_safe_slice(X, outliers_mask, num_outliers)
|
||||
sw_outliers = sample_weight[outliers_mask] * signed_outliers
|
||||
grad[:n_features] -= 2. * epsilon * (
|
||||
safe_sparse_dot(sw_outliers, X_outliers))
|
||||
|
||||
# Gradient due to the penalty.
|
||||
grad[:n_features] += alpha * 2. * w
|
||||
|
||||
# Gradient due to sigma.
|
||||
grad[-1] = n_samples
|
||||
grad[-1] -= n_sw_outliers * epsilon ** 2
|
||||
grad[-1] -= squared_loss / sigma
|
||||
|
||||
# Gradient due to the intercept.
|
||||
if fit_intercept:
|
||||
grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma
|
||||
grad[-2] -= 2. * epsilon * np.sum(sw_outliers)
|
||||
|
||||
loss = n_samples * sigma + squared_loss + outlier_loss
|
||||
loss += alpha * np.dot(w, w)
|
||||
return loss, grad
|
||||
|
||||
|
||||
class HuberRegressor(LinearModel, RegressorMixin, BaseEstimator):
|
||||
"""Linear regression model that is robust to outliers.
|
||||
|
||||
The Huber Regressor optimizes the squared loss for the samples where
|
||||
``|(y - X'w) / sigma| < epsilon`` and the absolute loss for the samples
|
||||
where ``|(y - X'w) / sigma| > epsilon``, where w and sigma are parameters
|
||||
to be optimized. The parameter sigma makes sure that if y is scaled up
|
||||
or down by a certain factor, one does not need to rescale epsilon to
|
||||
achieve the same robustness. Note that this does not take into account
|
||||
the fact that the different features of X may be of different scales.
|
||||
|
||||
This makes sure that the loss function is not heavily influenced by the
|
||||
outliers while not completely ignoring their effect.
|
||||
|
||||
Read more in the :ref:`User Guide <huber_regression>`
|
||||
|
||||
.. versionadded:: 0.18
|
||||
|
||||
Parameters
|
||||
----------
|
||||
epsilon : float, greater than 1.0, default 1.35
|
||||
The parameter epsilon controls the number of samples that should be
|
||||
classified as outliers. The smaller the epsilon, the more robust it is
|
||||
to outliers.
|
||||
|
||||
max_iter : int, default 100
|
||||
Maximum number of iterations that
|
||||
``scipy.optimize.minimize(method="L-BFGS-B")`` should run for.
|
||||
|
||||
alpha : float, default 0.0001
|
||||
Regularization parameter.
|
||||
|
||||
warm_start : bool, default False
|
||||
This is useful if the stored attributes of a previously used model
|
||||
has to be reused. If set to False, then the coefficients will
|
||||
be rewritten for every call to fit.
|
||||
See :term:`the Glossary <warm_start>`.
|
||||
|
||||
fit_intercept : bool, default True
|
||||
Whether or not to fit the intercept. This can be set to False
|
||||
if the data is already centered around the origin.
|
||||
|
||||
tol : float, default 1e-5
|
||||
The iteration will stop when
|
||||
``max{|proj g_i | i = 1, ..., n}`` <= ``tol``
|
||||
where pg_i is the i-th component of the projected gradient.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
coef_ : array, shape (n_features,)
|
||||
Features got by optimizing the Huber loss.
|
||||
|
||||
intercept_ : float
|
||||
Bias.
|
||||
|
||||
scale_ : float
|
||||
The value by which ``|y - X'w - c|`` is scaled down.
|
||||
|
||||
n_iter_ : int
|
||||
Number of iterations that
|
||||
``scipy.optimize.minimize(method="L-BFGS-B")`` has run for.
|
||||
|
||||
.. versionchanged:: 0.20
|
||||
|
||||
In SciPy <= 1.0.0 the number of lbfgs iterations may exceed
|
||||
``max_iter``. ``n_iter_`` will now report at most ``max_iter``.
|
||||
|
||||
outliers_ : array, shape (n_samples,)
|
||||
A boolean mask which is set to True where the samples are identified
|
||||
as outliers.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import numpy as np
|
||||
>>> from sklearn.linear_model import HuberRegressor, LinearRegression
|
||||
>>> from sklearn.datasets import make_regression
|
||||
>>> rng = np.random.RandomState(0)
|
||||
>>> X, y, coef = make_regression(
|
||||
... n_samples=200, n_features=2, noise=4.0, coef=True, random_state=0)
|
||||
>>> X[:4] = rng.uniform(10, 20, (4, 2))
|
||||
>>> y[:4] = rng.uniform(10, 20, 4)
|
||||
>>> huber = HuberRegressor().fit(X, y)
|
||||
>>> huber.score(X, y)
|
||||
-7.284...
|
||||
>>> huber.predict(X[:1,])
|
||||
array([806.7200...])
|
||||
>>> linear = LinearRegression().fit(X, y)
|
||||
>>> print("True coefficients:", coef)
|
||||
True coefficients: [20.4923... 34.1698...]
|
||||
>>> print("Huber coefficients:", huber.coef_)
|
||||
Huber coefficients: [17.7906... 31.0106...]
|
||||
>>> print("Linear Regression coefficients:", linear.coef_)
|
||||
Linear Regression coefficients: [-1.9221... 7.0226...]
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Peter J. Huber, Elvezio M. Ronchetti, Robust Statistics
|
||||
Concomitant scale estimates, pg 172
|
||||
.. [2] Art B. Owen (2006), A robust hybrid of lasso and ridge regression.
|
||||
https://statweb.stanford.edu/~owen/reports/hhu.pdf
|
||||
"""
|
||||
@_deprecate_positional_args
|
||||
def __init__(self, *, epsilon=1.35, max_iter=100, alpha=0.0001,
|
||||
warm_start=False, fit_intercept=True, tol=1e-05):
|
||||
self.epsilon = epsilon
|
||||
self.max_iter = max_iter
|
||||
self.alpha = alpha
|
||||
self.warm_start = warm_start
|
||||
self.fit_intercept = fit_intercept
|
||||
self.tol = tol
|
||||
|
||||
def fit(self, X, y, sample_weight=None):
|
||||
"""Fit the model according to the given training data.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like, shape (n_samples, n_features)
|
||||
Training vector, where n_samples in the number of samples and
|
||||
n_features is the number of features.
|
||||
|
||||
y : array-like, shape (n_samples,)
|
||||
Target vector relative to X.
|
||||
|
||||
sample_weight : array-like, shape (n_samples,)
|
||||
Weight given to each sample.
|
||||
|
||||
Returns
|
||||
-------
|
||||
self : object
|
||||
"""
|
||||
X, y = self._validate_data(
|
||||
X, y, copy=False, accept_sparse=['csr'], y_numeric=True,
|
||||
dtype=[np.float64, np.float32])
|
||||
|
||||
sample_weight = _check_sample_weight(sample_weight, X)
|
||||
|
||||
if self.epsilon < 1.0:
|
||||
raise ValueError(
|
||||
"epsilon should be greater than or equal to 1.0, got %f"
|
||||
% self.epsilon)
|
||||
|
||||
if self.warm_start and hasattr(self, 'coef_'):
|
||||
parameters = np.concatenate(
|
||||
(self.coef_, [self.intercept_, self.scale_]))
|
||||
else:
|
||||
if self.fit_intercept:
|
||||
parameters = np.zeros(X.shape[1] + 2)
|
||||
else:
|
||||
parameters = np.zeros(X.shape[1] + 1)
|
||||
# Make sure to initialize the scale parameter to a strictly
|
||||
# positive value:
|
||||
parameters[-1] = 1
|
||||
|
||||
# Sigma or the scale factor should be non-negative.
|
||||
# Setting it to be zero might cause undefined bounds hence we set it
|
||||
# to a value close to zero.
|
||||
bounds = np.tile([-np.inf, np.inf], (parameters.shape[0], 1))
|
||||
bounds[-1][0] = np.finfo(np.float64).eps * 10
|
||||
|
||||
opt_res = optimize.minimize(
|
||||
_huber_loss_and_gradient, parameters, method="L-BFGS-B", jac=True,
|
||||
args=(X, y, self.epsilon, self.alpha, sample_weight),
|
||||
options={"maxiter": self.max_iter, "gtol": self.tol, "iprint": -1},
|
||||
bounds=bounds)
|
||||
|
||||
parameters = opt_res.x
|
||||
|
||||
if opt_res.status == 2:
|
||||
raise ValueError("HuberRegressor convergence failed:"
|
||||
" l-BFGS-b solver terminated with %s"
|
||||
% opt_res.message)
|
||||
self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
|
||||
self.scale_ = parameters[-1]
|
||||
if self.fit_intercept:
|
||||
self.intercept_ = parameters[-2]
|
||||
else:
|
||||
self.intercept_ = 0.0
|
||||
self.coef_ = parameters[:X.shape[1]]
|
||||
|
||||
residual = np.abs(
|
||||
y - safe_sparse_dot(X, self.coef_) - self.intercept_)
|
||||
self.outliers_ = residual > self.scale_ * self.epsilon
|
||||
return self
|
1838
venv/Lib/site-packages/sklearn/linear_model/_least_angle.py
Normal file
1838
venv/Lib/site-packages/sklearn/linear_model/_least_angle.py
Normal file
File diff suppressed because it is too large
Load diff
2086
venv/Lib/site-packages/sklearn/linear_model/_logistic.py
Normal file
2086
venv/Lib/site-packages/sklearn/linear_model/_logistic.py
Normal file
File diff suppressed because it is too large
Load diff
913
venv/Lib/site-packages/sklearn/linear_model/_omp.py
Normal file
913
venv/Lib/site-packages/sklearn/linear_model/_omp.py
Normal file
|
@ -0,0 +1,913 @@
|
|||
"""Orthogonal matching pursuit algorithms
|
||||
"""
|
||||
|
||||
# Author: Vlad Niculae
|
||||
#
|
||||
# License: BSD 3 clause
|
||||
|
||||
import warnings
|
||||
from math import sqrt
|
||||
|
||||
import numpy as np
|
||||
from scipy import linalg
|
||||
from scipy.linalg.lapack import get_lapack_funcs
|
||||
from joblib import Parallel, delayed
|
||||
|
||||
from ._base import LinearModel, _pre_fit
|
||||
from ..base import RegressorMixin, MultiOutputMixin
|
||||
from ..utils import as_float_array, check_array
|
||||
from ..utils.validation import _deprecate_positional_args
|
||||
from ..model_selection import check_cv
|
||||
|
||||
premature = """ Orthogonal matching pursuit ended prematurely due to linear
|
||||
dependence in the dictionary. The requested precision might not have been met.
|
||||
"""
|
||||
|
||||
|
||||
def _cholesky_omp(X, y, n_nonzero_coefs, tol=None, copy_X=True,
|
||||
return_path=False):
|
||||
"""Orthogonal Matching Pursuit step using the Cholesky decomposition.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array, shape (n_samples, n_features)
|
||||
Input dictionary. Columns are assumed to have unit norm.
|
||||
|
||||
y : array, shape (n_samples,)
|
||||
Input targets
|
||||
|
||||
n_nonzero_coefs : int
|
||||
Targeted number of non-zero elements
|
||||
|
||||
tol : float
|
||||
Targeted squared error, if not None overrides n_nonzero_coefs.
|
||||
|
||||
copy_X : bool, optional
|
||||
Whether the design matrix X must be copied by the algorithm. A false
|
||||
value is only helpful if X is already Fortran-ordered, otherwise a
|
||||
copy is made anyway.
|
||||
|
||||
return_path : bool, optional. Default: False
|
||||
Whether to return every value of the nonzero coefficients along the
|
||||
forward path. Useful for cross-validation.
|
||||
|
||||
Returns
|
||||
-------
|
||||
gamma : array, shape (n_nonzero_coefs,)
|
||||
Non-zero elements of the solution
|
||||
|
||||
idx : array, shape (n_nonzero_coefs,)
|
||||
Indices of the positions of the elements in gamma within the solution
|
||||
vector
|
||||
|
||||
coef : array, shape (n_features, n_nonzero_coefs)
|
||||
The first k values of column k correspond to the coefficient value
|
||||
for the active features at that step. The lower left triangle contains
|
||||
garbage. Only returned if ``return_path=True``.
|
||||
|
||||
n_active : int
|
||||
Number of active features at convergence.
|
||||
"""
|
||||
if copy_X:
|
||||
X = X.copy('F')
|
||||
else: # even if we are allowed to overwrite, still copy it if bad order
|
||||
X = np.asfortranarray(X)
|
||||
|
||||
min_float = np.finfo(X.dtype).eps
|
||||
nrm2, swap = linalg.get_blas_funcs(('nrm2', 'swap'), (X,))
|
||||
potrs, = get_lapack_funcs(('potrs',), (X,))
|
||||
|
||||
alpha = np.dot(X.T, y)
|
||||
residual = y
|
||||
gamma = np.empty(0)
|
||||
n_active = 0
|
||||
indices = np.arange(X.shape[1]) # keeping track of swapping
|
||||
|
||||
max_features = X.shape[1] if tol is not None else n_nonzero_coefs
|
||||
|
||||
L = np.empty((max_features, max_features), dtype=X.dtype)
|
||||
|
||||
if return_path:
|
||||
coefs = np.empty_like(L)
|
||||
|
||||
while True:
|
||||
lam = np.argmax(np.abs(np.dot(X.T, residual)))
|
||||
if lam < n_active or alpha[lam] ** 2 < min_float:
|
||||
# atom already selected or inner product too small
|
||||
warnings.warn(premature, RuntimeWarning, stacklevel=2)
|
||||
break
|
||||
|
||||
if n_active > 0:
|
||||
# Updates the Cholesky decomposition of X' X
|
||||
L[n_active, :n_active] = np.dot(X[:, :n_active].T, X[:, lam])
|
||||
linalg.solve_triangular(L[:n_active, :n_active],
|
||||
L[n_active, :n_active],
|
||||
trans=0, lower=1,
|
||||
overwrite_b=True,
|
||||
check_finite=False)
|
||||
v = nrm2(L[n_active, :n_active]) ** 2
|
||||
Lkk = linalg.norm(X[:, lam]) ** 2 - v
|
||||
if Lkk <= min_float: # selected atoms are dependent
|
||||
warnings.warn(premature, RuntimeWarning, stacklevel=2)
|
||||
break
|
||||
L[n_active, n_active] = sqrt(Lkk)
|
||||
else:
|
||||
L[0, 0] = linalg.norm(X[:, lam])
|
||||
|
||||
X.T[n_active], X.T[lam] = swap(X.T[n_active], X.T[lam])
|
||||
alpha[n_active], alpha[lam] = alpha[lam], alpha[n_active]
|
||||
indices[n_active], indices[lam] = indices[lam], indices[n_active]
|
||||
n_active += 1
|
||||
|
||||
# solves LL'x = X'y as a composition of two triangular systems
|
||||
gamma, _ = potrs(L[:n_active, :n_active], alpha[:n_active], lower=True,
|
||||
overwrite_b=False)
|
||||
|
||||
if return_path:
|
||||
coefs[:n_active, n_active - 1] = gamma
|
||||
residual = y - np.dot(X[:, :n_active], gamma)
|
||||
if tol is not None and nrm2(residual) ** 2 <= tol:
|
||||
break
|
||||
elif n_active == max_features:
|
||||
break
|
||||
|
||||
if return_path:
|
||||
return gamma, indices[:n_active], coefs[:, :n_active], n_active
|
||||
else:
|
||||
return gamma, indices[:n_active], n_active
|
||||
|
||||
|
||||
def _gram_omp(Gram, Xy, n_nonzero_coefs, tol_0=None, tol=None,
|
||||
copy_Gram=True, copy_Xy=True, return_path=False):
|
||||
"""Orthogonal Matching Pursuit step on a precomputed Gram matrix.
|
||||
|
||||
This function uses the Cholesky decomposition method.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
Gram : array, shape (n_features, n_features)
|
||||
Gram matrix of the input data matrix
|
||||
|
||||
Xy : array, shape (n_features,)
|
||||
Input targets
|
||||
|
||||
n_nonzero_coefs : int
|
||||
Targeted number of non-zero elements
|
||||
|
||||
tol_0 : float
|
||||
Squared norm of y, required if tol is not None.
|
||||
|
||||
tol : float
|
||||
Targeted squared error, if not None overrides n_nonzero_coefs.
|
||||
|
||||
copy_Gram : bool, optional
|
||||
Whether the gram matrix must be copied by the algorithm. A false
|
||||
value is only helpful if it is already Fortran-ordered, otherwise a
|
||||
copy is made anyway.
|
||||
|
||||
copy_Xy : bool, optional
|
||||
Whether the covariance vector Xy must be copied by the algorithm.
|
||||
If False, it may be overwritten.
|
||||
|
||||
return_path : bool, optional. Default: False
|
||||
Whether to return every value of the nonzero coefficients along the
|
||||
forward path. Useful for cross-validation.
|
||||
|
||||
Returns
|
||||
-------
|
||||
gamma : array, shape (n_nonzero_coefs,)
|
||||
Non-zero elements of the solution
|
||||
|
||||
idx : array, shape (n_nonzero_coefs,)
|
||||
Indices of the positions of the elements in gamma within the solution
|
||||
vector
|
||||
|
||||
coefs : array, shape (n_features, n_nonzero_coefs)
|
||||
The first k values of column k correspond to the coefficient value
|
||||
for the active features at that step. The lower left triangle contains
|
||||
garbage. Only returned if ``return_path=True``.
|
||||
|
||||
n_active : int
|
||||
Number of active features at convergence.
|
||||
"""
|
||||
Gram = Gram.copy('F') if copy_Gram else np.asfortranarray(Gram)
|
||||
|
||||
if copy_Xy or not Xy.flags.writeable:
|
||||
Xy = Xy.copy()
|
||||
|
||||
min_float = np.finfo(Gram.dtype).eps
|
||||
nrm2, swap = linalg.get_blas_funcs(('nrm2', 'swap'), (Gram,))
|
||||
potrs, = get_lapack_funcs(('potrs',), (Gram,))
|
||||
|
||||
indices = np.arange(len(Gram)) # keeping track of swapping
|
||||
alpha = Xy
|
||||
tol_curr = tol_0
|
||||
delta = 0
|
||||
gamma = np.empty(0)
|
||||
n_active = 0
|
||||
|
||||
max_features = len(Gram) if tol is not None else n_nonzero_coefs
|
||||
|
||||
L = np.empty((max_features, max_features), dtype=Gram.dtype)
|
||||
|
||||
L[0, 0] = 1.
|
||||
if return_path:
|
||||
coefs = np.empty_like(L)
|
||||
|
||||
while True:
|
||||
lam = np.argmax(np.abs(alpha))
|
||||
if lam < n_active or alpha[lam] ** 2 < min_float:
|
||||
# selected same atom twice, or inner product too small
|
||||
warnings.warn(premature, RuntimeWarning, stacklevel=3)
|
||||
break
|
||||
if n_active > 0:
|
||||
L[n_active, :n_active] = Gram[lam, :n_active]
|
||||
linalg.solve_triangular(L[:n_active, :n_active],
|
||||
L[n_active, :n_active],
|
||||
trans=0, lower=1,
|
||||
overwrite_b=True,
|
||||
check_finite=False)
|
||||
v = nrm2(L[n_active, :n_active]) ** 2
|
||||
Lkk = Gram[lam, lam] - v
|
||||
if Lkk <= min_float: # selected atoms are dependent
|
||||
warnings.warn(premature, RuntimeWarning, stacklevel=3)
|
||||
break
|
||||
L[n_active, n_active] = sqrt(Lkk)
|
||||
else:
|
||||
L[0, 0] = sqrt(Gram[lam, lam])
|
||||
|
||||
Gram[n_active], Gram[lam] = swap(Gram[n_active], Gram[lam])
|
||||
Gram.T[n_active], Gram.T[lam] = swap(Gram.T[n_active], Gram.T[lam])
|
||||
indices[n_active], indices[lam] = indices[lam], indices[n_active]
|
||||
Xy[n_active], Xy[lam] = Xy[lam], Xy[n_active]
|
||||
n_active += 1
|
||||
# solves LL'x = X'y as a composition of two triangular systems
|
||||
gamma, _ = potrs(L[:n_active, :n_active], Xy[:n_active], lower=True,
|
||||
overwrite_b=False)
|
||||
if return_path:
|
||||
coefs[:n_active, n_active - 1] = gamma
|
||||
beta = np.dot(Gram[:, :n_active], gamma)
|
||||
alpha = Xy - beta
|
||||
if tol is not None:
|
||||
tol_curr += delta
|
||||
delta = np.inner(gamma, beta[:n_active])
|
||||
tol_curr -= delta
|
||||
if abs(tol_curr) <= tol:
|
||||
break
|
||||
elif n_active == max_features:
|
||||
break
|
||||
|
||||
if return_path:
|
||||
return gamma, indices[:n_active], coefs[:, :n_active], n_active
|
||||
else:
|
||||
return gamma, indices[:n_active], n_active
|
||||
|
||||
|
||||
@_deprecate_positional_args
|
||||
def orthogonal_mp(X, y, *, n_nonzero_coefs=None, tol=None, precompute=False,
|
||||
copy_X=True, return_path=False,
|
||||
return_n_iter=False):
|
||||
r"""Orthogonal Matching Pursuit (OMP)
|
||||
|
||||
Solves n_targets Orthogonal Matching Pursuit problems.
|
||||
An instance of the problem has the form:
|
||||
|
||||
When parametrized by the number of non-zero coefficients using
|
||||
`n_nonzero_coefs`:
|
||||
argmin ||y - X\gamma||^2 subject to ||\gamma||_0 <= n_{nonzero coefs}
|
||||
|
||||
When parametrized by error using the parameter `tol`:
|
||||
argmin ||\gamma||_0 subject to ||y - X\gamma||^2 <= tol
|
||||
|
||||
Read more in the :ref:`User Guide <omp>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array, shape (n_samples, n_features)
|
||||
Input data. Columns are assumed to have unit norm.
|
||||
|
||||
y : array, shape (n_samples,) or (n_samples, n_targets)
|
||||
Input targets
|
||||
|
||||
n_nonzero_coefs : int
|
||||
Desired number of non-zero entries in the solution. If None (by
|
||||
default) this value is set to 10% of n_features.
|
||||
|
||||
tol : float
|
||||
Maximum norm of the residual. If not None, overrides n_nonzero_coefs.
|
||||
|
||||
precompute : {True, False, 'auto'},
|
||||
Whether to perform precomputations. Improves performance when n_targets
|
||||
or n_samples is very large.
|
||||
|
||||
copy_X : bool, optional
|
||||
Whether the design matrix X must be copied by the algorithm. A false
|
||||
value is only helpful if X is already Fortran-ordered, otherwise a
|
||||
copy is made anyway.
|
||||
|
||||
return_path : bool, optional. Default: False
|
||||
Whether to return every value of the nonzero coefficients along the
|
||||
forward path. Useful for cross-validation.
|
||||
|
||||
return_n_iter : bool, optional default False
|
||||
Whether or not to return the number of iterations.
|
||||
|
||||
Returns
|
||||
-------
|
||||
coef : array, shape (n_features,) or (n_features, n_targets)
|
||||
Coefficients of the OMP solution. If `return_path=True`, this contains
|
||||
the whole coefficient path. In this case its shape is
|
||||
(n_features, n_features) or (n_features, n_targets, n_features) and
|
||||
iterating over the last axis yields coefficients in increasing order
|
||||
of active features.
|
||||
|
||||
n_iters : array-like or int
|
||||
Number of active features across every target. Returned only if
|
||||
`return_n_iter` is set to True.
|
||||
|
||||
See also
|
||||
--------
|
||||
OrthogonalMatchingPursuit
|
||||
orthogonal_mp_gram
|
||||
lars_path
|
||||
decomposition.sparse_encode
|
||||
|
||||
Notes
|
||||
-----
|
||||
Orthogonal matching pursuit was introduced in S. Mallat, Z. Zhang,
|
||||
Matching pursuits with time-frequency dictionaries, IEEE Transactions on
|
||||
Signal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415.
|
||||
(http://blanche.polytechnique.fr/~mallat/papiers/MallatPursuit93.pdf)
|
||||
|
||||
This implementation is based on Rubinstein, R., Zibulevsky, M. and Elad,
|
||||
M., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal
|
||||
Matching Pursuit Technical Report - CS Technion, April 2008.
|
||||
https://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf
|
||||
|
||||
"""
|
||||
X = check_array(X, order='F', copy=copy_X)
|
||||
copy_X = False
|
||||
if y.ndim == 1:
|
||||
y = y.reshape(-1, 1)
|
||||
y = check_array(y)
|
||||
if y.shape[1] > 1: # subsequent targets will be affected
|
||||
copy_X = True
|
||||
if n_nonzero_coefs is None and tol is None:
|
||||
# default for n_nonzero_coefs is 0.1 * n_features
|
||||
# but at least one.
|
||||
n_nonzero_coefs = max(int(0.1 * X.shape[1]), 1)
|
||||
if tol is not None and tol < 0:
|
||||
raise ValueError("Epsilon cannot be negative")
|
||||
if tol is None and n_nonzero_coefs <= 0:
|
||||
raise ValueError("The number of atoms must be positive")
|
||||
if tol is None and n_nonzero_coefs > X.shape[1]:
|
||||
raise ValueError("The number of atoms cannot be more than the number "
|
||||
"of features")
|
||||
if precompute == 'auto':
|
||||
precompute = X.shape[0] > X.shape[1]
|
||||
if precompute:
|
||||
G = np.dot(X.T, X)
|
||||
G = np.asfortranarray(G)
|
||||
Xy = np.dot(X.T, y)
|
||||
if tol is not None:
|
||||
norms_squared = np.sum((y ** 2), axis=0)
|
||||
else:
|
||||
norms_squared = None
|
||||
return orthogonal_mp_gram(G, Xy, n_nonzero_coefs=n_nonzero_coefs,
|
||||
tol=tol, norms_squared=norms_squared,
|
||||
copy_Gram=copy_X, copy_Xy=False,
|
||||
return_path=return_path)
|
||||
|
||||
if return_path:
|
||||
coef = np.zeros((X.shape[1], y.shape[1], X.shape[1]))
|
||||
else:
|
||||
coef = np.zeros((X.shape[1], y.shape[1]))
|
||||
n_iters = []
|
||||
|
||||
for k in range(y.shape[1]):
|
||||
out = _cholesky_omp(
|
||||
X, y[:, k], n_nonzero_coefs, tol,
|
||||
copy_X=copy_X, return_path=return_path)
|
||||
if return_path:
|
||||
_, idx, coefs, n_iter = out
|
||||
coef = coef[:, :, :len(idx)]
|
||||
for n_active, x in enumerate(coefs.T):
|
||||
coef[idx[:n_active + 1], k, n_active] = x[:n_active + 1]
|
||||
else:
|
||||
x, idx, n_iter = out
|
||||
coef[idx, k] = x
|
||||
n_iters.append(n_iter)
|
||||
|
||||
if y.shape[1] == 1:
|
||||
n_iters = n_iters[0]
|
||||
|
||||
if return_n_iter:
|
||||
return np.squeeze(coef), n_iters
|
||||
else:
|
||||
return np.squeeze(coef)
|
||||
|
||||
|
||||
@_deprecate_positional_args
|
||||
def orthogonal_mp_gram(Gram, Xy, *, n_nonzero_coefs=None, tol=None,
|
||||
norms_squared=None, copy_Gram=True,
|
||||
copy_Xy=True, return_path=False,
|
||||
return_n_iter=False):
|
||||
"""Gram Orthogonal Matching Pursuit (OMP)
|
||||
|
||||
Solves n_targets Orthogonal Matching Pursuit problems using only
|
||||
the Gram matrix X.T * X and the product X.T * y.
|
||||
|
||||
Read more in the :ref:`User Guide <omp>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
Gram : array, shape (n_features, n_features)
|
||||
Gram matrix of the input data: X.T * X
|
||||
|
||||
Xy : array, shape (n_features,) or (n_features, n_targets)
|
||||
Input targets multiplied by X: X.T * y
|
||||
|
||||
n_nonzero_coefs : int
|
||||
Desired number of non-zero entries in the solution. If None (by
|
||||
default) this value is set to 10% of n_features.
|
||||
|
||||
tol : float
|
||||
Maximum norm of the residual. If not None, overrides n_nonzero_coefs.
|
||||
|
||||
norms_squared : array-like, shape (n_targets,)
|
||||
Squared L2 norms of the lines of y. Required if tol is not None.
|
||||
|
||||
copy_Gram : bool, optional
|
||||
Whether the gram matrix must be copied by the algorithm. A false
|
||||
value is only helpful if it is already Fortran-ordered, otherwise a
|
||||
copy is made anyway.
|
||||
|
||||
copy_Xy : bool, optional
|
||||
Whether the covariance vector Xy must be copied by the algorithm.
|
||||
If False, it may be overwritten.
|
||||
|
||||
return_path : bool, optional. Default: False
|
||||
Whether to return every value of the nonzero coefficients along the
|
||||
forward path. Useful for cross-validation.
|
||||
|
||||
return_n_iter : bool, optional default False
|
||||
Whether or not to return the number of iterations.
|
||||
|
||||
Returns
|
||||
-------
|
||||
coef : array, shape (n_features,) or (n_features, n_targets)
|
||||
Coefficients of the OMP solution. If `return_path=True`, this contains
|
||||
the whole coefficient path. In this case its shape is
|
||||
(n_features, n_features) or (n_features, n_targets, n_features) and
|
||||
iterating over the last axis yields coefficients in increasing order
|
||||
of active features.
|
||||
|
||||
n_iters : array-like or int
|
||||
Number of active features across every target. Returned only if
|
||||
`return_n_iter` is set to True.
|
||||
|
||||
See also
|
||||
--------
|
||||
OrthogonalMatchingPursuit
|
||||
orthogonal_mp
|
||||
lars_path
|
||||
decomposition.sparse_encode
|
||||
|
||||
Notes
|
||||
-----
|
||||
Orthogonal matching pursuit was introduced in G. Mallat, Z. Zhang,
|
||||
Matching pursuits with time-frequency dictionaries, IEEE Transactions on
|
||||
Signal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415.
|
||||
(http://blanche.polytechnique.fr/~mallat/papiers/MallatPursuit93.pdf)
|
||||
|
||||
This implementation is based on Rubinstein, R., Zibulevsky, M. and Elad,
|
||||
M., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal
|
||||
Matching Pursuit Technical Report - CS Technion, April 2008.
|
||||
https://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf
|
||||
|
||||
"""
|
||||
Gram = check_array(Gram, order='F', copy=copy_Gram)
|
||||
Xy = np.asarray(Xy)
|
||||
if Xy.ndim > 1 and Xy.shape[1] > 1:
|
||||
# or subsequent target will be affected
|
||||
copy_Gram = True
|
||||
if Xy.ndim == 1:
|
||||
Xy = Xy[:, np.newaxis]
|
||||
if tol is not None:
|
||||
norms_squared = [norms_squared]
|
||||
if copy_Xy or not Xy.flags.writeable:
|
||||
# Make the copy once instead of many times in _gram_omp itself.
|
||||
Xy = Xy.copy()
|
||||
|
||||
if n_nonzero_coefs is None and tol is None:
|
||||
n_nonzero_coefs = int(0.1 * len(Gram))
|
||||
if tol is not None and norms_squared is None:
|
||||
raise ValueError('Gram OMP needs the precomputed norms in order '
|
||||
'to evaluate the error sum of squares.')
|
||||
if tol is not None and tol < 0:
|
||||
raise ValueError("Epsilon cannot be negative")
|
||||
if tol is None and n_nonzero_coefs <= 0:
|
||||
raise ValueError("The number of atoms must be positive")
|
||||
if tol is None and n_nonzero_coefs > len(Gram):
|
||||
raise ValueError("The number of atoms cannot be more than the number "
|
||||
"of features")
|
||||
|
||||
if return_path:
|
||||
coef = np.zeros((len(Gram), Xy.shape[1], len(Gram)))
|
||||
else:
|
||||
coef = np.zeros((len(Gram), Xy.shape[1]))
|
||||
|
||||
n_iters = []
|
||||
for k in range(Xy.shape[1]):
|
||||
out = _gram_omp(
|
||||
Gram, Xy[:, k], n_nonzero_coefs,
|
||||
norms_squared[k] if tol is not None else None, tol,
|
||||
copy_Gram=copy_Gram, copy_Xy=False,
|
||||
return_path=return_path)
|
||||
if return_path:
|
||||
_, idx, coefs, n_iter = out
|
||||
coef = coef[:, :, :len(idx)]
|
||||
for n_active, x in enumerate(coefs.T):
|
||||
coef[idx[:n_active + 1], k, n_active] = x[:n_active + 1]
|
||||
else:
|
||||
x, idx, n_iter = out
|
||||
coef[idx, k] = x
|
||||
n_iters.append(n_iter)
|
||||
|
||||
if Xy.shape[1] == 1:
|
||||
n_iters = n_iters[0]
|
||||
|
||||
if return_n_iter:
|
||||
return np.squeeze(coef), n_iters
|
||||
else:
|
||||
return np.squeeze(coef)
|
||||
|
||||
|
||||
class OrthogonalMatchingPursuit(MultiOutputMixin, RegressorMixin, LinearModel):
|
||||
"""Orthogonal Matching Pursuit model (OMP)
|
||||
|
||||
Read more in the :ref:`User Guide <omp>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
n_nonzero_coefs : int, optional
|
||||
Desired number of non-zero entries in the solution. If None (by
|
||||
default) this value is set to 10% of n_features.
|
||||
|
||||
tol : float, optional
|
||||
Maximum norm of the residual. If not None, overrides n_nonzero_coefs.
|
||||
|
||||
fit_intercept : boolean, optional
|
||||
whether to calculate the intercept for this model. If set
|
||||
to false, no intercept will be used in calculations
|
||||
(i.e. data is expected to be centered).
|
||||
|
||||
normalize : boolean, optional, default True
|
||||
This parameter is ignored when ``fit_intercept`` is set to False.
|
||||
If True, the regressors X will be normalized before regression by
|
||||
subtracting the mean and dividing by the l2-norm.
|
||||
If you wish to standardize, please use
|
||||
:class:`sklearn.preprocessing.StandardScaler` before calling ``fit``
|
||||
on an estimator with ``normalize=False``.
|
||||
|
||||
precompute : {True, False, 'auto'}, default 'auto'
|
||||
Whether to use a precomputed Gram and Xy matrix to speed up
|
||||
calculations. Improves performance when :term:`n_targets` or
|
||||
:term:`n_samples` is very large. Note that if you already have such
|
||||
matrices, you can pass them directly to the fit method.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
coef_ : array, shape (n_features,) or (n_targets, n_features)
|
||||
parameter vector (w in the formula)
|
||||
|
||||
intercept_ : float or array, shape (n_targets,)
|
||||
independent term in decision function.
|
||||
|
||||
n_iter_ : int or array-like
|
||||
Number of active features across every target.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from sklearn.linear_model import OrthogonalMatchingPursuit
|
||||
>>> from sklearn.datasets import make_regression
|
||||
>>> X, y = make_regression(noise=4, random_state=0)
|
||||
>>> reg = OrthogonalMatchingPursuit().fit(X, y)
|
||||
>>> reg.score(X, y)
|
||||
0.9991...
|
||||
>>> reg.predict(X[:1,])
|
||||
array([-78.3854...])
|
||||
|
||||
Notes
|
||||
-----
|
||||
Orthogonal matching pursuit was introduced in G. Mallat, Z. Zhang,
|
||||
Matching pursuits with time-frequency dictionaries, IEEE Transactions on
|
||||
Signal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415.
|
||||
(http://blanche.polytechnique.fr/~mallat/papiers/MallatPursuit93.pdf)
|
||||
|
||||
This implementation is based on Rubinstein, R., Zibulevsky, M. and Elad,
|
||||
M., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal
|
||||
Matching Pursuit Technical Report - CS Technion, April 2008.
|
||||
https://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf
|
||||
|
||||
See also
|
||||
--------
|
||||
orthogonal_mp
|
||||
orthogonal_mp_gram
|
||||
lars_path
|
||||
Lars
|
||||
LassoLars
|
||||
decomposition.sparse_encode
|
||||
OrthogonalMatchingPursuitCV
|
||||
"""
|
||||
@_deprecate_positional_args
|
||||
def __init__(self, *, n_nonzero_coefs=None, tol=None, fit_intercept=True,
|
||||
normalize=True, precompute='auto'):
|
||||
self.n_nonzero_coefs = n_nonzero_coefs
|
||||
self.tol = tol
|
||||
self.fit_intercept = fit_intercept
|
||||
self.normalize = normalize
|
||||
self.precompute = precompute
|
||||
|
||||
def fit(self, X, y):
|
||||
"""Fit the model using X, y as training data.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like, shape (n_samples, n_features)
|
||||
Training data.
|
||||
|
||||
y : array-like, shape (n_samples,) or (n_samples, n_targets)
|
||||
Target values. Will be cast to X's dtype if necessary
|
||||
|
||||
|
||||
Returns
|
||||
-------
|
||||
self : object
|
||||
returns an instance of self.
|
||||
"""
|
||||
X, y = self._validate_data(X, y, multi_output=True, y_numeric=True)
|
||||
n_features = X.shape[1]
|
||||
|
||||
X, y, X_offset, y_offset, X_scale, Gram, Xy = \
|
||||
_pre_fit(X, y, None, self.precompute, self.normalize,
|
||||
self.fit_intercept, copy=True)
|
||||
|
||||
if y.ndim == 1:
|
||||
y = y[:, np.newaxis]
|
||||
|
||||
if self.n_nonzero_coefs is None and self.tol is None:
|
||||
# default for n_nonzero_coefs is 0.1 * n_features
|
||||
# but at least one.
|
||||
self.n_nonzero_coefs_ = max(int(0.1 * n_features), 1)
|
||||
else:
|
||||
self.n_nonzero_coefs_ = self.n_nonzero_coefs
|
||||
|
||||
if Gram is False:
|
||||
coef_, self.n_iter_ = orthogonal_mp(
|
||||
X, y, n_nonzero_coefs=self.n_nonzero_coefs_, tol=self.tol,
|
||||
precompute=False, copy_X=True,
|
||||
return_n_iter=True)
|
||||
else:
|
||||
norms_sq = np.sum(y ** 2, axis=0) if self.tol is not None else None
|
||||
|
||||
coef_, self.n_iter_ = orthogonal_mp_gram(
|
||||
Gram, Xy=Xy, n_nonzero_coefs=self.n_nonzero_coefs_,
|
||||
tol=self.tol, norms_squared=norms_sq,
|
||||
copy_Gram=True, copy_Xy=True,
|
||||
return_n_iter=True)
|
||||
self.coef_ = coef_.T
|
||||
self._set_intercept(X_offset, y_offset, X_scale)
|
||||
return self
|
||||
|
||||
|
||||
def _omp_path_residues(X_train, y_train, X_test, y_test, copy=True,
|
||||
fit_intercept=True, normalize=True, max_iter=100):
|
||||
"""Compute the residues on left-out data for a full LARS path
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X_train : array, shape (n_samples, n_features)
|
||||
The data to fit the LARS on
|
||||
|
||||
y_train : array, shape (n_samples)
|
||||
The target variable to fit LARS on
|
||||
|
||||
X_test : array, shape (n_samples, n_features)
|
||||
The data to compute the residues on
|
||||
|
||||
y_test : array, shape (n_samples)
|
||||
The target variable to compute the residues on
|
||||
|
||||
copy : boolean, optional
|
||||
Whether X_train, X_test, y_train and y_test should be copied. If
|
||||
False, they may be overwritten.
|
||||
|
||||
fit_intercept : boolean
|
||||
whether to calculate the intercept for this model. If set
|
||||
to false, no intercept will be used in calculations
|
||||
(i.e. data is expected to be centered).
|
||||
|
||||
normalize : boolean, optional, default True
|
||||
This parameter is ignored when ``fit_intercept`` is set to False.
|
||||
If True, the regressors X will be normalized before regression by
|
||||
subtracting the mean and dividing by the l2-norm.
|
||||
If you wish to standardize, please use
|
||||
:class:`sklearn.preprocessing.StandardScaler` before calling ``fit``
|
||||
on an estimator with ``normalize=False``.
|
||||
|
||||
max_iter : integer, optional
|
||||
Maximum numbers of iterations to perform, therefore maximum features
|
||||
to include. 100 by default.
|
||||
|
||||
Returns
|
||||
-------
|
||||
residues : array, shape (n_samples, max_features)
|
||||
Residues of the prediction on the test data
|
||||
"""
|
||||
|
||||
if copy:
|
||||
X_train = X_train.copy()
|
||||
y_train = y_train.copy()
|
||||
X_test = X_test.copy()
|
||||
y_test = y_test.copy()
|
||||
|
||||
if fit_intercept:
|
||||
X_mean = X_train.mean(axis=0)
|
||||
X_train -= X_mean
|
||||
X_test -= X_mean
|
||||
y_mean = y_train.mean(axis=0)
|
||||
y_train = as_float_array(y_train, copy=False)
|
||||
y_train -= y_mean
|
||||
y_test = as_float_array(y_test, copy=False)
|
||||
y_test -= y_mean
|
||||
|
||||
if normalize:
|
||||
norms = np.sqrt(np.sum(X_train ** 2, axis=0))
|
||||
nonzeros = np.flatnonzero(norms)
|
||||
X_train[:, nonzeros] /= norms[nonzeros]
|
||||
|
||||
coefs = orthogonal_mp(X_train, y_train, n_nonzero_coefs=max_iter, tol=None,
|
||||
precompute=False, copy_X=False,
|
||||
return_path=True)
|
||||
if coefs.ndim == 1:
|
||||
coefs = coefs[:, np.newaxis]
|
||||
if normalize:
|
||||
coefs[nonzeros] /= norms[nonzeros][:, np.newaxis]
|
||||
|
||||
return np.dot(coefs.T, X_test.T) - y_test
|
||||
|
||||
|
||||
class OrthogonalMatchingPursuitCV(RegressorMixin, LinearModel):
|
||||
"""Cross-validated Orthogonal Matching Pursuit model (OMP).
|
||||
|
||||
See glossary entry for :term:`cross-validation estimator`.
|
||||
|
||||
Read more in the :ref:`User Guide <omp>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
copy : bool, optional
|
||||
Whether the design matrix X must be copied by the algorithm. A false
|
||||
value is only helpful if X is already Fortran-ordered, otherwise a
|
||||
copy is made anyway.
|
||||
|
||||
fit_intercept : boolean, optional
|
||||
whether to calculate the intercept for this model. If set
|
||||
to false, no intercept will be used in calculations
|
||||
(i.e. data is expected to be centered).
|
||||
|
||||
normalize : boolean, optional, default True
|
||||
This parameter is ignored when ``fit_intercept`` is set to False.
|
||||
If True, the regressors X will be normalized before regression by
|
||||
subtracting the mean and dividing by the l2-norm.
|
||||
If you wish to standardize, please use
|
||||
:class:`sklearn.preprocessing.StandardScaler` before calling ``fit``
|
||||
on an estimator with ``normalize=False``.
|
||||
|
||||
max_iter : integer, optional
|
||||
Maximum numbers of iterations to perform, therefore maximum features
|
||||
to include. 10% of ``n_features`` but at least 5 if available.
|
||||
|
||||
cv : int, cross-validation generator or an iterable, optional
|
||||
Determines the cross-validation splitting strategy.
|
||||
Possible inputs for cv are:
|
||||
|
||||
- None, to use the default 5-fold cross-validation,
|
||||
- integer, to specify the number of folds.
|
||||
- :term:`CV splitter`,
|
||||
- An iterable yielding (train, test) splits as arrays of indices.
|
||||
|
||||
For integer/None inputs, :class:`KFold` is used.
|
||||
|
||||
Refer :ref:`User Guide <cross_validation>` for the various
|
||||
cross-validation strategies that can be used here.
|
||||
|
||||
.. versionchanged:: 0.22
|
||||
``cv`` default value if None changed from 3-fold to 5-fold.
|
||||
|
||||
n_jobs : int or None, optional (default=None)
|
||||
Number of CPUs to use during the cross validation.
|
||||
``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
|
||||
``-1`` means using all processors. See :term:`Glossary <n_jobs>`
|
||||
for more details.
|
||||
|
||||
verbose : boolean or integer, optional
|
||||
Sets the verbosity amount
|
||||
|
||||
Attributes
|
||||
----------
|
||||
intercept_ : float or array, shape (n_targets,)
|
||||
Independent term in decision function.
|
||||
|
||||
coef_ : array, shape (n_features,) or (n_targets, n_features)
|
||||
Parameter vector (w in the problem formulation).
|
||||
|
||||
n_nonzero_coefs_ : int
|
||||
Estimated number of non-zero coefficients giving the best mean squared
|
||||
error over the cross-validation folds.
|
||||
|
||||
n_iter_ : int or array-like
|
||||
Number of active features across every target for the model refit with
|
||||
the best hyperparameters got by cross-validating across all folds.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from sklearn.linear_model import OrthogonalMatchingPursuitCV
|
||||
>>> from sklearn.datasets import make_regression
|
||||
>>> X, y = make_regression(n_features=100, n_informative=10,
|
||||
... noise=4, random_state=0)
|
||||
>>> reg = OrthogonalMatchingPursuitCV(cv=5).fit(X, y)
|
||||
>>> reg.score(X, y)
|
||||
0.9991...
|
||||
>>> reg.n_nonzero_coefs_
|
||||
10
|
||||
>>> reg.predict(X[:1,])
|
||||
array([-78.3854...])
|
||||
|
||||
See also
|
||||
--------
|
||||
orthogonal_mp
|
||||
orthogonal_mp_gram
|
||||
lars_path
|
||||
Lars
|
||||
LassoLars
|
||||
OrthogonalMatchingPursuit
|
||||
LarsCV
|
||||
LassoLarsCV
|
||||
decomposition.sparse_encode
|
||||
|
||||
"""
|
||||
@_deprecate_positional_args
|
||||
def __init__(self, *, copy=True, fit_intercept=True, normalize=True,
|
||||
max_iter=None, cv=None, n_jobs=None, verbose=False):
|
||||
self.copy = copy
|
||||
self.fit_intercept = fit_intercept
|
||||
self.normalize = normalize
|
||||
self.max_iter = max_iter
|
||||
self.cv = cv
|
||||
self.n_jobs = n_jobs
|
||||
self.verbose = verbose
|
||||
|
||||
def fit(self, X, y):
|
||||
"""Fit the model using X, y as training data.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like, shape [n_samples, n_features]
|
||||
Training data.
|
||||
|
||||
y : array-like, shape [n_samples]
|
||||
Target values. Will be cast to X's dtype if necessary
|
||||
|
||||
Returns
|
||||
-------
|
||||
self : object
|
||||
returns an instance of self.
|
||||
"""
|
||||
X, y = self._validate_data(X, y, y_numeric=True, ensure_min_features=2,
|
||||
estimator=self)
|
||||
X = as_float_array(X, copy=False, force_all_finite=False)
|
||||
cv = check_cv(self.cv, classifier=False)
|
||||
max_iter = (min(max(int(0.1 * X.shape[1]), 5), X.shape[1])
|
||||
if not self.max_iter
|
||||
else self.max_iter)
|
||||
cv_paths = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(
|
||||
delayed(_omp_path_residues)(
|
||||
X[train], y[train], X[test], y[test], self.copy,
|
||||
self.fit_intercept, self.normalize, max_iter)
|
||||
for train, test in cv.split(X))
|
||||
|
||||
min_early_stop = min(fold.shape[0] for fold in cv_paths)
|
||||
mse_folds = np.array([(fold[:min_early_stop] ** 2).mean(axis=1)
|
||||
for fold in cv_paths])
|
||||
best_n_nonzero_coefs = np.argmin(mse_folds.mean(axis=0)) + 1
|
||||
self.n_nonzero_coefs_ = best_n_nonzero_coefs
|
||||
omp = OrthogonalMatchingPursuit(n_nonzero_coefs=best_n_nonzero_coefs,
|
||||
fit_intercept=self.fit_intercept,
|
||||
normalize=self.normalize)
|
||||
omp.fit(X, y)
|
||||
self.coef_ = omp.coef_
|
||||
self.intercept_ = omp.intercept_
|
||||
self.n_iter_ = omp.n_iter_
|
||||
return self
|
|
@ -0,0 +1,471 @@
|
|||
# Authors: Rob Zinkov, Mathieu Blondel
|
||||
# License: BSD 3 clause
|
||||
|
||||
from ..utils.validation import _deprecate_positional_args
|
||||
from ._stochastic_gradient import BaseSGDClassifier
|
||||
from ._stochastic_gradient import BaseSGDRegressor
|
||||
from ._stochastic_gradient import DEFAULT_EPSILON
|
||||
|
||||
|
||||
class PassiveAggressiveClassifier(BaseSGDClassifier):
|
||||
"""Passive Aggressive Classifier
|
||||
|
||||
Read more in the :ref:`User Guide <passive_aggressive>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
||||
C : float
|
||||
Maximum step size (regularization). Defaults to 1.0.
|
||||
|
||||
fit_intercept : bool, default=False
|
||||
Whether the intercept should be estimated or not. If False, the
|
||||
data is assumed to be already centered.
|
||||
|
||||
max_iter : int, optional (default=1000)
|
||||
The maximum number of passes over the training data (aka epochs).
|
||||
It only impacts the behavior in the ``fit`` method, and not the
|
||||
:meth:`partial_fit` method.
|
||||
|
||||
.. versionadded:: 0.19
|
||||
|
||||
tol : float or None, optional (default=1e-3)
|
||||
The stopping criterion. If it is not None, the iterations will stop
|
||||
when (loss > previous_loss - tol).
|
||||
|
||||
.. versionadded:: 0.19
|
||||
|
||||
early_stopping : bool, default=False
|
||||
Whether to use early stopping to terminate training when validation.
|
||||
score is not improving. If set to True, it will automatically set aside
|
||||
a stratified fraction of training data as validation and terminate
|
||||
training when validation score is not improving by at least tol for
|
||||
n_iter_no_change consecutive epochs.
|
||||
|
||||
.. versionadded:: 0.20
|
||||
|
||||
validation_fraction : float, default=0.1
|
||||
The proportion of training data to set aside as validation set for
|
||||
early stopping. Must be between 0 and 1.
|
||||
Only used if early_stopping is True.
|
||||
|
||||
.. versionadded:: 0.20
|
||||
|
||||
n_iter_no_change : int, default=5
|
||||
Number of iterations with no improvement to wait before early stopping.
|
||||
|
||||
.. versionadded:: 0.20
|
||||
|
||||
shuffle : bool, default=True
|
||||
Whether or not the training data should be shuffled after each epoch.
|
||||
|
||||
verbose : integer, optional
|
||||
The verbosity level
|
||||
|
||||
loss : string, optional
|
||||
The loss function to be used:
|
||||
hinge: equivalent to PA-I in the reference paper.
|
||||
squared_hinge: equivalent to PA-II in the reference paper.
|
||||
|
||||
n_jobs : int or None, optional (default=None)
|
||||
The number of CPUs to use to do the OVA (One Versus All, for
|
||||
multi-class problems) computation.
|
||||
``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
|
||||
``-1`` means using all processors. See :term:`Glossary <n_jobs>`
|
||||
for more details.
|
||||
|
||||
random_state : int, RandomState instance, default=None
|
||||
Used to shuffle the training data, when ``shuffle`` is set to
|
||||
``True``. Pass an int for reproducible output across multiple
|
||||
function calls.
|
||||
See :term:`Glossary <random_state>`.
|
||||
|
||||
warm_start : bool, optional
|
||||
When set to True, reuse the solution of the previous call to fit as
|
||||
initialization, otherwise, just erase the previous solution.
|
||||
See :term:`the Glossary <warm_start>`.
|
||||
|
||||
Repeatedly calling fit or partial_fit when warm_start is True can
|
||||
result in a different solution than when calling fit a single time
|
||||
because of the way the data is shuffled.
|
||||
|
||||
class_weight : dict, {class_label: weight} or "balanced" or None, optional
|
||||
Preset for the class_weight fit parameter.
|
||||
|
||||
Weights associated with classes. If not given, all classes
|
||||
are supposed to have weight one.
|
||||
|
||||
The "balanced" mode uses the values of y to automatically adjust
|
||||
weights inversely proportional to class frequencies in the input data
|
||||
as ``n_samples / (n_classes * np.bincount(y))``
|
||||
|
||||
.. versionadded:: 0.17
|
||||
parameter *class_weight* to automatically weight samples.
|
||||
|
||||
average : bool or int, optional
|
||||
When set to True, computes the averaged SGD weights and stores the
|
||||
result in the ``coef_`` attribute. If set to an int greater than 1,
|
||||
averaging will begin once the total number of samples seen reaches
|
||||
average. So average=10 will begin averaging after seeing 10 samples.
|
||||
|
||||
.. versionadded:: 0.19
|
||||
parameter *average* to use weights averaging in SGD
|
||||
|
||||
Attributes
|
||||
----------
|
||||
coef_ : array, shape = [1, n_features] if n_classes == 2 else [n_classes,\
|
||||
n_features]
|
||||
Weights assigned to the features.
|
||||
|
||||
intercept_ : array, shape = [1] if n_classes == 2 else [n_classes]
|
||||
Constants in decision function.
|
||||
|
||||
n_iter_ : int
|
||||
The actual number of iterations to reach the stopping criterion.
|
||||
For multiclass fits, it is the maximum over every binary fit.
|
||||
|
||||
classes_ : array of shape (n_classes,)
|
||||
The unique classes labels.
|
||||
|
||||
t_ : int
|
||||
Number of weight updates performed during training.
|
||||
Same as ``(n_iter_ * n_samples)``.
|
||||
|
||||
loss_function_ : callable
|
||||
Loss function used by the algorithm.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from sklearn.linear_model import PassiveAggressiveClassifier
|
||||
>>> from sklearn.datasets import make_classification
|
||||
|
||||
>>> X, y = make_classification(n_features=4, random_state=0)
|
||||
>>> clf = PassiveAggressiveClassifier(max_iter=1000, random_state=0,
|
||||
... tol=1e-3)
|
||||
>>> clf.fit(X, y)
|
||||
PassiveAggressiveClassifier(random_state=0)
|
||||
>>> print(clf.coef_)
|
||||
[[0.26642044 0.45070924 0.67251877 0.64185414]]
|
||||
>>> print(clf.intercept_)
|
||||
[1.84127814]
|
||||
>>> print(clf.predict([[0, 0, 0, 0]]))
|
||||
[1]
|
||||
|
||||
See also
|
||||
--------
|
||||
|
||||
SGDClassifier
|
||||
Perceptron
|
||||
|
||||
References
|
||||
----------
|
||||
Online Passive-Aggressive Algorithms
|
||||
<http://jmlr.csail.mit.edu/papers/volume7/crammer06a/crammer06a.pdf>
|
||||
K. Crammer, O. Dekel, J. Keshat, S. Shalev-Shwartz, Y. Singer - JMLR (2006)
|
||||
|
||||
"""
|
||||
@_deprecate_positional_args
|
||||
def __init__(self, *, C=1.0, fit_intercept=True, max_iter=1000, tol=1e-3,
|
||||
early_stopping=False, validation_fraction=0.1,
|
||||
n_iter_no_change=5, shuffle=True, verbose=0, loss="hinge",
|
||||
n_jobs=None, random_state=None, warm_start=False,
|
||||
class_weight=None, average=False):
|
||||
super().__init__(
|
||||
penalty=None,
|
||||
fit_intercept=fit_intercept,
|
||||
max_iter=max_iter,
|
||||
tol=tol,
|
||||
early_stopping=early_stopping,
|
||||
validation_fraction=validation_fraction,
|
||||
n_iter_no_change=n_iter_no_change,
|
||||
shuffle=shuffle,
|
||||
verbose=verbose,
|
||||
random_state=random_state,
|
||||
eta0=1.0,
|
||||
warm_start=warm_start,
|
||||
class_weight=class_weight,
|
||||
average=average,
|
||||
n_jobs=n_jobs)
|
||||
|
||||
self.C = C
|
||||
self.loss = loss
|
||||
|
||||
def partial_fit(self, X, y, classes=None):
|
||||
"""Fit linear model with Passive Aggressive algorithm.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||||
Subset of the training data
|
||||
|
||||
y : numpy array of shape [n_samples]
|
||||
Subset of the target values
|
||||
|
||||
classes : array, shape = [n_classes]
|
||||
Classes across all calls to partial_fit.
|
||||
Can be obtained by via `np.unique(y_all)`, where y_all is the
|
||||
target vector of the entire dataset.
|
||||
This argument is required for the first call to partial_fit
|
||||
and can be omitted in the subsequent calls.
|
||||
Note that y doesn't need to contain all labels in `classes`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
self : returns an instance of self.
|
||||
"""
|
||||
self._validate_params(for_partial_fit=True)
|
||||
if self.class_weight == 'balanced':
|
||||
raise ValueError("class_weight 'balanced' is not supported for "
|
||||
"partial_fit. For 'balanced' weights, use "
|
||||
"`sklearn.utils.compute_class_weight` with "
|
||||
"`class_weight='balanced'`. In place of y you "
|
||||
"can use a large enough subset of the full "
|
||||
"training set target to properly estimate the "
|
||||
"class frequency distributions. Pass the "
|
||||
"resulting weights as the class_weight "
|
||||
"parameter.")
|
||||
lr = "pa1" if self.loss == "hinge" else "pa2"
|
||||
return self._partial_fit(X, y, alpha=1.0, C=self.C,
|
||||
loss="hinge", learning_rate=lr, max_iter=1,
|
||||
classes=classes, sample_weight=None,
|
||||
coef_init=None, intercept_init=None)
|
||||
|
||||
def fit(self, X, y, coef_init=None, intercept_init=None):
|
||||
"""Fit linear model with Passive Aggressive algorithm.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||||
Training data
|
||||
|
||||
y : numpy array of shape [n_samples]
|
||||
Target values
|
||||
|
||||
coef_init : array, shape = [n_classes,n_features]
|
||||
The initial coefficients to warm-start the optimization.
|
||||
|
||||
intercept_init : array, shape = [n_classes]
|
||||
The initial intercept to warm-start the optimization.
|
||||
|
||||
Returns
|
||||
-------
|
||||
self : returns an instance of self.
|
||||
"""
|
||||
self._validate_params()
|
||||
lr = "pa1" if self.loss == "hinge" else "pa2"
|
||||
return self._fit(X, y, alpha=1.0, C=self.C,
|
||||
loss="hinge", learning_rate=lr,
|
||||
coef_init=coef_init, intercept_init=intercept_init)
|
||||
|
||||
|
||||
class PassiveAggressiveRegressor(BaseSGDRegressor):
|
||||
"""Passive Aggressive Regressor
|
||||
|
||||
Read more in the :ref:`User Guide <passive_aggressive>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
||||
C : float
|
||||
Maximum step size (regularization). Defaults to 1.0.
|
||||
|
||||
fit_intercept : bool
|
||||
Whether the intercept should be estimated or not. If False, the
|
||||
data is assumed to be already centered. Defaults to True.
|
||||
|
||||
max_iter : int, optional (default=1000)
|
||||
The maximum number of passes over the training data (aka epochs).
|
||||
It only impacts the behavior in the ``fit`` method, and not the
|
||||
:meth:`partial_fit` method.
|
||||
|
||||
.. versionadded:: 0.19
|
||||
|
||||
tol : float or None, optional (default=1e-3)
|
||||
The stopping criterion. If it is not None, the iterations will stop
|
||||
when (loss > previous_loss - tol).
|
||||
|
||||
.. versionadded:: 0.19
|
||||
|
||||
early_stopping : bool, default=False
|
||||
Whether to use early stopping to terminate training when validation.
|
||||
score is not improving. If set to True, it will automatically set aside
|
||||
a fraction of training data as validation and terminate
|
||||
training when validation score is not improving by at least tol for
|
||||
n_iter_no_change consecutive epochs.
|
||||
|
||||
.. versionadded:: 0.20
|
||||
|
||||
validation_fraction : float, default=0.1
|
||||
The proportion of training data to set aside as validation set for
|
||||
early stopping. Must be between 0 and 1.
|
||||
Only used if early_stopping is True.
|
||||
|
||||
.. versionadded:: 0.20
|
||||
|
||||
n_iter_no_change : int, default=5
|
||||
Number of iterations with no improvement to wait before early stopping.
|
||||
|
||||
.. versionadded:: 0.20
|
||||
|
||||
shuffle : bool, default=True
|
||||
Whether or not the training data should be shuffled after each epoch.
|
||||
|
||||
verbose : integer, optional
|
||||
The verbosity level
|
||||
|
||||
loss : string, optional
|
||||
The loss function to be used:
|
||||
epsilon_insensitive: equivalent to PA-I in the reference paper.
|
||||
squared_epsilon_insensitive: equivalent to PA-II in the reference
|
||||
paper.
|
||||
|
||||
epsilon : float
|
||||
If the difference between the current prediction and the correct label
|
||||
is below this threshold, the model is not updated.
|
||||
|
||||
random_state : int, RandomState instance, default=None
|
||||
Used to shuffle the training data, when ``shuffle`` is set to
|
||||
``True``. Pass an int for reproducible output across multiple
|
||||
function calls.
|
||||
See :term:`Glossary <random_state>`.
|
||||
|
||||
warm_start : bool, optional
|
||||
When set to True, reuse the solution of the previous call to fit as
|
||||
initialization, otherwise, just erase the previous solution.
|
||||
See :term:`the Glossary <warm_start>`.
|
||||
|
||||
Repeatedly calling fit or partial_fit when warm_start is True can
|
||||
result in a different solution than when calling fit a single time
|
||||
because of the way the data is shuffled.
|
||||
|
||||
average : bool or int, optional
|
||||
When set to True, computes the averaged SGD weights and stores the
|
||||
result in the ``coef_`` attribute. If set to an int greater than 1,
|
||||
averaging will begin once the total number of samples seen reaches
|
||||
average. So average=10 will begin averaging after seeing 10 samples.
|
||||
|
||||
.. versionadded:: 0.19
|
||||
parameter *average* to use weights averaging in SGD
|
||||
|
||||
Attributes
|
||||
----------
|
||||
coef_ : array, shape = [1, n_features] if n_classes == 2 else [n_classes,\
|
||||
n_features]
|
||||
Weights assigned to the features.
|
||||
|
||||
intercept_ : array, shape = [1] if n_classes == 2 else [n_classes]
|
||||
Constants in decision function.
|
||||
|
||||
n_iter_ : int
|
||||
The actual number of iterations to reach the stopping criterion.
|
||||
|
||||
t_ : int
|
||||
Number of weight updates performed during training.
|
||||
Same as ``(n_iter_ * n_samples)``.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from sklearn.linear_model import PassiveAggressiveRegressor
|
||||
>>> from sklearn.datasets import make_regression
|
||||
|
||||
>>> X, y = make_regression(n_features=4, random_state=0)
|
||||
>>> regr = PassiveAggressiveRegressor(max_iter=100, random_state=0,
|
||||
... tol=1e-3)
|
||||
>>> regr.fit(X, y)
|
||||
PassiveAggressiveRegressor(max_iter=100, random_state=0)
|
||||
>>> print(regr.coef_)
|
||||
[20.48736655 34.18818427 67.59122734 87.94731329]
|
||||
>>> print(regr.intercept_)
|
||||
[-0.02306214]
|
||||
>>> print(regr.predict([[0, 0, 0, 0]]))
|
||||
[-0.02306214]
|
||||
|
||||
See also
|
||||
--------
|
||||
|
||||
SGDRegressor
|
||||
|
||||
References
|
||||
----------
|
||||
Online Passive-Aggressive Algorithms
|
||||
<http://jmlr.csail.mit.edu/papers/volume7/crammer06a/crammer06a.pdf>
|
||||
K. Crammer, O. Dekel, J. Keshat, S. Shalev-Shwartz, Y. Singer - JMLR (2006)
|
||||
|
||||
"""
|
||||
@_deprecate_positional_args
|
||||
def __init__(self, *, C=1.0, fit_intercept=True, max_iter=1000, tol=1e-3,
|
||||
early_stopping=False, validation_fraction=0.1,
|
||||
n_iter_no_change=5, shuffle=True, verbose=0,
|
||||
loss="epsilon_insensitive", epsilon=DEFAULT_EPSILON,
|
||||
random_state=None, warm_start=False,
|
||||
average=False):
|
||||
super().__init__(
|
||||
penalty=None,
|
||||
l1_ratio=0,
|
||||
epsilon=epsilon,
|
||||
eta0=1.0,
|
||||
fit_intercept=fit_intercept,
|
||||
max_iter=max_iter,
|
||||
tol=tol,
|
||||
early_stopping=early_stopping,
|
||||
validation_fraction=validation_fraction,
|
||||
n_iter_no_change=n_iter_no_change,
|
||||
shuffle=shuffle,
|
||||
verbose=verbose,
|
||||
random_state=random_state,
|
||||
warm_start=warm_start,
|
||||
average=average)
|
||||
self.C = C
|
||||
self.loss = loss
|
||||
|
||||
def partial_fit(self, X, y):
|
||||
"""Fit linear model with Passive Aggressive algorithm.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||||
Subset of training data
|
||||
|
||||
y : numpy array of shape [n_samples]
|
||||
Subset of target values
|
||||
|
||||
Returns
|
||||
-------
|
||||
self : returns an instance of self.
|
||||
"""
|
||||
self._validate_params(for_partial_fit=True)
|
||||
lr = "pa1" if self.loss == "epsilon_insensitive" else "pa2"
|
||||
return self._partial_fit(X, y, alpha=1.0, C=self.C,
|
||||
loss="epsilon_insensitive",
|
||||
learning_rate=lr, max_iter=1,
|
||||
sample_weight=None,
|
||||
coef_init=None, intercept_init=None)
|
||||
|
||||
def fit(self, X, y, coef_init=None, intercept_init=None):
|
||||
"""Fit linear model with Passive Aggressive algorithm.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||||
Training data
|
||||
|
||||
y : numpy array of shape [n_samples]
|
||||
Target values
|
||||
|
||||
coef_init : array, shape = [n_features]
|
||||
The initial coefficients to warm-start the optimization.
|
||||
|
||||
intercept_init : array, shape = [1]
|
||||
The initial intercept to warm-start the optimization.
|
||||
|
||||
Returns
|
||||
-------
|
||||
self : returns an instance of self.
|
||||
"""
|
||||
self._validate_params()
|
||||
lr = "pa1" if self.loss == "epsilon_insensitive" else "pa2"
|
||||
return self._fit(X, y, alpha=1.0, C=self.C,
|
||||
loss="epsilon_insensitive",
|
||||
learning_rate=lr,
|
||||
coef_init=coef_init,
|
||||
intercept_init=intercept_init)
|
160
venv/Lib/site-packages/sklearn/linear_model/_perceptron.py
Normal file
160
venv/Lib/site-packages/sklearn/linear_model/_perceptron.py
Normal file
|
@ -0,0 +1,160 @@
|
|||
# Author: Mathieu Blondel
|
||||
# License: BSD 3 clause
|
||||
|
||||
from ..utils.validation import _deprecate_positional_args
|
||||
from ._stochastic_gradient import BaseSGDClassifier
|
||||
|
||||
|
||||
class Perceptron(BaseSGDClassifier):
|
||||
"""Perceptron
|
||||
|
||||
Read more in the :ref:`User Guide <perceptron>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
||||
penalty : {'l2','l1','elasticnet'}, default=None
|
||||
The penalty (aka regularization term) to be used.
|
||||
|
||||
alpha : float, default=0.0001
|
||||
Constant that multiplies the regularization term if regularization is
|
||||
used.
|
||||
|
||||
fit_intercept : bool, default=True
|
||||
Whether the intercept should be estimated or not. If False, the
|
||||
data is assumed to be already centered.
|
||||
|
||||
max_iter : int, default=1000
|
||||
The maximum number of passes over the training data (aka epochs).
|
||||
It only impacts the behavior in the ``fit`` method, and not the
|
||||
:meth:`partial_fit` method.
|
||||
|
||||
.. versionadded:: 0.19
|
||||
|
||||
tol : float, default=1e-3
|
||||
The stopping criterion. If it is not None, the iterations will stop
|
||||
when (loss > previous_loss - tol).
|
||||
|
||||
.. versionadded:: 0.19
|
||||
|
||||
shuffle : bool, default=True
|
||||
Whether or not the training data should be shuffled after each epoch.
|
||||
|
||||
verbose : int, default=0
|
||||
The verbosity level
|
||||
|
||||
eta0 : double, default=1
|
||||
Constant by which the updates are multiplied.
|
||||
|
||||
n_jobs : int, default=None
|
||||
The number of CPUs to use to do the OVA (One Versus All, for
|
||||
multi-class problems) computation.
|
||||
``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
|
||||
``-1`` means using all processors. See :term:`Glossary <n_jobs>`
|
||||
for more details.
|
||||
|
||||
random_state : int, RandomState instance, default=None
|
||||
Used to shuffle the training data, when ``shuffle`` is set to
|
||||
``True``. Pass an int for reproducible output across multiple
|
||||
function calls.
|
||||
See :term:`Glossary <random_state>`.
|
||||
|
||||
early_stopping : bool, default=False
|
||||
Whether to use early stopping to terminate training when validation.
|
||||
score is not improving. If set to True, it will automatically set aside
|
||||
a stratified fraction of training data as validation and terminate
|
||||
training when validation score is not improving by at least tol for
|
||||
n_iter_no_change consecutive epochs.
|
||||
|
||||
.. versionadded:: 0.20
|
||||
|
||||
validation_fraction : float, default=0.1
|
||||
The proportion of training data to set aside as validation set for
|
||||
early stopping. Must be between 0 and 1.
|
||||
Only used if early_stopping is True.
|
||||
|
||||
.. versionadded:: 0.20
|
||||
|
||||
n_iter_no_change : int, default=5
|
||||
Number of iterations with no improvement to wait before early stopping.
|
||||
|
||||
.. versionadded:: 0.20
|
||||
|
||||
class_weight : dict, {class_label: weight} or "balanced", default=None
|
||||
Preset for the class_weight fit parameter.
|
||||
|
||||
Weights associated with classes. If not given, all classes
|
||||
are supposed to have weight one.
|
||||
|
||||
The "balanced" mode uses the values of y to automatically adjust
|
||||
weights inversely proportional to class frequencies in the input data
|
||||
as ``n_samples / (n_classes * np.bincount(y))``
|
||||
|
||||
warm_start : bool, default=False
|
||||
When set to True, reuse the solution of the previous call to fit as
|
||||
initialization, otherwise, just erase the previous solution. See
|
||||
:term:`the Glossary <warm_start>`.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
coef_ : ndarray of shape = [1, n_features] if n_classes == 2 else \
|
||||
[n_classes, n_features]
|
||||
Weights assigned to the features.
|
||||
|
||||
intercept_ : ndarray of shape = [1] if n_classes == 2 else [n_classes]
|
||||
Constants in decision function.
|
||||
|
||||
n_iter_ : int
|
||||
The actual number of iterations to reach the stopping criterion.
|
||||
For multiclass fits, it is the maximum over every binary fit.
|
||||
|
||||
classes_ : ndarray of shape (n_classes,)
|
||||
The unique classes labels.
|
||||
|
||||
t_ : int
|
||||
Number of weight updates performed during training.
|
||||
Same as ``(n_iter_ * n_samples)``.
|
||||
|
||||
Notes
|
||||
-----
|
||||
|
||||
``Perceptron`` is a classification algorithm which shares the same
|
||||
underlying implementation with ``SGDClassifier``. In fact,
|
||||
``Perceptron()`` is equivalent to `SGDClassifier(loss="perceptron",
|
||||
eta0=1, learning_rate="constant", penalty=None)`.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from sklearn.datasets import load_digits
|
||||
>>> from sklearn.linear_model import Perceptron
|
||||
>>> X, y = load_digits(return_X_y=True)
|
||||
>>> clf = Perceptron(tol=1e-3, random_state=0)
|
||||
>>> clf.fit(X, y)
|
||||
Perceptron()
|
||||
>>> clf.score(X, y)
|
||||
0.939...
|
||||
|
||||
See also
|
||||
--------
|
||||
|
||||
SGDClassifier
|
||||
|
||||
References
|
||||
----------
|
||||
|
||||
https://en.wikipedia.org/wiki/Perceptron and references therein.
|
||||
"""
|
||||
@_deprecate_positional_args
|
||||
def __init__(self, *, penalty=None, alpha=0.0001, fit_intercept=True,
|
||||
max_iter=1000, tol=1e-3, shuffle=True, verbose=0, eta0=1.0,
|
||||
n_jobs=None, random_state=0, early_stopping=False,
|
||||
validation_fraction=0.1, n_iter_no_change=5,
|
||||
class_weight=None, warm_start=False):
|
||||
super().__init__(
|
||||
loss="perceptron", penalty=penalty, alpha=alpha, l1_ratio=0,
|
||||
fit_intercept=fit_intercept, max_iter=max_iter, tol=tol,
|
||||
shuffle=shuffle, verbose=verbose, random_state=random_state,
|
||||
learning_rate="constant", eta0=eta0, early_stopping=early_stopping,
|
||||
validation_fraction=validation_fraction,
|
||||
n_iter_no_change=n_iter_no_change, power_t=0.5,
|
||||
warm_start=warm_start, class_weight=class_weight, n_jobs=n_jobs)
|
504
venv/Lib/site-packages/sklearn/linear_model/_ransac.py
Normal file
504
venv/Lib/site-packages/sklearn/linear_model/_ransac.py
Normal file
|
@ -0,0 +1,504 @@
|
|||
# coding: utf-8
|
||||
|
||||
# Author: Johannes Schönberger
|
||||
#
|
||||
# License: BSD 3 clause
|
||||
|
||||
import numpy as np
|
||||
import warnings
|
||||
|
||||
from ..base import BaseEstimator, MetaEstimatorMixin, RegressorMixin, clone
|
||||
from ..base import MultiOutputMixin
|
||||
from ..utils import check_random_state, check_consistent_length
|
||||
from ..utils.random import sample_without_replacement
|
||||
from ..utils.validation import check_is_fitted, _check_sample_weight
|
||||
from ..utils.validation import _deprecate_positional_args
|
||||
from ._base import LinearRegression
|
||||
from ..utils.validation import has_fit_parameter
|
||||
from ..exceptions import ConvergenceWarning
|
||||
|
||||
_EPSILON = np.spacing(1)
|
||||
|
||||
|
||||
def _dynamic_max_trials(n_inliers, n_samples, min_samples, probability):
|
||||
"""Determine number trials such that at least one outlier-free subset is
|
||||
sampled for the given inlier/outlier ratio.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
n_inliers : int
|
||||
Number of inliers in the data.
|
||||
|
||||
n_samples : int
|
||||
Total number of samples in the data.
|
||||
|
||||
min_samples : int
|
||||
Minimum number of samples chosen randomly from original data.
|
||||
|
||||
probability : float
|
||||
Probability (confidence) that one outlier-free sample is generated.
|
||||
|
||||
Returns
|
||||
-------
|
||||
trials : int
|
||||
Number of trials.
|
||||
|
||||
"""
|
||||
inlier_ratio = n_inliers / float(n_samples)
|
||||
nom = max(_EPSILON, 1 - probability)
|
||||
denom = max(_EPSILON, 1 - inlier_ratio ** min_samples)
|
||||
if nom == 1:
|
||||
return 0
|
||||
if denom == 1:
|
||||
return float('inf')
|
||||
return abs(float(np.ceil(np.log(nom) / np.log(denom))))
|
||||
|
||||
|
||||
class RANSACRegressor(MetaEstimatorMixin, RegressorMixin,
|
||||
MultiOutputMixin, BaseEstimator):
|
||||
"""RANSAC (RANdom SAmple Consensus) algorithm.
|
||||
|
||||
RANSAC is an iterative algorithm for the robust estimation of parameters
|
||||
from a subset of inliers from the complete data set.
|
||||
|
||||
Read more in the :ref:`User Guide <ransac_regression>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
base_estimator : object, optional
|
||||
Base estimator object which implements the following methods:
|
||||
|
||||
* `fit(X, y)`: Fit model to given training data and target values.
|
||||
* `score(X, y)`: Returns the mean accuracy on the given test data,
|
||||
which is used for the stop criterion defined by `stop_score`.
|
||||
Additionally, the score is used to decide which of two equally
|
||||
large consensus sets is chosen as the better one.
|
||||
* `predict(X)`: Returns predicted values using the linear model,
|
||||
which is used to compute residual error using loss function.
|
||||
|
||||
If `base_estimator` is None, then
|
||||
``base_estimator=sklearn.linear_model.LinearRegression()`` is used for
|
||||
target values of dtype float.
|
||||
|
||||
Note that the current implementation only supports regression
|
||||
estimators.
|
||||
|
||||
min_samples : int (>= 1) or float ([0, 1]), optional
|
||||
Minimum number of samples chosen randomly from original data. Treated
|
||||
as an absolute number of samples for `min_samples >= 1`, treated as a
|
||||
relative number `ceil(min_samples * X.shape[0]`) for
|
||||
`min_samples < 1`. This is typically chosen as the minimal number of
|
||||
samples necessary to estimate the given `base_estimator`. By default a
|
||||
``sklearn.linear_model.LinearRegression()`` estimator is assumed and
|
||||
`min_samples` is chosen as ``X.shape[1] + 1``.
|
||||
|
||||
residual_threshold : float, optional
|
||||
Maximum residual for a data sample to be classified as an inlier.
|
||||
By default the threshold is chosen as the MAD (median absolute
|
||||
deviation) of the target values `y`.
|
||||
|
||||
is_data_valid : callable, optional
|
||||
This function is called with the randomly selected data before the
|
||||
model is fitted to it: `is_data_valid(X, y)`. If its return value is
|
||||
False the current randomly chosen sub-sample is skipped.
|
||||
|
||||
is_model_valid : callable, optional
|
||||
This function is called with the estimated model and the randomly
|
||||
selected data: `is_model_valid(model, X, y)`. If its return value is
|
||||
False the current randomly chosen sub-sample is skipped.
|
||||
Rejecting samples with this function is computationally costlier than
|
||||
with `is_data_valid`. `is_model_valid` should therefore only be used if
|
||||
the estimated model is needed for making the rejection decision.
|
||||
|
||||
max_trials : int, optional
|
||||
Maximum number of iterations for random sample selection.
|
||||
|
||||
max_skips : int, optional
|
||||
Maximum number of iterations that can be skipped due to finding zero
|
||||
inliers or invalid data defined by ``is_data_valid`` or invalid models
|
||||
defined by ``is_model_valid``.
|
||||
|
||||
.. versionadded:: 0.19
|
||||
|
||||
stop_n_inliers : int, optional
|
||||
Stop iteration if at least this number of inliers are found.
|
||||
|
||||
stop_score : float, optional
|
||||
Stop iteration if score is greater equal than this threshold.
|
||||
|
||||
stop_probability : float in range [0, 1], optional
|
||||
RANSAC iteration stops if at least one outlier-free set of the training
|
||||
data is sampled in RANSAC. This requires to generate at least N
|
||||
samples (iterations)::
|
||||
|
||||
N >= log(1 - probability) / log(1 - e**m)
|
||||
|
||||
where the probability (confidence) is typically set to high value such
|
||||
as 0.99 (the default) and e is the current fraction of inliers w.r.t.
|
||||
the total number of samples.
|
||||
|
||||
loss : string, callable, optional, default "absolute_loss"
|
||||
String inputs, "absolute_loss" and "squared_loss" are supported which
|
||||
find the absolute loss and squared loss per sample
|
||||
respectively.
|
||||
|
||||
If ``loss`` is a callable, then it should be a function that takes
|
||||
two arrays as inputs, the true and predicted value and returns a 1-D
|
||||
array with the i-th value of the array corresponding to the loss
|
||||
on ``X[i]``.
|
||||
|
||||
If the loss on a sample is greater than the ``residual_threshold``,
|
||||
then this sample is classified as an outlier.
|
||||
|
||||
.. versionadded:: 0.18
|
||||
|
||||
random_state : int, RandomState instance, default=None
|
||||
The generator used to initialize the centers.
|
||||
Pass an int for reproducible output across multiple function calls.
|
||||
See :term:`Glossary <random_state>`.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
estimator_ : object
|
||||
Best fitted model (copy of the `base_estimator` object).
|
||||
|
||||
n_trials_ : int
|
||||
Number of random selection trials until one of the stop criteria is
|
||||
met. It is always ``<= max_trials``.
|
||||
|
||||
inlier_mask_ : bool array of shape [n_samples]
|
||||
Boolean mask of inliers classified as ``True``.
|
||||
|
||||
n_skips_no_inliers_ : int
|
||||
Number of iterations skipped due to finding zero inliers.
|
||||
|
||||
.. versionadded:: 0.19
|
||||
|
||||
n_skips_invalid_data_ : int
|
||||
Number of iterations skipped due to invalid data defined by
|
||||
``is_data_valid``.
|
||||
|
||||
.. versionadded:: 0.19
|
||||
|
||||
n_skips_invalid_model_ : int
|
||||
Number of iterations skipped due to an invalid model defined by
|
||||
``is_model_valid``.
|
||||
|
||||
.. versionadded:: 0.19
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from sklearn.linear_model import RANSACRegressor
|
||||
>>> from sklearn.datasets import make_regression
|
||||
>>> X, y = make_regression(
|
||||
... n_samples=200, n_features=2, noise=4.0, random_state=0)
|
||||
>>> reg = RANSACRegressor(random_state=0).fit(X, y)
|
||||
>>> reg.score(X, y)
|
||||
0.9885...
|
||||
>>> reg.predict(X[:1,])
|
||||
array([-31.9417...])
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] https://en.wikipedia.org/wiki/RANSAC
|
||||
.. [2] https://www.sri.com/sites/default/files/publications/ransac-publication.pdf
|
||||
.. [3] http://www.bmva.org/bmvc/2009/Papers/Paper355/Paper355.pdf
|
||||
"""
|
||||
@_deprecate_positional_args
|
||||
def __init__(self, base_estimator=None, *, min_samples=None,
|
||||
residual_threshold=None, is_data_valid=None,
|
||||
is_model_valid=None, max_trials=100, max_skips=np.inf,
|
||||
stop_n_inliers=np.inf, stop_score=np.inf,
|
||||
stop_probability=0.99, loss='absolute_loss',
|
||||
random_state=None):
|
||||
|
||||
self.base_estimator = base_estimator
|
||||
self.min_samples = min_samples
|
||||
self.residual_threshold = residual_threshold
|
||||
self.is_data_valid = is_data_valid
|
||||
self.is_model_valid = is_model_valid
|
||||
self.max_trials = max_trials
|
||||
self.max_skips = max_skips
|
||||
self.stop_n_inliers = stop_n_inliers
|
||||
self.stop_score = stop_score
|
||||
self.stop_probability = stop_probability
|
||||
self.random_state = random_state
|
||||
self.loss = loss
|
||||
|
||||
def fit(self, X, y, sample_weight=None):
|
||||
"""Fit estimator using RANSAC algorithm.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like or sparse matrix, shape [n_samples, n_features]
|
||||
Training data.
|
||||
|
||||
y : array-like of shape (n_samples,) or (n_samples, n_targets)
|
||||
Target values.
|
||||
|
||||
sample_weight : array-like of shape (n_samples,), default=None
|
||||
Individual weights for each sample
|
||||
raises error if sample_weight is passed and base_estimator
|
||||
fit method does not support it.
|
||||
|
||||
.. versionadded:: 0.18
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
If no valid consensus set could be found. This occurs if
|
||||
`is_data_valid` and `is_model_valid` return False for all
|
||||
`max_trials` randomly chosen sub-samples.
|
||||
|
||||
"""
|
||||
# Need to validate separately here.
|
||||
# We can't pass multi_ouput=True because that would allow y to be csr.
|
||||
check_X_params = dict(accept_sparse='csr')
|
||||
check_y_params = dict(ensure_2d=False)
|
||||
X, y = self._validate_data(X, y, validate_separately=(check_X_params,
|
||||
check_y_params))
|
||||
check_consistent_length(X, y)
|
||||
|
||||
if self.base_estimator is not None:
|
||||
base_estimator = clone(self.base_estimator)
|
||||
else:
|
||||
base_estimator = LinearRegression()
|
||||
|
||||
if self.min_samples is None:
|
||||
# assume linear model by default
|
||||
min_samples = X.shape[1] + 1
|
||||
elif 0 < self.min_samples < 1:
|
||||
min_samples = np.ceil(self.min_samples * X.shape[0])
|
||||
elif self.min_samples >= 1:
|
||||
if self.min_samples % 1 != 0:
|
||||
raise ValueError("Absolute number of samples must be an "
|
||||
"integer value.")
|
||||
min_samples = self.min_samples
|
||||
else:
|
||||
raise ValueError("Value for `min_samples` must be scalar and "
|
||||
"positive.")
|
||||
if min_samples > X.shape[0]:
|
||||
raise ValueError("`min_samples` may not be larger than number "
|
||||
"of samples: n_samples = %d." % (X.shape[0]))
|
||||
|
||||
if self.stop_probability < 0 or self.stop_probability > 1:
|
||||
raise ValueError("`stop_probability` must be in range [0, 1].")
|
||||
|
||||
if self.residual_threshold is None:
|
||||
# MAD (median absolute deviation)
|
||||
residual_threshold = np.median(np.abs(y - np.median(y)))
|
||||
else:
|
||||
residual_threshold = self.residual_threshold
|
||||
|
||||
if self.loss == "absolute_loss":
|
||||
if y.ndim == 1:
|
||||
loss_function = lambda y_true, y_pred: np.abs(y_true - y_pred)
|
||||
else:
|
||||
loss_function = lambda \
|
||||
y_true, y_pred: np.sum(np.abs(y_true - y_pred), axis=1)
|
||||
|
||||
elif self.loss == "squared_loss":
|
||||
if y.ndim == 1:
|
||||
loss_function = lambda y_true, y_pred: (y_true - y_pred) ** 2
|
||||
else:
|
||||
loss_function = lambda \
|
||||
y_true, y_pred: np.sum((y_true - y_pred) ** 2, axis=1)
|
||||
|
||||
elif callable(self.loss):
|
||||
loss_function = self.loss
|
||||
|
||||
else:
|
||||
raise ValueError(
|
||||
"loss should be 'absolute_loss', 'squared_loss' or a callable."
|
||||
"Got %s. " % self.loss)
|
||||
|
||||
|
||||
random_state = check_random_state(self.random_state)
|
||||
|
||||
try: # Not all estimator accept a random_state
|
||||
base_estimator.set_params(random_state=random_state)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
estimator_fit_has_sample_weight = has_fit_parameter(base_estimator,
|
||||
"sample_weight")
|
||||
estimator_name = type(base_estimator).__name__
|
||||
if (sample_weight is not None and not
|
||||
estimator_fit_has_sample_weight):
|
||||
raise ValueError("%s does not support sample_weight. Samples"
|
||||
" weights are only used for the calibration"
|
||||
" itself." % estimator_name)
|
||||
if sample_weight is not None:
|
||||
sample_weight = _check_sample_weight(sample_weight, X)
|
||||
|
||||
n_inliers_best = 1
|
||||
score_best = -np.inf
|
||||
inlier_mask_best = None
|
||||
X_inlier_best = None
|
||||
y_inlier_best = None
|
||||
inlier_best_idxs_subset = None
|
||||
self.n_skips_no_inliers_ = 0
|
||||
self.n_skips_invalid_data_ = 0
|
||||
self.n_skips_invalid_model_ = 0
|
||||
|
||||
# number of data samples
|
||||
n_samples = X.shape[0]
|
||||
sample_idxs = np.arange(n_samples)
|
||||
|
||||
self.n_trials_ = 0
|
||||
max_trials = self.max_trials
|
||||
while self.n_trials_ < max_trials:
|
||||
self.n_trials_ += 1
|
||||
|
||||
if (self.n_skips_no_inliers_ + self.n_skips_invalid_data_ +
|
||||
self.n_skips_invalid_model_) > self.max_skips:
|
||||
break
|
||||
|
||||
# choose random sample set
|
||||
subset_idxs = sample_without_replacement(n_samples, min_samples,
|
||||
random_state=random_state)
|
||||
X_subset = X[subset_idxs]
|
||||
y_subset = y[subset_idxs]
|
||||
|
||||
# check if random sample set is valid
|
||||
if (self.is_data_valid is not None
|
||||
and not self.is_data_valid(X_subset, y_subset)):
|
||||
self.n_skips_invalid_data_ += 1
|
||||
continue
|
||||
|
||||
# fit model for current random sample set
|
||||
if sample_weight is None:
|
||||
base_estimator.fit(X_subset, y_subset)
|
||||
else:
|
||||
base_estimator.fit(X_subset, y_subset,
|
||||
sample_weight=sample_weight[subset_idxs])
|
||||
|
||||
# check if estimated model is valid
|
||||
if (self.is_model_valid is not None and not
|
||||
self.is_model_valid(base_estimator, X_subset, y_subset)):
|
||||
self.n_skips_invalid_model_ += 1
|
||||
continue
|
||||
|
||||
# residuals of all data for current random sample model
|
||||
y_pred = base_estimator.predict(X)
|
||||
residuals_subset = loss_function(y, y_pred)
|
||||
|
||||
# classify data into inliers and outliers
|
||||
inlier_mask_subset = residuals_subset < residual_threshold
|
||||
n_inliers_subset = np.sum(inlier_mask_subset)
|
||||
|
||||
# less inliers -> skip current random sample
|
||||
if n_inliers_subset < n_inliers_best:
|
||||
self.n_skips_no_inliers_ += 1
|
||||
continue
|
||||
|
||||
# extract inlier data set
|
||||
inlier_idxs_subset = sample_idxs[inlier_mask_subset]
|
||||
X_inlier_subset = X[inlier_idxs_subset]
|
||||
y_inlier_subset = y[inlier_idxs_subset]
|
||||
|
||||
# score of inlier data set
|
||||
score_subset = base_estimator.score(X_inlier_subset,
|
||||
y_inlier_subset)
|
||||
|
||||
# same number of inliers but worse score -> skip current random
|
||||
# sample
|
||||
if (n_inliers_subset == n_inliers_best
|
||||
and score_subset < score_best):
|
||||
continue
|
||||
|
||||
# save current random sample as best sample
|
||||
n_inliers_best = n_inliers_subset
|
||||
score_best = score_subset
|
||||
inlier_mask_best = inlier_mask_subset
|
||||
X_inlier_best = X_inlier_subset
|
||||
y_inlier_best = y_inlier_subset
|
||||
inlier_best_idxs_subset = inlier_idxs_subset
|
||||
|
||||
max_trials = min(
|
||||
max_trials,
|
||||
_dynamic_max_trials(n_inliers_best, n_samples,
|
||||
min_samples, self.stop_probability))
|
||||
|
||||
# break if sufficient number of inliers or score is reached
|
||||
if n_inliers_best >= self.stop_n_inliers or \
|
||||
score_best >= self.stop_score:
|
||||
break
|
||||
|
||||
# if none of the iterations met the required criteria
|
||||
if inlier_mask_best is None:
|
||||
if ((self.n_skips_no_inliers_ + self.n_skips_invalid_data_ +
|
||||
self.n_skips_invalid_model_) > self.max_skips):
|
||||
raise ValueError(
|
||||
"RANSAC skipped more iterations than `max_skips` without"
|
||||
" finding a valid consensus set. Iterations were skipped"
|
||||
" because each randomly chosen sub-sample failed the"
|
||||
" passing criteria. See estimator attributes for"
|
||||
" diagnostics (n_skips*).")
|
||||
else:
|
||||
raise ValueError(
|
||||
"RANSAC could not find a valid consensus set. All"
|
||||
" `max_trials` iterations were skipped because each"
|
||||
" randomly chosen sub-sample failed the passing criteria."
|
||||
" See estimator attributes for diagnostics (n_skips*).")
|
||||
else:
|
||||
if (self.n_skips_no_inliers_ + self.n_skips_invalid_data_ +
|
||||
self.n_skips_invalid_model_) > self.max_skips:
|
||||
warnings.warn("RANSAC found a valid consensus set but exited"
|
||||
" early due to skipping more iterations than"
|
||||
" `max_skips`. See estimator attributes for"
|
||||
" diagnostics (n_skips*).",
|
||||
ConvergenceWarning)
|
||||
|
||||
# estimate final model using all inliers
|
||||
if sample_weight is None:
|
||||
base_estimator.fit(X_inlier_best, y_inlier_best)
|
||||
else:
|
||||
base_estimator.fit(
|
||||
X_inlier_best,
|
||||
y_inlier_best,
|
||||
sample_weight=sample_weight[inlier_best_idxs_subset])
|
||||
|
||||
self.estimator_ = base_estimator
|
||||
self.inlier_mask_ = inlier_mask_best
|
||||
return self
|
||||
|
||||
def predict(self, X):
|
||||
"""Predict using the estimated model.
|
||||
|
||||
This is a wrapper for `estimator_.predict(X)`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : numpy array of shape [n_samples, n_features]
|
||||
|
||||
Returns
|
||||
-------
|
||||
y : array, shape = [n_samples] or [n_samples, n_targets]
|
||||
Returns predicted values.
|
||||
"""
|
||||
check_is_fitted(self)
|
||||
|
||||
return self.estimator_.predict(X)
|
||||
|
||||
def score(self, X, y):
|
||||
"""Returns the score of the prediction.
|
||||
|
||||
This is a wrapper for `estimator_.score(X, y)`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : numpy array or sparse matrix of shape [n_samples, n_features]
|
||||
Training data.
|
||||
|
||||
y : array, shape = [n_samples] or [n_samples, n_targets]
|
||||
Target values.
|
||||
|
||||
Returns
|
||||
-------
|
||||
z : float
|
||||
Score of the prediction.
|
||||
"""
|
||||
check_is_fitted(self)
|
||||
|
||||
return self.estimator_.score(X, y)
|
1911
venv/Lib/site-packages/sklearn/linear_model/_ridge.py
Normal file
1911
venv/Lib/site-packages/sklearn/linear_model/_ridge.py
Normal file
File diff suppressed because it is too large
Load diff
345
venv/Lib/site-packages/sklearn/linear_model/_sag.py
Normal file
345
venv/Lib/site-packages/sklearn/linear_model/_sag.py
Normal file
|
@ -0,0 +1,345 @@
|
|||
"""Solvers for Ridge and LogisticRegression using SAG algorithm"""
|
||||
|
||||
# Authors: Tom Dupre la Tour <tom.dupre-la-tour@m4x.org>
|
||||
#
|
||||
# License: BSD 3 clause
|
||||
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
|
||||
from ._base import make_dataset
|
||||
from ._sag_fast import sag32, sag64
|
||||
from ..exceptions import ConvergenceWarning
|
||||
from ..utils import check_array
|
||||
from ..utils.validation import _check_sample_weight
|
||||
from ..utils.validation import _deprecate_positional_args
|
||||
from ..utils.extmath import row_norms
|
||||
|
||||
|
||||
def get_auto_step_size(max_squared_sum, alpha_scaled, loss, fit_intercept,
|
||||
n_samples=None,
|
||||
is_saga=False):
|
||||
"""Compute automatic step size for SAG solver
|
||||
|
||||
The step size is set to 1 / (alpha_scaled + L + fit_intercept) where L is
|
||||
the max sum of squares for over all samples.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
max_squared_sum : float
|
||||
Maximum squared sum of X over samples.
|
||||
|
||||
alpha_scaled : float
|
||||
Constant that multiplies the regularization term, scaled by
|
||||
1. / n_samples, the number of samples.
|
||||
|
||||
loss : string, in {"log", "squared"}
|
||||
The loss function used in SAG solver.
|
||||
|
||||
fit_intercept : bool
|
||||
Specifies if a constant (a.k.a. bias or intercept) will be
|
||||
added to the decision function.
|
||||
|
||||
n_samples : int, optional
|
||||
Number of rows in X. Useful if is_saga=True.
|
||||
|
||||
is_saga : boolean, optional
|
||||
Whether to return step size for the SAGA algorithm or the SAG
|
||||
algorithm.
|
||||
|
||||
Returns
|
||||
-------
|
||||
step_size : float
|
||||
Step size used in SAG solver.
|
||||
|
||||
References
|
||||
----------
|
||||
Schmidt, M., Roux, N. L., & Bach, F. (2013).
|
||||
Minimizing finite sums with the stochastic average gradient
|
||||
https://hal.inria.fr/hal-00860051/document
|
||||
|
||||
Defazio, A., Bach F. & Lacoste-Julien S. (2014).
|
||||
SAGA: A Fast Incremental Gradient Method With Support
|
||||
for Non-Strongly Convex Composite Objectives
|
||||
https://arxiv.org/abs/1407.0202
|
||||
"""
|
||||
if loss in ('log', 'multinomial'):
|
||||
L = (0.25 * (max_squared_sum + int(fit_intercept)) + alpha_scaled)
|
||||
elif loss == 'squared':
|
||||
# inverse Lipschitz constant for squared loss
|
||||
L = max_squared_sum + int(fit_intercept) + alpha_scaled
|
||||
else:
|
||||
raise ValueError("Unknown loss function for SAG solver, got %s "
|
||||
"instead of 'log' or 'squared'" % loss)
|
||||
if is_saga:
|
||||
# SAGA theoretical step size is 1/3L or 1 / (2 * (L + mu n))
|
||||
# See Defazio et al. 2014
|
||||
mun = min(2 * n_samples * alpha_scaled, L)
|
||||
step = 1. / (2 * L + mun)
|
||||
else:
|
||||
# SAG theoretical step size is 1/16L but it is recommended to use 1 / L
|
||||
# see http://www.birs.ca//workshops//2014/14w5003/files/schmidt.pdf,
|
||||
# slide 65
|
||||
step = 1. / L
|
||||
return step
|
||||
|
||||
|
||||
@_deprecate_positional_args
|
||||
def sag_solver(X, y, sample_weight=None, loss='log', alpha=1., beta=0.,
|
||||
max_iter=1000, tol=0.001, verbose=0, random_state=None,
|
||||
check_input=True, max_squared_sum=None,
|
||||
warm_start_mem=None,
|
||||
is_saga=False):
|
||||
"""SAG solver for Ridge and LogisticRegression
|
||||
|
||||
SAG stands for Stochastic Average Gradient: the gradient of the loss is
|
||||
estimated each sample at a time and the model is updated along the way with
|
||||
a constant learning rate.
|
||||
|
||||
IMPORTANT NOTE: 'sag' solver converges faster on columns that are on the
|
||||
same scale. You can normalize the data by using
|
||||
sklearn.preprocessing.StandardScaler on your data before passing it to the
|
||||
fit method.
|
||||
|
||||
This implementation works with data represented as dense numpy arrays or
|
||||
sparse scipy arrays of floating point values for the features. It will
|
||||
fit the data according to squared loss or log loss.
|
||||
|
||||
The regularizer is a penalty added to the loss function that shrinks model
|
||||
parameters towards the zero vector using the squared euclidean norm L2.
|
||||
|
||||
.. versionadded:: 0.17
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse matrix}, shape (n_samples, n_features)
|
||||
Training data
|
||||
|
||||
y : numpy array, shape (n_samples,)
|
||||
Target values. With loss='multinomial', y must be label encoded
|
||||
(see preprocessing.LabelEncoder).
|
||||
|
||||
sample_weight : array-like, shape (n_samples,), optional
|
||||
Weights applied to individual samples (1. for unweighted).
|
||||
|
||||
loss : 'log' | 'squared' | 'multinomial'
|
||||
Loss function that will be optimized:
|
||||
-'log' is the binary logistic loss, as used in LogisticRegression.
|
||||
-'squared' is the squared loss, as used in Ridge.
|
||||
-'multinomial' is the multinomial logistic loss, as used in
|
||||
LogisticRegression.
|
||||
|
||||
.. versionadded:: 0.18
|
||||
*loss='multinomial'*
|
||||
|
||||
alpha : float, optional
|
||||
L2 regularization term in the objective function
|
||||
``(0.5 * alpha * || W ||_F^2)``. Defaults to 1.
|
||||
|
||||
beta : float, optional
|
||||
L1 regularization term in the objective function
|
||||
``(beta * || W ||_1)``. Only applied if ``is_saga`` is set to True.
|
||||
Defaults to 0.
|
||||
|
||||
max_iter : int, optional
|
||||
The max number of passes over the training data if the stopping
|
||||
criteria is not reached. Defaults to 1000.
|
||||
|
||||
tol : double, optional
|
||||
The stopping criteria for the weights. The iterations will stop when
|
||||
max(change in weights) / max(weights) < tol. Defaults to .001
|
||||
|
||||
verbose : integer, optional
|
||||
The verbosity level.
|
||||
|
||||
random_state : int, RandomState instance, default=None
|
||||
Used when shuffling the data. Pass an int for reproducible output
|
||||
across multiple function calls.
|
||||
See :term:`Glossary <random_state>`.
|
||||
|
||||
check_input : bool, default True
|
||||
If False, the input arrays X and y will not be checked.
|
||||
|
||||
max_squared_sum : float, default None
|
||||
Maximum squared sum of X over samples. If None, it will be computed,
|
||||
going through all the samples. The value should be precomputed
|
||||
to speed up cross validation.
|
||||
|
||||
warm_start_mem : dict, optional
|
||||
The initialization parameters used for warm starting. Warm starting is
|
||||
currently used in LogisticRegression but not in Ridge.
|
||||
It contains:
|
||||
- 'coef': the weight vector, with the intercept in last line
|
||||
if the intercept is fitted.
|
||||
- 'gradient_memory': the scalar gradient for all seen samples.
|
||||
- 'sum_gradient': the sum of gradient over all seen samples,
|
||||
for each feature.
|
||||
- 'intercept_sum_gradient': the sum of gradient over all seen
|
||||
samples, for the intercept.
|
||||
- 'seen': array of boolean describing the seen samples.
|
||||
- 'num_seen': the number of seen samples.
|
||||
|
||||
is_saga : boolean, optional
|
||||
Whether to use the SAGA algorithm or the SAG algorithm. SAGA behaves
|
||||
better in the first epochs, and allow for l1 regularisation.
|
||||
|
||||
Returns
|
||||
-------
|
||||
coef_ : array, shape (n_features)
|
||||
Weight vector.
|
||||
|
||||
n_iter_ : int
|
||||
The number of full pass on all samples.
|
||||
|
||||
warm_start_mem : dict
|
||||
Contains a 'coef' key with the fitted result, and possibly the
|
||||
fitted intercept at the end of the array. Contains also other keys
|
||||
used for warm starting.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import numpy as np
|
||||
>>> from sklearn import linear_model
|
||||
>>> n_samples, n_features = 10, 5
|
||||
>>> rng = np.random.RandomState(0)
|
||||
>>> X = rng.randn(n_samples, n_features)
|
||||
>>> y = rng.randn(n_samples)
|
||||
>>> clf = linear_model.Ridge(solver='sag')
|
||||
>>> clf.fit(X, y)
|
||||
Ridge(solver='sag')
|
||||
|
||||
>>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
|
||||
>>> y = np.array([1, 1, 2, 2])
|
||||
>>> clf = linear_model.LogisticRegression(
|
||||
... solver='sag', multi_class='multinomial')
|
||||
>>> clf.fit(X, y)
|
||||
LogisticRegression(multi_class='multinomial', solver='sag')
|
||||
|
||||
References
|
||||
----------
|
||||
Schmidt, M., Roux, N. L., & Bach, F. (2013).
|
||||
Minimizing finite sums with the stochastic average gradient
|
||||
https://hal.inria.fr/hal-00860051/document
|
||||
|
||||
Defazio, A., Bach F. & Lacoste-Julien S. (2014).
|
||||
SAGA: A Fast Incremental Gradient Method With Support
|
||||
for Non-Strongly Convex Composite Objectives
|
||||
https://arxiv.org/abs/1407.0202
|
||||
|
||||
See also
|
||||
--------
|
||||
Ridge, SGDRegressor, ElasticNet, Lasso, SVR, and
|
||||
LogisticRegression, SGDClassifier, LinearSVC, Perceptron
|
||||
"""
|
||||
if warm_start_mem is None:
|
||||
warm_start_mem = {}
|
||||
# Ridge default max_iter is None
|
||||
if max_iter is None:
|
||||
max_iter = 1000
|
||||
|
||||
if check_input:
|
||||
_dtype = [np.float64, np.float32]
|
||||
X = check_array(X, dtype=_dtype, accept_sparse='csr', order='C')
|
||||
y = check_array(y, dtype=_dtype, ensure_2d=False, order='C')
|
||||
|
||||
n_samples, n_features = X.shape[0], X.shape[1]
|
||||
# As in SGD, the alpha is scaled by n_samples.
|
||||
alpha_scaled = float(alpha) / n_samples
|
||||
beta_scaled = float(beta) / n_samples
|
||||
|
||||
# if loss == 'multinomial', y should be label encoded.
|
||||
n_classes = int(y.max()) + 1 if loss == 'multinomial' else 1
|
||||
|
||||
# initialization
|
||||
sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)
|
||||
|
||||
if 'coef' in warm_start_mem.keys():
|
||||
coef_init = warm_start_mem['coef']
|
||||
else:
|
||||
# assume fit_intercept is False
|
||||
coef_init = np.zeros((n_features, n_classes), dtype=X.dtype,
|
||||
order='C')
|
||||
|
||||
# coef_init contains possibly the intercept_init at the end.
|
||||
# Note that Ridge centers the data before fitting, so fit_intercept=False.
|
||||
fit_intercept = coef_init.shape[0] == (n_features + 1)
|
||||
if fit_intercept:
|
||||
intercept_init = coef_init[-1, :]
|
||||
coef_init = coef_init[:-1, :]
|
||||
else:
|
||||
intercept_init = np.zeros(n_classes, dtype=X.dtype)
|
||||
|
||||
if 'intercept_sum_gradient' in warm_start_mem.keys():
|
||||
intercept_sum_gradient = warm_start_mem['intercept_sum_gradient']
|
||||
else:
|
||||
intercept_sum_gradient = np.zeros(n_classes, dtype=X.dtype)
|
||||
|
||||
if 'gradient_memory' in warm_start_mem.keys():
|
||||
gradient_memory_init = warm_start_mem['gradient_memory']
|
||||
else:
|
||||
gradient_memory_init = np.zeros((n_samples, n_classes),
|
||||
dtype=X.dtype, order='C')
|
||||
if 'sum_gradient' in warm_start_mem.keys():
|
||||
sum_gradient_init = warm_start_mem['sum_gradient']
|
||||
else:
|
||||
sum_gradient_init = np.zeros((n_features, n_classes),
|
||||
dtype=X.dtype, order='C')
|
||||
|
||||
if 'seen' in warm_start_mem.keys():
|
||||
seen_init = warm_start_mem['seen']
|
||||
else:
|
||||
seen_init = np.zeros(n_samples, dtype=np.int32, order='C')
|
||||
|
||||
if 'num_seen' in warm_start_mem.keys():
|
||||
num_seen_init = warm_start_mem['num_seen']
|
||||
else:
|
||||
num_seen_init = 0
|
||||
|
||||
dataset, intercept_decay = make_dataset(X, y, sample_weight, random_state)
|
||||
|
||||
if max_squared_sum is None:
|
||||
max_squared_sum = row_norms(X, squared=True).max()
|
||||
step_size = get_auto_step_size(max_squared_sum, alpha_scaled, loss,
|
||||
fit_intercept, n_samples=n_samples,
|
||||
is_saga=is_saga)
|
||||
if step_size * alpha_scaled == 1:
|
||||
raise ZeroDivisionError("Current sag implementation does not handle "
|
||||
"the case step_size * alpha_scaled == 1")
|
||||
|
||||
sag = sag64 if X.dtype == np.float64 else sag32
|
||||
num_seen, n_iter_ = sag(dataset, coef_init,
|
||||
intercept_init, n_samples,
|
||||
n_features, n_classes, tol,
|
||||
max_iter,
|
||||
loss,
|
||||
step_size, alpha_scaled,
|
||||
beta_scaled,
|
||||
sum_gradient_init,
|
||||
gradient_memory_init,
|
||||
seen_init,
|
||||
num_seen_init,
|
||||
fit_intercept,
|
||||
intercept_sum_gradient,
|
||||
intercept_decay,
|
||||
is_saga,
|
||||
verbose)
|
||||
|
||||
if n_iter_ == max_iter:
|
||||
warnings.warn("The max_iter was reached which means "
|
||||
"the coef_ did not converge", ConvergenceWarning)
|
||||
|
||||
if fit_intercept:
|
||||
coef_init = np.vstack((coef_init, intercept_init))
|
||||
|
||||
warm_start_mem = {'coef': coef_init, 'sum_gradient': sum_gradient_init,
|
||||
'intercept_sum_gradient': intercept_sum_gradient,
|
||||
'gradient_memory': gradient_memory_init,
|
||||
'seen': seen_init, 'num_seen': num_seen}
|
||||
|
||||
if loss == 'multinomial':
|
||||
coef_ = coef_init.T
|
||||
else:
|
||||
coef_ = coef_init[:, 0]
|
||||
|
||||
return coef_, n_iter_, warm_start_mem
|
Binary file not shown.
Binary file not shown.
26
venv/Lib/site-packages/sklearn/linear_model/_sgd_fast.pxd
Normal file
26
venv/Lib/site-packages/sklearn/linear_model/_sgd_fast.pxd
Normal file
|
@ -0,0 +1,26 @@
|
|||
# License: BSD 3 clause
|
||||
"""Helper to load LossFunction from sgd_fast.pyx to sag_fast.pyx"""
|
||||
|
||||
cdef class LossFunction:
|
||||
cdef double loss(self, double p, double y) nogil
|
||||
cdef double _dloss(self, double p, double y) nogil
|
||||
|
||||
|
||||
cdef class Regression(LossFunction):
|
||||
cdef double loss(self, double p, double y) nogil
|
||||
cdef double _dloss(self, double p, double y) nogil
|
||||
|
||||
|
||||
cdef class Classification(LossFunction):
|
||||
cdef double loss(self, double p, double y) nogil
|
||||
cdef double _dloss(self, double p, double y) nogil
|
||||
|
||||
|
||||
cdef class Log(Classification):
|
||||
cdef double loss(self, double p, double y) nogil
|
||||
cdef double _dloss(self, double p, double y) nogil
|
||||
|
||||
|
||||
cdef class SquaredLoss(Regression):
|
||||
cdef double loss(self, double p, double y) nogil
|
||||
cdef double _dloss(self, double p, double y) nogil
|
1578
venv/Lib/site-packages/sklearn/linear_model/_stochastic_gradient.py
Normal file
1578
venv/Lib/site-packages/sklearn/linear_model/_stochastic_gradient.py
Normal file
File diff suppressed because it is too large
Load diff
400
venv/Lib/site-packages/sklearn/linear_model/_theil_sen.py
Normal file
400
venv/Lib/site-packages/sklearn/linear_model/_theil_sen.py
Normal file
|
@ -0,0 +1,400 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
A Theil-Sen Estimator for Multiple Linear Regression Model
|
||||
"""
|
||||
|
||||
# Author: Florian Wilhelm <florian.wilhelm@gmail.com>
|
||||
#
|
||||
# License: BSD 3 clause
|
||||
|
||||
|
||||
import warnings
|
||||
from itertools import combinations
|
||||
|
||||
import numpy as np
|
||||
from scipy import linalg
|
||||
from scipy.special import binom
|
||||
from scipy.linalg.lapack import get_lapack_funcs
|
||||
from joblib import Parallel, delayed, effective_n_jobs
|
||||
|
||||
from ._base import LinearModel
|
||||
from ..base import RegressorMixin
|
||||
from ..utils import check_random_state
|
||||
from ..utils.validation import _deprecate_positional_args
|
||||
from ..exceptions import ConvergenceWarning
|
||||
|
||||
_EPSILON = np.finfo(np.double).eps
|
||||
|
||||
|
||||
def _modified_weiszfeld_step(X, x_old):
|
||||
"""Modified Weiszfeld step.
|
||||
|
||||
This function defines one iteration step in order to approximate the
|
||||
spatial median (L1 median). It is a form of an iteratively re-weighted
|
||||
least squares method.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
Training vector, where n_samples is the number of samples and
|
||||
n_features is the number of features.
|
||||
|
||||
x_old : array, shape = [n_features]
|
||||
Current start vector.
|
||||
|
||||
Returns
|
||||
-------
|
||||
x_new : array, shape = [n_features]
|
||||
New iteration step.
|
||||
|
||||
References
|
||||
----------
|
||||
- On Computation of Spatial Median for Robust Data Mining, 2005
|
||||
T. Kärkkäinen and S. Äyrämö
|
||||
http://users.jyu.fi/~samiayr/pdf/ayramo_eurogen05.pdf
|
||||
"""
|
||||
diff = X - x_old
|
||||
diff_norm = np.sqrt(np.sum(diff ** 2, axis=1))
|
||||
mask = diff_norm >= _EPSILON
|
||||
# x_old equals one of our samples
|
||||
is_x_old_in_X = int(mask.sum() < X.shape[0])
|
||||
|
||||
diff = diff[mask]
|
||||
diff_norm = diff_norm[mask][:, np.newaxis]
|
||||
quotient_norm = linalg.norm(np.sum(diff / diff_norm, axis=0))
|
||||
|
||||
if quotient_norm > _EPSILON: # to avoid division by zero
|
||||
new_direction = (np.sum(X[mask, :] / diff_norm, axis=0)
|
||||
/ np.sum(1 / diff_norm, axis=0))
|
||||
else:
|
||||
new_direction = 1.
|
||||
quotient_norm = 1.
|
||||
|
||||
return (max(0., 1. - is_x_old_in_X / quotient_norm) * new_direction
|
||||
+ min(1., is_x_old_in_X / quotient_norm) * x_old)
|
||||
|
||||
|
||||
def _spatial_median(X, max_iter=300, tol=1.e-3):
|
||||
"""Spatial median (L1 median).
|
||||
|
||||
The spatial median is member of a class of so-called M-estimators which
|
||||
are defined by an optimization problem. Given a number of p points in an
|
||||
n-dimensional space, the point x minimizing the sum of all distances to the
|
||||
p other points is called spatial median.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
Training vector, where n_samples is the number of samples and
|
||||
n_features is the number of features.
|
||||
|
||||
max_iter : int, optional
|
||||
Maximum number of iterations. Default is 300.
|
||||
|
||||
tol : float, optional
|
||||
Stop the algorithm if spatial_median has converged. Default is 1.e-3.
|
||||
|
||||
Returns
|
||||
-------
|
||||
spatial_median : array, shape = [n_features]
|
||||
Spatial median.
|
||||
|
||||
n_iter : int
|
||||
Number of iterations needed.
|
||||
|
||||
References
|
||||
----------
|
||||
- On Computation of Spatial Median for Robust Data Mining, 2005
|
||||
T. Kärkkäinen and S. Äyrämö
|
||||
http://users.jyu.fi/~samiayr/pdf/ayramo_eurogen05.pdf
|
||||
"""
|
||||
if X.shape[1] == 1:
|
||||
return 1, np.median(X.ravel())
|
||||
|
||||
tol **= 2 # We are computing the tol on the squared norm
|
||||
spatial_median_old = np.mean(X, axis=0)
|
||||
|
||||
for n_iter in range(max_iter):
|
||||
spatial_median = _modified_weiszfeld_step(X, spatial_median_old)
|
||||
if np.sum((spatial_median_old - spatial_median) ** 2) < tol:
|
||||
break
|
||||
else:
|
||||
spatial_median_old = spatial_median
|
||||
else:
|
||||
warnings.warn("Maximum number of iterations {max_iter} reached in "
|
||||
"spatial median for TheilSen regressor."
|
||||
"".format(max_iter=max_iter), ConvergenceWarning)
|
||||
|
||||
return n_iter, spatial_median
|
||||
|
||||
|
||||
def _breakdown_point(n_samples, n_subsamples):
|
||||
"""Approximation of the breakdown point.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
n_samples : int
|
||||
Number of samples.
|
||||
|
||||
n_subsamples : int
|
||||
Number of subsamples to consider.
|
||||
|
||||
Returns
|
||||
-------
|
||||
breakdown_point : float
|
||||
Approximation of breakdown point.
|
||||
"""
|
||||
return 1 - (0.5 ** (1 / n_subsamples) * (n_samples - n_subsamples + 1) +
|
||||
n_subsamples - 1) / n_samples
|
||||
|
||||
|
||||
def _lstsq(X, y, indices, fit_intercept):
|
||||
"""Least Squares Estimator for TheilSenRegressor class.
|
||||
|
||||
This function calculates the least squares method on a subset of rows of X
|
||||
and y defined by the indices array. Optionally, an intercept column is
|
||||
added if intercept is set to true.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
Design matrix, where n_samples is the number of samples and
|
||||
n_features is the number of features.
|
||||
|
||||
y : array, shape = [n_samples]
|
||||
Target vector, where n_samples is the number of samples.
|
||||
|
||||
indices : array, shape = [n_subpopulation, n_subsamples]
|
||||
Indices of all subsamples with respect to the chosen subpopulation.
|
||||
|
||||
fit_intercept : bool
|
||||
Fit intercept or not.
|
||||
|
||||
Returns
|
||||
-------
|
||||
weights : array, shape = [n_subpopulation, n_features + intercept]
|
||||
Solution matrix of n_subpopulation solved least square problems.
|
||||
"""
|
||||
fit_intercept = int(fit_intercept)
|
||||
n_features = X.shape[1] + fit_intercept
|
||||
n_subsamples = indices.shape[1]
|
||||
weights = np.empty((indices.shape[0], n_features))
|
||||
X_subpopulation = np.ones((n_subsamples, n_features))
|
||||
# gelss need to pad y_subpopulation to be of the max dim of X_subpopulation
|
||||
y_subpopulation = np.zeros((max(n_subsamples, n_features)))
|
||||
lstsq, = get_lapack_funcs(('gelss',), (X_subpopulation, y_subpopulation))
|
||||
|
||||
for index, subset in enumerate(indices):
|
||||
X_subpopulation[:, fit_intercept:] = X[subset, :]
|
||||
y_subpopulation[:n_subsamples] = y[subset]
|
||||
weights[index] = lstsq(X_subpopulation,
|
||||
y_subpopulation)[1][:n_features]
|
||||
|
||||
return weights
|
||||
|
||||
|
||||
class TheilSenRegressor(RegressorMixin, LinearModel):
|
||||
"""Theil-Sen Estimator: robust multivariate regression model.
|
||||
|
||||
The algorithm calculates least square solutions on subsets with size
|
||||
n_subsamples of the samples in X. Any value of n_subsamples between the
|
||||
number of features and samples leads to an estimator with a compromise
|
||||
between robustness and efficiency. Since the number of least square
|
||||
solutions is "n_samples choose n_subsamples", it can be extremely large
|
||||
and can therefore be limited with max_subpopulation. If this limit is
|
||||
reached, the subsets are chosen randomly. In a final step, the spatial
|
||||
median (or L1 median) is calculated of all least square solutions.
|
||||
|
||||
Read more in the :ref:`User Guide <theil_sen_regression>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
fit_intercept : boolean, optional, default True
|
||||
Whether to calculate the intercept for this model. If set
|
||||
to false, no intercept will be used in calculations.
|
||||
|
||||
copy_X : boolean, optional, default True
|
||||
If True, X will be copied; else, it may be overwritten.
|
||||
|
||||
max_subpopulation : int, optional, default 1e4
|
||||
Instead of computing with a set of cardinality 'n choose k', where n is
|
||||
the number of samples and k is the number of subsamples (at least
|
||||
number of features), consider only a stochastic subpopulation of a
|
||||
given maximal size if 'n choose k' is larger than max_subpopulation.
|
||||
For other than small problem sizes this parameter will determine
|
||||
memory usage and runtime if n_subsamples is not changed.
|
||||
|
||||
n_subsamples : int, optional, default None
|
||||
Number of samples to calculate the parameters. This is at least the
|
||||
number of features (plus 1 if fit_intercept=True) and the number of
|
||||
samples as a maximum. A lower number leads to a higher breakdown
|
||||
point and a low efficiency while a high number leads to a low
|
||||
breakdown point and a high efficiency. If None, take the
|
||||
minimum number of subsamples leading to maximal robustness.
|
||||
If n_subsamples is set to n_samples, Theil-Sen is identical to least
|
||||
squares.
|
||||
|
||||
max_iter : int, optional, default 300
|
||||
Maximum number of iterations for the calculation of spatial median.
|
||||
|
||||
tol : float, optional, default 1.e-3
|
||||
Tolerance when calculating spatial median.
|
||||
|
||||
random_state : int, RandomState instance, default=None
|
||||
A random number generator instance to define the state of the random
|
||||
permutations generator. Pass an int for reproducible output across
|
||||
multiple function calls.
|
||||
See :term:`Glossary <random_state>`
|
||||
|
||||
n_jobs : int or None, optional (default=None)
|
||||
Number of CPUs to use during the cross validation.
|
||||
``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
|
||||
``-1`` means using all processors. See :term:`Glossary <n_jobs>`
|
||||
for more details.
|
||||
|
||||
verbose : boolean, optional, default False
|
||||
Verbose mode when fitting the model.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
coef_ : array, shape = (n_features)
|
||||
Coefficients of the regression model (median of distribution).
|
||||
|
||||
intercept_ : float
|
||||
Estimated intercept of regression model.
|
||||
|
||||
breakdown_ : float
|
||||
Approximated breakdown point.
|
||||
|
||||
n_iter_ : int
|
||||
Number of iterations needed for the spatial median.
|
||||
|
||||
n_subpopulation_ : int
|
||||
Number of combinations taken into account from 'n choose k', where n is
|
||||
the number of samples and k is the number of subsamples.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from sklearn.linear_model import TheilSenRegressor
|
||||
>>> from sklearn.datasets import make_regression
|
||||
>>> X, y = make_regression(
|
||||
... n_samples=200, n_features=2, noise=4.0, random_state=0)
|
||||
>>> reg = TheilSenRegressor(random_state=0).fit(X, y)
|
||||
>>> reg.score(X, y)
|
||||
0.9884...
|
||||
>>> reg.predict(X[:1,])
|
||||
array([-31.5871...])
|
||||
|
||||
References
|
||||
----------
|
||||
- Theil-Sen Estimators in a Multiple Linear Regression Model, 2009
|
||||
Xin Dang, Hanxiang Peng, Xueqin Wang and Heping Zhang
|
||||
http://home.olemiss.edu/~xdang/papers/MTSE.pdf
|
||||
"""
|
||||
@_deprecate_positional_args
|
||||
def __init__(self, *, fit_intercept=True, copy_X=True,
|
||||
max_subpopulation=1e4, n_subsamples=None, max_iter=300,
|
||||
tol=1.e-3, random_state=None, n_jobs=None, verbose=False):
|
||||
self.fit_intercept = fit_intercept
|
||||
self.copy_X = copy_X
|
||||
self.max_subpopulation = int(max_subpopulation)
|
||||
self.n_subsamples = n_subsamples
|
||||
self.max_iter = max_iter
|
||||
self.tol = tol
|
||||
self.random_state = random_state
|
||||
self.n_jobs = n_jobs
|
||||
self.verbose = verbose
|
||||
|
||||
def _check_subparams(self, n_samples, n_features):
|
||||
n_subsamples = self.n_subsamples
|
||||
|
||||
if self.fit_intercept:
|
||||
n_dim = n_features + 1
|
||||
else:
|
||||
n_dim = n_features
|
||||
|
||||
if n_subsamples is not None:
|
||||
if n_subsamples > n_samples:
|
||||
raise ValueError("Invalid parameter since n_subsamples > "
|
||||
"n_samples ({0} > {1}).".format(n_subsamples,
|
||||
n_samples))
|
||||
if n_samples >= n_features:
|
||||
if n_dim > n_subsamples:
|
||||
plus_1 = "+1" if self.fit_intercept else ""
|
||||
raise ValueError("Invalid parameter since n_features{0} "
|
||||
"> n_subsamples ({1} > {2})."
|
||||
"".format(plus_1, n_dim, n_samples))
|
||||
else: # if n_samples < n_features
|
||||
if n_subsamples != n_samples:
|
||||
raise ValueError("Invalid parameter since n_subsamples != "
|
||||
"n_samples ({0} != {1}) while n_samples "
|
||||
"< n_features.".format(n_subsamples,
|
||||
n_samples))
|
||||
else:
|
||||
n_subsamples = min(n_dim, n_samples)
|
||||
|
||||
if self.max_subpopulation <= 0:
|
||||
raise ValueError("Subpopulation must be strictly positive "
|
||||
"({0} <= 0).".format(self.max_subpopulation))
|
||||
|
||||
all_combinations = max(1, np.rint(binom(n_samples, n_subsamples)))
|
||||
n_subpopulation = int(min(self.max_subpopulation, all_combinations))
|
||||
|
||||
return n_subsamples, n_subpopulation
|
||||
|
||||
def fit(self, X, y):
|
||||
"""Fit linear model.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : numpy array of shape [n_samples, n_features]
|
||||
Training data
|
||||
y : numpy array of shape [n_samples]
|
||||
Target values
|
||||
|
||||
Returns
|
||||
-------
|
||||
self : returns an instance of self.
|
||||
"""
|
||||
random_state = check_random_state(self.random_state)
|
||||
X, y = self._validate_data(X, y, y_numeric=True)
|
||||
n_samples, n_features = X.shape
|
||||
n_subsamples, self.n_subpopulation_ = self._check_subparams(n_samples,
|
||||
n_features)
|
||||
self.breakdown_ = _breakdown_point(n_samples, n_subsamples)
|
||||
|
||||
if self.verbose:
|
||||
print("Breakdown point: {0}".format(self.breakdown_))
|
||||
print("Number of samples: {0}".format(n_samples))
|
||||
tol_outliers = int(self.breakdown_ * n_samples)
|
||||
print("Tolerable outliers: {0}".format(tol_outliers))
|
||||
print("Number of subpopulations: {0}".format(
|
||||
self.n_subpopulation_))
|
||||
|
||||
# Determine indices of subpopulation
|
||||
if np.rint(binom(n_samples, n_subsamples)) <= self.max_subpopulation:
|
||||
indices = list(combinations(range(n_samples), n_subsamples))
|
||||
else:
|
||||
indices = [random_state.choice(n_samples, size=n_subsamples,
|
||||
replace=False)
|
||||
for _ in range(self.n_subpopulation_)]
|
||||
|
||||
n_jobs = effective_n_jobs(self.n_jobs)
|
||||
index_list = np.array_split(indices, n_jobs)
|
||||
weights = Parallel(n_jobs=n_jobs,
|
||||
verbose=self.verbose)(
|
||||
delayed(_lstsq)(X, y, index_list[job], self.fit_intercept)
|
||||
for job in range(n_jobs))
|
||||
weights = np.vstack(weights)
|
||||
self.n_iter_, coefs = _spatial_median(weights,
|
||||
max_iter=self.max_iter,
|
||||
tol=self.tol)
|
||||
|
||||
if self.fit_intercept:
|
||||
self.intercept_ = coefs[0]
|
||||
self.coef_ = coefs[1:]
|
||||
else:
|
||||
self.intercept_ = 0.
|
||||
self.coef_ = coefs
|
||||
|
||||
return self
|
18
venv/Lib/site-packages/sklearn/linear_model/base.py
Normal file
18
venv/Lib/site-packages/sklearn/linear_model/base.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
|
||||
import sys
|
||||
# mypy error: Module X has no attribute y (typically for C extensions)
|
||||
from . import _base # type: ignore
|
||||
from ..externals._pep562 import Pep562
|
||||
from ..utils.deprecation import _raise_dep_warning_if_not_pytest
|
||||
|
||||
deprecated_path = 'sklearn.linear_model.base'
|
||||
correct_import_path = 'sklearn.linear_model'
|
||||
|
||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
|
||||
|
||||
def __getattr__(name):
|
||||
return getattr(_base, name)
|
||||
|
||||
if not sys.version_info >= (3, 7):
|
||||
Pep562(__name__)
|
18
venv/Lib/site-packages/sklearn/linear_model/bayes.py
Normal file
18
venv/Lib/site-packages/sklearn/linear_model/bayes.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
|
||||
import sys
|
||||
# mypy error: Module X has no attribute y (typically for C extensions)
|
||||
from . import _bayes # type: ignore
|
||||
from ..externals._pep562 import Pep562
|
||||
from ..utils.deprecation import _raise_dep_warning_if_not_pytest
|
||||
|
||||
deprecated_path = 'sklearn.linear_model.bayes'
|
||||
correct_import_path = 'sklearn.linear_model'
|
||||
|
||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
|
||||
|
||||
def __getattr__(name):
|
||||
return getattr(_bayes, name)
|
||||
|
||||
if not sys.version_info >= (3, 7):
|
||||
Pep562(__name__)
|
18
venv/Lib/site-packages/sklearn/linear_model/cd_fast.py
Normal file
18
venv/Lib/site-packages/sklearn/linear_model/cd_fast.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
|
||||
import sys
|
||||
# mypy error: Module X has no attribute y (typically for C extensions)
|
||||
from . import _cd_fast # type: ignore
|
||||
from ..externals._pep562 import Pep562
|
||||
from ..utils.deprecation import _raise_dep_warning_if_not_pytest
|
||||
|
||||
deprecated_path = 'sklearn.linear_model.cd_fast'
|
||||
correct_import_path = 'sklearn.linear_model'
|
||||
|
||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
|
||||
|
||||
def __getattr__(name):
|
||||
return getattr(_cd_fast, name)
|
||||
|
||||
if not sys.version_info >= (3, 7):
|
||||
Pep562(__name__)
|
|
@ -0,0 +1,18 @@
|
|||
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
|
||||
import sys
|
||||
# mypy error: Module X has no attribute y (typically for C extensions)
|
||||
from . import _coordinate_descent # type: ignore
|
||||
from ..externals._pep562 import Pep562
|
||||
from ..utils.deprecation import _raise_dep_warning_if_not_pytest
|
||||
|
||||
deprecated_path = 'sklearn.linear_model.coordinate_descent'
|
||||
correct_import_path = 'sklearn.linear_model'
|
||||
|
||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
|
||||
|
||||
def __getattr__(name):
|
||||
return getattr(_coordinate_descent, name)
|
||||
|
||||
if not sys.version_info >= (3, 7):
|
||||
Pep562(__name__)
|
18
venv/Lib/site-packages/sklearn/linear_model/huber.py
Normal file
18
venv/Lib/site-packages/sklearn/linear_model/huber.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
|
||||
import sys
|
||||
# mypy error: Module X has no attribute y (typically for C extensions)
|
||||
from . import _huber # type: ignore
|
||||
from ..externals._pep562 import Pep562
|
||||
from ..utils.deprecation import _raise_dep_warning_if_not_pytest
|
||||
|
||||
deprecated_path = 'sklearn.linear_model.huber'
|
||||
correct_import_path = 'sklearn.linear_model'
|
||||
|
||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
|
||||
|
||||
def __getattr__(name):
|
||||
return getattr(_huber, name)
|
||||
|
||||
if not sys.version_info >= (3, 7):
|
||||
Pep562(__name__)
|
18
venv/Lib/site-packages/sklearn/linear_model/least_angle.py
Normal file
18
venv/Lib/site-packages/sklearn/linear_model/least_angle.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
|
||||
import sys
|
||||
# mypy error: Module X has no attribute y (typically for C extensions)
|
||||
from . import _least_angle # type: ignore
|
||||
from ..externals._pep562 import Pep562
|
||||
from ..utils.deprecation import _raise_dep_warning_if_not_pytest
|
||||
|
||||
deprecated_path = 'sklearn.linear_model.least_angle'
|
||||
correct_import_path = 'sklearn.linear_model'
|
||||
|
||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
|
||||
|
||||
def __getattr__(name):
|
||||
return getattr(_least_angle, name)
|
||||
|
||||
if not sys.version_info >= (3, 7):
|
||||
Pep562(__name__)
|
18
venv/Lib/site-packages/sklearn/linear_model/logistic.py
Normal file
18
venv/Lib/site-packages/sklearn/linear_model/logistic.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
|
||||
import sys
|
||||
# mypy error: Module X has no attribute y (typically for C extensions)
|
||||
from . import _logistic # type: ignore
|
||||
from ..externals._pep562 import Pep562
|
||||
from ..utils.deprecation import _raise_dep_warning_if_not_pytest
|
||||
|
||||
deprecated_path = 'sklearn.linear_model.logistic'
|
||||
correct_import_path = 'sklearn.linear_model'
|
||||
|
||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
|
||||
|
||||
def __getattr__(name):
|
||||
return getattr(_logistic, name)
|
||||
|
||||
if not sys.version_info >= (3, 7):
|
||||
Pep562(__name__)
|
18
venv/Lib/site-packages/sklearn/linear_model/omp.py
Normal file
18
venv/Lib/site-packages/sklearn/linear_model/omp.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
|
||||
import sys
|
||||
# mypy error: Module X has no attribute y (typically for C extensions)
|
||||
from . import _omp # type: ignore
|
||||
from ..externals._pep562 import Pep562
|
||||
from ..utils.deprecation import _raise_dep_warning_if_not_pytest
|
||||
|
||||
deprecated_path = 'sklearn.linear_model.omp'
|
||||
correct_import_path = 'sklearn.linear_model'
|
||||
|
||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
|
||||
|
||||
def __getattr__(name):
|
||||
return getattr(_omp, name)
|
||||
|
||||
if not sys.version_info >= (3, 7):
|
||||
Pep562(__name__)
|
|
@ -0,0 +1,18 @@
|
|||
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
|
||||
import sys
|
||||
# mypy error: Module X has no attribute y (typically for C extensions)
|
||||
from . import _passive_aggressive # type: ignore
|
||||
from ..externals._pep562 import Pep562
|
||||
from ..utils.deprecation import _raise_dep_warning_if_not_pytest
|
||||
|
||||
deprecated_path = 'sklearn.linear_model.passive_aggressive'
|
||||
correct_import_path = 'sklearn.linear_model'
|
||||
|
||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
|
||||
|
||||
def __getattr__(name):
|
||||
return getattr(_passive_aggressive, name)
|
||||
|
||||
if not sys.version_info >= (3, 7):
|
||||
Pep562(__name__)
|
18
venv/Lib/site-packages/sklearn/linear_model/perceptron.py
Normal file
18
venv/Lib/site-packages/sklearn/linear_model/perceptron.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
|
||||
import sys
|
||||
# mypy error: Module X has no attribute y (typically for C extensions)
|
||||
from . import _perceptron # type: ignore
|
||||
from ..externals._pep562 import Pep562
|
||||
from ..utils.deprecation import _raise_dep_warning_if_not_pytest
|
||||
|
||||
deprecated_path = 'sklearn.linear_model.perceptron'
|
||||
correct_import_path = 'sklearn.linear_model'
|
||||
|
||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
|
||||
|
||||
def __getattr__(name):
|
||||
return getattr(_perceptron, name)
|
||||
|
||||
if not sys.version_info >= (3, 7):
|
||||
Pep562(__name__)
|
18
venv/Lib/site-packages/sklearn/linear_model/ransac.py
Normal file
18
venv/Lib/site-packages/sklearn/linear_model/ransac.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
|
||||
import sys
|
||||
# mypy error: Module X has no attribute y (typically for C extensions)
|
||||
from . import _ransac # type: ignore
|
||||
from ..externals._pep562 import Pep562
|
||||
from ..utils.deprecation import _raise_dep_warning_if_not_pytest
|
||||
|
||||
deprecated_path = 'sklearn.linear_model.ransac'
|
||||
correct_import_path = 'sklearn.linear_model'
|
||||
|
||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
|
||||
|
||||
def __getattr__(name):
|
||||
return getattr(_ransac, name)
|
||||
|
||||
if not sys.version_info >= (3, 7):
|
||||
Pep562(__name__)
|
18
venv/Lib/site-packages/sklearn/linear_model/ridge.py
Normal file
18
venv/Lib/site-packages/sklearn/linear_model/ridge.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
|
||||
import sys
|
||||
# mypy error: Module X has no attribute y (typically for C extensions)
|
||||
from . import _ridge # type: ignore
|
||||
from ..externals._pep562 import Pep562
|
||||
from ..utils.deprecation import _raise_dep_warning_if_not_pytest
|
||||
|
||||
deprecated_path = 'sklearn.linear_model.ridge'
|
||||
correct_import_path = 'sklearn.linear_model'
|
||||
|
||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
|
||||
|
||||
def __getattr__(name):
|
||||
return getattr(_ridge, name)
|
||||
|
||||
if not sys.version_info >= (3, 7):
|
||||
Pep562(__name__)
|
18
venv/Lib/site-packages/sklearn/linear_model/sag.py
Normal file
18
venv/Lib/site-packages/sklearn/linear_model/sag.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
|
||||
import sys
|
||||
# mypy error: Module X has no attribute y (typically for C extensions)
|
||||
from . import _sag # type: ignore
|
||||
from ..externals._pep562 import Pep562
|
||||
from ..utils.deprecation import _raise_dep_warning_if_not_pytest
|
||||
|
||||
deprecated_path = 'sklearn.linear_model.sag'
|
||||
correct_import_path = 'sklearn.linear_model'
|
||||
|
||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
|
||||
|
||||
def __getattr__(name):
|
||||
return getattr(_sag, name)
|
||||
|
||||
if not sys.version_info >= (3, 7):
|
||||
Pep562(__name__)
|
18
venv/Lib/site-packages/sklearn/linear_model/sag_fast.py
Normal file
18
venv/Lib/site-packages/sklearn/linear_model/sag_fast.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
|
||||
import sys
|
||||
# mypy error: Module X has no attribute y (typically for C extensions)
|
||||
from . import _sag_fast # type: ignore
|
||||
from ..externals._pep562 import Pep562
|
||||
from ..utils.deprecation import _raise_dep_warning_if_not_pytest
|
||||
|
||||
deprecated_path = 'sklearn.linear_model.sag_fast'
|
||||
correct_import_path = 'sklearn.linear_model'
|
||||
|
||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
|
||||
|
||||
def __getattr__(name):
|
||||
return getattr(_sag_fast, name)
|
||||
|
||||
if not sys.version_info >= (3, 7):
|
||||
Pep562(__name__)
|
44
venv/Lib/site-packages/sklearn/linear_model/setup.py
Normal file
44
venv/Lib/site-packages/sklearn/linear_model/setup.py
Normal file
|
@ -0,0 +1,44 @@
|
|||
import os
|
||||
import numpy
|
||||
|
||||
from sklearn._build_utils import gen_from_templates
|
||||
|
||||
|
||||
def configuration(parent_package='', top_path=None):
|
||||
from numpy.distutils.misc_util import Configuration
|
||||
|
||||
config = Configuration('linear_model', parent_package, top_path)
|
||||
|
||||
libraries = []
|
||||
if os.name == 'posix':
|
||||
libraries.append('m')
|
||||
|
||||
config.add_extension('_cd_fast',
|
||||
sources=['_cd_fast.pyx'],
|
||||
include_dirs=numpy.get_include(),
|
||||
libraries=libraries)
|
||||
|
||||
config.add_extension('_sgd_fast',
|
||||
sources=['_sgd_fast.pyx'],
|
||||
include_dirs=numpy.get_include(),
|
||||
libraries=libraries)
|
||||
|
||||
# generate sag_fast from template
|
||||
templates = ['sklearn/linear_model/_sag_fast.pyx.tp']
|
||||
gen_from_templates(templates, top_path)
|
||||
|
||||
config.add_extension('_sag_fast',
|
||||
sources=['_sag_fast.pyx'],
|
||||
include_dirs=numpy.get_include())
|
||||
|
||||
# add other directories
|
||||
config.add_subpackage('tests')
|
||||
config.add_subpackage('_glm')
|
||||
config.add_subpackage('_glm/tests')
|
||||
|
||||
return config
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from numpy.distutils.core import setup
|
||||
setup(**configuration(top_path='').todict())
|
18
venv/Lib/site-packages/sklearn/linear_model/sgd_fast.py
Normal file
18
venv/Lib/site-packages/sklearn/linear_model/sgd_fast.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
|
||||
import sys
|
||||
# mypy error: Module X has no attribute y (typically for C extensions)
|
||||
from . import _sgd_fast # type: ignore
|
||||
from ..externals._pep562 import Pep562
|
||||
from ..utils.deprecation import _raise_dep_warning_if_not_pytest
|
||||
|
||||
deprecated_path = 'sklearn.linear_model.sgd_fast'
|
||||
correct_import_path = 'sklearn.linear_model'
|
||||
|
||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
|
||||
|
||||
def __getattr__(name):
|
||||
return getattr(_sgd_fast, name)
|
||||
|
||||
if not sys.version_info >= (3, 7):
|
||||
Pep562(__name__)
|
|
@ -0,0 +1,18 @@
|
|||
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
|
||||
import sys
|
||||
# mypy error: Module X has no attribute y (typically for C extensions)
|
||||
from . import _stochastic_gradient # type: ignore
|
||||
from ..externals._pep562 import Pep562
|
||||
from ..utils.deprecation import _raise_dep_warning_if_not_pytest
|
||||
|
||||
deprecated_path = 'sklearn.linear_model.stochastic_gradient'
|
||||
correct_import_path = 'sklearn.linear_model'
|
||||
|
||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
|
||||
|
||||
def __getattr__(name):
|
||||
return getattr(_stochastic_gradient, name)
|
||||
|
||||
if not sys.version_info >= (3, 7):
|
||||
Pep562(__name__)
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
530
venv/Lib/site-packages/sklearn/linear_model/tests/test_base.py
Normal file
530
venv/Lib/site-packages/sklearn/linear_model/tests/test_base.py
Normal file
|
@ -0,0 +1,530 @@
|
|||
# Author: Alexandre Gramfort <alexandre.gramfort@inria.fr>
|
||||
# Fabian Pedregosa <fabian.pedregosa@inria.fr>
|
||||
#
|
||||
# License: BSD 3 clause
|
||||
|
||||
import pytest
|
||||
|
||||
import numpy as np
|
||||
from scipy import sparse
|
||||
from scipy import linalg
|
||||
|
||||
from sklearn.utils._testing import assert_array_almost_equal
|
||||
from sklearn.utils._testing import assert_array_equal
|
||||
from sklearn.utils._testing import assert_almost_equal
|
||||
from sklearn.utils._testing import assert_allclose
|
||||
from sklearn.utils.fixes import parse_version
|
||||
|
||||
from sklearn.linear_model import LinearRegression
|
||||
from sklearn.linear_model._base import _preprocess_data
|
||||
from sklearn.linear_model._base import _rescale_data
|
||||
from sklearn.linear_model._base import make_dataset
|
||||
from sklearn.utils import check_random_state
|
||||
from sklearn.datasets import make_sparse_uncorrelated
|
||||
from sklearn.datasets import make_regression
|
||||
from sklearn.datasets import load_iris
|
||||
|
||||
rng = np.random.RandomState(0)
|
||||
rtol = 1e-6
|
||||
|
||||
|
||||
def test_linear_regression():
|
||||
# Test LinearRegression on a simple dataset.
|
||||
# a simple dataset
|
||||
X = [[1], [2]]
|
||||
Y = [1, 2]
|
||||
|
||||
reg = LinearRegression()
|
||||
reg.fit(X, Y)
|
||||
|
||||
assert_array_almost_equal(reg.coef_, [1])
|
||||
assert_array_almost_equal(reg.intercept_, [0])
|
||||
assert_array_almost_equal(reg.predict(X), [1, 2])
|
||||
|
||||
# test it also for degenerate input
|
||||
X = [[1]]
|
||||
Y = [0]
|
||||
|
||||
reg = LinearRegression()
|
||||
reg.fit(X, Y)
|
||||
assert_array_almost_equal(reg.coef_, [0])
|
||||
assert_array_almost_equal(reg.intercept_, [0])
|
||||
assert_array_almost_equal(reg.predict(X), [0])
|
||||
|
||||
|
||||
def test_linear_regression_sample_weights():
|
||||
# TODO: loop over sparse data as well
|
||||
|
||||
rng = np.random.RandomState(0)
|
||||
|
||||
# It would not work with under-determined systems
|
||||
for n_samples, n_features in ((6, 5), ):
|
||||
|
||||
y = rng.randn(n_samples)
|
||||
X = rng.randn(n_samples, n_features)
|
||||
sample_weight = 1.0 + rng.rand(n_samples)
|
||||
|
||||
for intercept in (True, False):
|
||||
|
||||
# LinearRegression with explicit sample_weight
|
||||
reg = LinearRegression(fit_intercept=intercept)
|
||||
reg.fit(X, y, sample_weight=sample_weight)
|
||||
coefs1 = reg.coef_
|
||||
inter1 = reg.intercept_
|
||||
|
||||
assert reg.coef_.shape == (X.shape[1], ) # sanity checks
|
||||
assert reg.score(X, y) > 0.5
|
||||
|
||||
# Closed form of the weighted least square
|
||||
# theta = (X^T W X)^(-1) * X^T W y
|
||||
W = np.diag(sample_weight)
|
||||
if intercept is False:
|
||||
X_aug = X
|
||||
else:
|
||||
dummy_column = np.ones(shape=(n_samples, 1))
|
||||
X_aug = np.concatenate((dummy_column, X), axis=1)
|
||||
|
||||
coefs2 = linalg.solve(X_aug.T.dot(W).dot(X_aug),
|
||||
X_aug.T.dot(W).dot(y))
|
||||
|
||||
if intercept is False:
|
||||
assert_array_almost_equal(coefs1, coefs2)
|
||||
else:
|
||||
assert_array_almost_equal(coefs1, coefs2[1:])
|
||||
assert_almost_equal(inter1, coefs2[0])
|
||||
|
||||
|
||||
def test_raises_value_error_if_sample_weights_greater_than_1d():
|
||||
# Sample weights must be either scalar or 1D
|
||||
|
||||
n_sampless = [2, 3]
|
||||
n_featuress = [3, 2]
|
||||
|
||||
for n_samples, n_features in zip(n_sampless, n_featuress):
|
||||
X = rng.randn(n_samples, n_features)
|
||||
y = rng.randn(n_samples)
|
||||
sample_weights_OK = rng.randn(n_samples) ** 2 + 1
|
||||
sample_weights_OK_1 = 1.
|
||||
sample_weights_OK_2 = 2.
|
||||
|
||||
reg = LinearRegression()
|
||||
|
||||
# make sure the "OK" sample weights actually work
|
||||
reg.fit(X, y, sample_weights_OK)
|
||||
reg.fit(X, y, sample_weights_OK_1)
|
||||
reg.fit(X, y, sample_weights_OK_2)
|
||||
|
||||
|
||||
def test_fit_intercept():
|
||||
# Test assertions on betas shape.
|
||||
X2 = np.array([[0.38349978, 0.61650022],
|
||||
[0.58853682, 0.41146318]])
|
||||
X3 = np.array([[0.27677969, 0.70693172, 0.01628859],
|
||||
[0.08385139, 0.20692515, 0.70922346]])
|
||||
y = np.array([1, 1])
|
||||
|
||||
lr2_without_intercept = LinearRegression(fit_intercept=False).fit(X2, y)
|
||||
lr2_with_intercept = LinearRegression().fit(X2, y)
|
||||
|
||||
lr3_without_intercept = LinearRegression(fit_intercept=False).fit(X3, y)
|
||||
lr3_with_intercept = LinearRegression().fit(X3, y)
|
||||
|
||||
assert (lr2_with_intercept.coef_.shape ==
|
||||
lr2_without_intercept.coef_.shape)
|
||||
assert (lr3_with_intercept.coef_.shape ==
|
||||
lr3_without_intercept.coef_.shape)
|
||||
assert (lr2_without_intercept.coef_.ndim ==
|
||||
lr3_without_intercept.coef_.ndim)
|
||||
|
||||
|
||||
def test_linear_regression_sparse(random_state=0):
|
||||
# Test that linear regression also works with sparse data
|
||||
random_state = check_random_state(random_state)
|
||||
for i in range(10):
|
||||
n = 100
|
||||
X = sparse.eye(n, n)
|
||||
beta = random_state.rand(n)
|
||||
y = X * beta[:, np.newaxis]
|
||||
|
||||
ols = LinearRegression()
|
||||
ols.fit(X, y.ravel())
|
||||
assert_array_almost_equal(beta, ols.coef_ + ols.intercept_)
|
||||
|
||||
assert_array_almost_equal(ols.predict(X) - y.ravel(), 0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('normalize', [True, False])
|
||||
@pytest.mark.parametrize('fit_intercept', [True, False])
|
||||
def test_linear_regression_sparse_equal_dense(normalize, fit_intercept):
|
||||
# Test that linear regression agrees between sparse and dense
|
||||
rng = check_random_state(0)
|
||||
n_samples = 200
|
||||
n_features = 2
|
||||
X = rng.randn(n_samples, n_features)
|
||||
X[X < 0.1] = 0.
|
||||
Xcsr = sparse.csr_matrix(X)
|
||||
y = rng.rand(n_samples)
|
||||
params = dict(normalize=normalize, fit_intercept=fit_intercept)
|
||||
clf_dense = LinearRegression(**params)
|
||||
clf_sparse = LinearRegression(**params)
|
||||
clf_dense.fit(X, y)
|
||||
clf_sparse.fit(Xcsr, y)
|
||||
assert clf_dense.intercept_ == pytest.approx(clf_sparse.intercept_)
|
||||
assert_allclose(clf_dense.coef_, clf_sparse.coef_)
|
||||
|
||||
|
||||
def test_linear_regression_multiple_outcome(random_state=0):
|
||||
# Test multiple-outcome linear regressions
|
||||
X, y = make_regression(random_state=random_state)
|
||||
|
||||
Y = np.vstack((y, y)).T
|
||||
n_features = X.shape[1]
|
||||
|
||||
reg = LinearRegression()
|
||||
reg.fit((X), Y)
|
||||
assert reg.coef_.shape == (2, n_features)
|
||||
Y_pred = reg.predict(X)
|
||||
reg.fit(X, y)
|
||||
y_pred = reg.predict(X)
|
||||
assert_array_almost_equal(np.vstack((y_pred, y_pred)).T, Y_pred, decimal=3)
|
||||
|
||||
|
||||
def test_linear_regression_sparse_multiple_outcome(random_state=0):
|
||||
# Test multiple-outcome linear regressions with sparse data
|
||||
random_state = check_random_state(random_state)
|
||||
X, y = make_sparse_uncorrelated(random_state=random_state)
|
||||
X = sparse.coo_matrix(X)
|
||||
Y = np.vstack((y, y)).T
|
||||
n_features = X.shape[1]
|
||||
|
||||
ols = LinearRegression()
|
||||
ols.fit(X, Y)
|
||||
assert ols.coef_.shape == (2, n_features)
|
||||
Y_pred = ols.predict(X)
|
||||
ols.fit(X, y.ravel())
|
||||
y_pred = ols.predict(X)
|
||||
assert_array_almost_equal(np.vstack((y_pred, y_pred)).T, Y_pred, decimal=3)
|
||||
|
||||
|
||||
def test_linear_regression_pd_sparse_dataframe_warning():
|
||||
pd = pytest.importorskip('pandas')
|
||||
# restrict the pd versions < '0.24.0' as they have a bug in is_sparse func
|
||||
if parse_version(pd.__version__) < parse_version('0.24.0'):
|
||||
pytest.skip("pandas 0.24+ required.")
|
||||
|
||||
# Warning is raised only when some of the columns is sparse
|
||||
df = pd.DataFrame({'0': np.random.randn(10)})
|
||||
for col in range(1, 4):
|
||||
arr = np.random.randn(10)
|
||||
arr[:8] = 0
|
||||
# all columns but the first column is sparse
|
||||
if col != 0:
|
||||
arr = pd.arrays.SparseArray(arr, fill_value=0)
|
||||
df[str(col)] = arr
|
||||
|
||||
msg = "pandas.DataFrame with sparse columns found."
|
||||
with pytest.warns(UserWarning, match=msg):
|
||||
reg = LinearRegression()
|
||||
reg.fit(df.iloc[:, 0:2], df.iloc[:, 3])
|
||||
|
||||
# does not warn when the whole dataframe is sparse
|
||||
df['0'] = pd.arrays.SparseArray(df['0'], fill_value=0)
|
||||
assert hasattr(df, "sparse")
|
||||
|
||||
with pytest.warns(None) as record:
|
||||
reg.fit(df.iloc[:, 0:2], df.iloc[:, 3])
|
||||
assert not record
|
||||
|
||||
|
||||
def test_preprocess_data():
|
||||
n_samples = 200
|
||||
n_features = 2
|
||||
X = rng.rand(n_samples, n_features)
|
||||
y = rng.rand(n_samples)
|
||||
expected_X_mean = np.mean(X, axis=0)
|
||||
expected_X_norm = np.std(X, axis=0) * np.sqrt(X.shape[0])
|
||||
expected_y_mean = np.mean(y, axis=0)
|
||||
|
||||
Xt, yt, X_mean, y_mean, X_norm = \
|
||||
_preprocess_data(X, y, fit_intercept=False, normalize=False)
|
||||
assert_array_almost_equal(X_mean, np.zeros(n_features))
|
||||
assert_array_almost_equal(y_mean, 0)
|
||||
assert_array_almost_equal(X_norm, np.ones(n_features))
|
||||
assert_array_almost_equal(Xt, X)
|
||||
assert_array_almost_equal(yt, y)
|
||||
|
||||
Xt, yt, X_mean, y_mean, X_norm = \
|
||||
_preprocess_data(X, y, fit_intercept=True, normalize=False)
|
||||
assert_array_almost_equal(X_mean, expected_X_mean)
|
||||
assert_array_almost_equal(y_mean, expected_y_mean)
|
||||
assert_array_almost_equal(X_norm, np.ones(n_features))
|
||||
assert_array_almost_equal(Xt, X - expected_X_mean)
|
||||
assert_array_almost_equal(yt, y - expected_y_mean)
|
||||
|
||||
Xt, yt, X_mean, y_mean, X_norm = \
|
||||
_preprocess_data(X, y, fit_intercept=True, normalize=True)
|
||||
assert_array_almost_equal(X_mean, expected_X_mean)
|
||||
assert_array_almost_equal(y_mean, expected_y_mean)
|
||||
assert_array_almost_equal(X_norm, expected_X_norm)
|
||||
assert_array_almost_equal(Xt, (X - expected_X_mean) / expected_X_norm)
|
||||
assert_array_almost_equal(yt, y - expected_y_mean)
|
||||
|
||||
|
||||
def test_preprocess_data_multioutput():
|
||||
n_samples = 200
|
||||
n_features = 3
|
||||
n_outputs = 2
|
||||
X = rng.rand(n_samples, n_features)
|
||||
y = rng.rand(n_samples, n_outputs)
|
||||
expected_y_mean = np.mean(y, axis=0)
|
||||
|
||||
args = [X, sparse.csc_matrix(X)]
|
||||
for X in args:
|
||||
_, yt, _, y_mean, _ = _preprocess_data(X, y, fit_intercept=False,
|
||||
normalize=False)
|
||||
assert_array_almost_equal(y_mean, np.zeros(n_outputs))
|
||||
assert_array_almost_equal(yt, y)
|
||||
|
||||
_, yt, _, y_mean, _ = _preprocess_data(X, y, fit_intercept=True,
|
||||
normalize=False)
|
||||
assert_array_almost_equal(y_mean, expected_y_mean)
|
||||
assert_array_almost_equal(yt, y - y_mean)
|
||||
|
||||
_, yt, _, y_mean, _ = _preprocess_data(X, y, fit_intercept=True,
|
||||
normalize=True)
|
||||
assert_array_almost_equal(y_mean, expected_y_mean)
|
||||
assert_array_almost_equal(yt, y - y_mean)
|
||||
|
||||
|
||||
def test_preprocess_data_weighted():
|
||||
n_samples = 200
|
||||
n_features = 2
|
||||
X = rng.rand(n_samples, n_features)
|
||||
y = rng.rand(n_samples)
|
||||
sample_weight = rng.rand(n_samples)
|
||||
expected_X_mean = np.average(X, axis=0, weights=sample_weight)
|
||||
expected_y_mean = np.average(y, axis=0, weights=sample_weight)
|
||||
|
||||
# XXX: if normalize=True, should we expect a weighted standard deviation?
|
||||
# Currently not weighted, but calculated with respect to weighted mean
|
||||
expected_X_norm = (np.sqrt(X.shape[0]) *
|
||||
np.mean((X - expected_X_mean) ** 2, axis=0) ** .5)
|
||||
|
||||
Xt, yt, X_mean, y_mean, X_norm = \
|
||||
_preprocess_data(X, y, fit_intercept=True, normalize=False,
|
||||
sample_weight=sample_weight)
|
||||
assert_array_almost_equal(X_mean, expected_X_mean)
|
||||
assert_array_almost_equal(y_mean, expected_y_mean)
|
||||
assert_array_almost_equal(X_norm, np.ones(n_features))
|
||||
assert_array_almost_equal(Xt, X - expected_X_mean)
|
||||
assert_array_almost_equal(yt, y - expected_y_mean)
|
||||
|
||||
Xt, yt, X_mean, y_mean, X_norm = \
|
||||
_preprocess_data(X, y, fit_intercept=True, normalize=True,
|
||||
sample_weight=sample_weight)
|
||||
assert_array_almost_equal(X_mean, expected_X_mean)
|
||||
assert_array_almost_equal(y_mean, expected_y_mean)
|
||||
assert_array_almost_equal(X_norm, expected_X_norm)
|
||||
assert_array_almost_equal(Xt, (X - expected_X_mean) / expected_X_norm)
|
||||
assert_array_almost_equal(yt, y - expected_y_mean)
|
||||
|
||||
|
||||
def test_sparse_preprocess_data_with_return_mean():
|
||||
n_samples = 200
|
||||
n_features = 2
|
||||
# random_state not supported yet in sparse.rand
|
||||
X = sparse.rand(n_samples, n_features, density=.5) # , random_state=rng
|
||||
X = X.tolil()
|
||||
y = rng.rand(n_samples)
|
||||
XA = X.toarray()
|
||||
expected_X_norm = np.std(XA, axis=0) * np.sqrt(X.shape[0])
|
||||
|
||||
Xt, yt, X_mean, y_mean, X_norm = \
|
||||
_preprocess_data(X, y, fit_intercept=False, normalize=False,
|
||||
return_mean=True)
|
||||
assert_array_almost_equal(X_mean, np.zeros(n_features))
|
||||
assert_array_almost_equal(y_mean, 0)
|
||||
assert_array_almost_equal(X_norm, np.ones(n_features))
|
||||
assert_array_almost_equal(Xt.A, XA)
|
||||
assert_array_almost_equal(yt, y)
|
||||
|
||||
Xt, yt, X_mean, y_mean, X_norm = \
|
||||
_preprocess_data(X, y, fit_intercept=True, normalize=False,
|
||||
return_mean=True)
|
||||
assert_array_almost_equal(X_mean, np.mean(XA, axis=0))
|
||||
assert_array_almost_equal(y_mean, np.mean(y, axis=0))
|
||||
assert_array_almost_equal(X_norm, np.ones(n_features))
|
||||
assert_array_almost_equal(Xt.A, XA)
|
||||
assert_array_almost_equal(yt, y - np.mean(y, axis=0))
|
||||
|
||||
Xt, yt, X_mean, y_mean, X_norm = \
|
||||
_preprocess_data(X, y, fit_intercept=True, normalize=True,
|
||||
return_mean=True)
|
||||
assert_array_almost_equal(X_mean, np.mean(XA, axis=0))
|
||||
assert_array_almost_equal(y_mean, np.mean(y, axis=0))
|
||||
assert_array_almost_equal(X_norm, expected_X_norm)
|
||||
assert_array_almost_equal(Xt.A, XA / expected_X_norm)
|
||||
assert_array_almost_equal(yt, y - np.mean(y, axis=0))
|
||||
|
||||
|
||||
def test_csr_preprocess_data():
|
||||
# Test output format of _preprocess_data, when input is csr
|
||||
X, y = make_regression()
|
||||
X[X < 2.5] = 0.0
|
||||
csr = sparse.csr_matrix(X)
|
||||
csr_, y, _, _, _ = _preprocess_data(csr, y, True)
|
||||
assert csr_.getformat() == 'csr'
|
||||
|
||||
|
||||
@pytest.mark.parametrize('is_sparse', (True, False))
|
||||
@pytest.mark.parametrize('to_copy', (True, False))
|
||||
def test_preprocess_copy_data_no_checks(is_sparse, to_copy):
|
||||
X, y = make_regression()
|
||||
X[X < 2.5] = 0.0
|
||||
|
||||
if is_sparse:
|
||||
X = sparse.csr_matrix(X)
|
||||
|
||||
X_, y_, _, _, _ = _preprocess_data(X, y, True,
|
||||
copy=to_copy, check_input=False)
|
||||
|
||||
if to_copy and is_sparse:
|
||||
assert not np.may_share_memory(X_.data, X.data)
|
||||
elif to_copy:
|
||||
assert not np.may_share_memory(X_, X)
|
||||
elif is_sparse:
|
||||
assert np.may_share_memory(X_.data, X.data)
|
||||
else:
|
||||
assert np.may_share_memory(X_, X)
|
||||
|
||||
|
||||
def test_dtype_preprocess_data():
|
||||
n_samples = 200
|
||||
n_features = 2
|
||||
X = rng.rand(n_samples, n_features)
|
||||
y = rng.rand(n_samples)
|
||||
|
||||
X_32 = np.asarray(X, dtype=np.float32)
|
||||
y_32 = np.asarray(y, dtype=np.float32)
|
||||
X_64 = np.asarray(X, dtype=np.float64)
|
||||
y_64 = np.asarray(y, dtype=np.float64)
|
||||
|
||||
for fit_intercept in [True, False]:
|
||||
for normalize in [True, False]:
|
||||
|
||||
Xt_32, yt_32, X_mean_32, y_mean_32, X_norm_32 = _preprocess_data(
|
||||
X_32, y_32, fit_intercept=fit_intercept, normalize=normalize,
|
||||
return_mean=True)
|
||||
|
||||
Xt_64, yt_64, X_mean_64, y_mean_64, X_norm_64 = _preprocess_data(
|
||||
X_64, y_64, fit_intercept=fit_intercept, normalize=normalize,
|
||||
return_mean=True)
|
||||
|
||||
Xt_3264, yt_3264, X_mean_3264, y_mean_3264, X_norm_3264 = (
|
||||
_preprocess_data(X_32, y_64, fit_intercept=fit_intercept,
|
||||
normalize=normalize, return_mean=True))
|
||||
|
||||
Xt_6432, yt_6432, X_mean_6432, y_mean_6432, X_norm_6432 = (
|
||||
_preprocess_data(X_64, y_32, fit_intercept=fit_intercept,
|
||||
normalize=normalize, return_mean=True))
|
||||
|
||||
assert Xt_32.dtype == np.float32
|
||||
assert yt_32.dtype == np.float32
|
||||
assert X_mean_32.dtype == np.float32
|
||||
assert y_mean_32.dtype == np.float32
|
||||
assert X_norm_32.dtype == np.float32
|
||||
|
||||
assert Xt_64.dtype == np.float64
|
||||
assert yt_64.dtype == np.float64
|
||||
assert X_mean_64.dtype == np.float64
|
||||
assert y_mean_64.dtype == np.float64
|
||||
assert X_norm_64.dtype == np.float64
|
||||
|
||||
assert Xt_3264.dtype == np.float32
|
||||
assert yt_3264.dtype == np.float32
|
||||
assert X_mean_3264.dtype == np.float32
|
||||
assert y_mean_3264.dtype == np.float32
|
||||
assert X_norm_3264.dtype == np.float32
|
||||
|
||||
assert Xt_6432.dtype == np.float64
|
||||
assert yt_6432.dtype == np.float64
|
||||
assert X_mean_6432.dtype == np.float64
|
||||
assert y_mean_6432.dtype == np.float64
|
||||
assert X_norm_6432.dtype == np.float64
|
||||
|
||||
assert X_32.dtype == np.float32
|
||||
assert y_32.dtype == np.float32
|
||||
assert X_64.dtype == np.float64
|
||||
assert y_64.dtype == np.float64
|
||||
|
||||
assert_array_almost_equal(Xt_32, Xt_64)
|
||||
assert_array_almost_equal(yt_32, yt_64)
|
||||
assert_array_almost_equal(X_mean_32, X_mean_64)
|
||||
assert_array_almost_equal(y_mean_32, y_mean_64)
|
||||
assert_array_almost_equal(X_norm_32, X_norm_64)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('n_targets', [None, 2])
|
||||
def test_rescale_data_dense(n_targets):
|
||||
n_samples = 200
|
||||
n_features = 2
|
||||
|
||||
sample_weight = 1.0 + rng.rand(n_samples)
|
||||
X = rng.rand(n_samples, n_features)
|
||||
if n_targets is None:
|
||||
y = rng.rand(n_samples)
|
||||
else:
|
||||
y = rng.rand(n_samples, n_targets)
|
||||
rescaled_X, rescaled_y = _rescale_data(X, y, sample_weight)
|
||||
rescaled_X2 = X * np.sqrt(sample_weight)[:, np.newaxis]
|
||||
if n_targets is None:
|
||||
rescaled_y2 = y * np.sqrt(sample_weight)
|
||||
else:
|
||||
rescaled_y2 = y * np.sqrt(sample_weight)[:, np.newaxis]
|
||||
assert_array_almost_equal(rescaled_X, rescaled_X2)
|
||||
assert_array_almost_equal(rescaled_y, rescaled_y2)
|
||||
|
||||
|
||||
def test_fused_types_make_dataset():
|
||||
iris = load_iris()
|
||||
|
||||
X_32 = iris.data.astype(np.float32)
|
||||
y_32 = iris.target.astype(np.float32)
|
||||
X_csr_32 = sparse.csr_matrix(X_32)
|
||||
sample_weight_32 = np.arange(y_32.size, dtype=np.float32)
|
||||
|
||||
X_64 = iris.data.astype(np.float64)
|
||||
y_64 = iris.target.astype(np.float64)
|
||||
X_csr_64 = sparse.csr_matrix(X_64)
|
||||
sample_weight_64 = np.arange(y_64.size, dtype=np.float64)
|
||||
|
||||
# array
|
||||
dataset_32, _ = make_dataset(X_32, y_32, sample_weight_32)
|
||||
dataset_64, _ = make_dataset(X_64, y_64, sample_weight_64)
|
||||
xi_32, yi_32, _, _ = dataset_32._next_py()
|
||||
xi_64, yi_64, _, _ = dataset_64._next_py()
|
||||
xi_data_32, _, _ = xi_32
|
||||
xi_data_64, _, _ = xi_64
|
||||
|
||||
assert xi_data_32.dtype == np.float32
|
||||
assert xi_data_64.dtype == np.float64
|
||||
assert_allclose(yi_64, yi_32, rtol=rtol)
|
||||
|
||||
# csr
|
||||
datasetcsr_32, _ = make_dataset(X_csr_32, y_32, sample_weight_32)
|
||||
datasetcsr_64, _ = make_dataset(X_csr_64, y_64, sample_weight_64)
|
||||
xicsr_32, yicsr_32, _, _ = datasetcsr_32._next_py()
|
||||
xicsr_64, yicsr_64, _, _ = datasetcsr_64._next_py()
|
||||
xicsr_data_32, _, _ = xicsr_32
|
||||
xicsr_data_64, _, _ = xicsr_64
|
||||
|
||||
assert xicsr_data_32.dtype == np.float32
|
||||
assert xicsr_data_64.dtype == np.float64
|
||||
|
||||
assert_allclose(xicsr_data_64, xicsr_data_32, rtol=rtol)
|
||||
assert_allclose(yicsr_64, yicsr_32, rtol=rtol)
|
||||
|
||||
assert_array_equal(xi_data_32, xicsr_data_32)
|
||||
assert_array_equal(xi_data_64, xicsr_data_64)
|
||||
assert_array_equal(yi_32, yicsr_32)
|
||||
assert_array_equal(yi_64, yicsr_64)
|
274
venv/Lib/site-packages/sklearn/linear_model/tests/test_bayes.py
Normal file
274
venv/Lib/site-packages/sklearn/linear_model/tests/test_bayes.py
Normal file
|
@ -0,0 +1,274 @@
|
|||
# Author: Alexandre Gramfort <alexandre.gramfort@inria.fr>
|
||||
# Fabian Pedregosa <fabian.pedregosa@inria.fr>
|
||||
#
|
||||
# License: BSD 3 clause
|
||||
|
||||
from math import log
|
||||
|
||||
import numpy as np
|
||||
from scipy.linalg import pinvh
|
||||
import pytest
|
||||
|
||||
|
||||
from sklearn.utils._testing import assert_array_almost_equal
|
||||
from sklearn.utils._testing import assert_almost_equal
|
||||
from sklearn.utils._testing import assert_array_less
|
||||
from sklearn.utils._testing import assert_raise_message
|
||||
from sklearn.utils import check_random_state
|
||||
from sklearn.linear_model import BayesianRidge, ARDRegression
|
||||
from sklearn.linear_model import Ridge
|
||||
from sklearn import datasets
|
||||
from sklearn.utils.extmath import fast_logdet
|
||||
|
||||
diabetes = datasets.load_diabetes()
|
||||
|
||||
|
||||
def test_n_iter():
|
||||
"""Check value of n_iter."""
|
||||
X = np.array([[1], [2], [6], [8], [10]])
|
||||
y = np.array([1, 2, 6, 8, 10])
|
||||
clf = BayesianRidge(n_iter=0)
|
||||
msg = "n_iter should be greater than or equal to 1."
|
||||
assert_raise_message(ValueError, msg, clf.fit, X, y)
|
||||
|
||||
|
||||
def test_bayesian_ridge_scores():
|
||||
"""Check scores attribute shape"""
|
||||
X, y = diabetes.data, diabetes.target
|
||||
|
||||
clf = BayesianRidge(compute_score=True)
|
||||
clf.fit(X, y)
|
||||
|
||||
assert clf.scores_.shape == (clf.n_iter_ + 1,)
|
||||
|
||||
|
||||
def test_bayesian_ridge_score_values():
|
||||
"""Check value of score on toy example.
|
||||
|
||||
Compute log marginal likelihood with equation (36) in Sparse Bayesian
|
||||
Learning and the Relevance Vector Machine (Tipping, 2001):
|
||||
|
||||
- 0.5 * (log |Id/alpha + X.X^T/lambda| +
|
||||
y^T.(Id/alpha + X.X^T/lambda).y + n * log(2 * pi))
|
||||
+ lambda_1 * log(lambda) - lambda_2 * lambda
|
||||
+ alpha_1 * log(alpha) - alpha_2 * alpha
|
||||
|
||||
and check equality with the score computed during training.
|
||||
"""
|
||||
|
||||
X, y = diabetes.data, diabetes.target
|
||||
n_samples = X.shape[0]
|
||||
# check with initial values of alpha and lambda (see code for the values)
|
||||
eps = np.finfo(np.float64).eps
|
||||
alpha_ = 1. / (np.var(y) + eps)
|
||||
lambda_ = 1.
|
||||
|
||||
# value of the parameters of the Gamma hyperpriors
|
||||
alpha_1 = 0.1
|
||||
alpha_2 = 0.1
|
||||
lambda_1 = 0.1
|
||||
lambda_2 = 0.1
|
||||
|
||||
# compute score using formula of docstring
|
||||
score = lambda_1 * log(lambda_) - lambda_2 * lambda_
|
||||
score += alpha_1 * log(alpha_) - alpha_2 * alpha_
|
||||
M = 1. / alpha_ * np.eye(n_samples) + 1. / lambda_ * np.dot(X, X.T)
|
||||
M_inv = pinvh(M)
|
||||
score += - 0.5 * (fast_logdet(M) + np.dot(y.T, np.dot(M_inv, y)) +
|
||||
n_samples * log(2 * np.pi))
|
||||
|
||||
# compute score with BayesianRidge
|
||||
clf = BayesianRidge(alpha_1=alpha_1, alpha_2=alpha_2,
|
||||
lambda_1=lambda_1, lambda_2=lambda_2,
|
||||
n_iter=1, fit_intercept=False, compute_score=True)
|
||||
clf.fit(X, y)
|
||||
|
||||
assert_almost_equal(clf.scores_[0], score, decimal=9)
|
||||
|
||||
|
||||
def test_bayesian_ridge_parameter():
|
||||
# Test correctness of lambda_ and alpha_ parameters (GitHub issue #8224)
|
||||
X = np.array([[1, 1], [3, 4], [5, 7], [4, 1], [2, 6], [3, 10], [3, 2]])
|
||||
y = np.array([1, 2, 3, 2, 0, 4, 5]).T
|
||||
|
||||
# A Ridge regression model using an alpha value equal to the ratio of
|
||||
# lambda_ and alpha_ from the Bayesian Ridge model must be identical
|
||||
br_model = BayesianRidge(compute_score=True).fit(X, y)
|
||||
rr_model = Ridge(alpha=br_model.lambda_ / br_model.alpha_).fit(X, y)
|
||||
assert_array_almost_equal(rr_model.coef_, br_model.coef_)
|
||||
assert_almost_equal(rr_model.intercept_, br_model.intercept_)
|
||||
|
||||
|
||||
def test_bayesian_sample_weights():
|
||||
# Test correctness of the sample_weights method
|
||||
X = np.array([[1, 1], [3, 4], [5, 7], [4, 1], [2, 6], [3, 10], [3, 2]])
|
||||
y = np.array([1, 2, 3, 2, 0, 4, 5]).T
|
||||
w = np.array([4, 3, 3, 1, 1, 2, 3]).T
|
||||
|
||||
# A Ridge regression model using an alpha value equal to the ratio of
|
||||
# lambda_ and alpha_ from the Bayesian Ridge model must be identical
|
||||
br_model = BayesianRidge(compute_score=True).fit(X, y, sample_weight=w)
|
||||
rr_model = Ridge(alpha=br_model.lambda_ / br_model.alpha_).fit(
|
||||
X, y, sample_weight=w)
|
||||
assert_array_almost_equal(rr_model.coef_, br_model.coef_)
|
||||
assert_almost_equal(rr_model.intercept_, br_model.intercept_)
|
||||
|
||||
|
||||
def test_toy_bayesian_ridge_object():
|
||||
# Test BayesianRidge on toy
|
||||
X = np.array([[1], [2], [6], [8], [10]])
|
||||
Y = np.array([1, 2, 6, 8, 10])
|
||||
clf = BayesianRidge(compute_score=True)
|
||||
clf.fit(X, Y)
|
||||
|
||||
# Check that the model could approximately learn the identity function
|
||||
test = [[1], [3], [4]]
|
||||
assert_array_almost_equal(clf.predict(test), [1, 3, 4], 2)
|
||||
|
||||
|
||||
def test_bayesian_initial_params():
|
||||
# Test BayesianRidge with initial values (alpha_init, lambda_init)
|
||||
X = np.vander(np.linspace(0, 4, 5), 4)
|
||||
y = np.array([0., 1., 0., -1., 0.]) # y = (x^3 - 6x^2 + 8x) / 3
|
||||
|
||||
# In this case, starting from the default initial values will increase
|
||||
# the bias of the fitted curve. So, lambda_init should be small.
|
||||
reg = BayesianRidge(alpha_init=1., lambda_init=1e-3)
|
||||
# Check the R2 score nearly equals to one.
|
||||
r2 = reg.fit(X, y).score(X, y)
|
||||
assert_almost_equal(r2, 1.)
|
||||
|
||||
|
||||
def test_prediction_bayesian_ridge_ard_with_constant_input():
|
||||
# Test BayesianRidge and ARDRegression predictions for edge case of
|
||||
# constant target vectors
|
||||
n_samples = 4
|
||||
n_features = 5
|
||||
random_state = check_random_state(42)
|
||||
constant_value = random_state.rand()
|
||||
X = random_state.random_sample((n_samples, n_features))
|
||||
y = np.full(n_samples, constant_value,
|
||||
dtype=np.array(constant_value).dtype)
|
||||
expected = np.full(n_samples, constant_value,
|
||||
dtype=np.array(constant_value).dtype)
|
||||
|
||||
for clf in [BayesianRidge(), ARDRegression()]:
|
||||
y_pred = clf.fit(X, y).predict(X)
|
||||
assert_array_almost_equal(y_pred, expected)
|
||||
|
||||
|
||||
def test_std_bayesian_ridge_ard_with_constant_input():
|
||||
# Test BayesianRidge and ARDRegression standard dev. for edge case of
|
||||
# constant target vector
|
||||
# The standard dev. should be relatively small (< 0.01 is tested here)
|
||||
n_samples = 10
|
||||
n_features = 5
|
||||
random_state = check_random_state(42)
|
||||
constant_value = random_state.rand()
|
||||
X = random_state.random_sample((n_samples, n_features))
|
||||
y = np.full(n_samples, constant_value,
|
||||
dtype=np.array(constant_value).dtype)
|
||||
expected_upper_boundary = 0.01
|
||||
|
||||
for clf in [BayesianRidge(), ARDRegression()]:
|
||||
_, y_std = clf.fit(X, y).predict(X, return_std=True)
|
||||
assert_array_less(y_std, expected_upper_boundary)
|
||||
|
||||
|
||||
def test_update_of_sigma_in_ard():
|
||||
# Checks that `sigma_` is updated correctly after the last iteration
|
||||
# of the ARDRegression algorithm. See issue #10128.
|
||||
X = np.array([[1, 0],
|
||||
[0, 0]])
|
||||
y = np.array([0, 0])
|
||||
clf = ARDRegression(n_iter=1)
|
||||
clf.fit(X, y)
|
||||
# With the inputs above, ARDRegression prunes both of the two coefficients
|
||||
# in the first iteration. Hence, the expected shape of `sigma_` is (0, 0).
|
||||
assert clf.sigma_.shape == (0, 0)
|
||||
# Ensure that no error is thrown at prediction stage
|
||||
clf.predict(X, return_std=True)
|
||||
|
||||
|
||||
def test_toy_ard_object():
|
||||
# Test BayesianRegression ARD classifier
|
||||
X = np.array([[1], [2], [3]])
|
||||
Y = np.array([1, 2, 3])
|
||||
clf = ARDRegression(compute_score=True)
|
||||
clf.fit(X, Y)
|
||||
|
||||
# Check that the model could approximately learn the identity function
|
||||
test = [[1], [3], [4]]
|
||||
assert_array_almost_equal(clf.predict(test), [1, 3, 4], 2)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('seed', range(100))
|
||||
@pytest.mark.parametrize('n_samples, n_features', ((10, 100), (100, 10)))
|
||||
def test_ard_accuracy_on_easy_problem(seed, n_samples, n_features):
|
||||
# Check that ARD converges with reasonable accuracy on an easy problem
|
||||
# (Github issue #14055)
|
||||
X = np.random.RandomState(seed=seed).normal(size=(250, 3))
|
||||
y = X[:, 1]
|
||||
|
||||
regressor = ARDRegression()
|
||||
regressor.fit(X, y)
|
||||
|
||||
abs_coef_error = np.abs(1 - regressor.coef_[1])
|
||||
assert abs_coef_error < 1e-10
|
||||
|
||||
|
||||
def test_return_std():
|
||||
# Test return_std option for both Bayesian regressors
|
||||
def f(X):
|
||||
return np.dot(X, w) + b
|
||||
|
||||
def f_noise(X, noise_mult):
|
||||
return f(X) + np.random.randn(X.shape[0]) * noise_mult
|
||||
|
||||
d = 5
|
||||
n_train = 50
|
||||
n_test = 10
|
||||
|
||||
w = np.array([1.0, 0.0, 1.0, -1.0, 0.0])
|
||||
b = 1.0
|
||||
|
||||
X = np.random.random((n_train, d))
|
||||
X_test = np.random.random((n_test, d))
|
||||
|
||||
for decimal, noise_mult in enumerate([1, 0.1, 0.01]):
|
||||
y = f_noise(X, noise_mult)
|
||||
|
||||
m1 = BayesianRidge()
|
||||
m1.fit(X, y)
|
||||
y_mean1, y_std1 = m1.predict(X_test, return_std=True)
|
||||
assert_array_almost_equal(y_std1, noise_mult, decimal=decimal)
|
||||
|
||||
m2 = ARDRegression()
|
||||
m2.fit(X, y)
|
||||
y_mean2, y_std2 = m2.predict(X_test, return_std=True)
|
||||
assert_array_almost_equal(y_std2, noise_mult, decimal=decimal)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('seed', range(10))
|
||||
def test_update_sigma(seed):
|
||||
# make sure the two update_sigma() helpers are equivalent. The woodbury
|
||||
# formula is used when n_samples < n_features, and the other one is used
|
||||
# otherwise.
|
||||
|
||||
rng = np.random.RandomState(seed)
|
||||
|
||||
# set n_samples == n_features to avoid instability issues when inverting
|
||||
# the matrices. Using the woodbury formula would be unstable when
|
||||
# n_samples > n_features
|
||||
n_samples = n_features = 10
|
||||
X = rng.randn(n_samples, n_features)
|
||||
alpha = 1
|
||||
lmbda = np.arange(1, n_features + 1)
|
||||
keep_lambda = np.array([True] * n_features)
|
||||
|
||||
reg = ARDRegression()
|
||||
|
||||
sigma = reg._update_sigma(X, alpha, lmbda, keep_lambda)
|
||||
sigma_woodbury = reg._update_sigma_woodbury(X, alpha, lmbda, keep_lambda)
|
||||
|
||||
np.testing.assert_allclose(sigma, sigma_woodbury)
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue