Uploaded Test files

2020-11-12 11:05:57 -05:00 · 2020-11-12 11:05:57 -05:00 · 2e81cb7d99
commit 2e81cb7d99
parent f584ad9d97
16627 changed files with 2065359 additions and 102444 deletions
--- a/venv/Lib/site-packages/sklearn/linear_model/init.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/init.py
@ -0,0 +1,80 @@
+"""
+The :mod:`sklearn.linear_model` module implements a variety of linear models.
+"""
+
+# See http://scikit-learn.sourceforge.net/modules/sgd.html and
+# http://scikit-learn.sourceforge.net/modules/linear_model.html for
+# complete documentation.
+
+from ._base import LinearRegression
+from ._bayes import BayesianRidge, ARDRegression
+from ._least_angle import (Lars, LassoLars, lars_path, lars_path_gram, LarsCV,
+                           LassoLarsCV, LassoLarsIC)
+from ._coordinate_descent import (Lasso, ElasticNet, LassoCV, ElasticNetCV,
+                                  lasso_path, enet_path, MultiTaskLasso,
+                                  MultiTaskElasticNet, MultiTaskElasticNetCV,
+                                  MultiTaskLassoCV)
+from ._glm import (PoissonRegressor,
+                   GammaRegressor, TweedieRegressor)
+from ._huber import HuberRegressor
+from ._sgd_fast import Hinge, Log, ModifiedHuber, SquaredLoss, Huber
+from ._stochastic_gradient import SGDClassifier, SGDRegressor
+from ._ridge import (Ridge, RidgeCV, RidgeClassifier, RidgeClassifierCV,
+                     ridge_regression)
+from ._logistic import LogisticRegression, LogisticRegressionCV
+from ._omp import (orthogonal_mp, orthogonal_mp_gram,
+                   OrthogonalMatchingPursuit, OrthogonalMatchingPursuitCV)
+from ._passive_aggressive import PassiveAggressiveClassifier
+from ._passive_aggressive import PassiveAggressiveRegressor
+from ._perceptron import Perceptron
+
+from ._ransac import RANSACRegressor
+from ._theil_sen import TheilSenRegressor
+
+__all__ = ['ARDRegression',
+           'BayesianRidge',
+           'ElasticNet',
+           'ElasticNetCV',
+           'Hinge',
+           'Huber',
+           'HuberRegressor',
+           'Lars',
+           'LarsCV',
+           'Lasso',
+           'LassoCV',
+           'LassoLars',
+           'LassoLarsCV',
+           'LassoLarsIC',
+           'LinearRegression',
+           'Log',
+           'LogisticRegression',
+           'LogisticRegressionCV',
+           'ModifiedHuber',
+           'MultiTaskElasticNet',
+           'MultiTaskElasticNetCV',
+           'MultiTaskLasso',
+           'MultiTaskLassoCV',
+           'OrthogonalMatchingPursuit',
+           'OrthogonalMatchingPursuitCV',
+           'PassiveAggressiveClassifier',
+           'PassiveAggressiveRegressor',
+           'Perceptron',
+           'Ridge',
+           'RidgeCV',
+           'RidgeClassifier',
+           'RidgeClassifierCV',
+           'SGDClassifier',
+           'SGDRegressor',
+           'SquaredLoss',
+           'TheilSenRegressor',
+           'enet_path',
+           'lars_path',
+           'lars_path_gram',
+           'lasso_path',
+           'orthogonal_mp',
+           'orthogonal_mp_gram',
+           'ridge_regression',
+           'RANSACRegressor',
+           'PoissonRegressor',
+           'GammaRegressor',
+           'TweedieRegressor']
--- a/venv/Lib/site-packages/sklearn/linear_model/pycache/init.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/pycache/init.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/pycache/_base.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/pycache/_base.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/pycache/_bayes.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/pycache/_bayes.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/pycache/_coordinate_descent.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/pycache/_coordinate_descent.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/pycache/_huber.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/pycache/_huber.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/pycache/_least_angle.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/pycache/_least_angle.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/pycache/_logistic.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/pycache/_logistic.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/pycache/_omp.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/pycache/_omp.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/pycache/_passive_aggressive.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/pycache/_passive_aggressive.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/pycache/_perceptron.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/pycache/_perceptron.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/pycache/_ransac.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/pycache/_ransac.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/pycache/_ridge.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/pycache/_ridge.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/pycache/_sag.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/pycache/_sag.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/pycache/_stochastic_gradient.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/pycache/_stochastic_gradient.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/pycache/_theil_sen.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/pycache/_theil_sen.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/pycache/base.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/pycache/base.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/pycache/bayes.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/pycache/bayes.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/pycache/cd_fast.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/pycache/cd_fast.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/pycache/coordinate_descent.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/pycache/coordinate_descent.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/pycache/huber.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/pycache/huber.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/pycache/least_angle.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/pycache/least_angle.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/pycache/logistic.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/pycache/logistic.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/pycache/omp.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/pycache/omp.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/pycache/passive_aggressive.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/pycache/passive_aggressive.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/pycache/perceptron.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/pycache/perceptron.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/pycache/ransac.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/pycache/ransac.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/pycache/ridge.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/pycache/ridge.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/pycache/sag.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/pycache/sag.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/pycache/sag_fast.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/pycache/sag_fast.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/pycache/setup.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/pycache/setup.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/pycache/sgd_fast.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/pycache/sgd_fast.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/pycache/stochastic_gradient.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/pycache/stochastic_gradient.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/pycache/theil_sen.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/pycache/theil_sen.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/_base.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/_base.py
@ -0,0 +1,620 @@
+"""
+Generalized Linear Models.
+"""
+
+# Author: Alexandre Gramfort <alexandre.gramfort@inria.fr>
+# Fabian Pedregosa <fabian.pedregosa@inria.fr>
+# Olivier Grisel <olivier.grisel@ensta.org>
+#         Vincent Michel <vincent.michel@inria.fr>
+#         Peter Prettenhofer <peter.prettenhofer@gmail.com>
+#         Mathieu Blondel <mathieu@mblondel.org>
+#         Lars Buitinck
+#         Maryan Morel <maryan.morel@polytechnique.edu>
+#         Giorgio Patrini <giorgio.patrini@anu.edu.au>
+# License: BSD 3 clause
+
+from abc import ABCMeta, abstractmethod
+import numbers
+import warnings
+
+import numpy as np
+import scipy.sparse as sp
+from scipy import linalg
+from scipy import sparse
+from scipy.special import expit
+from joblib import Parallel, delayed
+
+from ..base import (BaseEstimator, ClassifierMixin, RegressorMixin,
+                    MultiOutputMixin)
+from ..utils import check_array
+from ..utils.validation import FLOAT_DTYPES
+from ..utils.validation import _deprecate_positional_args
+from ..utils import check_random_state
+from ..utils.extmath import safe_sparse_dot
+from ..utils.sparsefuncs import mean_variance_axis, inplace_column_scale
+from ..utils.fixes import sparse_lsqr
+from ..utils._seq_dataset import ArrayDataset32, CSRDataset32
+from ..utils._seq_dataset import ArrayDataset64, CSRDataset64
+from ..utils.validation import check_is_fitted, _check_sample_weight
+from ..preprocessing import normalize as f_normalize
+
+# TODO: bayesian_ridge_regression and bayesian_regression_ard
+# should be squashed into its respective objects.
+
+SPARSE_INTERCEPT_DECAY = 0.01
+# For sparse data intercept updates are scaled by this decay factor to avoid
+# intercept oscillation.
+
+
+def make_dataset(X, y, sample_weight, random_state=None):
+    """Create ``Dataset`` abstraction for sparse and dense inputs.
+
+    This also returns the ``intercept_decay`` which is different
+    for sparse datasets.
+
+    Parameters
+    ----------
+    X : array_like, shape (n_samples, n_features)
+        Training data
+
+    y : array_like, shape (n_samples, )
+        Target values.
+
+    sample_weight : numpy array of shape (n_samples,)
+        The weight of each sample
+
+    random_state : int, RandomState instance or None (default)
+        Determines random number generation for dataset shuffling and noise.
+        Pass an int for reproducible output across multiple function calls.
+        See :term:`Glossary <random_state>`.
+
+    Returns
+    -------
+    dataset
+        The ``Dataset`` abstraction
+    intercept_decay
+        The intercept decay
+    """
+
+    rng = check_random_state(random_state)
+    # seed should never be 0 in SequentialDataset64
+    seed = rng.randint(1, np.iinfo(np.int32).max)
+
+    if X.dtype == np.float32:
+        CSRData = CSRDataset32
+        ArrayData = ArrayDataset32
+    else:
+        CSRData = CSRDataset64
+        ArrayData = ArrayDataset64
+
+    if sp.issparse(X):
+        dataset = CSRData(X.data, X.indptr, X.indices, y, sample_weight,
+                          seed=seed)
+        intercept_decay = SPARSE_INTERCEPT_DECAY
+    else:
+        X = np.ascontiguousarray(X)
+        dataset = ArrayData(X, y, sample_weight, seed=seed)
+        intercept_decay = 1.0
+
+    return dataset, intercept_decay
+
+
+def _preprocess_data(X, y, fit_intercept, normalize=False, copy=True,
+                     sample_weight=None, return_mean=False, check_input=True):
+    """Center and scale data.
+
+    Centers data to have mean zero along axis 0. If fit_intercept=False or if
+    the X is a sparse matrix, no centering is done, but normalization can still
+    be applied. The function returns the statistics necessary to reconstruct
+    the input data, which are X_offset, y_offset, X_scale, such that the output
+
+        X = (X - X_offset) / X_scale
+
+    X_scale is the L2 norm of X - X_offset. If sample_weight is not None,
+    then the weighted mean of X and y is zero, and not the mean itself. If
+    return_mean=True, the mean, eventually weighted, is returned, independently
+    of whether X was centered (option used for optimization with sparse data in
+    coordinate_descend).
+
+    This is here because nearly all linear models will want their data to be
+    centered. This function also systematically makes y consistent with X.dtype
+    """
+    if isinstance(sample_weight, numbers.Number):
+        sample_weight = None
+    if sample_weight is not None:
+        sample_weight = np.asarray(sample_weight)
+
+    if check_input:
+        X = check_array(X, copy=copy, accept_sparse=['csr', 'csc'],
+                        dtype=FLOAT_DTYPES)
+    elif copy:
+        if sp.issparse(X):
+            X = X.copy()
+        else:
+            X = X.copy(order='K')
+
+    y = np.asarray(y, dtype=X.dtype)
+
+    if fit_intercept:
+        if sp.issparse(X):
+            X_offset, X_var = mean_variance_axis(X, axis=0)
+            if not return_mean:
+                X_offset[:] = X.dtype.type(0)
+
+            if normalize:
+
+                # TODO: f_normalize could be used here as well but the function
+                # inplace_csr_row_normalize_l2 must be changed such that it
+                # can return also the norms computed internally
+
+                # transform variance to norm in-place
+                X_var *= X.shape[0]
+                X_scale = np.sqrt(X_var, X_var)
+                del X_var
+                X_scale[X_scale == 0] = 1
+                inplace_column_scale(X, 1. / X_scale)
+            else:
+                X_scale = np.ones(X.shape[1], dtype=X.dtype)
+
+        else:
+            X_offset = np.average(X, axis=0, weights=sample_weight)
+            X -= X_offset
+            if normalize:
+                X, X_scale = f_normalize(X, axis=0, copy=False,
+                                         return_norm=True)
+            else:
+                X_scale = np.ones(X.shape[1], dtype=X.dtype)
+        y_offset = np.average(y, axis=0, weights=sample_weight)
+        y = y - y_offset
+    else:
+        X_offset = np.zeros(X.shape[1], dtype=X.dtype)
+        X_scale = np.ones(X.shape[1], dtype=X.dtype)
+        if y.ndim == 1:
+            y_offset = X.dtype.type(0)
+        else:
+            y_offset = np.zeros(y.shape[1], dtype=X.dtype)
+
+    return X, y, X_offset, y_offset, X_scale
+
+
+# TODO: _rescale_data should be factored into _preprocess_data.
+# Currently, the fact that sag implements its own way to deal with
+# sample_weight makes the refactoring tricky.
+
+def _rescale_data(X, y, sample_weight):
+    """Rescale data sample-wise by square root of sample_weight.
+
+    For many linear models, this enables easy support for sample_weight.
+
+    Returns
+    -------
+    X_rescaled : {array-like, sparse matrix}
+
+    y_rescaled : {array-like, sparse matrix}
+    """
+    n_samples = X.shape[0]
+    sample_weight = np.asarray(sample_weight)
+    if sample_weight.ndim == 0:
+        sample_weight = np.full(n_samples, sample_weight,
+                                dtype=sample_weight.dtype)
+    sample_weight = np.sqrt(sample_weight)
+    sw_matrix = sparse.dia_matrix((sample_weight, 0),
+                                  shape=(n_samples, n_samples))
+    X = safe_sparse_dot(sw_matrix, X)
+    y = safe_sparse_dot(sw_matrix, y)
+    return X, y
+
+
+class LinearModel(BaseEstimator, metaclass=ABCMeta):
+    """Base class for Linear Models"""
+
+    @abstractmethod
+    def fit(self, X, y):
+        """Fit model."""
+
+    def _decision_function(self, X):
+        check_is_fitted(self)
+
+        X = check_array(X, accept_sparse=['csr', 'csc', 'coo'])
+        return safe_sparse_dot(X, self.coef_.T,
+                               dense_output=True) + self.intercept_
+
+    def predict(self, X):
+        """
+        Predict using the linear model.
+
+        Parameters
+        ----------
+        X : array_like or sparse matrix, shape (n_samples, n_features)
+            Samples.
+
+        Returns
+        -------
+        C : array, shape (n_samples,)
+            Returns predicted values.
+        """
+        return self._decision_function(X)
+
+    _preprocess_data = staticmethod(_preprocess_data)
+
+    def _set_intercept(self, X_offset, y_offset, X_scale):
+        """Set the intercept_
+        """
+        if self.fit_intercept:
+            self.coef_ = self.coef_ / X_scale
+            self.intercept_ = y_offset - np.dot(X_offset, self.coef_.T)
+        else:
+            self.intercept_ = 0.
+
+    def _more_tags(self):
+        return {'requires_y': True}
+
+
+# XXX Should this derive from LinearModel? It should be a mixin, not an ABC.
+# Maybe the n_features checking can be moved to LinearModel.
+class LinearClassifierMixin(ClassifierMixin):
+    """Mixin for linear classifiers.
+
+    Handles prediction for sparse and dense X.
+    """
+
+    def decision_function(self, X):
+        """
+        Predict confidence scores for samples.
+
+        The confidence score for a sample is the signed distance of that
+        sample to the hyperplane.
+
+        Parameters
+        ----------
+        X : array_like or sparse matrix, shape (n_samples, n_features)
+            Samples.
+
+        Returns
+        -------
+        array, shape=(n_samples,) if n_classes == 2 else (n_samples, n_classes)
+            Confidence scores per (sample, class) combination. In the binary
+            case, confidence score for self.classes_[1] where >0 means this
+            class would be predicted.
+        """
+        check_is_fitted(self)
+
+        X = check_array(X, accept_sparse='csr')
+
+        n_features = self.coef_.shape[1]
+        if X.shape[1] != n_features:
+            raise ValueError("X has %d features per sample; expecting %d"
+                             % (X.shape[1], n_features))
+
+        scores = safe_sparse_dot(X, self.coef_.T,
+                                 dense_output=True) + self.intercept_
+        return scores.ravel() if scores.shape[1] == 1 else scores
+
+    def predict(self, X):
+        """
+        Predict class labels for samples in X.
+
+        Parameters
+        ----------
+        X : array_like or sparse matrix, shape (n_samples, n_features)
+            Samples.
+
+        Returns
+        -------
+        C : array, shape [n_samples]
+            Predicted class label per sample.
+        """
+        scores = self.decision_function(X)
+        if len(scores.shape) == 1:
+            indices = (scores > 0).astype(np.int)
+        else:
+            indices = scores.argmax(axis=1)
+        return self.classes_[indices]
+
+    def _predict_proba_lr(self, X):
+        """Probability estimation for OvR logistic regression.
+
+        Positive class probabilities are computed as
+        1. / (1. + np.exp(-self.decision_function(X)));
+        multiclass is handled by normalizing that over all classes.
+        """
+        prob = self.decision_function(X)
+        expit(prob, out=prob)
+        if prob.ndim == 1:
+            return np.vstack([1 - prob, prob]).T
+        else:
+            # OvR normalization, like LibLinear's predict_probability
+            prob /= prob.sum(axis=1).reshape((prob.shape[0], -1))
+            return prob
+
+
+class SparseCoefMixin:
+    """Mixin for converting coef_ to and from CSR format.
+
+    L1-regularizing estimators should inherit this.
+    """
+
+    def densify(self):
+        """
+        Convert coefficient matrix to dense array format.
+
+        Converts the ``coef_`` member (back) to a numpy.ndarray. This is the
+        default format of ``coef_`` and is required for fitting, so calling
+        this method is only required on models that have previously been
+        sparsified; otherwise, it is a no-op.
+
+        Returns
+        -------
+        self
+            Fitted estimator.
+        """
+        msg = "Estimator, %(name)s, must be fitted before densifying."
+        check_is_fitted(self, msg=msg)
+        if sp.issparse(self.coef_):
+            self.coef_ = self.coef_.toarray()
+        return self
+
+    def sparsify(self):
+        """
+        Convert coefficient matrix to sparse format.
+
+        Converts the ``coef_`` member to a scipy.sparse matrix, which for
+        L1-regularized models can be much more memory- and storage-efficient
+        than the usual numpy.ndarray representation.
+
+        The ``intercept_`` member is not converted.
+
+        Returns
+        -------
+        self
+            Fitted estimator.
+
+        Notes
+        -----
+        For non-sparse models, i.e. when there are not many zeros in ``coef_``,
+        this may actually *increase* memory usage, so use this method with
+        care. A rule of thumb is that the number of zero elements, which can
+        be computed with ``(coef_ == 0).sum()``, must be more than 50% for this
+        to provide significant benefits.
+
+        After calling this method, further fitting with the partial_fit
+        method (if any) will not work until you call densify.
+        """
+        msg = "Estimator, %(name)s, must be fitted before sparsifying."
+        check_is_fitted(self, msg=msg)
+        self.coef_ = sp.csr_matrix(self.coef_)
+        return self
+
+
+class LinearRegression(MultiOutputMixin, RegressorMixin, LinearModel):
+    """
+    Ordinary least squares Linear Regression.
+
+    LinearRegression fits a linear model with coefficients w = (w1, ..., wp)
+    to minimize the residual sum of squares between the observed targets in
+    the dataset, and the targets predicted by the linear approximation.
+
+    Parameters
+    ----------
+    fit_intercept : bool, default=True
+        Whether to calculate the intercept for this model. If set
+        to False, no intercept will be used in calculations
+        (i.e. data is expected to be centered).
+
+    normalize : bool, default=False
+        This parameter is ignored when ``fit_intercept`` is set to False.
+        If True, the regressors X will be normalized before regression by
+        subtracting the mean and dividing by the l2-norm.
+        If you wish to standardize, please use
+        :class:`sklearn.preprocessing.StandardScaler` before calling ``fit`` on
+        an estimator with ``normalize=False``.
+
+    copy_X : bool, default=True
+        If True, X will be copied; else, it may be overwritten.
+
+    n_jobs : int, default=None
+        The number of jobs to use for the computation. This will only provide
+        speedup for n_targets > 1 and sufficient large problems.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    Attributes
+    ----------
+    coef_ : array of shape (n_features, ) or (n_targets, n_features)
+        Estimated coefficients for the linear regression problem.
+        If multiple targets are passed during the fit (y 2D), this
+        is a 2D array of shape (n_targets, n_features), while if only
+        one target is passed, this is a 1D array of length n_features.
+
+    rank_ : int
+        Rank of matrix `X`. Only available when `X` is dense.
+
+    singular_ : array of shape (min(X, y),)
+        Singular values of `X`. Only available when `X` is dense.
+
+    intercept_ : float or array of shape (n_targets,)
+        Independent term in the linear model. Set to 0.0 if
+        `fit_intercept = False`.
+
+    See Also
+    --------
+    sklearn.linear_model.Ridge : Ridge regression addresses some of the
+        problems of Ordinary Least Squares by imposing a penalty on the
+        size of the coefficients with l2 regularization.
+    sklearn.linear_model.Lasso : The Lasso is a linear model that estimates
+        sparse coefficients with l1 regularization.
+    sklearn.linear_model.ElasticNet : Elastic-Net is a linear regression
+        model trained with both l1 and l2 -norm regularization of the
+        coefficients.
+
+    Notes
+    -----
+    From the implementation point of view, this is just plain Ordinary
+    Least Squares (scipy.linalg.lstsq) wrapped as a predictor object.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.linear_model import LinearRegression
+    >>> X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
+    >>> # y = 1 * x_0 + 2 * x_1 + 3
+    >>> y = np.dot(X, np.array([1, 2])) + 3
+    >>> reg = LinearRegression().fit(X, y)
+    >>> reg.score(X, y)
+    1.0
+    >>> reg.coef_
+    array([1., 2.])
+    >>> reg.intercept_
+    3.0000...
+    >>> reg.predict(np.array([[3, 5]]))
+    array([16.])
+    """
+    @_deprecate_positional_args
+    def __init__(self, *, fit_intercept=True, normalize=False, copy_X=True,
+                 n_jobs=None):
+        self.fit_intercept = fit_intercept
+        self.normalize = normalize
+        self.copy_X = copy_X
+        self.n_jobs = n_jobs
+
+    def fit(self, X, y, sample_weight=None):
+        """
+        Fit linear model.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Training data
+
+        y : array-like of shape (n_samples,) or (n_samples, n_targets)
+            Target values. Will be cast to X's dtype if necessary
+
+        sample_weight : array-like of shape (n_samples,), default=None
+            Individual weights for each sample
+
+            .. versionadded:: 0.17
+               parameter *sample_weight* support to LinearRegression.
+
+        Returns
+        -------
+        self : returns an instance of self.
+        """
+
+        n_jobs_ = self.n_jobs
+        X, y = self._validate_data(X, y, accept_sparse=['csr', 'csc', 'coo'],
+                                   y_numeric=True, multi_output=True)
+
+        if sample_weight is not None:
+            sample_weight = _check_sample_weight(sample_weight, X,
+                                                 dtype=X.dtype)
+
+        X, y, X_offset, y_offset, X_scale = self._preprocess_data(
+            X, y, fit_intercept=self.fit_intercept, normalize=self.normalize,
+            copy=self.copy_X, sample_weight=sample_weight,
+            return_mean=True)
+
+        if sample_weight is not None:
+            # Sample weight can be implemented via a simple rescaling.
+            X, y = _rescale_data(X, y, sample_weight)
+
+        if sp.issparse(X):
+            X_offset_scale = X_offset / X_scale
+
+            def matvec(b):
+                return X.dot(b) - b.dot(X_offset_scale)
+
+            def rmatvec(b):
+                return X.T.dot(b) - X_offset_scale * np.sum(b)
+
+            X_centered = sparse.linalg.LinearOperator(shape=X.shape,
+                                                      matvec=matvec,
+                                                      rmatvec=rmatvec)
+
+            if y.ndim < 2:
+                out = sparse_lsqr(X_centered, y)
+                self.coef_ = out[0]
+                self._residues = out[3]
+            else:
+                # sparse_lstsq cannot handle y with shape (M, K)
+                outs = Parallel(n_jobs=n_jobs_)(
+                    delayed(sparse_lsqr)(X_centered, y[:, j].ravel())
+                    for j in range(y.shape[1]))
+                self.coef_ = np.vstack([out[0] for out in outs])
+                self._residues = np.vstack([out[3] for out in outs])
+        else:
+            self.coef_, self._residues, self.rank_, self.singular_ = \
+                linalg.lstsq(X, y)
+            self.coef_ = self.coef_.T
+
+        if y.ndim == 1:
+            self.coef_ = np.ravel(self.coef_)
+        self._set_intercept(X_offset, y_offset, X_scale)
+        return self
+
+
+def _pre_fit(X, y, Xy, precompute, normalize, fit_intercept, copy,
+             check_input=True, sample_weight=None):
+    """Aux function used at beginning of fit in linear models
+
+    Parameters
+    ----------
+    order : 'F', 'C' or None, default=None
+        Whether X and y will be forced to be fortran or c-style. Only relevant
+        if sample_weight is not None.
+    """
+    n_samples, n_features = X.shape
+
+    if sparse.isspmatrix(X):
+        # copy is not needed here as X is not modified inplace when X is sparse
+        precompute = False
+        X, y, X_offset, y_offset, X_scale = _preprocess_data(
+            X, y, fit_intercept=fit_intercept, normalize=normalize,
+            copy=False, return_mean=True, check_input=check_input)
+    else:
+        # copy was done in fit if necessary
+        X, y, X_offset, y_offset, X_scale = _preprocess_data(
+            X, y, fit_intercept=fit_intercept, normalize=normalize, copy=copy,
+            check_input=check_input, sample_weight=sample_weight)
+    if sample_weight is not None:
+        X, y = _rescale_data(X, y, sample_weight=sample_weight)
+    if hasattr(precompute, '__array__') and (
+        fit_intercept and not np.allclose(X_offset, np.zeros(n_features)) or
+            normalize and not np.allclose(X_scale, np.ones(n_features))):
+        warnings.warn("Gram matrix was provided but X was centered"
+                      " to fit intercept, "
+                      "or X was normalized : recomputing Gram matrix.",
+                      UserWarning)
+        # recompute Gram
+        precompute = 'auto'
+        Xy = None
+
+    # precompute if n_samples > n_features
+    if isinstance(precompute, str) and precompute == 'auto':
+        precompute = (n_samples > n_features)
+
+    if precompute is True:
+        # make sure that the 'precompute' array is contiguous.
+        precompute = np.empty(shape=(n_features, n_features), dtype=X.dtype,
+                              order='C')
+        np.dot(X.T, X, out=precompute)
+
+    if not hasattr(precompute, '__array__'):
+        Xy = None  # cannot use Xy if precompute is not Gram
+
+    if hasattr(precompute, '__array__') and Xy is None:
+        common_dtype = np.find_common_type([X.dtype, y.dtype], [])
+        if y.ndim == 1:
+            # Xy is 1d, make sure it is contiguous.
+            Xy = np.empty(shape=n_features, dtype=common_dtype, order='C')
+            np.dot(X.T, y, out=Xy)
+        else:
+            # Make sure that Xy is always F contiguous even if X or y are not
+            # contiguous: the goal is to make it fast to extract the data for a
+            # specific target.
+            n_targets = y.shape[1]
+            Xy = np.empty(shape=(n_features, n_targets), dtype=common_dtype,
+                          order='F')
+            np.dot(y.T, X, out=Xy.T)
+
+    return X, y, X_offset, y_offset, X_scale, precompute, Xy
--- a/venv/Lib/site-packages/sklearn/linear_model/_bayes.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/_bayes.py
@ -0,0 +1,674 @@
+"""
+Various bayesian regression
+"""
+
+# Authors: V. Michel, F. Pedregosa, A. Gramfort
+# License: BSD 3 clause
+
+from math import log
+import numpy as np
+from scipy import linalg
+
+from ._base import LinearModel, _rescale_data
+from ..base import RegressorMixin
+from ..utils.extmath import fast_logdet
+from scipy.linalg import pinvh
+from ..utils.validation import _check_sample_weight
+from ..utils.validation import _deprecate_positional_args
+
+
+###############################################################################
+# BayesianRidge regression
+
+class BayesianRidge(RegressorMixin, LinearModel):
+    """Bayesian ridge regression.
+
+    Fit a Bayesian ridge model. See the Notes section for details on this
+    implementation and the optimization of the regularization parameters
+    lambda (precision of the weights) and alpha (precision of the noise).
+
+    Read more in the :ref:`User Guide <bayesian_regression>`.
+
+    Parameters
+    ----------
+    n_iter : int, default=300
+        Maximum number of iterations. Should be greater than or equal to 1.
+
+    tol : float, default=1e-3
+        Stop the algorithm if w has converged.
+
+    alpha_1 : float, default=1e-6
+        Hyper-parameter : shape parameter for the Gamma distribution prior
+        over the alpha parameter.
+
+    alpha_2 : float, default=1e-6
+        Hyper-parameter : inverse scale parameter (rate parameter) for the
+        Gamma distribution prior over the alpha parameter.
+
+    lambda_1 : float, default=1e-6
+        Hyper-parameter : shape parameter for the Gamma distribution prior
+        over the lambda parameter.
+
+    lambda_2 : float, default=1e-6
+        Hyper-parameter : inverse scale parameter (rate parameter) for the
+        Gamma distribution prior over the lambda parameter.
+
+    alpha_init : float, default=None
+        Initial value for alpha (precision of the noise).
+        If not set, alpha_init is 1/Var(y).
+
+            .. versionadded:: 0.22
+
+    lambda_init : float, default=None
+        Initial value for lambda (precision of the weights).
+        If not set, lambda_init is 1.
+
+            .. versionadded:: 0.22
+
+    compute_score : bool, default=False
+        If True, compute the log marginal likelihood at each iteration of the
+        optimization.
+
+    fit_intercept : bool, default=True
+        Whether to calculate the intercept for this model.
+        The intercept is not treated as a probabilistic parameter
+        and thus has no associated variance. If set
+        to False, no intercept will be used in calculations
+        (i.e. data is expected to be centered).
+
+    normalize : bool, default=False
+        This parameter is ignored when ``fit_intercept`` is set to False.
+        If True, the regressors X will be normalized before regression by
+        subtracting the mean and dividing by the l2-norm.
+        If you wish to standardize, please use
+        :class:`sklearn.preprocessing.StandardScaler` before calling ``fit``
+        on an estimator with ``normalize=False``.
+
+    copy_X : bool, default=True
+        If True, X will be copied; else, it may be overwritten.
+
+    verbose : bool, default=False
+        Verbose mode when fitting the model.
+
+
+    Attributes
+    ----------
+    coef_ : array-like of shape (n_features,)
+        Coefficients of the regression model (mean of distribution)
+
+    intercept_ : float
+        Independent term in decision function. Set to 0.0 if
+        ``fit_intercept = False``.
+
+    alpha_ : float
+       Estimated precision of the noise.
+
+    lambda_ : float
+       Estimated precision of the weights.
+
+    sigma_ : array-like of shape (n_features, n_features)
+        Estimated variance-covariance matrix of the weights
+
+    scores_ : array-like of shape (n_iter_+1,)
+        If computed_score is True, value of the log marginal likelihood (to be
+        maximized) at each iteration of the optimization. The array starts
+        with the value of the log marginal likelihood obtained for the initial
+        values of alpha and lambda and ends with the value obtained for the
+        estimated alpha and lambda.
+
+    n_iter_ : int
+        The actual number of iterations to reach the stopping criterion.
+
+    Examples
+    --------
+    >>> from sklearn import linear_model
+    >>> clf = linear_model.BayesianRidge()
+    >>> clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])
+    BayesianRidge()
+    >>> clf.predict([[1, 1]])
+    array([1.])
+
+    Notes
+    -----
+    There exist several strategies to perform Bayesian ridge regression. This
+    implementation is based on the algorithm described in Appendix A of
+    (Tipping, 2001) where updates of the regularization parameters are done as
+    suggested in (MacKay, 1992). Note that according to A New
+    View of Automatic Relevance Determination (Wipf and Nagarajan, 2008) these
+    update rules do not guarantee that the marginal likelihood is increasing
+    between two consecutive iterations of the optimization.
+
+    References
+    ----------
+    D. J. C. MacKay, Bayesian Interpolation, Computation and Neural Systems,
+    Vol. 4, No. 3, 1992.
+
+    M. E. Tipping, Sparse Bayesian Learning and the Relevance Vector Machine,
+    Journal of Machine Learning Research, Vol. 1, 2001.
+    """
+    @_deprecate_positional_args
+    def __init__(self, *, n_iter=300, tol=1.e-3, alpha_1=1.e-6, alpha_2=1.e-6,
+                 lambda_1=1.e-6, lambda_2=1.e-6, alpha_init=None,
+                 lambda_init=None, compute_score=False, fit_intercept=True,
+                 normalize=False, copy_X=True, verbose=False):
+        self.n_iter = n_iter
+        self.tol = tol
+        self.alpha_1 = alpha_1
+        self.alpha_2 = alpha_2
+        self.lambda_1 = lambda_1
+        self.lambda_2 = lambda_2
+        self.alpha_init = alpha_init
+        self.lambda_init = lambda_init
+        self.compute_score = compute_score
+        self.fit_intercept = fit_intercept
+        self.normalize = normalize
+        self.copy_X = copy_X
+        self.verbose = verbose
+
+    def fit(self, X, y, sample_weight=None):
+        """Fit the model
+
+        Parameters
+        ----------
+        X : ndarray of shape (n_samples, n_features)
+            Training data
+        y : ndarray of shape (n_samples,)
+            Target values. Will be cast to X's dtype if necessary
+
+        sample_weight : ndarray of shape (n_samples,), default=None
+            Individual weights for each sample
+
+            .. versionadded:: 0.20
+               parameter *sample_weight* support to BayesianRidge.
+
+        Returns
+        -------
+        self : returns an instance of self.
+        """
+
+        if self.n_iter < 1:
+            raise ValueError('n_iter should be greater than or equal to 1.'
+                             ' Got {!r}.'.format(self.n_iter))
+
+        X, y = self._validate_data(X, y, dtype=np.float64, y_numeric=True)
+
+        if sample_weight is not None:
+            sample_weight = _check_sample_weight(sample_weight, X,
+                                                 dtype=X.dtype)
+
+        X, y, X_offset_, y_offset_, X_scale_ = self._preprocess_data(
+            X, y, self.fit_intercept, self.normalize, self.copy_X,
+            sample_weight=sample_weight)
+
+        if sample_weight is not None:
+            # Sample weight can be implemented via a simple rescaling.
+            X, y = _rescale_data(X, y, sample_weight)
+
+        self.X_offset_ = X_offset_
+        self.X_scale_ = X_scale_
+        n_samples, n_features = X.shape
+
+        # Initialization of the values of the parameters
+        eps = np.finfo(np.float64).eps
+        # Add `eps` in the denominator to omit division by zero if `np.var(y)`
+        # is zero
+        alpha_ = self.alpha_init
+        lambda_ = self.lambda_init
+        if alpha_ is None:
+            alpha_ = 1. / (np.var(y) + eps)
+        if lambda_ is None:
+            lambda_ = 1.
+
+        verbose = self.verbose
+        lambda_1 = self.lambda_1
+        lambda_2 = self.lambda_2
+        alpha_1 = self.alpha_1
+        alpha_2 = self.alpha_2
+
+        self.scores_ = list()
+        coef_old_ = None
+
+        XT_y = np.dot(X.T, y)
+        U, S, Vh = linalg.svd(X, full_matrices=False)
+        eigen_vals_ = S ** 2
+
+        # Convergence loop of the bayesian ridge regression
+        for iter_ in range(self.n_iter):
+
+            # update posterior mean coef_ based on alpha_ and lambda_ and
+            # compute corresponding rmse
+            coef_, rmse_ = self._update_coef_(X, y, n_samples, n_features,
+                                              XT_y, U, Vh, eigen_vals_,
+                                              alpha_, lambda_)
+            if self.compute_score:
+                # compute the log marginal likelihood
+                s = self._log_marginal_likelihood(n_samples, n_features,
+                                                  eigen_vals_,
+                                                  alpha_, lambda_,
+                                                  coef_, rmse_)
+                self.scores_.append(s)
+
+            # Update alpha and lambda according to (MacKay, 1992)
+            gamma_ = np.sum((alpha_ * eigen_vals_) /
+                            (lambda_ + alpha_ * eigen_vals_))
+            lambda_ = ((gamma_ + 2 * lambda_1) /
+                       (np.sum(coef_ ** 2) + 2 * lambda_2))
+            alpha_ = ((n_samples - gamma_ + 2 * alpha_1) /
+                      (rmse_ + 2 * alpha_2))
+
+            # Check for convergence
+            if iter_ != 0 and np.sum(np.abs(coef_old_ - coef_)) < self.tol:
+                if verbose:
+                    print("Convergence after ", str(iter_), " iterations")
+                break
+            coef_old_ = np.copy(coef_)
+
+        self.n_iter_ = iter_ + 1
+
+        # return regularization parameters and corresponding posterior mean,
+        # log marginal likelihood and posterior covariance
+        self.alpha_ = alpha_
+        self.lambda_ = lambda_
+        self.coef_, rmse_ = self._update_coef_(X, y, n_samples, n_features,
+                                               XT_y, U, Vh, eigen_vals_,
+                                               alpha_, lambda_)
+        if self.compute_score:
+            # compute the log marginal likelihood
+            s = self._log_marginal_likelihood(n_samples, n_features,
+                                              eigen_vals_,
+                                              alpha_, lambda_,
+                                              coef_, rmse_)
+            self.scores_.append(s)
+            self.scores_ = np.array(self.scores_)
+
+        # posterior covariance is given by 1/alpha_ * scaled_sigma_
+        scaled_sigma_ = np.dot(Vh.T,
+                               Vh / (eigen_vals_ +
+                                     lambda_ / alpha_)[:, np.newaxis])
+        self.sigma_ = (1. / alpha_) * scaled_sigma_
+
+        self._set_intercept(X_offset_, y_offset_, X_scale_)
+
+        return self
+
+    def predict(self, X, return_std=False):
+        """Predict using the linear model.
+
+        In addition to the mean of the predictive distribution, also its
+        standard deviation can be returned.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Samples.
+
+        return_std : bool, default=False
+            Whether to return the standard deviation of posterior prediction.
+
+        Returns
+        -------
+        y_mean : array-like of shape (n_samples,)
+            Mean of predictive distribution of query points.
+
+        y_std : array-like of shape (n_samples,)
+            Standard deviation of predictive distribution of query points.
+        """
+        y_mean = self._decision_function(X)
+        if return_std is False:
+            return y_mean
+        else:
+            if self.normalize:
+                X = (X - self.X_offset_) / self.X_scale_
+            sigmas_squared_data = (np.dot(X, self.sigma_) * X).sum(axis=1)
+            y_std = np.sqrt(sigmas_squared_data + (1. / self.alpha_))
+            return y_mean, y_std
+
+    def _update_coef_(self, X, y, n_samples, n_features, XT_y, U, Vh,
+                      eigen_vals_, alpha_, lambda_):
+        """Update posterior mean and compute corresponding rmse.
+
+        Posterior mean is given by coef_ = scaled_sigma_ * X.T * y where
+        scaled_sigma_ = (lambda_/alpha_ * np.eye(n_features)
+                         + np.dot(X.T, X))^-1
+        """
+
+        if n_samples > n_features:
+            coef_ = np.dot(Vh.T,
+                           Vh / (eigen_vals_ +
+                                 lambda_ / alpha_)[:, np.newaxis])
+            coef_ = np.dot(coef_, XT_y)
+        else:
+            coef_ = np.dot(X.T, np.dot(
+                U / (eigen_vals_ + lambda_ / alpha_)[None, :], U.T))
+            coef_ = np.dot(coef_, y)
+
+        rmse_ = np.sum((y - np.dot(X, coef_)) ** 2)
+
+        return coef_, rmse_
+
+    def _log_marginal_likelihood(self, n_samples, n_features, eigen_vals,
+                                 alpha_, lambda_, coef, rmse):
+        """Log marginal likelihood."""
+        alpha_1 = self.alpha_1
+        alpha_2 = self.alpha_2
+        lambda_1 = self.lambda_1
+        lambda_2 = self.lambda_2
+
+        # compute the log of the determinant of the posterior covariance.
+        # posterior covariance is given by
+        # sigma = (lambda_ * np.eye(n_features) + alpha_ * np.dot(X.T, X))^-1
+        if n_samples > n_features:
+            logdet_sigma = - np.sum(np.log(lambda_ + alpha_ * eigen_vals))
+        else:
+            logdet_sigma = np.full(n_features, lambda_,
+                                   dtype=np.array(lambda_).dtype)
+            logdet_sigma[:n_samples] += alpha_ * eigen_vals
+            logdet_sigma = - np.sum(np.log(logdet_sigma))
+
+        score = lambda_1 * log(lambda_) - lambda_2 * lambda_
+        score += alpha_1 * log(alpha_) - alpha_2 * alpha_
+        score += 0.5 * (n_features * log(lambda_) +
+                        n_samples * log(alpha_) -
+                        alpha_ * rmse -
+                        lambda_ * np.sum(coef ** 2) +
+                        logdet_sigma -
+                        n_samples * log(2 * np.pi))
+
+        return score
+
+
+###############################################################################
+# ARD (Automatic Relevance Determination) regression
+
+
+class ARDRegression(RegressorMixin, LinearModel):
+    """Bayesian ARD regression.
+
+    Fit the weights of a regression model, using an ARD prior. The weights of
+    the regression model are assumed to be in Gaussian distributions.
+    Also estimate the parameters lambda (precisions of the distributions of the
+    weights) and alpha (precision of the distribution of the noise).
+    The estimation is done by an iterative procedures (Evidence Maximization)
+
+    Read more in the :ref:`User Guide <bayesian_regression>`.
+
+    Parameters
+    ----------
+    n_iter : int, default=300
+        Maximum number of iterations.
+
+    tol : float, default=1e-3
+        Stop the algorithm if w has converged.
+
+    alpha_1 : float, default=1e-6
+        Hyper-parameter : shape parameter for the Gamma distribution prior
+        over the alpha parameter.
+
+    alpha_2 : float, default=1e-6
+        Hyper-parameter : inverse scale parameter (rate parameter) for the
+        Gamma distribution prior over the alpha parameter.
+
+    lambda_1 : float, default=1e-6
+        Hyper-parameter : shape parameter for the Gamma distribution prior
+        over the lambda parameter.
+
+    lambda_2 : float, default=1e-6
+        Hyper-parameter : inverse scale parameter (rate parameter) for the
+        Gamma distribution prior over the lambda parameter.
+
+    compute_score : bool, default=False
+        If True, compute the objective function at each step of the model.
+
+    threshold_lambda : float, default=10 000
+        threshold for removing (pruning) weights with high precision from
+        the computation.
+
+    fit_intercept : bool, default=True
+        whether to calculate the intercept for this model. If set
+        to false, no intercept will be used in calculations
+        (i.e. data is expected to be centered).
+
+    normalize : bool, default=False
+        This parameter is ignored when ``fit_intercept`` is set to False.
+        If True, the regressors X will be normalized before regression by
+        subtracting the mean and dividing by the l2-norm.
+        If you wish to standardize, please use
+        :class:`sklearn.preprocessing.StandardScaler` before calling ``fit``
+        on an estimator with ``normalize=False``.
+
+    copy_X : bool, default=True
+        If True, X will be copied; else, it may be overwritten.
+
+    verbose : bool, default=False
+        Verbose mode when fitting the model.
+
+    Attributes
+    ----------
+    coef_ : array-like of shape (n_features,)
+        Coefficients of the regression model (mean of distribution)
+
+    alpha_ : float
+       estimated precision of the noise.
+
+    lambda_ : array-like of shape (n_features,)
+       estimated precisions of the weights.
+
+    sigma_ : array-like of shape (n_features, n_features)
+        estimated variance-covariance matrix of the weights
+
+    scores_ : float
+        if computed, value of the objective function (to be maximized)
+
+    intercept_ : float
+        Independent term in decision function. Set to 0.0 if
+        ``fit_intercept = False``.
+
+    Examples
+    --------
+    >>> from sklearn import linear_model
+    >>> clf = linear_model.ARDRegression()
+    >>> clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])
+    ARDRegression()
+    >>> clf.predict([[1, 1]])
+    array([1.])
+
+    Notes
+    -----
+    For an example, see :ref:`examples/linear_model/plot_ard.py
+    <sphx_glr_auto_examples_linear_model_plot_ard.py>`.
+
+    References
+    ----------
+    D. J. C. MacKay, Bayesian nonlinear modeling for the prediction
+    competition, ASHRAE Transactions, 1994.
+
+    R. Salakhutdinov, Lecture notes on Statistical Machine Learning,
+    http://www.utstat.toronto.edu/~rsalakhu/sta4273/notes/Lecture2.pdf#page=15
+    Their beta is our ``self.alpha_``
+    Their alpha is our ``self.lambda_``
+    ARD is a little different than the slide: only dimensions/features for
+    which ``self.lambda_ < self.threshold_lambda`` are kept and the rest are
+    discarded.
+    """
+    @_deprecate_positional_args
+    def __init__(self, *, n_iter=300, tol=1.e-3, alpha_1=1.e-6, alpha_2=1.e-6,
+                 lambda_1=1.e-6, lambda_2=1.e-6, compute_score=False,
+                 threshold_lambda=1.e+4, fit_intercept=True, normalize=False,
+                 copy_X=True, verbose=False):
+        self.n_iter = n_iter
+        self.tol = tol
+        self.fit_intercept = fit_intercept
+        self.normalize = normalize
+        self.alpha_1 = alpha_1
+        self.alpha_2 = alpha_2
+        self.lambda_1 = lambda_1
+        self.lambda_2 = lambda_2
+        self.compute_score = compute_score
+        self.threshold_lambda = threshold_lambda
+        self.copy_X = copy_X
+        self.verbose = verbose
+
+    def fit(self, X, y):
+        """Fit the ARDRegression model according to the given training data
+        and parameters.
+
+        Iterative procedure to maximize the evidence
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Training vector, where n_samples in the number of samples and
+            n_features is the number of features.
+        y : array-like of shape (n_samples,)
+            Target values (integers). Will be cast to X's dtype if necessary
+
+        Returns
+        -------
+        self : returns an instance of self.
+        """
+        X, y = self._validate_data(X, y, dtype=np.float64, y_numeric=True,
+                                   ensure_min_samples=2)
+
+        n_samples, n_features = X.shape
+        coef_ = np.zeros(n_features)
+
+        X, y, X_offset_, y_offset_, X_scale_ = self._preprocess_data(
+            X, y, self.fit_intercept, self.normalize, self.copy_X)
+
+        # Launch the convergence loop
+        keep_lambda = np.ones(n_features, dtype=bool)
+
+        lambda_1 = self.lambda_1
+        lambda_2 = self.lambda_2
+        alpha_1 = self.alpha_1
+        alpha_2 = self.alpha_2
+        verbose = self.verbose
+
+        # Initialization of the values of the parameters
+        eps = np.finfo(np.float64).eps
+        # Add `eps` in the denominator to omit division by zero if `np.var(y)`
+        # is zero
+        alpha_ = 1. / (np.var(y) + eps)
+        lambda_ = np.ones(n_features)
+
+        self.scores_ = list()
+        coef_old_ = None
+
+        def update_coeff(X, y, coef_, alpha_, keep_lambda, sigma_):
+            coef_[keep_lambda] = alpha_ * np.dot(
+                sigma_, np.dot(X[:, keep_lambda].T, y))
+            return coef_
+
+        update_sigma = (self._update_sigma if n_samples >= n_features
+                        else self._update_sigma_woodbury)
+        # Iterative procedure of ARDRegression
+        for iter_ in range(self.n_iter):
+            sigma_ = update_sigma(X, alpha_, lambda_, keep_lambda)
+            coef_ = update_coeff(X, y, coef_, alpha_, keep_lambda, sigma_)
+
+            # Update alpha and lambda
+            rmse_ = np.sum((y - np.dot(X, coef_)) ** 2)
+            gamma_ = 1. - lambda_[keep_lambda] * np.diag(sigma_)
+            lambda_[keep_lambda] = ((gamma_ + 2. * lambda_1) /
+                                    ((coef_[keep_lambda]) ** 2 +
+                                     2. * lambda_2))
+            alpha_ = ((n_samples - gamma_.sum() + 2. * alpha_1) /
+                      (rmse_ + 2. * alpha_2))
+
+            # Prune the weights with a precision over a threshold
+            keep_lambda = lambda_ < self.threshold_lambda
+            coef_[~keep_lambda] = 0
+
+            # Compute the objective function
+            if self.compute_score:
+                s = (lambda_1 * np.log(lambda_) - lambda_2 * lambda_).sum()
+                s += alpha_1 * log(alpha_) - alpha_2 * alpha_
+                s += 0.5 * (fast_logdet(sigma_) + n_samples * log(alpha_) +
+                            np.sum(np.log(lambda_)))
+                s -= 0.5 * (alpha_ * rmse_ + (lambda_ * coef_ ** 2).sum())
+                self.scores_.append(s)
+
+            # Check for convergence
+            if iter_ > 0 and np.sum(np.abs(coef_old_ - coef_)) < self.tol:
+                if verbose:
+                    print("Converged after %s iterations" % iter_)
+                break
+            coef_old_ = np.copy(coef_)
+
+            if not keep_lambda.any():
+                break
+
+        if keep_lambda.any():
+            # update sigma and mu using updated params from the last iteration
+            sigma_ = update_sigma(X, alpha_, lambda_, keep_lambda)
+            coef_ = update_coeff(X, y, coef_, alpha_, keep_lambda, sigma_)
+        else:
+            sigma_ = np.array([]).reshape(0, 0)
+
+        self.coef_ = coef_
+        self.alpha_ = alpha_
+        self.sigma_ = sigma_
+        self.lambda_ = lambda_
+        self._set_intercept(X_offset_, y_offset_, X_scale_)
+        return self
+
+    def _update_sigma_woodbury(self, X, alpha_, lambda_, keep_lambda):
+        # See slides as referenced in the docstring note
+        # this function is used when n_samples < n_features and will invert
+        # a matrix of shape (n_samples, n_samples) making use of the
+        # woodbury formula:
+        # https://en.wikipedia.org/wiki/Woodbury_matrix_identity
+        n_samples = X.shape[0]
+        X_keep = X[:, keep_lambda]
+        inv_lambda = 1 / lambda_[keep_lambda].reshape(1, -1)
+        sigma_ = pinvh(
+            np.eye(n_samples) / alpha_ + np.dot(X_keep * inv_lambda, X_keep.T)
+        )
+        sigma_ = np.dot(sigma_, X_keep * inv_lambda)
+        sigma_ = - np.dot(inv_lambda.reshape(-1, 1) * X_keep.T, sigma_)
+        sigma_[np.diag_indices(sigma_.shape[1])] += 1. / lambda_[keep_lambda]
+        return sigma_
+
+    def _update_sigma(self, X, alpha_, lambda_, keep_lambda):
+        # See slides as referenced in the docstring note
+        # this function is used when n_samples >= n_features and will
+        # invert a matrix of shape (n_features, n_features)
+        X_keep = X[:, keep_lambda]
+        gram = np.dot(X_keep.T, X_keep)
+        eye = np.eye(gram.shape[0])
+        sigma_inv = lambda_[keep_lambda] * eye + alpha_ * gram
+        sigma_ = pinvh(sigma_inv)
+        return sigma_
+
+    def predict(self, X, return_std=False):
+        """Predict using the linear model.
+
+        In addition to the mean of the predictive distribution, also its
+        standard deviation can be returned.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Samples.
+
+        return_std : bool, default=False
+            Whether to return the standard deviation of posterior prediction.
+
+        Returns
+        -------
+        y_mean : array-like of shape (n_samples,)
+            Mean of predictive distribution of query points.
+
+        y_std : array-like of shape (n_samples,)
+            Standard deviation of predictive distribution of query points.
+        """
+        y_mean = self._decision_function(X)
+        if return_std is False:
+            return y_mean
+        else:
+            if self.normalize:
+                X = (X - self.X_offset_) / self.X_scale_
+            X = X[:, self.lambda_ < self.threshold_lambda]
+            sigmas_squared_data = (np.dot(X, self.sigma_) * X).sum(axis=1)
+            y_std = np.sqrt(sigmas_squared_data + (1. / self.alpha_))
+            return y_mean, y_std
--- a/venv/Lib/site-packages/sklearn/linear_model/_cd_fast.cp36-win32.pyd
+++ b/venv/Lib/site-packages/sklearn/linear_model/_cd_fast.cp36-win32.pyd
--- a/venv/Lib/site-packages/sklearn/linear_model/_coordinate_descent.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/_coordinate_descent.py
--- a/venv/Lib/site-packages/sklearn/linear_model/_glm/init.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/_glm/init.py
@ -0,0 +1,15 @@
+# License: BSD 3 clause
+
+from .glm import (
+    GeneralizedLinearRegressor,
+    PoissonRegressor,
+    GammaRegressor,
+    TweedieRegressor
+)
+
+__all__ = [
+    "GeneralizedLinearRegressor",
+    "PoissonRegressor",
+    "GammaRegressor",
+    "TweedieRegressor"
+]
--- a/venv/Lib/site-packages/sklearn/linear_model/_glm/pycache/init.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/_glm/pycache/init.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/_glm/pycache/glm.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/_glm/pycache/glm.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/_glm/pycache/link.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/_glm/pycache/link.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/_glm/glm.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/_glm/glm.py
@ -0,0 +1,615 @@
+"""
+Generalized Linear Models with Exponential Dispersion Family
+"""
+
+# Author: Christian Lorentzen <lorentzen.ch@googlemail.com>
+# some parts and tricks stolen from other sklearn files.
+# License: BSD 3 clause
+
+import numbers
+
+import numpy as np
+import scipy.optimize
+
+from ...base import BaseEstimator, RegressorMixin
+from ...utils import check_array, check_X_y
+from ...utils.optimize import _check_optimize_result
+from ...utils.validation import check_is_fitted, _check_sample_weight
+from ..._loss.glm_distribution import (
+        ExponentialDispersionModel,
+        TweedieDistribution,
+        EDM_DISTRIBUTIONS
+)
+from .link import (
+        BaseLink,
+        IdentityLink,
+        LogLink,
+)
+
+
+def _safe_lin_pred(X, coef):
+    """Compute the linear predictor taking care if intercept is present."""
+    if coef.size == X.shape[1] + 1:
+        return X @ coef[1:] + coef[0]
+    else:
+        return X @ coef
+
+
+def _y_pred_deviance_derivative(coef, X, y, weights, family, link):
+    """Compute y_pred and the derivative of the deviance w.r.t coef."""
+    lin_pred = _safe_lin_pred(X, coef)
+    y_pred = link.inverse(lin_pred)
+    d1 = link.inverse_derivative(lin_pred)
+    temp = d1 * family.deviance_derivative(y, y_pred, weights)
+    if coef.size == X.shape[1] + 1:
+        devp = np.concatenate(([temp.sum()], temp @ X))
+    else:
+        devp = temp @ X  # same as X.T @ temp
+    return y_pred, devp
+
+
+class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
+    """Regression via a penalized Generalized Linear Model (GLM).
+
+    GLMs based on a reproductive Exponential Dispersion Model (EDM) aim at
+    fitting and predicting the mean of the target y as y_pred=h(X*w).
+    Therefore, the fit minimizes the following objective function with L2
+    priors as regularizer::
+
+            1/(2*sum(s)) * deviance(y, h(X*w); s)
+            + 1/2 * alpha * |w|_2
+
+    with inverse link function h and s=sample_weight.
+    The parameter ``alpha`` corresponds to the lambda parameter in glmnet.
+
+    Read more in the :ref:`User Guide <Generalized_linear_regression>`.
+
+    Parameters
+    ----------
+    alpha : float, default=1
+        Constant that multiplies the penalty term and thus determines the
+        regularization strength. ``alpha = 0`` is equivalent to unpenalized
+        GLMs. In this case, the design matrix `X` must have full column rank
+        (no collinearities).
+
+    fit_intercept : bool, default=True
+        Specifies if a constant (a.k.a. bias or intercept) should be
+        added to the linear predictor (X @ coef + intercept).
+
+    family : {'normal', 'poisson', 'gamma', 'inverse-gaussian'} \
+            or an ExponentialDispersionModel instance, default='normal'
+        The distributional assumption of the GLM, i.e. which distribution from
+        the EDM, specifies the loss function to be minimized.
+
+    link : {'auto', 'identity', 'log'} or an instance of class BaseLink, \
+            default='auto'
+        The link function of the GLM, i.e. mapping from linear predictor
+        `X @ coeff + intercept` to prediction `y_pred`. Option 'auto' sets
+        the link depending on the chosen family as follows:
+
+        - 'identity' for Normal distribution
+        - 'log' for Poisson,  Gamma and Inverse Gaussian distributions
+
+    solver : 'lbfgs', default='lbfgs'
+        Algorithm to use in the optimization problem:
+
+        'lbfgs'
+            Calls scipy's L-BFGS-B optimizer.
+
+    max_iter : int, default=100
+        The maximal number of iterations for the solver.
+
+    tol : float, default=1e-4
+        Stopping criterion. For the lbfgs solver,
+        the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``
+        where ``g_j`` is the j-th component of the gradient (derivative) of
+        the objective function.
+
+    warm_start : bool, default=False
+        If set to ``True``, reuse the solution of the previous call to ``fit``
+        as initialization for ``coef_`` and ``intercept_``.
+
+    verbose : int, default=0
+        For the lbfgs solver set verbose to any positive number for verbosity.
+
+    Attributes
+    ----------
+    coef_ : array of shape (n_features,)
+        Estimated coefficients for the linear predictor (`X @ coef_ +
+        intercept_`) in the GLM.
+
+    intercept_ : float
+        Intercept (a.k.a. bias) added to linear predictor.
+
+    n_iter_ : int
+        Actual number of iterations used in the solver.
+    """
+    def __init__(self, *, alpha=1.0,
+                 fit_intercept=True, family='normal', link='auto',
+                 solver='lbfgs', max_iter=100, tol=1e-4, warm_start=False,
+                 verbose=0):
+        self.alpha = alpha
+        self.fit_intercept = fit_intercept
+        self.family = family
+        self.link = link
+        self.solver = solver
+        self.max_iter = max_iter
+        self.tol = tol
+        self.warm_start = warm_start
+        self.verbose = verbose
+
+    def fit(self, X, y, sample_weight=None):
+        """Fit a Generalized Linear Model.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Training data.
+
+        y : array-like of shape (n_samples,)
+            Target values.
+
+        sample_weight : array-like of shape (n_samples,), default=None
+            Sample weights.
+
+        Returns
+        -------
+        self : returns an instance of self.
+        """
+        if isinstance(self.family, ExponentialDispersionModel):
+            self._family_instance = self.family
+        elif self.family in EDM_DISTRIBUTIONS:
+            self._family_instance = EDM_DISTRIBUTIONS[self.family]()
+        else:
+            raise ValueError(
+                "The family must be an instance of class"
+                " ExponentialDispersionModel or an element of"
+                " ['normal', 'poisson', 'gamma', 'inverse-gaussian']"
+                "; got (family={0})".format(self.family))
+
+        # Guarantee that self._link_instance is set to an instance of
+        # class BaseLink
+        if isinstance(self.link, BaseLink):
+            self._link_instance = self.link
+        else:
+            if self.link == 'auto':
+                if isinstance(self._family_instance, TweedieDistribution):
+                    if self._family_instance.power <= 0:
+                        self._link_instance = IdentityLink()
+                    if self._family_instance.power >= 1:
+                        self._link_instance = LogLink()
+                else:
+                    raise ValueError("No default link known for the "
+                                     "specified distribution family. Please "
+                                     "set link manually, i.e. not to 'auto'; "
+                                     "got (link='auto', family={})"
+                                     .format(self.family))
+            elif self.link == 'identity':
+                self._link_instance = IdentityLink()
+            elif self.link == 'log':
+                self._link_instance = LogLink()
+            else:
+                raise ValueError(
+                    "The link must be an instance of class Link or "
+                    "an element of ['auto', 'identity', 'log']; "
+                    "got (link={0})".format(self.link))
+
+        if not isinstance(self.alpha, numbers.Number) or self.alpha < 0:
+            raise ValueError("Penalty term must be a non-negative number;"
+                             " got (alpha={0})".format(self.alpha))
+        if not isinstance(self.fit_intercept, bool):
+            raise ValueError("The argument fit_intercept must be bool;"
+                             " got {0}".format(self.fit_intercept))
+        if self.solver not in ['lbfgs']:
+            raise ValueError("GeneralizedLinearRegressor supports only solvers"
+                             "'lbfgs'; got {0}".format(self.solver))
+        solver = self.solver
+        if (not isinstance(self.max_iter, numbers.Integral)
+                or self.max_iter <= 0):
+            raise ValueError("Maximum number of iteration must be a positive "
+                             "integer;"
+                             " got (max_iter={0!r})".format(self.max_iter))
+        if not isinstance(self.tol, numbers.Number) or self.tol <= 0:
+            raise ValueError("Tolerance for stopping criteria must be "
+                             "positive; got (tol={0!r})".format(self.tol))
+        if not isinstance(self.warm_start, bool):
+            raise ValueError("The argument warm_start must be bool;"
+                             " got {0}".format(self.warm_start))
+
+        family = self._family_instance
+        link = self._link_instance
+
+        X, y = check_X_y(X, y, accept_sparse=['csc', 'csr'],
+                         dtype=[np.float64, np.float32],
+                         y_numeric=True, multi_output=False)
+
+        weights = _check_sample_weight(sample_weight, X)
+
+        _, n_features = X.shape
+
+        if not np.all(family.in_y_range(y)):
+            raise ValueError("Some value(s) of y are out of the valid "
+                             "range for family {0}"
+                             .format(family.__class__.__name__))
+        # TODO: if alpha=0 check that X is not rank deficient
+
+        # rescaling of sample_weight
+        #
+        # IMPORTANT NOTE: Since we want to minimize
+        # 1/(2*sum(sample_weight)) * deviance + L2,
+        # deviance = sum(sample_weight * unit_deviance),
+        # we rescale weights such that sum(weights) = 1 and this becomes
+        # 1/2*deviance + L2 with deviance=sum(weights * unit_deviance)
+        weights = weights / weights.sum()
+
+        if self.warm_start and hasattr(self, 'coef_'):
+            if self.fit_intercept:
+                coef = np.concatenate((np.array([self.intercept_]),
+                                       self.coef_))
+            else:
+                coef = self.coef_
+        else:
+            if self.fit_intercept:
+                coef = np.zeros(n_features+1)
+                coef[0] = link(np.average(y, weights=weights))
+            else:
+                coef = np.zeros(n_features)
+
+        # algorithms for optimization
+
+        if solver == 'lbfgs':
+            def func(coef, X, y, weights, alpha, family, link):
+                y_pred, devp = _y_pred_deviance_derivative(
+                    coef, X, y, weights, family, link
+                )
+                dev = family.deviance(y, y_pred, weights)
+                # offset if coef[0] is intercept
+                offset = 1 if self.fit_intercept else 0
+                coef_scaled = alpha * coef[offset:]
+                obj = 0.5 * dev + 0.5 * (coef[offset:] @ coef_scaled)
+                objp = 0.5 * devp
+                objp[offset:] += coef_scaled
+                return obj, objp
+
+            args = (X, y, weights, self.alpha, family, link)
+
+            opt_res = scipy.optimize.minimize(
+                func, coef, method="L-BFGS-B", jac=True,
+                options={
+                    "maxiter": self.max_iter,
+                    "iprint": (self.verbose > 0) - 1,
+                    "gtol": self.tol,
+                    "ftol": 1e3*np.finfo(float).eps,
+                },
+                args=args)
+            self.n_iter_ = _check_optimize_result("lbfgs", opt_res)
+            coef = opt_res.x
+
+        if self.fit_intercept:
+            self.intercept_ = coef[0]
+            self.coef_ = coef[1:]
+        else:
+            # set intercept to zero as the other linear models do
+            self.intercept_ = 0.
+            self.coef_ = coef
+
+        return self
+
+    def _linear_predictor(self, X):
+        """Compute the linear_predictor = `X @ coef_ + intercept_`.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Samples.
+
+        Returns
+        -------
+        y_pred : array of shape (n_samples,)
+            Returns predicted values of linear predictor.
+        """
+        check_is_fitted(self)
+        X = check_array(X, accept_sparse=['csr', 'csc', 'coo'],
+                        dtype=[np.float64, np.float32], ensure_2d=True,
+                        allow_nd=False)
+        return X @ self.coef_ + self.intercept_
+
+    def predict(self, X):
+        """Predict using GLM with feature matrix X.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Samples.
+
+        Returns
+        -------
+        y_pred : array of shape (n_samples,)
+            Returns predicted values.
+        """
+        # check_array is done in _linear_predictor
+        eta = self._linear_predictor(X)
+        y_pred = self._link_instance.inverse(eta)
+        return y_pred
+
+    def score(self, X, y, sample_weight=None):
+        """Compute D^2, the percentage of deviance explained.
+
+        D^2 is a generalization of the coefficient of determination R^2.
+        R^2 uses squared error and D^2 deviance. Note that those two are equal
+        for ``family='normal'``.
+
+        D^2 is defined as
+        :math:`D^2 = 1-\\frac{D(y_{true},y_{pred})}{D_{null}}`,
+        :math:`D_{null}` is the null deviance, i.e. the deviance of a model
+        with intercept alone, which corresponds to :math:`y_{pred} = \\bar{y}`.
+        The mean :math:`\\bar{y}` is averaged by sample_weight.
+        Best possible score is 1.0 and it can be negative (because the model
+        can be arbitrarily worse).
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Test samples.
+
+        y : array-like of shape (n_samples,)
+            True values of target.
+
+        sample_weight : array-like of shape (n_samples,), default=None
+            Sample weights.
+
+        Returns
+        -------
+        score : float
+            D^2 of self.predict(X) w.r.t. y.
+        """
+        # Note, default score defined in RegressorMixin is R^2 score.
+        # TODO: make D^2 a score function in module metrics (and thereby get
+        #       input validation and so on)
+        weights = _check_sample_weight(sample_weight, X)
+        y_pred = self.predict(X)
+        dev = self._family_instance.deviance(y, y_pred, weights=weights)
+        y_mean = np.average(y, weights=weights)
+        dev_null = self._family_instance.deviance(y, y_mean, weights=weights)
+        return 1 - dev / dev_null
+
+    def _more_tags(self):
+        # create the _family_instance if fit wasn't called yet.
+        if hasattr(self, '_family_instance'):
+            _family_instance = self._family_instance
+        elif isinstance(self.family, ExponentialDispersionModel):
+            _family_instance = self.family
+        elif self.family in EDM_DISTRIBUTIONS:
+            _family_instance = EDM_DISTRIBUTIONS[self.family]()
+        else:
+            raise ValueError
+        return {"requires_positive_y": not _family_instance.in_y_range(-1.0)}
+
+
+class PoissonRegressor(GeneralizedLinearRegressor):
+    """Generalized Linear Model with a Poisson distribution.
+
+    Read more in the :ref:`User Guide <Generalized_linear_regression>`.
+
+    Parameters
+    ----------
+    alpha : float, default=1
+        Constant that multiplies the penalty term and thus determines the
+        regularization strength. ``alpha = 0`` is equivalent to unpenalized
+        GLMs. In this case, the design matrix `X` must have full column rank
+        (no collinearities).
+
+    fit_intercept : bool, default=True
+        Specifies if a constant (a.k.a. bias or intercept) should be
+        added to the linear predictor (X @ coef + intercept).
+
+    max_iter : int, default=100
+        The maximal number of iterations for the solver.
+
+    tol : float, default=1e-4
+        Stopping criterion. For the lbfgs solver,
+        the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``
+        where ``g_j`` is the j-th component of the gradient (derivative) of
+        the objective function.
+
+    warm_start : bool, default=False
+        If set to ``True``, reuse the solution of the previous call to ``fit``
+        as initialization for ``coef_`` and ``intercept_`` .
+
+    verbose : int, default=0
+        For the lbfgs solver set verbose to any positive number for verbosity.
+
+    Attributes
+    ----------
+    coef_ : array of shape (n_features,)
+        Estimated coefficients for the linear predictor (`X @ coef_ +
+        intercept_`) in the GLM.
+
+    intercept_ : float
+        Intercept (a.k.a. bias) added to linear predictor.
+
+    n_iter_ : int
+        Actual number of iterations used in the solver.
+    """
+    def __init__(self, *, alpha=1.0, fit_intercept=True, max_iter=100,
+                 tol=1e-4, warm_start=False, verbose=0):
+
+        super().__init__(alpha=alpha, fit_intercept=fit_intercept,
+                         family="poisson", link='log', max_iter=max_iter,
+                         tol=tol, warm_start=warm_start, verbose=verbose)
+
+    @property
+    def family(self):
+        # Make this attribute read-only to avoid mis-uses e.g. in GridSearch.
+        return "poisson"
+
+    @family.setter
+    def family(self, value):
+        if value != "poisson":
+            raise ValueError("PoissonRegressor.family must be 'poisson'!")
+
+
+class GammaRegressor(GeneralizedLinearRegressor):
+    """Generalized Linear Model with a Gamma distribution.
+
+    Read more in the :ref:`User Guide <Generalized_linear_regression>`.
+
+    Parameters
+    ----------
+    alpha : float, default=1
+        Constant that multiplies the penalty term and thus determines the
+        regularization strength. ``alpha = 0`` is equivalent to unpenalized
+        GLMs. In this case, the design matrix `X` must have full column rank
+        (no collinearities).
+
+    fit_intercept : bool, default=True
+        Specifies if a constant (a.k.a. bias or intercept) should be
+        added to the linear predictor (X @ coef + intercept).
+
+    max_iter : int, default=100
+        The maximal number of iterations for the solver.
+
+    tol : float, default=1e-4
+        Stopping criterion. For the lbfgs solver,
+        the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``
+        where ``g_j`` is the j-th component of the gradient (derivative) of
+        the objective function.
+
+    warm_start : bool, default=False
+        If set to ``True``, reuse the solution of the previous call to ``fit``
+        as initialization for ``coef_`` and ``intercept_`` .
+
+    verbose : int, default=0
+        For the lbfgs solver set verbose to any positive number for verbosity.
+
+    Attributes
+    ----------
+    coef_ : array of shape (n_features,)
+        Estimated coefficients for the linear predictor (`X * coef_ +
+        intercept_`) in the GLM.
+
+    intercept_ : float
+        Intercept (a.k.a. bias) added to linear predictor.
+
+    n_iter_ : int
+        Actual number of iterations used in the solver.
+    """
+    def __init__(self, *, alpha=1.0, fit_intercept=True, max_iter=100,
+                 tol=1e-4, warm_start=False, verbose=0):
+
+        super().__init__(alpha=alpha, fit_intercept=fit_intercept,
+                         family="gamma", link='log', max_iter=max_iter,
+                         tol=tol, warm_start=warm_start, verbose=verbose)
+
+    @property
+    def family(self):
+        # Make this attribute read-only to avoid mis-uses e.g. in GridSearch.
+        return "gamma"
+
+    @family.setter
+    def family(self, value):
+        if value != "gamma":
+            raise ValueError("GammaRegressor.family must be 'gamma'!")
+
+
+class TweedieRegressor(GeneralizedLinearRegressor):
+    """Generalized Linear Model with a Tweedie distribution.
+
+    This estimator can be used to model different GLMs depending on the
+    ``power`` parameter, which determines the underlying distribution.
+
+    Read more in the :ref:`User Guide <Generalized_linear_regression>`.
+
+    Parameters
+    ----------
+    power : float, default=0
+            The power determines the underlying target distribution according
+            to the following table:
+
+            +-------+------------------------+
+            | Power | Distribution           |
+            +=======+========================+
+            | 0     | Normal                 |
+            +-------+------------------------+
+            | 1     | Poisson                |
+            +-------+------------------------+
+            | (1,2) | Compound Poisson Gamma |
+            +-------+------------------------+
+            | 2     | Gamma                  |
+            +-------+------------------------+
+            | 3     | Inverse Gaussian       |
+            +-------+------------------------+
+
+            For ``0 < power < 1``, no distribution exists.
+
+    alpha : float, default=1
+        Constant that multiplies the penalty term and thus determines the
+        regularization strength. ``alpha = 0`` is equivalent to unpenalized
+        GLMs. In this case, the design matrix `X` must have full column rank
+        (no collinearities).
+
+    link : {'auto', 'identity', 'log'}, default='auto'
+        The link function of the GLM, i.e. mapping from linear predictor
+        `X @ coeff + intercept` to prediction `y_pred`. Option 'auto' sets
+        the link depending on the chosen family as follows:
+
+        - 'identity' for Normal distribution
+        - 'log' for Poisson,  Gamma and Inverse Gaussian distributions
+
+    fit_intercept : bool, default=True
+        Specifies if a constant (a.k.a. bias or intercept) should be
+        added to the linear predictor (X @ coef + intercept).
+
+    max_iter : int, default=100
+        The maximal number of iterations for the solver.
+
+    tol : float, default=1e-4
+        Stopping criterion. For the lbfgs solver,
+        the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``
+        where ``g_j`` is the j-th component of the gradient (derivative) of
+        the objective function.
+
+    warm_start : bool, default=False
+        If set to ``True``, reuse the solution of the previous call to ``fit``
+        as initialization for ``coef_`` and ``intercept_`` .
+
+    verbose : int, default=0
+        For the lbfgs solver set verbose to any positive number for verbosity.
+
+    Attributes
+    ----------
+    coef_ : array of shape (n_features,)
+        Estimated coefficients for the linear predictor (`X @ coef_ +
+        intercept_`) in the GLM.
+
+    intercept_ : float
+        Intercept (a.k.a. bias) added to linear predictor.
+
+    n_iter_ : int
+        Actual number of iterations used in the solver.
+    """
+    def __init__(self, *, power=0.0, alpha=1.0, fit_intercept=True,
+                 link='auto', max_iter=100, tol=1e-4,
+                 warm_start=False, verbose=0):
+
+        super().__init__(alpha=alpha, fit_intercept=fit_intercept,
+                         family=TweedieDistribution(power=power), link=link,
+                         max_iter=max_iter, tol=tol,
+                         warm_start=warm_start, verbose=verbose)
+
+    @property
+    def family(self):
+        # We use a property with a setter to make sure that the family is
+        # always a Tweedie distribution, and that self.power and
+        # self.family.power are identical by construction.
+        dist = TweedieDistribution(power=self.power)
+        # TODO: make the returned object immutable
+        return dist
+
+    @family.setter
+    def family(self, value):
+        if isinstance(value, TweedieDistribution):
+            self.power = value.power
+        else:
+            raise TypeError("TweedieRegressor.family must be of type "
+                            "TweedieDistribution!")
--- a/venv/Lib/site-packages/sklearn/linear_model/_glm/link.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/_glm/link.py
@ -0,0 +1,110 @@
+"""
+Link functions used in GLM
+"""
+
+# Author: Christian Lorentzen <lorentzen.ch@googlemail.com>
+# License: BSD 3 clause
+
+from abc import ABCMeta, abstractmethod
+
+import numpy as np
+from scipy.special import expit, logit
+
+
+class BaseLink(metaclass=ABCMeta):
+    """Abstract base class for Link functions."""
+
+    @abstractmethod
+    def __call__(self, y_pred):
+        """Compute the link function g(y_pred).
+
+        The link function links the mean y_pred=E[Y] to the so called linear
+        predictor (X*w), i.e. g(y_pred) = linear predictor.
+
+        Parameters
+        ----------
+        y_pred : array of shape (n_samples,)
+            Usually the (predicted) mean.
+        """
+
+    @abstractmethod
+    def derivative(self, y_pred):
+        """Compute the derivative of the link g'(y_pred).
+
+        Parameters
+        ----------
+        y_pred : array of shape (n_samples,)
+            Usually the (predicted) mean.
+        """
+
+    @abstractmethod
+    def inverse(self, lin_pred):
+        """Compute the inverse link function h(lin_pred).
+
+        Gives the inverse relationship between linear predictor and the mean
+        y_pred=E[Y], i.e. h(linear predictor) = y_pred.
+
+        Parameters
+        ----------
+        lin_pred : array of shape (n_samples,)
+            Usually the (fitted) linear predictor.
+        """
+
+    @abstractmethod
+    def inverse_derivative(self, lin_pred):
+        """Compute the derivative of the inverse link function h'(lin_pred).
+
+        Parameters
+        ----------
+        lin_pred : array of shape (n_samples,)
+            Usually the (fitted) linear predictor.
+        """
+
+
+class IdentityLink(BaseLink):
+    """The identity link function g(x)=x."""
+
+    def __call__(self, y_pred):
+        return y_pred
+
+    def derivative(self, y_pred):
+        return np.ones_like(y_pred)
+
+    def inverse(self, lin_pred):
+        return lin_pred
+
+    def inverse_derivative(self, lin_pred):
+        return np.ones_like(lin_pred)
+
+
+class LogLink(BaseLink):
+    """The log link function g(x)=log(x)."""
+
+    def __call__(self, y_pred):
+        return np.log(y_pred)
+
+    def derivative(self, y_pred):
+        return 1 / y_pred
+
+    def inverse(self, lin_pred):
+        return np.exp(lin_pred)
+
+    def inverse_derivative(self, lin_pred):
+        return np.exp(lin_pred)
+
+
+class LogitLink(BaseLink):
+    """The logit link function g(x)=logit(x)."""
+
+    def __call__(self, y_pred):
+        return logit(y_pred)
+
+    def derivative(self, y_pred):
+        return 1 / (y_pred * (1 - y_pred))
+
+    def inverse(self, lin_pred):
+        return expit(lin_pred)
+
+    def inverse_derivative(self, lin_pred):
+        ep = expit(lin_pred)
+        return ep * (1 - ep)
--- a/venv/Lib/site-packages/sklearn/linear_model/_glm/tests/init.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/_glm/tests/init.py
@ -0,0 +1 @@
+# License: BSD 3 clause
--- a/venv/Lib/site-packages/sklearn/linear_model/_glm/tests/pycache/init.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/_glm/tests/pycache/init.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/_glm/tests/pycache/test_glm.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/_glm/tests/pycache/test_glm.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/_glm/tests/pycache/test_link.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/_glm/tests/pycache/test_link.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/_glm/tests/test_glm.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/_glm/tests/test_glm.py
@ -0,0 +1,431 @@
+# Authors: Christian Lorentzen <lorentzen.ch@gmail.com>
+#
+# License: BSD 3 clause
+
+import numpy as np
+from numpy.testing import assert_allclose
+import pytest
+import warnings
+
+from sklearn.datasets import make_regression
+from sklearn.linear_model._glm import GeneralizedLinearRegressor
+from sklearn.linear_model import (
+    TweedieRegressor,
+    PoissonRegressor,
+    GammaRegressor
+)
+from sklearn.linear_model._glm.link import (
+    IdentityLink,
+    LogLink,
+)
+from sklearn._loss.glm_distribution import (
+    TweedieDistribution,
+    NormalDistribution, PoissonDistribution,
+    GammaDistribution, InverseGaussianDistribution,
+)
+from sklearn.linear_model import Ridge
+from sklearn.exceptions import ConvergenceWarning
+from sklearn.model_selection import train_test_split
+
+
+@pytest.fixture(scope="module")
+def regression_data():
+    X, y = make_regression(n_samples=107,
+                           n_features=10,
+                           n_informative=80, noise=0.5,
+                           random_state=2)
+    return X, y
+
+
+def test_sample_weights_validation():
+    """Test the raised errors in the validation of sample_weight."""
+    # scalar value but not positive
+    X = [[1]]
+    y = [1]
+    weights = 0
+    glm = GeneralizedLinearRegressor()
+
+    # Positive weights are accepted
+    glm.fit(X, y, sample_weight=1)
+
+    # 2d array
+    weights = [[0]]
+    with pytest.raises(ValueError, match="must be 1D array or scalar"):
+        glm.fit(X, y, weights)
+
+    # 1d but wrong length
+    weights = [1, 0]
+    msg = r"sample_weight.shape == \(2,\), expected \(1,\)!"
+    with pytest.raises(ValueError, match=msg):
+        glm.fit(X, y, weights)
+
+
+@pytest.mark.parametrize('name, instance',
+                         [('normal', NormalDistribution()),
+                          ('poisson', PoissonDistribution()),
+                          ('gamma', GammaDistribution()),
+                          ('inverse-gaussian', InverseGaussianDistribution())])
+def test_glm_family_argument(name, instance):
+    """Test GLM family argument set as string."""
+    y = np.array([0.1, 0.5])  # in range of all distributions
+    X = np.array([[1], [2]])
+    glm = GeneralizedLinearRegressor(family=name, alpha=0).fit(X, y)
+    assert isinstance(glm._family_instance, instance.__class__)
+
+    glm = GeneralizedLinearRegressor(family='not a family')
+    with pytest.raises(ValueError, match="family must be"):
+        glm.fit(X, y)
+
+
+@pytest.mark.parametrize('name, instance',
+                         [('identity', IdentityLink()),
+                          ('log', LogLink())])
+def test_glm_link_argument(name, instance):
+    """Test GLM link argument set as string."""
+    y = np.array([0.1, 0.5])  # in range of all distributions
+    X = np.array([[1], [2]])
+    glm = GeneralizedLinearRegressor(family='normal', link=name).fit(X, y)
+    assert isinstance(glm._link_instance, instance.__class__)
+
+    glm = GeneralizedLinearRegressor(family='normal', link='not a link')
+    with pytest.raises(ValueError, match="link must be"):
+        glm.fit(X, y)
+
+
+@pytest.mark.parametrize('family, expected_link_class', [
+    ('normal', IdentityLink),
+    ('poisson', LogLink),
+    ('gamma', LogLink),
+    ('inverse-gaussian', LogLink),
+])
+def test_glm_link_auto(family, expected_link_class):
+    # Make sure link='auto' delivers the expected link function
+    y = np.array([0.1, 0.5])  # in range of all distributions
+    X = np.array([[1], [2]])
+    glm = GeneralizedLinearRegressor(family=family, link='auto').fit(X, y)
+    assert isinstance(glm._link_instance, expected_link_class)
+
+
+@pytest.mark.parametrize('alpha', ['not a number', -4.2])
+def test_glm_alpha_argument(alpha):
+    """Test GLM for invalid alpha argument."""
+    y = np.array([1, 2])
+    X = np.array([[1], [2]])
+    glm = GeneralizedLinearRegressor(family='normal', alpha=alpha)
+    with pytest.raises(ValueError,
+                       match="Penalty term must be a non-negative"):
+        glm.fit(X, y)
+
+
+@pytest.mark.parametrize('fit_intercept', ['not bool', 1, 0, [True]])
+def test_glm_fit_intercept_argument(fit_intercept):
+    """Test GLM for invalid fit_intercept argument."""
+    y = np.array([1, 2])
+    X = np.array([[1], [1]])
+    glm = GeneralizedLinearRegressor(fit_intercept=fit_intercept)
+    with pytest.raises(ValueError, match="fit_intercept must be bool"):
+        glm.fit(X, y)
+
+
+@pytest.mark.parametrize('solver',
+                         ['not a solver', 1, [1]])
+def test_glm_solver_argument(solver):
+    """Test GLM for invalid solver argument."""
+    y = np.array([1, 2])
+    X = np.array([[1], [2]])
+    glm = GeneralizedLinearRegressor(solver=solver)
+    with pytest.raises(ValueError):
+        glm.fit(X, y)
+
+
+@pytest.mark.parametrize('max_iter', ['not a number', 0, -1, 5.5, [1]])
+def test_glm_max_iter_argument(max_iter):
+    """Test GLM for invalid max_iter argument."""
+    y = np.array([1, 2])
+    X = np.array([[1], [2]])
+    glm = GeneralizedLinearRegressor(max_iter=max_iter)
+    with pytest.raises(ValueError, match="must be a positive integer"):
+        glm.fit(X, y)
+
+
+@pytest.mark.parametrize('tol', ['not a number', 0, -1.0, [1e-3]])
+def test_glm_tol_argument(tol):
+    """Test GLM for invalid tol argument."""
+    y = np.array([1, 2])
+    X = np.array([[1], [2]])
+    glm = GeneralizedLinearRegressor(tol=tol)
+    with pytest.raises(ValueError, match="stopping criteria must be positive"):
+        glm.fit(X, y)
+
+
+@pytest.mark.parametrize('warm_start', ['not bool', 1, 0, [True]])
+def test_glm_warm_start_argument(warm_start):
+    """Test GLM for invalid warm_start argument."""
+    y = np.array([1, 2])
+    X = np.array([[1], [1]])
+    glm = GeneralizedLinearRegressor(warm_start=warm_start)
+    with pytest.raises(ValueError, match="warm_start must be bool"):
+        glm.fit(X, y)
+
+
+@pytest.mark.parametrize('fit_intercept', [False, True])
+def test_glm_identity_regression(fit_intercept):
+    """Test GLM regression with identity link on a simple dataset."""
+    coef = [1., 2.]
+    X = np.array([[1, 1, 1, 1, 1], [0, 1, 2, 3, 4]]).T
+    y = np.dot(X, coef)
+    glm = GeneralizedLinearRegressor(alpha=0, family='normal', link='identity',
+                                     fit_intercept=fit_intercept, tol=1e-12)
+    if fit_intercept:
+        glm.fit(X[:, 1:], y)
+        assert_allclose(glm.coef_, coef[1:], rtol=1e-10)
+        assert_allclose(glm.intercept_, coef[0], rtol=1e-10)
+    else:
+        glm.fit(X, y)
+        assert_allclose(glm.coef_, coef, rtol=1e-12)
+
+
+@pytest.mark.parametrize('fit_intercept', [False, True])
+@pytest.mark.parametrize('alpha', [0.0, 1.0])
+@pytest.mark.parametrize('family', ['normal', 'poisson', 'gamma'])
+def test_glm_sample_weight_consistentcy(fit_intercept, alpha, family):
+    """Test that the impact of sample_weight is consistent"""
+    rng = np.random.RandomState(0)
+    n_samples, n_features = 10, 5
+
+    X = rng.rand(n_samples, n_features)
+    y = rng.rand(n_samples)
+    glm_params = dict(alpha=alpha, family=family, link='auto',
+                      fit_intercept=fit_intercept)
+
+    glm = GeneralizedLinearRegressor(**glm_params).fit(X, y)
+    coef = glm.coef_.copy()
+
+    # sample_weight=np.ones(..) should be equivalent to sample_weight=None
+    sample_weight = np.ones(y.shape)
+    glm.fit(X, y, sample_weight=sample_weight)
+    assert_allclose(glm.coef_, coef, rtol=1e-12)
+
+    # sample_weight are normalized to 1 so, scaling them has no effect
+    sample_weight = 2*np.ones(y.shape)
+    glm.fit(X, y, sample_weight=sample_weight)
+    assert_allclose(glm.coef_, coef, rtol=1e-12)
+
+    # setting one element of sample_weight to 0 is equivalent to removing
+    # the correspoding sample
+    sample_weight = np.ones(y.shape)
+    sample_weight[-1] = 0
+    glm.fit(X, y, sample_weight=sample_weight)
+    coef1 = glm.coef_.copy()
+    glm.fit(X[:-1], y[:-1])
+    assert_allclose(glm.coef_, coef1, rtol=1e-12)
+
+    # check that multiplying sample_weight by 2 is equivalent
+    # to repeating correspoding samples twice
+    X2 = np.concatenate([X, X[:n_samples//2]], axis=0)
+    y2 = np.concatenate([y, y[:n_samples//2]])
+    sample_weight_1 = np.ones(len(y))
+    sample_weight_1[:n_samples//2] = 2
+
+    glm1 = GeneralizedLinearRegressor(**glm_params).fit(
+            X, y, sample_weight=sample_weight_1
+    )
+
+    glm2 = GeneralizedLinearRegressor(**glm_params).fit(
+            X2, y2, sample_weight=None
+    )
+    assert_allclose(glm1.coef_, glm2.coef_)
+
+
+@pytest.mark.parametrize('fit_intercept', [True, False])
+@pytest.mark.parametrize(
+    'family',
+    [NormalDistribution(), PoissonDistribution(),
+     GammaDistribution(), InverseGaussianDistribution(),
+     TweedieDistribution(power=1.5), TweedieDistribution(power=4.5)])
+def test_glm_log_regression(fit_intercept, family):
+    """Test GLM regression with log link on a simple dataset."""
+    coef = [0.2, -0.1]
+    X = np.array([[1, 1, 1, 1, 1], [0, 1, 2, 3, 4]]).T
+    y = np.exp(np.dot(X, coef))
+    glm = GeneralizedLinearRegressor(
+                alpha=0, family=family, link='log',
+                fit_intercept=fit_intercept, tol=1e-7)
+    if fit_intercept:
+        res = glm.fit(X[:, 1:], y)
+        assert_allclose(res.coef_, coef[1:], rtol=1e-6)
+        assert_allclose(res.intercept_, coef[0], rtol=1e-6)
+    else:
+        res = glm.fit(X, y)
+        assert_allclose(res.coef_, coef, rtol=2e-6)
+
+
+@pytest.mark.parametrize('fit_intercept', [True, False])
+def test_warm_start(fit_intercept):
+    n_samples, n_features = 110, 10
+    X, y = make_regression(n_samples=n_samples, n_features=n_features,
+                           n_informative=n_features-2, noise=0.5,
+                           random_state=42)
+
+    glm1 = GeneralizedLinearRegressor(
+        warm_start=False,
+        fit_intercept=fit_intercept,
+        max_iter=1000
+    )
+    glm1.fit(X, y)
+
+    glm2 = GeneralizedLinearRegressor(
+        warm_start=True,
+        fit_intercept=fit_intercept,
+        max_iter=1
+    )
+    # As we intentionally set max_iter=1, L-BFGS-B will issue a
+    # ConvergenceWarning which we here simply ignore.
+    with warnings.catch_warnings():
+        warnings.filterwarnings('ignore', category=ConvergenceWarning)
+        glm2.fit(X, y)
+    assert glm1.score(X, y) > glm2.score(X, y)
+    glm2.set_params(max_iter=1000)
+    glm2.fit(X, y)
+    # The two model are not exactly identical since the lbfgs solver
+    # computes the approximate hessian from previous iterations, which
+    # will not be strictly identical in the case of a warm start.
+    assert_allclose(glm1.coef_, glm2.coef_, rtol=1e-5)
+    assert_allclose(glm1.score(X, y), glm2.score(X, y), rtol=1e-4)
+
+
+@pytest.mark.parametrize('n_samples, n_features', [(100, 10), (10, 100)])
+@pytest.mark.parametrize('fit_intercept', [True, False])
+@pytest.mark.parametrize('sample_weight', [None, True])
+def test_normal_ridge_comparison(n_samples, n_features, fit_intercept,
+                                 sample_weight, request):
+    """Compare with Ridge regression for Normal distributions."""
+    test_size = 10
+    X, y = make_regression(n_samples=n_samples + test_size,
+                           n_features=n_features,
+                           n_informative=n_features-2, noise=0.5,
+                           random_state=42)
+
+    if n_samples > n_features:
+        ridge_params = {"solver": "svd"}
+    else:
+        ridge_params = {"solver": "saga", "max_iter": 1000000, "tol": 1e-7}
+
+    X_train, X_test, y_train, y_test, = train_test_split(
+        X, y, test_size=test_size, random_state=0
+    )
+
+    alpha = 1.0
+    if sample_weight is None:
+        sw_train = None
+        alpha_ridge = alpha * n_samples
+    else:
+        sw_train = np.random.RandomState(0).rand(len(y_train))
+        alpha_ridge = alpha * sw_train.sum()
+
+    # GLM has 1/(2*n) * Loss + 1/2*L2, Ridge has Loss + L2
+    ridge = Ridge(alpha=alpha_ridge, normalize=False,
+                  random_state=42, fit_intercept=fit_intercept,
+                  **ridge_params)
+    ridge.fit(X_train, y_train, sample_weight=sw_train)
+
+    glm = GeneralizedLinearRegressor(alpha=alpha, family='normal',
+                                     link='identity',
+                                     fit_intercept=fit_intercept,
+                                     max_iter=300,
+                                     tol=1e-5)
+    glm.fit(X_train, y_train, sample_weight=sw_train)
+    assert glm.coef_.shape == (X.shape[1], )
+    assert_allclose(glm.coef_, ridge.coef_, atol=5e-5)
+    assert_allclose(glm.intercept_, ridge.intercept_, rtol=1e-5)
+    assert_allclose(glm.predict(X_train), ridge.predict(X_train), rtol=2e-4)
+    assert_allclose(glm.predict(X_test), ridge.predict(X_test), rtol=2e-4)
+
+
+def test_poisson_glmnet():
+    """Compare Poisson regression with L2 regularization and LogLink to glmnet
+    """
+    # library("glmnet")
+    # options(digits=10)
+    # df <- data.frame(a=c(-2,-1,1,2), b=c(0,0,1,1), y=c(0,1,1,2))
+    # x <- data.matrix(df[,c("a", "b")])
+    # y <- df$y
+    # fit <- glmnet(x=x, y=y, alpha=0, intercept=T, family="poisson",
+    #               standardize=F, thresh=1e-10, nlambda=10000)
+    # coef(fit, s=1)
+    # (Intercept) -0.12889386979
+    # a            0.29019207995
+    # b            0.03741173122
+    X = np.array([[-2, -1, 1, 2], [0, 0, 1, 1]]).T
+    y = np.array([0, 1, 1, 2])
+    glm = GeneralizedLinearRegressor(alpha=1,
+                                     fit_intercept=True, family='poisson',
+                                     link='log', tol=1e-7,
+                                     max_iter=300)
+    glm.fit(X, y)
+    assert_allclose(glm.intercept_, -0.12889386979, rtol=1e-5)
+    assert_allclose(glm.coef_, [0.29019207995, 0.03741173122], rtol=1e-5)
+
+
+def test_convergence_warning(regression_data):
+    X, y = regression_data
+
+    est = GeneralizedLinearRegressor(max_iter=1, tol=1e-20)
+    with pytest.warns(ConvergenceWarning):
+        est.fit(X, y)
+
+
+def test_poisson_regression_family(regression_data):
+    # Make sure the family attribute is read-only to prevent searching over it
+    # e.g. in a grid search
+    est = PoissonRegressor()
+    est.family == "poisson"
+
+    msg = "PoissonRegressor.family must be 'poisson'!"
+    with pytest.raises(ValueError, match=msg):
+        est.family = 0
+
+
+def test_gamma_regression_family(regression_data):
+    # Make sure the family attribute is read-only to prevent searching over it
+    # e.g. in a grid search
+    est = GammaRegressor()
+    est.family == "gamma"
+
+    msg = "GammaRegressor.family must be 'gamma'!"
+    with pytest.raises(ValueError, match=msg):
+        est.family = 0
+
+
+def test_tweedie_regression_family(regression_data):
+    # Make sure the family attribute is always a TweedieDistribution and that
+    # the power attribute is properly updated
+    power = 2.0
+    est = TweedieRegressor(power=power)
+    assert isinstance(est.family, TweedieDistribution)
+    assert est.family.power == power
+    assert est.power == power
+
+    new_power = 0
+    new_family = TweedieDistribution(power=new_power)
+    est.family = new_family
+    assert isinstance(est.family, TweedieDistribution)
+    assert est.family.power == new_power
+    assert est.power == new_power
+
+    msg = "TweedieRegressor.family must be of type TweedieDistribution!"
+    with pytest.raises(TypeError, match=msg):
+        est.family = None
+
+
+@pytest.mark.parametrize(
+        'estimator, value',
+        [
+            (PoissonRegressor(), True),
+            (GammaRegressor(), True),
+            (TweedieRegressor(power=1.5), True),
+            (TweedieRegressor(power=0), False)
+        ],
+)
+def test_tags(estimator, value):
+    assert estimator._get_tags()['requires_positive_y'] is value
--- a/venv/Lib/site-packages/sklearn/linear_model/_glm/tests/test_link.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/_glm/tests/test_link.py
@ -0,0 +1,45 @@
+# Authors: Christian Lorentzen <lorentzen.ch@gmail.com>
+#
+# License: BSD 3 clause
+import numpy as np
+from numpy.testing import assert_allclose
+import pytest
+from scipy.optimize import check_grad
+
+from sklearn.linear_model._glm.link import (
+    IdentityLink,
+    LogLink,
+    LogitLink,
+)
+
+
+LINK_FUNCTIONS = [IdentityLink, LogLink, LogitLink]
+
+
+@pytest.mark.parametrize('Link', LINK_FUNCTIONS)
+def test_link_properties(Link):
+    """Test link inverse and derivative."""
+    rng = np.random.RandomState(42)
+    x = rng.rand(100) * 100
+    link = Link()
+    if isinstance(link, LogitLink):
+        # careful for large x, note expit(36) = 1
+        # limit max eta to 15
+        x = x / 100 * 15
+    assert_allclose(link(link.inverse(x)), x)
+    # if g(h(x)) = x, then g'(h(x)) = 1/h'(x)
+    # g = link, h = link.inverse
+    assert_allclose(link.derivative(link.inverse(x)),
+                    1 / link.inverse_derivative(x))
+
+
+@pytest.mark.parametrize('Link', LINK_FUNCTIONS)
+def test_link_derivative(Link):
+    link = Link()
+    x = np.random.RandomState(0).rand(1)
+    err = check_grad(link, link.derivative, x) / link.derivative(x)
+    assert abs(err) < 1e-6
+
+    err = (check_grad(link.inverse, link.inverse_derivative, x)
+           / link.derivative(x))
+    assert abs(err) < 1e-6
--- a/venv/Lib/site-packages/sklearn/linear_model/_huber.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/_huber.py
@ -0,0 +1,307 @@
+# Authors: Manoj Kumar mks542@nyu.edu
+# License: BSD 3 clause
+
+import numpy as np
+
+from scipy import optimize
+
+from ..base import BaseEstimator, RegressorMixin
+from ._base import LinearModel
+from ..utils import axis0_safe_slice
+from ..utils.validation import _check_sample_weight
+from ..utils.validation import _deprecate_positional_args
+from ..utils.extmath import safe_sparse_dot
+from ..utils.optimize import _check_optimize_result
+
+
+def _huber_loss_and_gradient(w, X, y, epsilon, alpha, sample_weight=None):
+    """Returns the Huber loss and the gradient.
+
+    Parameters
+    ----------
+    w : ndarray, shape (n_features + 1,) or (n_features + 2,)
+        Feature vector.
+        w[:n_features] gives the coefficients
+        w[-1] gives the scale factor and if the intercept is fit w[-2]
+        gives the intercept factor.
+
+    X : ndarray, shape (n_samples, n_features)
+        Input data.
+
+    y : ndarray, shape (n_samples,)
+        Target vector.
+
+    epsilon : float
+        Robustness of the Huber estimator.
+
+    alpha : float
+        Regularization parameter.
+
+    sample_weight : ndarray, shape (n_samples,), optional
+        Weight assigned to each sample.
+
+    Returns
+    -------
+    loss : float
+        Huber loss.
+
+    gradient : ndarray, shape (len(w))
+        Returns the derivative of the Huber loss with respect to each
+        coefficient, intercept and the scale as a vector.
+    """
+    _, n_features = X.shape
+    fit_intercept = (n_features + 2 == w.shape[0])
+    if fit_intercept:
+        intercept = w[-2]
+    sigma = w[-1]
+    w = w[:n_features]
+    n_samples = np.sum(sample_weight)
+
+    # Calculate the values where |y - X'w -c / sigma| > epsilon
+    # The values above this threshold are outliers.
+    linear_loss = y - safe_sparse_dot(X, w)
+    if fit_intercept:
+        linear_loss -= intercept
+    abs_linear_loss = np.abs(linear_loss)
+    outliers_mask = abs_linear_loss > epsilon * sigma
+
+    # Calculate the linear loss due to the outliers.
+    # This is equal to (2 * M * |y - X'w -c / sigma| - M**2) * sigma
+    outliers = abs_linear_loss[outliers_mask]
+    num_outliers = np.count_nonzero(outliers_mask)
+    n_non_outliers = X.shape[0] - num_outliers
+
+    # n_sq_outliers includes the weight give to the outliers while
+    # num_outliers is just the number of outliers.
+    outliers_sw = sample_weight[outliers_mask]
+    n_sw_outliers = np.sum(outliers_sw)
+    outlier_loss = (2. * epsilon * np.sum(outliers_sw * outliers) -
+                    sigma * n_sw_outliers * epsilon ** 2)
+
+    # Calculate the quadratic loss due to the non-outliers.-
+    # This is equal to |(y - X'w - c)**2 / sigma**2| * sigma
+    non_outliers = linear_loss[~outliers_mask]
+    weighted_non_outliers = sample_weight[~outliers_mask] * non_outliers
+    weighted_loss = np.dot(weighted_non_outliers.T, non_outliers)
+    squared_loss = weighted_loss / sigma
+
+    if fit_intercept:
+        grad = np.zeros(n_features + 2)
+    else:
+        grad = np.zeros(n_features + 1)
+
+    # Gradient due to the squared loss.
+    X_non_outliers = -axis0_safe_slice(X, ~outliers_mask, n_non_outliers)
+    grad[:n_features] = (
+        2. / sigma * safe_sparse_dot(weighted_non_outliers, X_non_outliers))
+
+    # Gradient due to the linear loss.
+    signed_outliers = np.ones_like(outliers)
+    signed_outliers_mask = linear_loss[outliers_mask] < 0
+    signed_outliers[signed_outliers_mask] = -1.0
+    X_outliers = axis0_safe_slice(X, outliers_mask, num_outliers)
+    sw_outliers = sample_weight[outliers_mask] * signed_outliers
+    grad[:n_features] -= 2. * epsilon * (
+        safe_sparse_dot(sw_outliers, X_outliers))
+
+    # Gradient due to the penalty.
+    grad[:n_features] += alpha * 2. * w
+
+    # Gradient due to sigma.
+    grad[-1] = n_samples
+    grad[-1] -= n_sw_outliers * epsilon ** 2
+    grad[-1] -= squared_loss / sigma
+
+    # Gradient due to the intercept.
+    if fit_intercept:
+        grad[-2] = -2. * np.sum(weighted_non_outliers) / sigma
+        grad[-2] -= 2. * epsilon * np.sum(sw_outliers)
+
+    loss = n_samples * sigma + squared_loss + outlier_loss
+    loss += alpha * np.dot(w, w)
+    return loss, grad
+
+
+class HuberRegressor(LinearModel, RegressorMixin, BaseEstimator):
+    """Linear regression model that is robust to outliers.
+
+    The Huber Regressor optimizes the squared loss for the samples where
+    ``|(y - X'w) / sigma| < epsilon`` and the absolute loss for the samples
+    where ``|(y - X'w) / sigma| > epsilon``, where w and sigma are parameters
+    to be optimized. The parameter sigma makes sure that if y is scaled up
+    or down by a certain factor, one does not need to rescale epsilon to
+    achieve the same robustness. Note that this does not take into account
+    the fact that the different features of X may be of different scales.
+
+    This makes sure that the loss function is not heavily influenced by the
+    outliers while not completely ignoring their effect.
+
+    Read more in the :ref:`User Guide <huber_regression>`
+
+    .. versionadded:: 0.18
+
+    Parameters
+    ----------
+    epsilon : float, greater than 1.0, default 1.35
+        The parameter epsilon controls the number of samples that should be
+        classified as outliers. The smaller the epsilon, the more robust it is
+        to outliers.
+
+    max_iter : int, default 100
+        Maximum number of iterations that
+        ``scipy.optimize.minimize(method="L-BFGS-B")`` should run for.
+
+    alpha : float, default 0.0001
+        Regularization parameter.
+
+    warm_start : bool, default False
+        This is useful if the stored attributes of a previously used model
+        has to be reused. If set to False, then the coefficients will
+        be rewritten for every call to fit.
+        See :term:`the Glossary <warm_start>`.
+
+    fit_intercept : bool, default True
+        Whether or not to fit the intercept. This can be set to False
+        if the data is already centered around the origin.
+
+    tol : float, default 1e-5
+        The iteration will stop when
+        ``max{|proj g_i | i = 1, ..., n}`` <= ``tol``
+        where pg_i is the i-th component of the projected gradient.
+
+    Attributes
+    ----------
+    coef_ : array, shape (n_features,)
+        Features got by optimizing the Huber loss.
+
+    intercept_ : float
+        Bias.
+
+    scale_ : float
+        The value by which ``|y - X'w - c|`` is scaled down.
+
+    n_iter_ : int
+        Number of iterations that
+        ``scipy.optimize.minimize(method="L-BFGS-B")`` has run for.
+
+        .. versionchanged:: 0.20
+
+            In SciPy <= 1.0.0 the number of lbfgs iterations may exceed
+            ``max_iter``. ``n_iter_`` will now report at most ``max_iter``.
+
+    outliers_ : array, shape (n_samples,)
+        A boolean mask which is set to True where the samples are identified
+        as outliers.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.linear_model import HuberRegressor, LinearRegression
+    >>> from sklearn.datasets import make_regression
+    >>> rng = np.random.RandomState(0)
+    >>> X, y, coef = make_regression(
+    ...     n_samples=200, n_features=2, noise=4.0, coef=True, random_state=0)
+    >>> X[:4] = rng.uniform(10, 20, (4, 2))
+    >>> y[:4] = rng.uniform(10, 20, 4)
+    >>> huber = HuberRegressor().fit(X, y)
+    >>> huber.score(X, y)
+    -7.284...
+    >>> huber.predict(X[:1,])
+    array([806.7200...])
+    >>> linear = LinearRegression().fit(X, y)
+    >>> print("True coefficients:", coef)
+    True coefficients: [20.4923...  34.1698...]
+    >>> print("Huber coefficients:", huber.coef_)
+    Huber coefficients: [17.7906... 31.0106...]
+    >>> print("Linear Regression coefficients:", linear.coef_)
+    Linear Regression coefficients: [-1.9221...  7.0226...]
+
+    References
+    ----------
+    .. [1] Peter J. Huber, Elvezio M. Ronchetti, Robust Statistics
+           Concomitant scale estimates, pg 172
+    .. [2] Art B. Owen (2006), A robust hybrid of lasso and ridge regression.
+           https://statweb.stanford.edu/~owen/reports/hhu.pdf
+    """
+    @_deprecate_positional_args
+    def __init__(self, *, epsilon=1.35, max_iter=100, alpha=0.0001,
+                 warm_start=False, fit_intercept=True, tol=1e-05):
+        self.epsilon = epsilon
+        self.max_iter = max_iter
+        self.alpha = alpha
+        self.warm_start = warm_start
+        self.fit_intercept = fit_intercept
+        self.tol = tol
+
+    def fit(self, X, y, sample_weight=None):
+        """Fit the model according to the given training data.
+
+        Parameters
+        ----------
+        X : array-like, shape (n_samples, n_features)
+            Training vector, where n_samples in the number of samples and
+            n_features is the number of features.
+
+        y : array-like, shape (n_samples,)
+            Target vector relative to X.
+
+        sample_weight : array-like, shape (n_samples,)
+            Weight given to each sample.
+
+        Returns
+        -------
+        self : object
+        """
+        X, y = self._validate_data(
+            X, y, copy=False, accept_sparse=['csr'], y_numeric=True,
+            dtype=[np.float64, np.float32])
+
+        sample_weight = _check_sample_weight(sample_weight, X)
+
+        if self.epsilon < 1.0:
+            raise ValueError(
+                "epsilon should be greater than or equal to 1.0, got %f"
+                % self.epsilon)
+
+        if self.warm_start and hasattr(self, 'coef_'):
+            parameters = np.concatenate(
+                (self.coef_, [self.intercept_, self.scale_]))
+        else:
+            if self.fit_intercept:
+                parameters = np.zeros(X.shape[1] + 2)
+            else:
+                parameters = np.zeros(X.shape[1] + 1)
+            # Make sure to initialize the scale parameter to a strictly
+            # positive value:
+            parameters[-1] = 1
+
+        # Sigma or the scale factor should be non-negative.
+        # Setting it to be zero might cause undefined bounds hence we set it
+        # to a value close to zero.
+        bounds = np.tile([-np.inf, np.inf], (parameters.shape[0], 1))
+        bounds[-1][0] = np.finfo(np.float64).eps * 10
+
+        opt_res = optimize.minimize(
+            _huber_loss_and_gradient, parameters, method="L-BFGS-B", jac=True,
+            args=(X, y, self.epsilon, self.alpha, sample_weight),
+            options={"maxiter": self.max_iter, "gtol": self.tol, "iprint": -1},
+            bounds=bounds)
+
+        parameters = opt_res.x
+
+        if opt_res.status == 2:
+            raise ValueError("HuberRegressor convergence failed:"
+                             " l-BFGS-b solver terminated with %s"
+                             % opt_res.message)
+        self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
+        self.scale_ = parameters[-1]
+        if self.fit_intercept:
+            self.intercept_ = parameters[-2]
+        else:
+            self.intercept_ = 0.0
+        self.coef_ = parameters[:X.shape[1]]
+
+        residual = np.abs(
+            y - safe_sparse_dot(X, self.coef_) - self.intercept_)
+        self.outliers_ = residual > self.scale_ * self.epsilon
+        return self
--- a/venv/Lib/site-packages/sklearn/linear_model/_least_angle.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/_least_angle.py
--- a/venv/Lib/site-packages/sklearn/linear_model/_logistic.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/_logistic.py
--- a/venv/Lib/site-packages/sklearn/linear_model/_omp.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/_omp.py
@ -0,0 +1,913 @@
+"""Orthogonal matching pursuit algorithms
+"""
+
+# Author: Vlad Niculae
+#
+# License: BSD 3 clause
+
+import warnings
+from math import sqrt
+
+import numpy as np
+from scipy import linalg
+from scipy.linalg.lapack import get_lapack_funcs
+from joblib import Parallel, delayed
+
+from ._base import LinearModel, _pre_fit
+from ..base import RegressorMixin, MultiOutputMixin
+from ..utils import as_float_array, check_array
+from ..utils.validation import _deprecate_positional_args
+from ..model_selection import check_cv
+
+premature = """ Orthogonal matching pursuit ended prematurely due to linear
+dependence in the dictionary. The requested precision might not have been met.
+"""
+
+
+def _cholesky_omp(X, y, n_nonzero_coefs, tol=None, copy_X=True,
+                  return_path=False):
+    """Orthogonal Matching Pursuit step using the Cholesky decomposition.
+
+    Parameters
+    ----------
+    X : array, shape (n_samples, n_features)
+        Input dictionary. Columns are assumed to have unit norm.
+
+    y : array, shape (n_samples,)
+        Input targets
+
+    n_nonzero_coefs : int
+        Targeted number of non-zero elements
+
+    tol : float
+        Targeted squared error, if not None overrides n_nonzero_coefs.
+
+    copy_X : bool, optional
+        Whether the design matrix X must be copied by the algorithm. A false
+        value is only helpful if X is already Fortran-ordered, otherwise a
+        copy is made anyway.
+
+    return_path : bool, optional. Default: False
+        Whether to return every value of the nonzero coefficients along the
+        forward path. Useful for cross-validation.
+
+    Returns
+    -------
+    gamma : array, shape (n_nonzero_coefs,)
+        Non-zero elements of the solution
+
+    idx : array, shape (n_nonzero_coefs,)
+        Indices of the positions of the elements in gamma within the solution
+        vector
+
+    coef : array, shape (n_features, n_nonzero_coefs)
+        The first k values of column k correspond to the coefficient value
+        for the active features at that step. The lower left triangle contains
+        garbage. Only returned if ``return_path=True``.
+
+    n_active : int
+        Number of active features at convergence.
+    """
+    if copy_X:
+        X = X.copy('F')
+    else:  # even if we are allowed to overwrite, still copy it if bad order
+        X = np.asfortranarray(X)
+
+    min_float = np.finfo(X.dtype).eps
+    nrm2, swap = linalg.get_blas_funcs(('nrm2', 'swap'), (X,))
+    potrs, = get_lapack_funcs(('potrs',), (X,))
+
+    alpha = np.dot(X.T, y)
+    residual = y
+    gamma = np.empty(0)
+    n_active = 0
+    indices = np.arange(X.shape[1])  # keeping track of swapping
+
+    max_features = X.shape[1] if tol is not None else n_nonzero_coefs
+
+    L = np.empty((max_features, max_features), dtype=X.dtype)
+
+    if return_path:
+        coefs = np.empty_like(L)
+
+    while True:
+        lam = np.argmax(np.abs(np.dot(X.T, residual)))
+        if lam < n_active or alpha[lam] ** 2 < min_float:
+            # atom already selected or inner product too small
+            warnings.warn(premature, RuntimeWarning, stacklevel=2)
+            break
+
+        if n_active > 0:
+            # Updates the Cholesky decomposition of X' X
+            L[n_active, :n_active] = np.dot(X[:, :n_active].T, X[:, lam])
+            linalg.solve_triangular(L[:n_active, :n_active],
+                                    L[n_active, :n_active],
+                                    trans=0, lower=1,
+                                    overwrite_b=True,
+                                    check_finite=False)
+            v = nrm2(L[n_active, :n_active]) ** 2
+            Lkk = linalg.norm(X[:, lam]) ** 2 - v
+            if Lkk <= min_float:  # selected atoms are dependent
+                warnings.warn(premature, RuntimeWarning, stacklevel=2)
+                break
+            L[n_active, n_active] = sqrt(Lkk)
+        else:
+            L[0, 0] = linalg.norm(X[:, lam])
+
+        X.T[n_active], X.T[lam] = swap(X.T[n_active], X.T[lam])
+        alpha[n_active], alpha[lam] = alpha[lam], alpha[n_active]
+        indices[n_active], indices[lam] = indices[lam], indices[n_active]
+        n_active += 1
+
+        # solves LL'x = X'y as a composition of two triangular systems
+        gamma, _ = potrs(L[:n_active, :n_active], alpha[:n_active], lower=True,
+                         overwrite_b=False)
+
+        if return_path:
+            coefs[:n_active, n_active - 1] = gamma
+        residual = y - np.dot(X[:, :n_active], gamma)
+        if tol is not None and nrm2(residual) ** 2 <= tol:
+            break
+        elif n_active == max_features:
+            break
+
+    if return_path:
+        return gamma, indices[:n_active], coefs[:, :n_active], n_active
+    else:
+        return gamma, indices[:n_active], n_active
+
+
+def _gram_omp(Gram, Xy, n_nonzero_coefs, tol_0=None, tol=None,
+              copy_Gram=True, copy_Xy=True, return_path=False):
+    """Orthogonal Matching Pursuit step on a precomputed Gram matrix.
+
+    This function uses the Cholesky decomposition method.
+
+    Parameters
+    ----------
+    Gram : array, shape (n_features, n_features)
+        Gram matrix of the input data matrix
+
+    Xy : array, shape (n_features,)
+        Input targets
+
+    n_nonzero_coefs : int
+        Targeted number of non-zero elements
+
+    tol_0 : float
+        Squared norm of y, required if tol is not None.
+
+    tol : float
+        Targeted squared error, if not None overrides n_nonzero_coefs.
+
+    copy_Gram : bool, optional
+        Whether the gram matrix must be copied by the algorithm. A false
+        value is only helpful if it is already Fortran-ordered, otherwise a
+        copy is made anyway.
+
+    copy_Xy : bool, optional
+        Whether the covariance vector Xy must be copied by the algorithm.
+        If False, it may be overwritten.
+
+    return_path : bool, optional. Default: False
+        Whether to return every value of the nonzero coefficients along the
+        forward path. Useful for cross-validation.
+
+    Returns
+    -------
+    gamma : array, shape (n_nonzero_coefs,)
+        Non-zero elements of the solution
+
+    idx : array, shape (n_nonzero_coefs,)
+        Indices of the positions of the elements in gamma within the solution
+        vector
+
+    coefs : array, shape (n_features, n_nonzero_coefs)
+        The first k values of column k correspond to the coefficient value
+        for the active features at that step. The lower left triangle contains
+        garbage. Only returned if ``return_path=True``.
+
+    n_active : int
+        Number of active features at convergence.
+    """
+    Gram = Gram.copy('F') if copy_Gram else np.asfortranarray(Gram)
+
+    if copy_Xy or not Xy.flags.writeable:
+        Xy = Xy.copy()
+
+    min_float = np.finfo(Gram.dtype).eps
+    nrm2, swap = linalg.get_blas_funcs(('nrm2', 'swap'), (Gram,))
+    potrs, = get_lapack_funcs(('potrs',), (Gram,))
+
+    indices = np.arange(len(Gram))  # keeping track of swapping
+    alpha = Xy
+    tol_curr = tol_0
+    delta = 0
+    gamma = np.empty(0)
+    n_active = 0
+
+    max_features = len(Gram) if tol is not None else n_nonzero_coefs
+
+    L = np.empty((max_features, max_features), dtype=Gram.dtype)
+
+    L[0, 0] = 1.
+    if return_path:
+        coefs = np.empty_like(L)
+
+    while True:
+        lam = np.argmax(np.abs(alpha))
+        if lam < n_active or alpha[lam] ** 2 < min_float:
+            # selected same atom twice, or inner product too small
+            warnings.warn(premature, RuntimeWarning, stacklevel=3)
+            break
+        if n_active > 0:
+            L[n_active, :n_active] = Gram[lam, :n_active]
+            linalg.solve_triangular(L[:n_active, :n_active],
+                                    L[n_active, :n_active],
+                                    trans=0, lower=1,
+                                    overwrite_b=True,
+                                    check_finite=False)
+            v = nrm2(L[n_active, :n_active]) ** 2
+            Lkk = Gram[lam, lam] - v
+            if Lkk <= min_float:  # selected atoms are dependent
+                warnings.warn(premature, RuntimeWarning, stacklevel=3)
+                break
+            L[n_active, n_active] = sqrt(Lkk)
+        else:
+            L[0, 0] = sqrt(Gram[lam, lam])
+
+        Gram[n_active], Gram[lam] = swap(Gram[n_active], Gram[lam])
+        Gram.T[n_active], Gram.T[lam] = swap(Gram.T[n_active], Gram.T[lam])
+        indices[n_active], indices[lam] = indices[lam], indices[n_active]
+        Xy[n_active], Xy[lam] = Xy[lam], Xy[n_active]
+        n_active += 1
+        # solves LL'x = X'y as a composition of two triangular systems
+        gamma, _ = potrs(L[:n_active, :n_active], Xy[:n_active], lower=True,
+                         overwrite_b=False)
+        if return_path:
+            coefs[:n_active, n_active - 1] = gamma
+        beta = np.dot(Gram[:, :n_active], gamma)
+        alpha = Xy - beta
+        if tol is not None:
+            tol_curr += delta
+            delta = np.inner(gamma, beta[:n_active])
+            tol_curr -= delta
+            if abs(tol_curr) <= tol:
+                break
+        elif n_active == max_features:
+            break
+
+    if return_path:
+        return gamma, indices[:n_active], coefs[:, :n_active], n_active
+    else:
+        return gamma, indices[:n_active], n_active
+
+
+@_deprecate_positional_args
+def orthogonal_mp(X, y, *, n_nonzero_coefs=None, tol=None, precompute=False,
+                  copy_X=True, return_path=False,
+                  return_n_iter=False):
+    r"""Orthogonal Matching Pursuit (OMP)
+
+    Solves n_targets Orthogonal Matching Pursuit problems.
+    An instance of the problem has the form:
+
+    When parametrized by the number of non-zero coefficients using
+    `n_nonzero_coefs`:
+    argmin ||y - X\gamma||^2 subject to ||\gamma||_0 <= n_{nonzero coefs}
+
+    When parametrized by error using the parameter `tol`:
+    argmin ||\gamma||_0 subject to ||y - X\gamma||^2 <= tol
+
+    Read more in the :ref:`User Guide <omp>`.
+
+    Parameters
+    ----------
+    X : array, shape (n_samples, n_features)
+        Input data. Columns are assumed to have unit norm.
+
+    y : array, shape (n_samples,) or (n_samples, n_targets)
+        Input targets
+
+    n_nonzero_coefs : int
+        Desired number of non-zero entries in the solution. If None (by
+        default) this value is set to 10% of n_features.
+
+    tol : float
+        Maximum norm of the residual. If not None, overrides n_nonzero_coefs.
+
+    precompute : {True, False, 'auto'},
+        Whether to perform precomputations. Improves performance when n_targets
+        or n_samples is very large.
+
+    copy_X : bool, optional
+        Whether the design matrix X must be copied by the algorithm. A false
+        value is only helpful if X is already Fortran-ordered, otherwise a
+        copy is made anyway.
+
+    return_path : bool, optional. Default: False
+        Whether to return every value of the nonzero coefficients along the
+        forward path. Useful for cross-validation.
+
+    return_n_iter : bool, optional default False
+        Whether or not to return the number of iterations.
+
+    Returns
+    -------
+    coef : array, shape (n_features,) or (n_features, n_targets)
+        Coefficients of the OMP solution. If `return_path=True`, this contains
+        the whole coefficient path. In this case its shape is
+        (n_features, n_features) or (n_features, n_targets, n_features) and
+        iterating over the last axis yields coefficients in increasing order
+        of active features.
+
+    n_iters : array-like or int
+        Number of active features across every target. Returned only if
+        `return_n_iter` is set to True.
+
+    See also
+    --------
+    OrthogonalMatchingPursuit
+    orthogonal_mp_gram
+    lars_path
+    decomposition.sparse_encode
+
+    Notes
+    -----
+    Orthogonal matching pursuit was introduced in S. Mallat, Z. Zhang,
+    Matching pursuits with time-frequency dictionaries, IEEE Transactions on
+    Signal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415.
+    (http://blanche.polytechnique.fr/~mallat/papiers/MallatPursuit93.pdf)
+
+    This implementation is based on Rubinstein, R., Zibulevsky, M. and Elad,
+    M., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal
+    Matching Pursuit Technical Report - CS Technion, April 2008.
+    https://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf
+
+    """
+    X = check_array(X, order='F', copy=copy_X)
+    copy_X = False
+    if y.ndim == 1:
+        y = y.reshape(-1, 1)
+    y = check_array(y)
+    if y.shape[1] > 1:  # subsequent targets will be affected
+        copy_X = True
+    if n_nonzero_coefs is None and tol is None:
+        # default for n_nonzero_coefs is 0.1 * n_features
+        # but at least one.
+        n_nonzero_coefs = max(int(0.1 * X.shape[1]), 1)
+    if tol is not None and tol < 0:
+        raise ValueError("Epsilon cannot be negative")
+    if tol is None and n_nonzero_coefs <= 0:
+        raise ValueError("The number of atoms must be positive")
+    if tol is None and n_nonzero_coefs > X.shape[1]:
+        raise ValueError("The number of atoms cannot be more than the number "
+                         "of features")
+    if precompute == 'auto':
+        precompute = X.shape[0] > X.shape[1]
+    if precompute:
+        G = np.dot(X.T, X)
+        G = np.asfortranarray(G)
+        Xy = np.dot(X.T, y)
+        if tol is not None:
+            norms_squared = np.sum((y ** 2), axis=0)
+        else:
+            norms_squared = None
+        return orthogonal_mp_gram(G, Xy, n_nonzero_coefs=n_nonzero_coefs,
+                                  tol=tol, norms_squared=norms_squared,
+                                  copy_Gram=copy_X, copy_Xy=False,
+                                  return_path=return_path)
+
+    if return_path:
+        coef = np.zeros((X.shape[1], y.shape[1], X.shape[1]))
+    else:
+        coef = np.zeros((X.shape[1], y.shape[1]))
+    n_iters = []
+
+    for k in range(y.shape[1]):
+        out = _cholesky_omp(
+            X, y[:, k], n_nonzero_coefs, tol,
+            copy_X=copy_X, return_path=return_path)
+        if return_path:
+            _, idx, coefs, n_iter = out
+            coef = coef[:, :, :len(idx)]
+            for n_active, x in enumerate(coefs.T):
+                coef[idx[:n_active + 1], k, n_active] = x[:n_active + 1]
+        else:
+            x, idx, n_iter = out
+            coef[idx, k] = x
+        n_iters.append(n_iter)
+
+    if y.shape[1] == 1:
+        n_iters = n_iters[0]
+
+    if return_n_iter:
+        return np.squeeze(coef), n_iters
+    else:
+        return np.squeeze(coef)
+
+
+@_deprecate_positional_args
+def orthogonal_mp_gram(Gram, Xy, *, n_nonzero_coefs=None, tol=None,
+                       norms_squared=None, copy_Gram=True,
+                       copy_Xy=True, return_path=False,
+                       return_n_iter=False):
+    """Gram Orthogonal Matching Pursuit (OMP)
+
+    Solves n_targets Orthogonal Matching Pursuit problems using only
+    the Gram matrix X.T * X and the product X.T * y.
+
+    Read more in the :ref:`User Guide <omp>`.
+
+    Parameters
+    ----------
+    Gram : array, shape (n_features, n_features)
+        Gram matrix of the input data: X.T * X
+
+    Xy : array, shape (n_features,) or (n_features, n_targets)
+        Input targets multiplied by X: X.T * y
+
+    n_nonzero_coefs : int
+        Desired number of non-zero entries in the solution. If None (by
+        default) this value is set to 10% of n_features.
+
+    tol : float
+        Maximum norm of the residual. If not None, overrides n_nonzero_coefs.
+
+    norms_squared : array-like, shape (n_targets,)
+        Squared L2 norms of the lines of y. Required if tol is not None.
+
+    copy_Gram : bool, optional
+        Whether the gram matrix must be copied by the algorithm. A false
+        value is only helpful if it is already Fortran-ordered, otherwise a
+        copy is made anyway.
+
+    copy_Xy : bool, optional
+        Whether the covariance vector Xy must be copied by the algorithm.
+        If False, it may be overwritten.
+
+    return_path : bool, optional. Default: False
+        Whether to return every value of the nonzero coefficients along the
+        forward path. Useful for cross-validation.
+
+    return_n_iter : bool, optional default False
+        Whether or not to return the number of iterations.
+
+    Returns
+    -------
+    coef : array, shape (n_features,) or (n_features, n_targets)
+        Coefficients of the OMP solution. If `return_path=True`, this contains
+        the whole coefficient path. In this case its shape is
+        (n_features, n_features) or (n_features, n_targets, n_features) and
+        iterating over the last axis yields coefficients in increasing order
+        of active features.
+
+    n_iters : array-like or int
+        Number of active features across every target. Returned only if
+        `return_n_iter` is set to True.
+
+    See also
+    --------
+    OrthogonalMatchingPursuit
+    orthogonal_mp
+    lars_path
+    decomposition.sparse_encode
+
+    Notes
+    -----
+    Orthogonal matching pursuit was introduced in G. Mallat, Z. Zhang,
+    Matching pursuits with time-frequency dictionaries, IEEE Transactions on
+    Signal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415.
+    (http://blanche.polytechnique.fr/~mallat/papiers/MallatPursuit93.pdf)
+
+    This implementation is based on Rubinstein, R., Zibulevsky, M. and Elad,
+    M., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal
+    Matching Pursuit Technical Report - CS Technion, April 2008.
+    https://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf
+
+    """
+    Gram = check_array(Gram, order='F', copy=copy_Gram)
+    Xy = np.asarray(Xy)
+    if Xy.ndim > 1 and Xy.shape[1] > 1:
+        # or subsequent target will be affected
+        copy_Gram = True
+    if Xy.ndim == 1:
+        Xy = Xy[:, np.newaxis]
+        if tol is not None:
+            norms_squared = [norms_squared]
+    if copy_Xy or not Xy.flags.writeable:
+        # Make the copy once instead of many times in _gram_omp itself.
+        Xy = Xy.copy()
+
+    if n_nonzero_coefs is None and tol is None:
+        n_nonzero_coefs = int(0.1 * len(Gram))
+    if tol is not None and norms_squared is None:
+        raise ValueError('Gram OMP needs the precomputed norms in order '
+                         'to evaluate the error sum of squares.')
+    if tol is not None and tol < 0:
+        raise ValueError("Epsilon cannot be negative")
+    if tol is None and n_nonzero_coefs <= 0:
+        raise ValueError("The number of atoms must be positive")
+    if tol is None and n_nonzero_coefs > len(Gram):
+        raise ValueError("The number of atoms cannot be more than the number "
+                         "of features")
+
+    if return_path:
+        coef = np.zeros((len(Gram), Xy.shape[1], len(Gram)))
+    else:
+        coef = np.zeros((len(Gram), Xy.shape[1]))
+
+    n_iters = []
+    for k in range(Xy.shape[1]):
+        out = _gram_omp(
+            Gram, Xy[:, k], n_nonzero_coefs,
+            norms_squared[k] if tol is not None else None, tol,
+            copy_Gram=copy_Gram, copy_Xy=False,
+            return_path=return_path)
+        if return_path:
+            _, idx, coefs, n_iter = out
+            coef = coef[:, :, :len(idx)]
+            for n_active, x in enumerate(coefs.T):
+                coef[idx[:n_active + 1], k, n_active] = x[:n_active + 1]
+        else:
+            x, idx, n_iter = out
+            coef[idx, k] = x
+        n_iters.append(n_iter)
+
+    if Xy.shape[1] == 1:
+        n_iters = n_iters[0]
+
+    if return_n_iter:
+        return np.squeeze(coef), n_iters
+    else:
+        return np.squeeze(coef)
+
+
+class OrthogonalMatchingPursuit(MultiOutputMixin, RegressorMixin, LinearModel):
+    """Orthogonal Matching Pursuit model (OMP)
+
+    Read more in the :ref:`User Guide <omp>`.
+
+    Parameters
+    ----------
+    n_nonzero_coefs : int, optional
+        Desired number of non-zero entries in the solution. If None (by
+        default) this value is set to 10% of n_features.
+
+    tol : float, optional
+        Maximum norm of the residual. If not None, overrides n_nonzero_coefs.
+
+    fit_intercept : boolean, optional
+        whether to calculate the intercept for this model. If set
+        to false, no intercept will be used in calculations
+        (i.e. data is expected to be centered).
+
+    normalize : boolean, optional, default True
+        This parameter is ignored when ``fit_intercept`` is set to False.
+        If True, the regressors X will be normalized before regression by
+        subtracting the mean and dividing by the l2-norm.
+        If you wish to standardize, please use
+        :class:`sklearn.preprocessing.StandardScaler` before calling ``fit``
+        on an estimator with ``normalize=False``.
+
+    precompute : {True, False, 'auto'}, default 'auto'
+        Whether to use a precomputed Gram and Xy matrix to speed up
+        calculations. Improves performance when :term:`n_targets` or
+        :term:`n_samples` is very large. Note that if you already have such
+        matrices, you can pass them directly to the fit method.
+
+    Attributes
+    ----------
+    coef_ : array, shape (n_features,) or (n_targets, n_features)
+        parameter vector (w in the formula)
+
+    intercept_ : float or array, shape (n_targets,)
+        independent term in decision function.
+
+    n_iter_ : int or array-like
+        Number of active features across every target.
+
+    Examples
+    --------
+    >>> from sklearn.linear_model import OrthogonalMatchingPursuit
+    >>> from sklearn.datasets import make_regression
+    >>> X, y = make_regression(noise=4, random_state=0)
+    >>> reg = OrthogonalMatchingPursuit().fit(X, y)
+    >>> reg.score(X, y)
+    0.9991...
+    >>> reg.predict(X[:1,])
+    array([-78.3854...])
+
+    Notes
+    -----
+    Orthogonal matching pursuit was introduced in G. Mallat, Z. Zhang,
+    Matching pursuits with time-frequency dictionaries, IEEE Transactions on
+    Signal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415.
+    (http://blanche.polytechnique.fr/~mallat/papiers/MallatPursuit93.pdf)
+
+    This implementation is based on Rubinstein, R., Zibulevsky, M. and Elad,
+    M., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal
+    Matching Pursuit Technical Report - CS Technion, April 2008.
+    https://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf
+
+    See also
+    --------
+    orthogonal_mp
+    orthogonal_mp_gram
+    lars_path
+    Lars
+    LassoLars
+    decomposition.sparse_encode
+    OrthogonalMatchingPursuitCV
+    """
+    @_deprecate_positional_args
+    def __init__(self, *, n_nonzero_coefs=None, tol=None, fit_intercept=True,
+                 normalize=True, precompute='auto'):
+        self.n_nonzero_coefs = n_nonzero_coefs
+        self.tol = tol
+        self.fit_intercept = fit_intercept
+        self.normalize = normalize
+        self.precompute = precompute
+
+    def fit(self, X, y):
+        """Fit the model using X, y as training data.
+
+        Parameters
+        ----------
+        X : array-like, shape (n_samples, n_features)
+            Training data.
+
+        y : array-like, shape (n_samples,) or (n_samples, n_targets)
+            Target values. Will be cast to X's dtype if necessary
+
+
+        Returns
+        -------
+        self : object
+            returns an instance of self.
+        """
+        X, y = self._validate_data(X, y, multi_output=True, y_numeric=True)
+        n_features = X.shape[1]
+
+        X, y, X_offset, y_offset, X_scale, Gram, Xy = \
+            _pre_fit(X, y, None, self.precompute, self.normalize,
+                     self.fit_intercept, copy=True)
+
+        if y.ndim == 1:
+            y = y[:, np.newaxis]
+
+        if self.n_nonzero_coefs is None and self.tol is None:
+            # default for n_nonzero_coefs is 0.1 * n_features
+            # but at least one.
+            self.n_nonzero_coefs_ = max(int(0.1 * n_features), 1)
+        else:
+            self.n_nonzero_coefs_ = self.n_nonzero_coefs
+
+        if Gram is False:
+            coef_, self.n_iter_ = orthogonal_mp(
+                X, y, n_nonzero_coefs=self.n_nonzero_coefs_, tol=self.tol,
+                precompute=False, copy_X=True,
+                return_n_iter=True)
+        else:
+            norms_sq = np.sum(y ** 2, axis=0) if self.tol is not None else None
+
+            coef_, self.n_iter_ = orthogonal_mp_gram(
+                Gram, Xy=Xy, n_nonzero_coefs=self.n_nonzero_coefs_,
+                tol=self.tol, norms_squared=norms_sq,
+                copy_Gram=True, copy_Xy=True,
+                return_n_iter=True)
+        self.coef_ = coef_.T
+        self._set_intercept(X_offset, y_offset, X_scale)
+        return self
+
+
+def _omp_path_residues(X_train, y_train, X_test, y_test, copy=True,
+                       fit_intercept=True, normalize=True, max_iter=100):
+    """Compute the residues on left-out data for a full LARS path
+
+    Parameters
+    ----------
+    X_train : array, shape (n_samples, n_features)
+        The data to fit the LARS on
+
+    y_train : array, shape (n_samples)
+        The target variable to fit LARS on
+
+    X_test : array, shape (n_samples, n_features)
+        The data to compute the residues on
+
+    y_test : array, shape (n_samples)
+        The target variable to compute the residues on
+
+    copy : boolean, optional
+        Whether X_train, X_test, y_train and y_test should be copied.  If
+        False, they may be overwritten.
+
+    fit_intercept : boolean
+        whether to calculate the intercept for this model. If set
+        to false, no intercept will be used in calculations
+        (i.e. data is expected to be centered).
+
+    normalize : boolean, optional, default True
+        This parameter is ignored when ``fit_intercept`` is set to False.
+        If True, the regressors X will be normalized before regression by
+        subtracting the mean and dividing by the l2-norm.
+        If you wish to standardize, please use
+        :class:`sklearn.preprocessing.StandardScaler` before calling ``fit``
+        on an estimator with ``normalize=False``.
+
+    max_iter : integer, optional
+        Maximum numbers of iterations to perform, therefore maximum features
+        to include. 100 by default.
+
+    Returns
+    -------
+    residues : array, shape (n_samples, max_features)
+        Residues of the prediction on the test data
+    """
+
+    if copy:
+        X_train = X_train.copy()
+        y_train = y_train.copy()
+        X_test = X_test.copy()
+        y_test = y_test.copy()
+
+    if fit_intercept:
+        X_mean = X_train.mean(axis=0)
+        X_train -= X_mean
+        X_test -= X_mean
+        y_mean = y_train.mean(axis=0)
+        y_train = as_float_array(y_train, copy=False)
+        y_train -= y_mean
+        y_test = as_float_array(y_test, copy=False)
+        y_test -= y_mean
+
+    if normalize:
+        norms = np.sqrt(np.sum(X_train ** 2, axis=0))
+        nonzeros = np.flatnonzero(norms)
+        X_train[:, nonzeros] /= norms[nonzeros]
+
+    coefs = orthogonal_mp(X_train, y_train, n_nonzero_coefs=max_iter, tol=None,
+                          precompute=False, copy_X=False,
+                          return_path=True)
+    if coefs.ndim == 1:
+        coefs = coefs[:, np.newaxis]
+    if normalize:
+        coefs[nonzeros] /= norms[nonzeros][:, np.newaxis]
+
+    return np.dot(coefs.T, X_test.T) - y_test
+
+
+class OrthogonalMatchingPursuitCV(RegressorMixin, LinearModel):
+    """Cross-validated Orthogonal Matching Pursuit model (OMP).
+
+    See glossary entry for :term:`cross-validation estimator`.
+
+    Read more in the :ref:`User Guide <omp>`.
+
+    Parameters
+    ----------
+    copy : bool, optional
+        Whether the design matrix X must be copied by the algorithm. A false
+        value is only helpful if X is already Fortran-ordered, otherwise a
+        copy is made anyway.
+
+    fit_intercept : boolean, optional
+        whether to calculate the intercept for this model. If set
+        to false, no intercept will be used in calculations
+        (i.e. data is expected to be centered).
+
+    normalize : boolean, optional, default True
+        This parameter is ignored when ``fit_intercept`` is set to False.
+        If True, the regressors X will be normalized before regression by
+        subtracting the mean and dividing by the l2-norm.
+        If you wish to standardize, please use
+        :class:`sklearn.preprocessing.StandardScaler` before calling ``fit``
+        on an estimator with ``normalize=False``.
+
+    max_iter : integer, optional
+        Maximum numbers of iterations to perform, therefore maximum features
+        to include. 10% of ``n_features`` but at least 5 if available.
+
+    cv : int, cross-validation generator or an iterable, optional
+        Determines the cross-validation splitting strategy.
+        Possible inputs for cv are:
+
+        - None, to use the default 5-fold cross-validation,
+        - integer, to specify the number of folds.
+        - :term:`CV splitter`,
+        - An iterable yielding (train, test) splits as arrays of indices.
+
+        For integer/None inputs, :class:`KFold` is used.
+
+        Refer :ref:`User Guide <cross_validation>` for the various
+        cross-validation strategies that can be used here.
+
+        .. versionchanged:: 0.22
+            ``cv`` default value if None changed from 3-fold to 5-fold.
+
+    n_jobs : int or None, optional (default=None)
+        Number of CPUs to use during the cross validation.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    verbose : boolean or integer, optional
+        Sets the verbosity amount
+
+    Attributes
+    ----------
+    intercept_ : float or array, shape (n_targets,)
+        Independent term in decision function.
+
+    coef_ : array, shape (n_features,) or (n_targets, n_features)
+        Parameter vector (w in the problem formulation).
+
+    n_nonzero_coefs_ : int
+        Estimated number of non-zero coefficients giving the best mean squared
+        error over the cross-validation folds.
+
+    n_iter_ : int or array-like
+        Number of active features across every target for the model refit with
+        the best hyperparameters got by cross-validating across all folds.
+
+    Examples
+    --------
+    >>> from sklearn.linear_model import OrthogonalMatchingPursuitCV
+    >>> from sklearn.datasets import make_regression
+    >>> X, y = make_regression(n_features=100, n_informative=10,
+    ...                        noise=4, random_state=0)
+    >>> reg = OrthogonalMatchingPursuitCV(cv=5).fit(X, y)
+    >>> reg.score(X, y)
+    0.9991...
+    >>> reg.n_nonzero_coefs_
+    10
+    >>> reg.predict(X[:1,])
+    array([-78.3854...])
+
+    See also
+    --------
+    orthogonal_mp
+    orthogonal_mp_gram
+    lars_path
+    Lars
+    LassoLars
+    OrthogonalMatchingPursuit
+    LarsCV
+    LassoLarsCV
+    decomposition.sparse_encode
+
+    """
+    @_deprecate_positional_args
+    def __init__(self, *, copy=True, fit_intercept=True, normalize=True,
+                 max_iter=None, cv=None, n_jobs=None, verbose=False):
+        self.copy = copy
+        self.fit_intercept = fit_intercept
+        self.normalize = normalize
+        self.max_iter = max_iter
+        self.cv = cv
+        self.n_jobs = n_jobs
+        self.verbose = verbose
+
+    def fit(self, X, y):
+        """Fit the model using X, y as training data.
+
+        Parameters
+        ----------
+        X : array-like, shape [n_samples, n_features]
+            Training data.
+
+        y : array-like, shape [n_samples]
+            Target values. Will be cast to X's dtype if necessary
+
+        Returns
+        -------
+        self : object
+            returns an instance of self.
+        """
+        X, y = self._validate_data(X, y, y_numeric=True, ensure_min_features=2,
+                                   estimator=self)
+        X = as_float_array(X, copy=False, force_all_finite=False)
+        cv = check_cv(self.cv, classifier=False)
+        max_iter = (min(max(int(0.1 * X.shape[1]), 5), X.shape[1])
+                    if not self.max_iter
+                    else self.max_iter)
+        cv_paths = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(
+            delayed(_omp_path_residues)(
+                X[train], y[train], X[test], y[test], self.copy,
+                self.fit_intercept, self.normalize, max_iter)
+            for train, test in cv.split(X))
+
+        min_early_stop = min(fold.shape[0] for fold in cv_paths)
+        mse_folds = np.array([(fold[:min_early_stop] ** 2).mean(axis=1)
+                              for fold in cv_paths])
+        best_n_nonzero_coefs = np.argmin(mse_folds.mean(axis=0)) + 1
+        self.n_nonzero_coefs_ = best_n_nonzero_coefs
+        omp = OrthogonalMatchingPursuit(n_nonzero_coefs=best_n_nonzero_coefs,
+                                        fit_intercept=self.fit_intercept,
+                                        normalize=self.normalize)
+        omp.fit(X, y)
+        self.coef_ = omp.coef_
+        self.intercept_ = omp.intercept_
+        self.n_iter_ = omp.n_iter_
+        return self
--- a/venv/Lib/site-packages/sklearn/linear_model/_passive_aggressive.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/_passive_aggressive.py
@ -0,0 +1,471 @@
+# Authors: Rob Zinkov, Mathieu Blondel
+# License: BSD 3 clause
+
+from ..utils.validation import _deprecate_positional_args
+from ._stochastic_gradient import BaseSGDClassifier
+from ._stochastic_gradient import BaseSGDRegressor
+from ._stochastic_gradient import DEFAULT_EPSILON
+
+
+class PassiveAggressiveClassifier(BaseSGDClassifier):
+    """Passive Aggressive Classifier
+
+    Read more in the :ref:`User Guide <passive_aggressive>`.
+
+    Parameters
+    ----------
+
+    C : float
+        Maximum step size (regularization). Defaults to 1.0.
+
+    fit_intercept : bool, default=False
+        Whether the intercept should be estimated or not. If False, the
+        data is assumed to be already centered.
+
+    max_iter : int, optional (default=1000)
+        The maximum number of passes over the training data (aka epochs).
+        It only impacts the behavior in the ``fit`` method, and not the
+        :meth:`partial_fit` method.
+
+        .. versionadded:: 0.19
+
+    tol : float or None, optional (default=1e-3)
+        The stopping criterion. If it is not None, the iterations will stop
+        when (loss > previous_loss - tol).
+
+        .. versionadded:: 0.19
+
+    early_stopping : bool, default=False
+        Whether to use early stopping to terminate training when validation.
+        score is not improving. If set to True, it will automatically set aside
+        a stratified fraction of training data as validation and terminate
+        training when validation score is not improving by at least tol for
+        n_iter_no_change consecutive epochs.
+
+        .. versionadded:: 0.20
+
+    validation_fraction : float, default=0.1
+        The proportion of training data to set aside as validation set for
+        early stopping. Must be between 0 and 1.
+        Only used if early_stopping is True.
+
+        .. versionadded:: 0.20
+
+    n_iter_no_change : int, default=5
+        Number of iterations with no improvement to wait before early stopping.
+
+        .. versionadded:: 0.20
+
+    shuffle : bool, default=True
+        Whether or not the training data should be shuffled after each epoch.
+
+    verbose : integer, optional
+        The verbosity level
+
+    loss : string, optional
+        The loss function to be used:
+        hinge: equivalent to PA-I in the reference paper.
+        squared_hinge: equivalent to PA-II in the reference paper.
+
+    n_jobs : int or None, optional (default=None)
+        The number of CPUs to use to do the OVA (One Versus All, for
+        multi-class problems) computation.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    random_state : int, RandomState instance, default=None
+        Used to shuffle the training data, when ``shuffle`` is set to
+        ``True``. Pass an int for reproducible output across multiple
+        function calls.
+        See :term:`Glossary <random_state>`.
+
+    warm_start : bool, optional
+        When set to True, reuse the solution of the previous call to fit as
+        initialization, otherwise, just erase the previous solution.
+        See :term:`the Glossary <warm_start>`.
+
+        Repeatedly calling fit or partial_fit when warm_start is True can
+        result in a different solution than when calling fit a single time
+        because of the way the data is shuffled.
+
+    class_weight : dict, {class_label: weight} or "balanced" or None, optional
+        Preset for the class_weight fit parameter.
+
+        Weights associated with classes. If not given, all classes
+        are supposed to have weight one.
+
+        The "balanced" mode uses the values of y to automatically adjust
+        weights inversely proportional to class frequencies in the input data
+        as ``n_samples / (n_classes * np.bincount(y))``
+
+        .. versionadded:: 0.17
+           parameter *class_weight* to automatically weight samples.
+
+    average : bool or int, optional
+        When set to True, computes the averaged SGD weights and stores the
+        result in the ``coef_`` attribute. If set to an int greater than 1,
+        averaging will begin once the total number of samples seen reaches
+        average. So average=10 will begin averaging after seeing 10 samples.
+
+        .. versionadded:: 0.19
+           parameter *average* to use weights averaging in SGD
+
+    Attributes
+    ----------
+    coef_ : array, shape = [1, n_features] if n_classes == 2 else [n_classes,\
+            n_features]
+        Weights assigned to the features.
+
+    intercept_ : array, shape = [1] if n_classes == 2 else [n_classes]
+        Constants in decision function.
+
+    n_iter_ : int
+        The actual number of iterations to reach the stopping criterion.
+        For multiclass fits, it is the maximum over every binary fit.
+
+    classes_ : array of shape (n_classes,)
+        The unique classes labels.
+
+    t_ : int
+        Number of weight updates performed during training.
+        Same as ``(n_iter_ * n_samples)``.
+
+    loss_function_ : callable
+        Loss function used by the algorithm.
+
+    Examples
+    --------
+    >>> from sklearn.linear_model import PassiveAggressiveClassifier
+    >>> from sklearn.datasets import make_classification
+
+    >>> X, y = make_classification(n_features=4, random_state=0)
+    >>> clf = PassiveAggressiveClassifier(max_iter=1000, random_state=0,
+    ... tol=1e-3)
+    >>> clf.fit(X, y)
+    PassiveAggressiveClassifier(random_state=0)
+    >>> print(clf.coef_)
+    [[0.26642044 0.45070924 0.67251877 0.64185414]]
+    >>> print(clf.intercept_)
+    [1.84127814]
+    >>> print(clf.predict([[0, 0, 0, 0]]))
+    [1]
+
+    See also
+    --------
+
+    SGDClassifier
+    Perceptron
+
+    References
+    ----------
+    Online Passive-Aggressive Algorithms
+    <http://jmlr.csail.mit.edu/papers/volume7/crammer06a/crammer06a.pdf>
+    K. Crammer, O. Dekel, J. Keshat, S. Shalev-Shwartz, Y. Singer - JMLR (2006)
+
+    """
+    @_deprecate_positional_args
+    def __init__(self, *, C=1.0, fit_intercept=True, max_iter=1000, tol=1e-3,
+                 early_stopping=False, validation_fraction=0.1,
+                 n_iter_no_change=5, shuffle=True, verbose=0, loss="hinge",
+                 n_jobs=None, random_state=None, warm_start=False,
+                 class_weight=None, average=False):
+        super().__init__(
+            penalty=None,
+            fit_intercept=fit_intercept,
+            max_iter=max_iter,
+            tol=tol,
+            early_stopping=early_stopping,
+            validation_fraction=validation_fraction,
+            n_iter_no_change=n_iter_no_change,
+            shuffle=shuffle,
+            verbose=verbose,
+            random_state=random_state,
+            eta0=1.0,
+            warm_start=warm_start,
+            class_weight=class_weight,
+            average=average,
+            n_jobs=n_jobs)
+
+        self.C = C
+        self.loss = loss
+
+    def partial_fit(self, X, y, classes=None):
+        """Fit linear model with Passive Aggressive algorithm.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Subset of the training data
+
+        y : numpy array of shape [n_samples]
+            Subset of the target values
+
+        classes : array, shape = [n_classes]
+            Classes across all calls to partial_fit.
+            Can be obtained by via `np.unique(y_all)`, where y_all is the
+            target vector of the entire dataset.
+            This argument is required for the first call to partial_fit
+            and can be omitted in the subsequent calls.
+            Note that y doesn't need to contain all labels in `classes`.
+
+        Returns
+        -------
+        self : returns an instance of self.
+        """
+        self._validate_params(for_partial_fit=True)
+        if self.class_weight == 'balanced':
+            raise ValueError("class_weight 'balanced' is not supported for "
+                             "partial_fit. For 'balanced' weights, use "
+                             "`sklearn.utils.compute_class_weight` with "
+                             "`class_weight='balanced'`. In place of y you "
+                             "can use a large enough subset of the full "
+                             "training set target to properly estimate the "
+                             "class frequency distributions. Pass the "
+                             "resulting weights as the class_weight "
+                             "parameter.")
+        lr = "pa1" if self.loss == "hinge" else "pa2"
+        return self._partial_fit(X, y, alpha=1.0, C=self.C,
+                                 loss="hinge", learning_rate=lr, max_iter=1,
+                                 classes=classes, sample_weight=None,
+                                 coef_init=None, intercept_init=None)
+
+    def fit(self, X, y, coef_init=None, intercept_init=None):
+        """Fit linear model with Passive Aggressive algorithm.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Training data
+
+        y : numpy array of shape [n_samples]
+            Target values
+
+        coef_init : array, shape = [n_classes,n_features]
+            The initial coefficients to warm-start the optimization.
+
+        intercept_init : array, shape = [n_classes]
+            The initial intercept to warm-start the optimization.
+
+        Returns
+        -------
+        self : returns an instance of self.
+        """
+        self._validate_params()
+        lr = "pa1" if self.loss == "hinge" else "pa2"
+        return self._fit(X, y, alpha=1.0, C=self.C,
+                         loss="hinge", learning_rate=lr,
+                         coef_init=coef_init, intercept_init=intercept_init)
+
+
+class PassiveAggressiveRegressor(BaseSGDRegressor):
+    """Passive Aggressive Regressor
+
+    Read more in the :ref:`User Guide <passive_aggressive>`.
+
+    Parameters
+    ----------
+
+    C : float
+        Maximum step size (regularization). Defaults to 1.0.
+
+    fit_intercept : bool
+        Whether the intercept should be estimated or not. If False, the
+        data is assumed to be already centered. Defaults to True.
+
+    max_iter : int, optional (default=1000)
+        The maximum number of passes over the training data (aka epochs).
+        It only impacts the behavior in the ``fit`` method, and not the
+        :meth:`partial_fit` method.
+
+        .. versionadded:: 0.19
+
+    tol : float or None, optional (default=1e-3)
+        The stopping criterion. If it is not None, the iterations will stop
+        when (loss > previous_loss - tol).
+
+        .. versionadded:: 0.19
+
+    early_stopping : bool, default=False
+        Whether to use early stopping to terminate training when validation.
+        score is not improving. If set to True, it will automatically set aside
+        a fraction of training data as validation and terminate
+        training when validation score is not improving by at least tol for
+        n_iter_no_change consecutive epochs.
+
+        .. versionadded:: 0.20
+
+    validation_fraction : float, default=0.1
+        The proportion of training data to set aside as validation set for
+        early stopping. Must be between 0 and 1.
+        Only used if early_stopping is True.
+
+        .. versionadded:: 0.20
+
+    n_iter_no_change : int, default=5
+        Number of iterations with no improvement to wait before early stopping.
+
+        .. versionadded:: 0.20
+
+    shuffle : bool, default=True
+        Whether or not the training data should be shuffled after each epoch.
+
+    verbose : integer, optional
+        The verbosity level
+
+    loss : string, optional
+        The loss function to be used:
+        epsilon_insensitive: equivalent to PA-I in the reference paper.
+        squared_epsilon_insensitive: equivalent to PA-II in the reference
+        paper.
+
+    epsilon : float
+        If the difference between the current prediction and the correct label
+        is below this threshold, the model is not updated.
+
+    random_state : int, RandomState instance, default=None
+        Used to shuffle the training data, when ``shuffle`` is set to
+        ``True``. Pass an int for reproducible output across multiple
+        function calls.
+        See :term:`Glossary <random_state>`.
+
+    warm_start : bool, optional
+        When set to True, reuse the solution of the previous call to fit as
+        initialization, otherwise, just erase the previous solution.
+        See :term:`the Glossary <warm_start>`.
+
+        Repeatedly calling fit or partial_fit when warm_start is True can
+        result in a different solution than when calling fit a single time
+        because of the way the data is shuffled.
+
+    average : bool or int, optional
+        When set to True, computes the averaged SGD weights and stores the
+        result in the ``coef_`` attribute. If set to an int greater than 1,
+        averaging will begin once the total number of samples seen reaches
+        average. So average=10 will begin averaging after seeing 10 samples.
+
+        .. versionadded:: 0.19
+           parameter *average* to use weights averaging in SGD
+
+    Attributes
+    ----------
+    coef_ : array, shape = [1, n_features] if n_classes == 2 else [n_classes,\
+            n_features]
+        Weights assigned to the features.
+
+    intercept_ : array, shape = [1] if n_classes == 2 else [n_classes]
+        Constants in decision function.
+
+    n_iter_ : int
+        The actual number of iterations to reach the stopping criterion.
+
+    t_ : int
+        Number of weight updates performed during training.
+        Same as ``(n_iter_ * n_samples)``.
+
+    Examples
+    --------
+    >>> from sklearn.linear_model import PassiveAggressiveRegressor
+    >>> from sklearn.datasets import make_regression
+
+    >>> X, y = make_regression(n_features=4, random_state=0)
+    >>> regr = PassiveAggressiveRegressor(max_iter=100, random_state=0,
+    ... tol=1e-3)
+    >>> regr.fit(X, y)
+    PassiveAggressiveRegressor(max_iter=100, random_state=0)
+    >>> print(regr.coef_)
+    [20.48736655 34.18818427 67.59122734 87.94731329]
+    >>> print(regr.intercept_)
+    [-0.02306214]
+    >>> print(regr.predict([[0, 0, 0, 0]]))
+    [-0.02306214]
+
+    See also
+    --------
+
+    SGDRegressor
+
+    References
+    ----------
+    Online Passive-Aggressive Algorithms
+    <http://jmlr.csail.mit.edu/papers/volume7/crammer06a/crammer06a.pdf>
+    K. Crammer, O. Dekel, J. Keshat, S. Shalev-Shwartz, Y. Singer - JMLR (2006)
+
+    """
+    @_deprecate_positional_args
+    def __init__(self, *, C=1.0, fit_intercept=True, max_iter=1000, tol=1e-3,
+                 early_stopping=False, validation_fraction=0.1,
+                 n_iter_no_change=5, shuffle=True, verbose=0,
+                 loss="epsilon_insensitive", epsilon=DEFAULT_EPSILON,
+                 random_state=None, warm_start=False,
+                 average=False):
+        super().__init__(
+            penalty=None,
+            l1_ratio=0,
+            epsilon=epsilon,
+            eta0=1.0,
+            fit_intercept=fit_intercept,
+            max_iter=max_iter,
+            tol=tol,
+            early_stopping=early_stopping,
+            validation_fraction=validation_fraction,
+            n_iter_no_change=n_iter_no_change,
+            shuffle=shuffle,
+            verbose=verbose,
+            random_state=random_state,
+            warm_start=warm_start,
+            average=average)
+        self.C = C
+        self.loss = loss
+
+    def partial_fit(self, X, y):
+        """Fit linear model with Passive Aggressive algorithm.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Subset of training data
+
+        y : numpy array of shape [n_samples]
+            Subset of target values
+
+        Returns
+        -------
+        self : returns an instance of self.
+        """
+        self._validate_params(for_partial_fit=True)
+        lr = "pa1" if self.loss == "epsilon_insensitive" else "pa2"
+        return self._partial_fit(X, y, alpha=1.0, C=self.C,
+                                 loss="epsilon_insensitive",
+                                 learning_rate=lr, max_iter=1,
+                                 sample_weight=None,
+                                 coef_init=None, intercept_init=None)
+
+    def fit(self, X, y, coef_init=None, intercept_init=None):
+        """Fit linear model with Passive Aggressive algorithm.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Training data
+
+        y : numpy array of shape [n_samples]
+            Target values
+
+        coef_init : array, shape = [n_features]
+            The initial coefficients to warm-start the optimization.
+
+        intercept_init : array, shape = [1]
+            The initial intercept to warm-start the optimization.
+
+        Returns
+        -------
+        self : returns an instance of self.
+        """
+        self._validate_params()
+        lr = "pa1" if self.loss == "epsilon_insensitive" else "pa2"
+        return self._fit(X, y, alpha=1.0, C=self.C,
+                         loss="epsilon_insensitive",
+                         learning_rate=lr,
+                         coef_init=coef_init,
+                         intercept_init=intercept_init)
--- a/venv/Lib/site-packages/sklearn/linear_model/_perceptron.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/_perceptron.py
@ -0,0 +1,160 @@
+# Author: Mathieu Blondel
+# License: BSD 3 clause
+
+from ..utils.validation import _deprecate_positional_args
+from ._stochastic_gradient import BaseSGDClassifier
+
+
+class Perceptron(BaseSGDClassifier):
+    """Perceptron
+
+    Read more in the :ref:`User Guide <perceptron>`.
+
+    Parameters
+    ----------
+
+    penalty : {'l2','l1','elasticnet'}, default=None
+        The penalty (aka regularization term) to be used.
+
+    alpha : float, default=0.0001
+        Constant that multiplies the regularization term if regularization is
+        used.
+
+    fit_intercept : bool, default=True
+        Whether the intercept should be estimated or not. If False, the
+        data is assumed to be already centered.
+
+    max_iter : int, default=1000
+        The maximum number of passes over the training data (aka epochs).
+        It only impacts the behavior in the ``fit`` method, and not the
+        :meth:`partial_fit` method.
+
+        .. versionadded:: 0.19
+
+    tol : float, default=1e-3
+        The stopping criterion. If it is not None, the iterations will stop
+        when (loss > previous_loss - tol).
+
+        .. versionadded:: 0.19
+
+    shuffle : bool, default=True
+        Whether or not the training data should be shuffled after each epoch.
+
+    verbose : int, default=0
+        The verbosity level
+
+    eta0 : double, default=1
+        Constant by which the updates are multiplied.
+
+    n_jobs : int, default=None
+        The number of CPUs to use to do the OVA (One Versus All, for
+        multi-class problems) computation.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    random_state : int, RandomState instance, default=None
+        Used to shuffle the training data, when ``shuffle`` is set to
+        ``True``. Pass an int for reproducible output across multiple
+        function calls.
+        See :term:`Glossary <random_state>`.
+
+    early_stopping : bool, default=False
+        Whether to use early stopping to terminate training when validation.
+        score is not improving. If set to True, it will automatically set aside
+        a stratified fraction of training data as validation and terminate
+        training when validation score is not improving by at least tol for
+        n_iter_no_change consecutive epochs.
+
+        .. versionadded:: 0.20
+
+    validation_fraction : float, default=0.1
+        The proportion of training data to set aside as validation set for
+        early stopping. Must be between 0 and 1.
+        Only used if early_stopping is True.
+
+        .. versionadded:: 0.20
+
+    n_iter_no_change : int, default=5
+        Number of iterations with no improvement to wait before early stopping.
+
+        .. versionadded:: 0.20
+
+    class_weight : dict, {class_label: weight} or "balanced", default=None
+        Preset for the class_weight fit parameter.
+
+        Weights associated with classes. If not given, all classes
+        are supposed to have weight one.
+
+        The "balanced" mode uses the values of y to automatically adjust
+        weights inversely proportional to class frequencies in the input data
+        as ``n_samples / (n_classes * np.bincount(y))``
+
+    warm_start : bool, default=False
+        When set to True, reuse the solution of the previous call to fit as
+        initialization, otherwise, just erase the previous solution. See
+        :term:`the Glossary <warm_start>`.
+
+    Attributes
+    ----------
+    coef_ : ndarray of shape = [1, n_features] if n_classes == 2 else \
+        [n_classes, n_features]
+        Weights assigned to the features.
+
+    intercept_ : ndarray of shape = [1] if n_classes == 2 else [n_classes]
+        Constants in decision function.
+
+    n_iter_ : int
+        The actual number of iterations to reach the stopping criterion.
+        For multiclass fits, it is the maximum over every binary fit.
+
+    classes_ : ndarray of shape (n_classes,)
+        The unique classes labels.
+
+    t_ : int
+        Number of weight updates performed during training.
+        Same as ``(n_iter_ * n_samples)``.
+
+    Notes
+    -----
+
+    ``Perceptron`` is a classification algorithm which shares the same
+    underlying implementation with ``SGDClassifier``. In fact,
+    ``Perceptron()`` is equivalent to `SGDClassifier(loss="perceptron",
+    eta0=1, learning_rate="constant", penalty=None)`.
+
+    Examples
+    --------
+    >>> from sklearn.datasets import load_digits
+    >>> from sklearn.linear_model import Perceptron
+    >>> X, y = load_digits(return_X_y=True)
+    >>> clf = Perceptron(tol=1e-3, random_state=0)
+    >>> clf.fit(X, y)
+    Perceptron()
+    >>> clf.score(X, y)
+    0.939...
+
+    See also
+    --------
+
+    SGDClassifier
+
+    References
+    ----------
+
+    https://en.wikipedia.org/wiki/Perceptron and references therein.
+    """
+    @_deprecate_positional_args
+    def __init__(self, *, penalty=None, alpha=0.0001, fit_intercept=True,
+                 max_iter=1000, tol=1e-3, shuffle=True, verbose=0, eta0=1.0,
+                 n_jobs=None, random_state=0, early_stopping=False,
+                 validation_fraction=0.1, n_iter_no_change=5,
+                 class_weight=None, warm_start=False):
+        super().__init__(
+            loss="perceptron", penalty=penalty, alpha=alpha, l1_ratio=0,
+            fit_intercept=fit_intercept, max_iter=max_iter, tol=tol,
+            shuffle=shuffle, verbose=verbose, random_state=random_state,
+            learning_rate="constant", eta0=eta0, early_stopping=early_stopping,
+            validation_fraction=validation_fraction,
+            n_iter_no_change=n_iter_no_change, power_t=0.5,
+            warm_start=warm_start, class_weight=class_weight, n_jobs=n_jobs)
--- a/venv/Lib/site-packages/sklearn/linear_model/_ransac.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/_ransac.py
@ -0,0 +1,504 @@
+# coding: utf-8
+
+# Author: Johannes Schönberger
+#
+# License: BSD 3 clause
+
+import numpy as np
+import warnings
+
+from ..base import BaseEstimator, MetaEstimatorMixin, RegressorMixin, clone
+from ..base import MultiOutputMixin
+from ..utils import check_random_state, check_consistent_length
+from ..utils.random import sample_without_replacement
+from ..utils.validation import check_is_fitted, _check_sample_weight
+from ..utils.validation import _deprecate_positional_args
+from ._base import LinearRegression
+from ..utils.validation import has_fit_parameter
+from ..exceptions import ConvergenceWarning
+
+_EPSILON = np.spacing(1)
+
+
+def _dynamic_max_trials(n_inliers, n_samples, min_samples, probability):
+    """Determine number trials such that at least one outlier-free subset is
+    sampled for the given inlier/outlier ratio.
+
+    Parameters
+    ----------
+    n_inliers : int
+        Number of inliers in the data.
+
+    n_samples : int
+        Total number of samples in the data.
+
+    min_samples : int
+        Minimum number of samples chosen randomly from original data.
+
+    probability : float
+        Probability (confidence) that one outlier-free sample is generated.
+
+    Returns
+    -------
+    trials : int
+        Number of trials.
+
+    """
+    inlier_ratio = n_inliers / float(n_samples)
+    nom = max(_EPSILON, 1 - probability)
+    denom = max(_EPSILON, 1 - inlier_ratio ** min_samples)
+    if nom == 1:
+        return 0
+    if denom == 1:
+        return float('inf')
+    return abs(float(np.ceil(np.log(nom) / np.log(denom))))
+
+
+class RANSACRegressor(MetaEstimatorMixin, RegressorMixin,
+                      MultiOutputMixin, BaseEstimator):
+    """RANSAC (RANdom SAmple Consensus) algorithm.
+
+    RANSAC is an iterative algorithm for the robust estimation of parameters
+    from a subset of inliers from the complete data set.
+
+    Read more in the :ref:`User Guide <ransac_regression>`.
+
+    Parameters
+    ----------
+    base_estimator : object, optional
+        Base estimator object which implements the following methods:
+
+         * `fit(X, y)`: Fit model to given training data and target values.
+         * `score(X, y)`: Returns the mean accuracy on the given test data,
+           which is used for the stop criterion defined by `stop_score`.
+           Additionally, the score is used to decide which of two equally
+           large consensus sets is chosen as the better one.
+         * `predict(X)`: Returns predicted values using the linear model,
+           which is used to compute residual error using loss function.
+
+        If `base_estimator` is None, then
+        ``base_estimator=sklearn.linear_model.LinearRegression()`` is used for
+        target values of dtype float.
+
+        Note that the current implementation only supports regression
+        estimators.
+
+    min_samples : int (>= 1) or float ([0, 1]), optional
+        Minimum number of samples chosen randomly from original data. Treated
+        as an absolute number of samples for `min_samples >= 1`, treated as a
+        relative number `ceil(min_samples * X.shape[0]`) for
+        `min_samples < 1`. This is typically chosen as the minimal number of
+        samples necessary to estimate the given `base_estimator`. By default a
+        ``sklearn.linear_model.LinearRegression()`` estimator is assumed and
+        `min_samples` is chosen as ``X.shape[1] + 1``.
+
+    residual_threshold : float, optional
+        Maximum residual for a data sample to be classified as an inlier.
+        By default the threshold is chosen as the MAD (median absolute
+        deviation) of the target values `y`.
+
+    is_data_valid : callable, optional
+        This function is called with the randomly selected data before the
+        model is fitted to it: `is_data_valid(X, y)`. If its return value is
+        False the current randomly chosen sub-sample is skipped.
+
+    is_model_valid : callable, optional
+        This function is called with the estimated model and the randomly
+        selected data: `is_model_valid(model, X, y)`. If its return value is
+        False the current randomly chosen sub-sample is skipped.
+        Rejecting samples with this function is computationally costlier than
+        with `is_data_valid`. `is_model_valid` should therefore only be used if
+        the estimated model is needed for making the rejection decision.
+
+    max_trials : int, optional
+        Maximum number of iterations for random sample selection.
+
+    max_skips : int, optional
+        Maximum number of iterations that can be skipped due to finding zero
+        inliers or invalid data defined by ``is_data_valid`` or invalid models
+        defined by ``is_model_valid``.
+
+        .. versionadded:: 0.19
+
+    stop_n_inliers : int, optional
+        Stop iteration if at least this number of inliers are found.
+
+    stop_score : float, optional
+        Stop iteration if score is greater equal than this threshold.
+
+    stop_probability : float in range [0, 1], optional
+        RANSAC iteration stops if at least one outlier-free set of the training
+        data is sampled in RANSAC. This requires to generate at least N
+        samples (iterations)::
+
+            N >= log(1 - probability) / log(1 - e**m)
+
+        where the probability (confidence) is typically set to high value such
+        as 0.99 (the default) and e is the current fraction of inliers w.r.t.
+        the total number of samples.
+
+    loss : string, callable, optional, default "absolute_loss"
+        String inputs, "absolute_loss" and "squared_loss" are supported which
+        find the absolute loss and squared loss per sample
+        respectively.
+
+        If ``loss`` is a callable, then it should be a function that takes
+        two arrays as inputs, the true and predicted value and returns a 1-D
+        array with the i-th value of the array corresponding to the loss
+        on ``X[i]``.
+
+        If the loss on a sample is greater than the ``residual_threshold``,
+        then this sample is classified as an outlier.
+
+        .. versionadded:: 0.18
+
+    random_state : int, RandomState instance, default=None
+        The generator used to initialize the centers.
+        Pass an int for reproducible output across multiple function calls.
+        See :term:`Glossary <random_state>`.
+
+    Attributes
+    ----------
+    estimator_ : object
+        Best fitted model (copy of the `base_estimator` object).
+
+    n_trials_ : int
+        Number of random selection trials until one of the stop criteria is
+        met. It is always ``<= max_trials``.
+
+    inlier_mask_ : bool array of shape [n_samples]
+        Boolean mask of inliers classified as ``True``.
+
+    n_skips_no_inliers_ : int
+        Number of iterations skipped due to finding zero inliers.
+
+        .. versionadded:: 0.19
+
+    n_skips_invalid_data_ : int
+        Number of iterations skipped due to invalid data defined by
+        ``is_data_valid``.
+
+        .. versionadded:: 0.19
+
+    n_skips_invalid_model_ : int
+        Number of iterations skipped due to an invalid model defined by
+        ``is_model_valid``.
+
+        .. versionadded:: 0.19
+
+    Examples
+    --------
+    >>> from sklearn.linear_model import RANSACRegressor
+    >>> from sklearn.datasets import make_regression
+    >>> X, y = make_regression(
+    ...     n_samples=200, n_features=2, noise=4.0, random_state=0)
+    >>> reg = RANSACRegressor(random_state=0).fit(X, y)
+    >>> reg.score(X, y)
+    0.9885...
+    >>> reg.predict(X[:1,])
+    array([-31.9417...])
+
+    References
+    ----------
+    .. [1] https://en.wikipedia.org/wiki/RANSAC
+    .. [2] https://www.sri.com/sites/default/files/publications/ransac-publication.pdf
+    .. [3] http://www.bmva.org/bmvc/2009/Papers/Paper355/Paper355.pdf
+    """
+    @_deprecate_positional_args
+    def __init__(self, base_estimator=None, *, min_samples=None,
+                 residual_threshold=None, is_data_valid=None,
+                 is_model_valid=None, max_trials=100, max_skips=np.inf,
+                 stop_n_inliers=np.inf, stop_score=np.inf,
+                 stop_probability=0.99, loss='absolute_loss',
+                 random_state=None):
+
+        self.base_estimator = base_estimator
+        self.min_samples = min_samples
+        self.residual_threshold = residual_threshold
+        self.is_data_valid = is_data_valid
+        self.is_model_valid = is_model_valid
+        self.max_trials = max_trials
+        self.max_skips = max_skips
+        self.stop_n_inliers = stop_n_inliers
+        self.stop_score = stop_score
+        self.stop_probability = stop_probability
+        self.random_state = random_state
+        self.loss = loss
+
+    def fit(self, X, y, sample_weight=None):
+        """Fit estimator using RANSAC algorithm.
+
+        Parameters
+        ----------
+        X : array-like or sparse matrix, shape [n_samples, n_features]
+            Training data.
+
+        y : array-like of shape (n_samples,) or (n_samples, n_targets)
+            Target values.
+
+        sample_weight : array-like of shape (n_samples,), default=None
+            Individual weights for each sample
+            raises error if sample_weight is passed and base_estimator
+            fit method does not support it.
+
+            .. versionadded:: 0.18
+
+        Raises
+        ------
+        ValueError
+            If no valid consensus set could be found. This occurs if
+            `is_data_valid` and `is_model_valid` return False for all
+            `max_trials` randomly chosen sub-samples.
+
+        """
+        # Need to validate separately here.
+        # We can't pass multi_ouput=True because that would allow y to be csr.
+        check_X_params = dict(accept_sparse='csr')
+        check_y_params = dict(ensure_2d=False)
+        X, y = self._validate_data(X, y, validate_separately=(check_X_params,
+                                                              check_y_params))
+        check_consistent_length(X, y)
+
+        if self.base_estimator is not None:
+            base_estimator = clone(self.base_estimator)
+        else:
+            base_estimator = LinearRegression()
+
+        if self.min_samples is None:
+            # assume linear model by default
+            min_samples = X.shape[1] + 1
+        elif 0 < self.min_samples < 1:
+            min_samples = np.ceil(self.min_samples * X.shape[0])
+        elif self.min_samples >= 1:
+            if self.min_samples % 1 != 0:
+                raise ValueError("Absolute number of samples must be an "
+                                 "integer value.")
+            min_samples = self.min_samples
+        else:
+            raise ValueError("Value for `min_samples` must be scalar and "
+                             "positive.")
+        if min_samples > X.shape[0]:
+            raise ValueError("`min_samples` may not be larger than number "
+                             "of samples: n_samples = %d." % (X.shape[0]))
+
+        if self.stop_probability < 0 or self.stop_probability > 1:
+            raise ValueError("`stop_probability` must be in range [0, 1].")
+
+        if self.residual_threshold is None:
+            # MAD (median absolute deviation)
+            residual_threshold = np.median(np.abs(y - np.median(y)))
+        else:
+            residual_threshold = self.residual_threshold
+
+        if self.loss == "absolute_loss":
+            if y.ndim == 1:
+                loss_function = lambda y_true, y_pred: np.abs(y_true - y_pred)
+            else:
+                loss_function = lambda \
+                    y_true, y_pred: np.sum(np.abs(y_true - y_pred), axis=1)
+
+        elif self.loss == "squared_loss":
+            if y.ndim == 1:
+                loss_function = lambda y_true, y_pred: (y_true - y_pred) ** 2
+            else:
+                loss_function = lambda \
+                    y_true, y_pred: np.sum((y_true - y_pred) ** 2, axis=1)
+
+        elif callable(self.loss):
+            loss_function = self.loss
+
+        else:
+            raise ValueError(
+                "loss should be 'absolute_loss', 'squared_loss' or a callable."
+                "Got %s. " % self.loss)
+
+
+        random_state = check_random_state(self.random_state)
+
+        try:  # Not all estimator accept a random_state
+            base_estimator.set_params(random_state=random_state)
+        except ValueError:
+            pass
+
+        estimator_fit_has_sample_weight = has_fit_parameter(base_estimator,
+                                                            "sample_weight")
+        estimator_name = type(base_estimator).__name__
+        if (sample_weight is not None and not
+                estimator_fit_has_sample_weight):
+            raise ValueError("%s does not support sample_weight. Samples"
+                             " weights are only used for the calibration"
+                             " itself." % estimator_name)
+        if sample_weight is not None:
+            sample_weight = _check_sample_weight(sample_weight, X)
+
+        n_inliers_best = 1
+        score_best = -np.inf
+        inlier_mask_best = None
+        X_inlier_best = None
+        y_inlier_best = None
+        inlier_best_idxs_subset = None
+        self.n_skips_no_inliers_ = 0
+        self.n_skips_invalid_data_ = 0
+        self.n_skips_invalid_model_ = 0
+
+        # number of data samples
+        n_samples = X.shape[0]
+        sample_idxs = np.arange(n_samples)
+
+        self.n_trials_ = 0
+        max_trials = self.max_trials
+        while self.n_trials_ < max_trials:
+            self.n_trials_ += 1
+
+            if (self.n_skips_no_inliers_ + self.n_skips_invalid_data_ +
+                    self.n_skips_invalid_model_) > self.max_skips:
+                break
+
+            # choose random sample set
+            subset_idxs = sample_without_replacement(n_samples, min_samples,
+                                                     random_state=random_state)
+            X_subset = X[subset_idxs]
+            y_subset = y[subset_idxs]
+
+            # check if random sample set is valid
+            if (self.is_data_valid is not None
+                    and not self.is_data_valid(X_subset, y_subset)):
+                self.n_skips_invalid_data_ += 1
+                continue
+
+            # fit model for current random sample set
+            if sample_weight is None:
+                base_estimator.fit(X_subset, y_subset)
+            else:
+                base_estimator.fit(X_subset, y_subset,
+                                   sample_weight=sample_weight[subset_idxs])
+
+            # check if estimated model is valid
+            if (self.is_model_valid is not None and not
+                    self.is_model_valid(base_estimator, X_subset, y_subset)):
+                self.n_skips_invalid_model_ += 1
+                continue
+
+            # residuals of all data for current random sample model
+            y_pred = base_estimator.predict(X)
+            residuals_subset = loss_function(y, y_pred)
+
+            # classify data into inliers and outliers
+            inlier_mask_subset = residuals_subset < residual_threshold
+            n_inliers_subset = np.sum(inlier_mask_subset)
+
+            # less inliers -> skip current random sample
+            if n_inliers_subset < n_inliers_best:
+                self.n_skips_no_inliers_ += 1
+                continue
+
+            # extract inlier data set
+            inlier_idxs_subset = sample_idxs[inlier_mask_subset]
+            X_inlier_subset = X[inlier_idxs_subset]
+            y_inlier_subset = y[inlier_idxs_subset]
+
+            # score of inlier data set
+            score_subset = base_estimator.score(X_inlier_subset,
+                                                y_inlier_subset)
+
+            # same number of inliers but worse score -> skip current random
+            # sample
+            if (n_inliers_subset == n_inliers_best
+                    and score_subset < score_best):
+                continue
+
+            # save current random sample as best sample
+            n_inliers_best = n_inliers_subset
+            score_best = score_subset
+            inlier_mask_best = inlier_mask_subset
+            X_inlier_best = X_inlier_subset
+            y_inlier_best = y_inlier_subset
+            inlier_best_idxs_subset = inlier_idxs_subset
+
+            max_trials = min(
+                max_trials,
+                _dynamic_max_trials(n_inliers_best, n_samples,
+                                    min_samples, self.stop_probability))
+
+            # break if sufficient number of inliers or score is reached
+            if n_inliers_best >= self.stop_n_inliers or \
+                            score_best >= self.stop_score:
+                break
+
+        # if none of the iterations met the required criteria
+        if inlier_mask_best is None:
+            if ((self.n_skips_no_inliers_ + self.n_skips_invalid_data_ +
+                    self.n_skips_invalid_model_) > self.max_skips):
+                raise ValueError(
+                    "RANSAC skipped more iterations than `max_skips` without"
+                    " finding a valid consensus set. Iterations were skipped"
+                    " because each randomly chosen sub-sample failed the"
+                    " passing criteria. See estimator attributes for"
+                    " diagnostics (n_skips*).")
+            else:
+                raise ValueError(
+                    "RANSAC could not find a valid consensus set. All"
+                    " `max_trials` iterations were skipped because each"
+                    " randomly chosen sub-sample failed the passing criteria."
+                    " See estimator attributes for diagnostics (n_skips*).")
+        else:
+            if (self.n_skips_no_inliers_ + self.n_skips_invalid_data_ +
+                    self.n_skips_invalid_model_) > self.max_skips:
+                warnings.warn("RANSAC found a valid consensus set but exited"
+                              " early due to skipping more iterations than"
+                              " `max_skips`. See estimator attributes for"
+                              " diagnostics (n_skips*).",
+                              ConvergenceWarning)
+
+        # estimate final model using all inliers
+        if sample_weight is None:
+            base_estimator.fit(X_inlier_best, y_inlier_best)
+        else:
+            base_estimator.fit(
+                X_inlier_best,
+                y_inlier_best,
+                sample_weight=sample_weight[inlier_best_idxs_subset])
+
+        self.estimator_ = base_estimator
+        self.inlier_mask_ = inlier_mask_best
+        return self
+
+    def predict(self, X):
+        """Predict using the estimated model.
+
+        This is a wrapper for `estimator_.predict(X)`.
+
+        Parameters
+        ----------
+        X : numpy array of shape [n_samples, n_features]
+
+        Returns
+        -------
+        y : array, shape = [n_samples] or [n_samples, n_targets]
+            Returns predicted values.
+        """
+        check_is_fitted(self)
+
+        return self.estimator_.predict(X)
+
+    def score(self, X, y):
+        """Returns the score of the prediction.
+
+        This is a wrapper for `estimator_.score(X, y)`.
+
+        Parameters
+        ----------
+        X : numpy array or sparse matrix of shape [n_samples, n_features]
+            Training data.
+
+        y : array, shape = [n_samples] or [n_samples, n_targets]
+            Target values.
+
+        Returns
+        -------
+        z : float
+            Score of the prediction.
+        """
+        check_is_fitted(self)
+
+        return self.estimator_.score(X, y)
--- a/venv/Lib/site-packages/sklearn/linear_model/_ridge.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/_ridge.py
--- a/venv/Lib/site-packages/sklearn/linear_model/_sag.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/_sag.py
@ -0,0 +1,345 @@
+"""Solvers for Ridge and LogisticRegression using SAG algorithm"""
+
+# Authors: Tom Dupre la Tour <tom.dupre-la-tour@m4x.org>
+#
+# License: BSD 3 clause
+
+import warnings
+
+import numpy as np
+
+from ._base import make_dataset
+from ._sag_fast import sag32, sag64
+from ..exceptions import ConvergenceWarning
+from ..utils import check_array
+from ..utils.validation import _check_sample_weight
+from ..utils.validation import _deprecate_positional_args
+from ..utils.extmath import row_norms
+
+
+def get_auto_step_size(max_squared_sum, alpha_scaled, loss, fit_intercept,
+                       n_samples=None,
+                       is_saga=False):
+    """Compute automatic step size for SAG solver
+
+    The step size is set to 1 / (alpha_scaled + L + fit_intercept) where L is
+    the max sum of squares for over all samples.
+
+    Parameters
+    ----------
+    max_squared_sum : float
+        Maximum squared sum of X over samples.
+
+    alpha_scaled : float
+        Constant that multiplies the regularization term, scaled by
+        1. / n_samples, the number of samples.
+
+    loss : string, in {"log", "squared"}
+        The loss function used in SAG solver.
+
+    fit_intercept : bool
+        Specifies if a constant (a.k.a. bias or intercept) will be
+        added to the decision function.
+
+    n_samples : int, optional
+        Number of rows in X. Useful if is_saga=True.
+
+    is_saga : boolean, optional
+        Whether to return step size for the SAGA algorithm or the SAG
+        algorithm.
+
+    Returns
+    -------
+    step_size : float
+        Step size used in SAG solver.
+
+    References
+    ----------
+    Schmidt, M., Roux, N. L., & Bach, F. (2013).
+    Minimizing finite sums with the stochastic average gradient
+    https://hal.inria.fr/hal-00860051/document
+
+    Defazio, A., Bach F. & Lacoste-Julien S. (2014).
+    SAGA: A Fast Incremental Gradient Method With Support
+    for Non-Strongly Convex Composite Objectives
+    https://arxiv.org/abs/1407.0202
+    """
+    if loss in ('log', 'multinomial'):
+        L = (0.25 * (max_squared_sum + int(fit_intercept)) + alpha_scaled)
+    elif loss == 'squared':
+        # inverse Lipschitz constant for squared loss
+        L = max_squared_sum + int(fit_intercept) + alpha_scaled
+    else:
+        raise ValueError("Unknown loss function for SAG solver, got %s "
+                         "instead of 'log' or 'squared'" % loss)
+    if is_saga:
+        # SAGA theoretical step size is 1/3L or 1 / (2 * (L + mu n))
+        # See Defazio et al. 2014
+        mun = min(2 * n_samples * alpha_scaled, L)
+        step = 1. / (2 * L + mun)
+    else:
+        # SAG theoretical step size is 1/16L but it is recommended to use 1 / L
+        # see http://www.birs.ca//workshops//2014/14w5003/files/schmidt.pdf,
+        # slide 65
+        step = 1. / L
+    return step
+
+
+@_deprecate_positional_args
+def sag_solver(X, y, sample_weight=None, loss='log', alpha=1., beta=0.,
+               max_iter=1000, tol=0.001, verbose=0, random_state=None,
+               check_input=True, max_squared_sum=None,
+               warm_start_mem=None,
+               is_saga=False):
+    """SAG solver for Ridge and LogisticRegression
+
+    SAG stands for Stochastic Average Gradient: the gradient of the loss is
+    estimated each sample at a time and the model is updated along the way with
+    a constant learning rate.
+
+    IMPORTANT NOTE: 'sag' solver converges faster on columns that are on the
+    same scale. You can normalize the data by using
+    sklearn.preprocessing.StandardScaler on your data before passing it to the
+    fit method.
+
+    This implementation works with data represented as dense numpy arrays or
+    sparse scipy arrays of floating point values for the features. It will
+    fit the data according to squared loss or log loss.
+
+    The regularizer is a penalty added to the loss function that shrinks model
+    parameters towards the zero vector using the squared euclidean norm L2.
+
+    .. versionadded:: 0.17
+
+    Parameters
+    ----------
+    X : {array-like, sparse matrix}, shape (n_samples, n_features)
+        Training data
+
+    y : numpy array, shape (n_samples,)
+        Target values. With loss='multinomial', y must be label encoded
+        (see preprocessing.LabelEncoder).
+
+    sample_weight : array-like, shape (n_samples,), optional
+        Weights applied to individual samples (1. for unweighted).
+
+    loss : 'log' | 'squared' | 'multinomial'
+        Loss function that will be optimized:
+        -'log' is the binary logistic loss, as used in LogisticRegression.
+        -'squared' is the squared loss, as used in Ridge.
+        -'multinomial' is the multinomial logistic loss, as used in
+         LogisticRegression.
+
+        .. versionadded:: 0.18
+           *loss='multinomial'*
+
+    alpha : float, optional
+        L2 regularization term in the objective function
+        ``(0.5 * alpha * || W ||_F^2)``. Defaults to 1.
+
+    beta : float, optional
+        L1 regularization term in the objective function
+        ``(beta * || W ||_1)``. Only applied if ``is_saga`` is set to True.
+        Defaults to 0.
+
+    max_iter : int, optional
+        The max number of passes over the training data if the stopping
+        criteria is not reached. Defaults to 1000.
+
+    tol : double, optional
+        The stopping criteria for the weights. The iterations will stop when
+        max(change in weights) / max(weights) < tol. Defaults to .001
+
+    verbose : integer, optional
+        The verbosity level.
+
+    random_state : int, RandomState instance, default=None
+        Used when shuffling the data. Pass an int for reproducible output
+        across multiple function calls.
+        See :term:`Glossary <random_state>`.
+
+    check_input : bool, default True
+        If False, the input arrays X and y will not be checked.
+
+    max_squared_sum : float, default None
+        Maximum squared sum of X over samples. If None, it will be computed,
+        going through all the samples. The value should be precomputed
+        to speed up cross validation.
+
+    warm_start_mem : dict, optional
+        The initialization parameters used for warm starting. Warm starting is
+        currently used in LogisticRegression but not in Ridge.
+        It contains:
+            - 'coef': the weight vector, with the intercept in last line
+                if the intercept is fitted.
+            - 'gradient_memory': the scalar gradient for all seen samples.
+            - 'sum_gradient': the sum of gradient over all seen samples,
+                for each feature.
+            - 'intercept_sum_gradient': the sum of gradient over all seen
+                samples, for the intercept.
+            - 'seen': array of boolean describing the seen samples.
+            - 'num_seen': the number of seen samples.
+
+    is_saga : boolean, optional
+        Whether to use the SAGA algorithm or the SAG algorithm. SAGA behaves
+        better in the first epochs, and allow for l1 regularisation.
+
+    Returns
+    -------
+    coef_ : array, shape (n_features)
+        Weight vector.
+
+    n_iter_ : int
+        The number of full pass on all samples.
+
+    warm_start_mem : dict
+        Contains a 'coef' key with the fitted result, and possibly the
+        fitted intercept at the end of the array. Contains also other keys
+        used for warm starting.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn import linear_model
+    >>> n_samples, n_features = 10, 5
+    >>> rng = np.random.RandomState(0)
+    >>> X = rng.randn(n_samples, n_features)
+    >>> y = rng.randn(n_samples)
+    >>> clf = linear_model.Ridge(solver='sag')
+    >>> clf.fit(X, y)
+    Ridge(solver='sag')
+
+    >>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
+    >>> y = np.array([1, 1, 2, 2])
+    >>> clf = linear_model.LogisticRegression(
+    ...     solver='sag', multi_class='multinomial')
+    >>> clf.fit(X, y)
+    LogisticRegression(multi_class='multinomial', solver='sag')
+
+    References
+    ----------
+    Schmidt, M., Roux, N. L., & Bach, F. (2013).
+    Minimizing finite sums with the stochastic average gradient
+    https://hal.inria.fr/hal-00860051/document
+
+    Defazio, A., Bach F. & Lacoste-Julien S. (2014).
+    SAGA: A Fast Incremental Gradient Method With Support
+    for Non-Strongly Convex Composite Objectives
+    https://arxiv.org/abs/1407.0202
+
+    See also
+    --------
+    Ridge, SGDRegressor, ElasticNet, Lasso, SVR, and
+    LogisticRegression, SGDClassifier, LinearSVC, Perceptron
+    """
+    if warm_start_mem is None:
+        warm_start_mem = {}
+    # Ridge default max_iter is None
+    if max_iter is None:
+        max_iter = 1000
+
+    if check_input:
+        _dtype = [np.float64, np.float32]
+        X = check_array(X, dtype=_dtype, accept_sparse='csr', order='C')
+        y = check_array(y, dtype=_dtype, ensure_2d=False, order='C')
+
+    n_samples, n_features = X.shape[0], X.shape[1]
+    # As in SGD, the alpha is scaled by n_samples.
+    alpha_scaled = float(alpha) / n_samples
+    beta_scaled = float(beta) / n_samples
+
+    # if loss == 'multinomial', y should be label encoded.
+    n_classes = int(y.max()) + 1 if loss == 'multinomial' else 1
+
+    # initialization
+    sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)
+
+    if 'coef' in warm_start_mem.keys():
+        coef_init = warm_start_mem['coef']
+    else:
+        # assume fit_intercept is False
+        coef_init = np.zeros((n_features, n_classes), dtype=X.dtype,
+                             order='C')
+
+    # coef_init contains possibly the intercept_init at the end.
+    # Note that Ridge centers the data before fitting, so fit_intercept=False.
+    fit_intercept = coef_init.shape[0] == (n_features + 1)
+    if fit_intercept:
+        intercept_init = coef_init[-1, :]
+        coef_init = coef_init[:-1, :]
+    else:
+        intercept_init = np.zeros(n_classes, dtype=X.dtype)
+
+    if 'intercept_sum_gradient' in warm_start_mem.keys():
+        intercept_sum_gradient = warm_start_mem['intercept_sum_gradient']
+    else:
+        intercept_sum_gradient = np.zeros(n_classes, dtype=X.dtype)
+
+    if 'gradient_memory' in warm_start_mem.keys():
+        gradient_memory_init = warm_start_mem['gradient_memory']
+    else:
+        gradient_memory_init = np.zeros((n_samples, n_classes),
+                                        dtype=X.dtype, order='C')
+    if 'sum_gradient' in warm_start_mem.keys():
+        sum_gradient_init = warm_start_mem['sum_gradient']
+    else:
+        sum_gradient_init = np.zeros((n_features, n_classes),
+                                     dtype=X.dtype, order='C')
+
+    if 'seen' in warm_start_mem.keys():
+        seen_init = warm_start_mem['seen']
+    else:
+        seen_init = np.zeros(n_samples, dtype=np.int32, order='C')
+
+    if 'num_seen' in warm_start_mem.keys():
+        num_seen_init = warm_start_mem['num_seen']
+    else:
+        num_seen_init = 0
+
+    dataset, intercept_decay = make_dataset(X, y, sample_weight, random_state)
+
+    if max_squared_sum is None:
+        max_squared_sum = row_norms(X, squared=True).max()
+    step_size = get_auto_step_size(max_squared_sum, alpha_scaled, loss,
+                                   fit_intercept, n_samples=n_samples,
+                                   is_saga=is_saga)
+    if step_size * alpha_scaled == 1:
+        raise ZeroDivisionError("Current sag implementation does not handle "
+                                "the case step_size * alpha_scaled == 1")
+
+    sag = sag64 if X.dtype == np.float64 else sag32
+    num_seen, n_iter_ = sag(dataset, coef_init,
+                            intercept_init, n_samples,
+                            n_features, n_classes, tol,
+                            max_iter,
+                            loss,
+                            step_size, alpha_scaled,
+                            beta_scaled,
+                            sum_gradient_init,
+                            gradient_memory_init,
+                            seen_init,
+                            num_seen_init,
+                            fit_intercept,
+                            intercept_sum_gradient,
+                            intercept_decay,
+                            is_saga,
+                            verbose)
+
+    if n_iter_ == max_iter:
+        warnings.warn("The max_iter was reached which means "
+                      "the coef_ did not converge", ConvergenceWarning)
+
+    if fit_intercept:
+        coef_init = np.vstack((coef_init, intercept_init))
+
+    warm_start_mem = {'coef': coef_init, 'sum_gradient': sum_gradient_init,
+                      'intercept_sum_gradient': intercept_sum_gradient,
+                      'gradient_memory': gradient_memory_init,
+                      'seen': seen_init, 'num_seen': num_seen}
+
+    if loss == 'multinomial':
+        coef_ = coef_init.T
+    else:
+        coef_ = coef_init[:, 0]
+
+    return coef_, n_iter_, warm_start_mem
--- a/venv/Lib/site-packages/sklearn/linear_model/_sag_fast.cp36-win32.pyd
+++ b/venv/Lib/site-packages/sklearn/linear_model/_sag_fast.cp36-win32.pyd
--- a/venv/Lib/site-packages/sklearn/linear_model/_sgd_fast.cp36-win32.pyd
+++ b/venv/Lib/site-packages/sklearn/linear_model/_sgd_fast.cp36-win32.pyd
--- a/venv/Lib/site-packages/sklearn/linear_model/_sgd_fast.pxd
+++ b/venv/Lib/site-packages/sklearn/linear_model/_sgd_fast.pxd
@ -0,0 +1,26 @@
+# License: BSD 3 clause
+"""Helper to load LossFunction from sgd_fast.pyx to sag_fast.pyx"""
+
+cdef class LossFunction:
+    cdef double loss(self, double p, double y) nogil
+    cdef double _dloss(self, double p, double y) nogil
+
+
+cdef class Regression(LossFunction):
+    cdef double loss(self, double p, double y) nogil
+    cdef double _dloss(self, double p, double y) nogil
+
+
+cdef class Classification(LossFunction):
+    cdef double loss(self, double p, double y) nogil
+    cdef double _dloss(self, double p, double y) nogil
+
+
+cdef class Log(Classification):
+    cdef double loss(self, double p, double y) nogil
+    cdef double _dloss(self, double p, double y) nogil
+
+
+cdef class SquaredLoss(Regression):
+    cdef double loss(self, double p, double y) nogil
+    cdef double _dloss(self, double p, double y) nogil
--- a/venv/Lib/site-packages/sklearn/linear_model/_stochastic_gradient.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/_stochastic_gradient.py
--- a/venv/Lib/site-packages/sklearn/linear_model/_theil_sen.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/_theil_sen.py
@ -0,0 +1,400 @@
+# -*- coding: utf-8 -*-
+"""
+A Theil-Sen Estimator for Multiple Linear Regression Model
+"""
+
+# Author: Florian Wilhelm <florian.wilhelm@gmail.com>
+#
+# License: BSD 3 clause
+
+
+import warnings
+from itertools import combinations
+
+import numpy as np
+from scipy import linalg
+from scipy.special import binom
+from scipy.linalg.lapack import get_lapack_funcs
+from joblib import Parallel, delayed, effective_n_jobs
+
+from ._base import LinearModel
+from ..base import RegressorMixin
+from ..utils import check_random_state
+from ..utils.validation import _deprecate_positional_args
+from ..exceptions import ConvergenceWarning
+
+_EPSILON = np.finfo(np.double).eps
+
+
+def _modified_weiszfeld_step(X, x_old):
+    """Modified Weiszfeld step.
+
+    This function defines one iteration step in order to approximate the
+    spatial median (L1 median). It is a form of an iteratively re-weighted
+    least squares method.
+
+    Parameters
+    ----------
+    X : array-like of shape (n_samples, n_features)
+        Training vector, where n_samples is the number of samples and
+        n_features is the number of features.
+
+    x_old : array, shape = [n_features]
+        Current start vector.
+
+    Returns
+    -------
+    x_new : array, shape = [n_features]
+        New iteration step.
+
+    References
+    ----------
+    - On Computation of Spatial Median for Robust Data Mining, 2005
+      T. Kärkkäinen and S. Äyrämö
+      http://users.jyu.fi/~samiayr/pdf/ayramo_eurogen05.pdf
+    """
+    diff = X - x_old
+    diff_norm = np.sqrt(np.sum(diff ** 2, axis=1))
+    mask = diff_norm >= _EPSILON
+    # x_old equals one of our samples
+    is_x_old_in_X = int(mask.sum() < X.shape[0])
+
+    diff = diff[mask]
+    diff_norm = diff_norm[mask][:, np.newaxis]
+    quotient_norm = linalg.norm(np.sum(diff / diff_norm, axis=0))
+
+    if quotient_norm > _EPSILON:  # to avoid division by zero
+        new_direction = (np.sum(X[mask, :] / diff_norm, axis=0)
+                         / np.sum(1 / diff_norm, axis=0))
+    else:
+        new_direction = 1.
+        quotient_norm = 1.
+
+    return (max(0., 1. - is_x_old_in_X / quotient_norm) * new_direction
+            + min(1., is_x_old_in_X / quotient_norm) * x_old)
+
+
+def _spatial_median(X, max_iter=300, tol=1.e-3):
+    """Spatial median (L1 median).
+
+    The spatial median is member of a class of so-called M-estimators which
+    are defined by an optimization problem. Given a number of p points in an
+    n-dimensional space, the point x minimizing the sum of all distances to the
+    p other points is called spatial median.
+
+    Parameters
+    ----------
+    X : array-like of shape (n_samples, n_features)
+        Training vector, where n_samples is the number of samples and
+        n_features is the number of features.
+
+    max_iter : int, optional
+        Maximum number of iterations.  Default is 300.
+
+    tol : float, optional
+        Stop the algorithm if spatial_median has converged. Default is 1.e-3.
+
+    Returns
+    -------
+    spatial_median : array, shape = [n_features]
+        Spatial median.
+
+    n_iter : int
+        Number of iterations needed.
+
+    References
+    ----------
+    - On Computation of Spatial Median for Robust Data Mining, 2005
+      T. Kärkkäinen and S. Äyrämö
+      http://users.jyu.fi/~samiayr/pdf/ayramo_eurogen05.pdf
+    """
+    if X.shape[1] == 1:
+        return 1, np.median(X.ravel())
+
+    tol **= 2  # We are computing the tol on the squared norm
+    spatial_median_old = np.mean(X, axis=0)
+
+    for n_iter in range(max_iter):
+        spatial_median = _modified_weiszfeld_step(X, spatial_median_old)
+        if np.sum((spatial_median_old - spatial_median) ** 2) < tol:
+            break
+        else:
+            spatial_median_old = spatial_median
+    else:
+        warnings.warn("Maximum number of iterations {max_iter} reached in "
+                      "spatial median for TheilSen regressor."
+                      "".format(max_iter=max_iter), ConvergenceWarning)
+
+    return n_iter, spatial_median
+
+
+def _breakdown_point(n_samples, n_subsamples):
+    """Approximation of the breakdown point.
+
+    Parameters
+    ----------
+    n_samples : int
+        Number of samples.
+
+    n_subsamples : int
+        Number of subsamples to consider.
+
+    Returns
+    -------
+    breakdown_point : float
+        Approximation of breakdown point.
+    """
+    return 1 - (0.5 ** (1 / n_subsamples) * (n_samples - n_subsamples + 1) +
+                n_subsamples - 1) / n_samples
+
+
+def _lstsq(X, y, indices, fit_intercept):
+    """Least Squares Estimator for TheilSenRegressor class.
+
+    This function calculates the least squares method on a subset of rows of X
+    and y defined by the indices array. Optionally, an intercept column is
+    added if intercept is set to true.
+
+    Parameters
+    ----------
+    X : array-like of shape (n_samples, n_features)
+        Design matrix, where n_samples is the number of samples and
+        n_features is the number of features.
+
+    y : array, shape = [n_samples]
+        Target vector, where n_samples is the number of samples.
+
+    indices : array, shape = [n_subpopulation, n_subsamples]
+        Indices of all subsamples with respect to the chosen subpopulation.
+
+    fit_intercept : bool
+        Fit intercept or not.
+
+    Returns
+    -------
+    weights : array, shape = [n_subpopulation, n_features + intercept]
+        Solution matrix of n_subpopulation solved least square problems.
+    """
+    fit_intercept = int(fit_intercept)
+    n_features = X.shape[1] + fit_intercept
+    n_subsamples = indices.shape[1]
+    weights = np.empty((indices.shape[0], n_features))
+    X_subpopulation = np.ones((n_subsamples, n_features))
+    # gelss need to pad y_subpopulation to be of the max dim of X_subpopulation
+    y_subpopulation = np.zeros((max(n_subsamples, n_features)))
+    lstsq, = get_lapack_funcs(('gelss',), (X_subpopulation, y_subpopulation))
+
+    for index, subset in enumerate(indices):
+        X_subpopulation[:, fit_intercept:] = X[subset, :]
+        y_subpopulation[:n_subsamples] = y[subset]
+        weights[index] = lstsq(X_subpopulation,
+                               y_subpopulation)[1][:n_features]
+
+    return weights
+
+
+class TheilSenRegressor(RegressorMixin, LinearModel):
+    """Theil-Sen Estimator: robust multivariate regression model.
+
+    The algorithm calculates least square solutions on subsets with size
+    n_subsamples of the samples in X. Any value of n_subsamples between the
+    number of features and samples leads to an estimator with a compromise
+    between robustness and efficiency. Since the number of least square
+    solutions is "n_samples choose n_subsamples", it can be extremely large
+    and can therefore be limited with max_subpopulation. If this limit is
+    reached, the subsets are chosen randomly. In a final step, the spatial
+    median (or L1 median) is calculated of all least square solutions.
+
+    Read more in the :ref:`User Guide <theil_sen_regression>`.
+
+    Parameters
+    ----------
+    fit_intercept : boolean, optional, default True
+        Whether to calculate the intercept for this model. If set
+        to false, no intercept will be used in calculations.
+
+    copy_X : boolean, optional, default True
+        If True, X will be copied; else, it may be overwritten.
+
+    max_subpopulation : int, optional, default 1e4
+        Instead of computing with a set of cardinality 'n choose k', where n is
+        the number of samples and k is the number of subsamples (at least
+        number of features), consider only a stochastic subpopulation of a
+        given maximal size if 'n choose k' is larger than max_subpopulation.
+        For other than small problem sizes this parameter will determine
+        memory usage and runtime if n_subsamples is not changed.
+
+    n_subsamples : int, optional, default None
+        Number of samples to calculate the parameters. This is at least the
+        number of features (plus 1 if fit_intercept=True) and the number of
+        samples as a maximum. A lower number leads to a higher breakdown
+        point and a low efficiency while a high number leads to a low
+        breakdown point and a high efficiency. If None, take the
+        minimum number of subsamples leading to maximal robustness.
+        If n_subsamples is set to n_samples, Theil-Sen is identical to least
+        squares.
+
+    max_iter : int, optional, default 300
+        Maximum number of iterations for the calculation of spatial median.
+
+    tol : float, optional, default 1.e-3
+        Tolerance when calculating spatial median.
+
+    random_state : int, RandomState instance, default=None
+        A random number generator instance to define the state of the random
+        permutations generator. Pass an int for reproducible output across
+        multiple function calls.
+        See :term:`Glossary <random_state>`
+
+    n_jobs : int or None, optional (default=None)
+        Number of CPUs to use during the cross validation.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    verbose : boolean, optional, default False
+        Verbose mode when fitting the model.
+
+    Attributes
+    ----------
+    coef_ : array, shape = (n_features)
+        Coefficients of the regression model (median of distribution).
+
+    intercept_ : float
+        Estimated intercept of regression model.
+
+    breakdown_ : float
+        Approximated breakdown point.
+
+    n_iter_ : int
+        Number of iterations needed for the spatial median.
+
+    n_subpopulation_ : int
+        Number of combinations taken into account from 'n choose k', where n is
+        the number of samples and k is the number of subsamples.
+
+    Examples
+    --------
+    >>> from sklearn.linear_model import TheilSenRegressor
+    >>> from sklearn.datasets import make_regression
+    >>> X, y = make_regression(
+    ...     n_samples=200, n_features=2, noise=4.0, random_state=0)
+    >>> reg = TheilSenRegressor(random_state=0).fit(X, y)
+    >>> reg.score(X, y)
+    0.9884...
+    >>> reg.predict(X[:1,])
+    array([-31.5871...])
+
+    References
+    ----------
+    - Theil-Sen Estimators in a Multiple Linear Regression Model, 2009
+      Xin Dang, Hanxiang Peng, Xueqin Wang and Heping Zhang
+      http://home.olemiss.edu/~xdang/papers/MTSE.pdf
+    """
+    @_deprecate_positional_args
+    def __init__(self, *, fit_intercept=True, copy_X=True,
+                 max_subpopulation=1e4, n_subsamples=None, max_iter=300,
+                 tol=1.e-3, random_state=None, n_jobs=None, verbose=False):
+        self.fit_intercept = fit_intercept
+        self.copy_X = copy_X
+        self.max_subpopulation = int(max_subpopulation)
+        self.n_subsamples = n_subsamples
+        self.max_iter = max_iter
+        self.tol = tol
+        self.random_state = random_state
+        self.n_jobs = n_jobs
+        self.verbose = verbose
+
+    def _check_subparams(self, n_samples, n_features):
+        n_subsamples = self.n_subsamples
+
+        if self.fit_intercept:
+            n_dim = n_features + 1
+        else:
+            n_dim = n_features
+
+        if n_subsamples is not None:
+            if n_subsamples > n_samples:
+                raise ValueError("Invalid parameter since n_subsamples > "
+                                 "n_samples ({0} > {1}).".format(n_subsamples,
+                                                                 n_samples))
+            if n_samples >= n_features:
+                if n_dim > n_subsamples:
+                    plus_1 = "+1" if self.fit_intercept else ""
+                    raise ValueError("Invalid parameter since n_features{0} "
+                                     "> n_subsamples ({1} > {2})."
+                                     "".format(plus_1, n_dim, n_samples))
+            else:  # if n_samples < n_features
+                if n_subsamples != n_samples:
+                    raise ValueError("Invalid parameter since n_subsamples != "
+                                     "n_samples ({0} != {1}) while n_samples "
+                                     "< n_features.".format(n_subsamples,
+                                                            n_samples))
+        else:
+            n_subsamples = min(n_dim, n_samples)
+
+        if self.max_subpopulation <= 0:
+            raise ValueError("Subpopulation must be strictly positive "
+                             "({0} <= 0).".format(self.max_subpopulation))
+
+        all_combinations = max(1, np.rint(binom(n_samples, n_subsamples)))
+        n_subpopulation = int(min(self.max_subpopulation, all_combinations))
+
+        return n_subsamples, n_subpopulation
+
+    def fit(self, X, y):
+        """Fit linear model.
+
+        Parameters
+        ----------
+        X : numpy array of shape [n_samples, n_features]
+            Training data
+        y : numpy array of shape [n_samples]
+            Target values
+
+        Returns
+        -------
+        self : returns an instance of self.
+        """
+        random_state = check_random_state(self.random_state)
+        X, y = self._validate_data(X, y, y_numeric=True)
+        n_samples, n_features = X.shape
+        n_subsamples, self.n_subpopulation_ = self._check_subparams(n_samples,
+                                                                    n_features)
+        self.breakdown_ = _breakdown_point(n_samples, n_subsamples)
+
+        if self.verbose:
+            print("Breakdown point: {0}".format(self.breakdown_))
+            print("Number of samples: {0}".format(n_samples))
+            tol_outliers = int(self.breakdown_ * n_samples)
+            print("Tolerable outliers: {0}".format(tol_outliers))
+            print("Number of subpopulations: {0}".format(
+                self.n_subpopulation_))
+
+        # Determine indices of subpopulation
+        if np.rint(binom(n_samples, n_subsamples)) <= self.max_subpopulation:
+            indices = list(combinations(range(n_samples), n_subsamples))
+        else:
+            indices = [random_state.choice(n_samples, size=n_subsamples,
+                                           replace=False)
+                       for _ in range(self.n_subpopulation_)]
+
+        n_jobs = effective_n_jobs(self.n_jobs)
+        index_list = np.array_split(indices, n_jobs)
+        weights = Parallel(n_jobs=n_jobs,
+                           verbose=self.verbose)(
+            delayed(_lstsq)(X, y, index_list[job], self.fit_intercept)
+            for job in range(n_jobs))
+        weights = np.vstack(weights)
+        self.n_iter_, coefs = _spatial_median(weights,
+                                              max_iter=self.max_iter,
+                                              tol=self.tol)
+
+        if self.fit_intercept:
+            self.intercept_ = coefs[0]
+            self.coef_ = coefs[1:]
+        else:
+            self.intercept_ = 0.
+            self.coef_ = coefs
+
+        return self
--- a/venv/Lib/site-packages/sklearn/linear_model/base.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/base.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _base  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.linear_model.base'
+correct_import_path = 'sklearn.linear_model'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_base, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/linear_model/bayes.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/bayes.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _bayes  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.linear_model.bayes'
+correct_import_path = 'sklearn.linear_model'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_bayes, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/linear_model/cd_fast.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/cd_fast.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _cd_fast  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.linear_model.cd_fast'
+correct_import_path = 'sklearn.linear_model'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_cd_fast, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/linear_model/coordinate_descent.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/coordinate_descent.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _coordinate_descent  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.linear_model.coordinate_descent'
+correct_import_path = 'sklearn.linear_model'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_coordinate_descent, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/linear_model/huber.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/huber.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _huber  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.linear_model.huber'
+correct_import_path = 'sklearn.linear_model'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_huber, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/linear_model/least_angle.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/least_angle.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _least_angle  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.linear_model.least_angle'
+correct_import_path = 'sklearn.linear_model'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_least_angle, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/linear_model/logistic.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/logistic.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _logistic  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.linear_model.logistic'
+correct_import_path = 'sklearn.linear_model'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_logistic, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/linear_model/omp.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/omp.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _omp  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.linear_model.omp'
+correct_import_path = 'sklearn.linear_model'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_omp, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/linear_model/passive_aggressive.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/passive_aggressive.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _passive_aggressive  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.linear_model.passive_aggressive'
+correct_import_path = 'sklearn.linear_model'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_passive_aggressive, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/linear_model/perceptron.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/perceptron.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _perceptron  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.linear_model.perceptron'
+correct_import_path = 'sklearn.linear_model'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_perceptron, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/linear_model/ransac.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/ransac.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _ransac  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.linear_model.ransac'
+correct_import_path = 'sklearn.linear_model'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_ransac, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/linear_model/ridge.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/ridge.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _ridge  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.linear_model.ridge'
+correct_import_path = 'sklearn.linear_model'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_ridge, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/linear_model/sag.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/sag.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _sag  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.linear_model.sag'
+correct_import_path = 'sklearn.linear_model'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_sag, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/linear_model/sag_fast.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/sag_fast.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _sag_fast  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.linear_model.sag_fast'
+correct_import_path = 'sklearn.linear_model'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_sag_fast, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/linear_model/setup.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/setup.py
@ -0,0 +1,44 @@
+import os
+import numpy
+
+from sklearn._build_utils import gen_from_templates
+
+
+def configuration(parent_package='', top_path=None):
+    from numpy.distutils.misc_util import Configuration
+
+    config = Configuration('linear_model', parent_package, top_path)
+
+    libraries = []
+    if os.name == 'posix':
+        libraries.append('m')
+
+    config.add_extension('_cd_fast',
+                         sources=['_cd_fast.pyx'],
+                         include_dirs=numpy.get_include(),
+                         libraries=libraries)
+
+    config.add_extension('_sgd_fast',
+                         sources=['_sgd_fast.pyx'],
+                         include_dirs=numpy.get_include(),
+                         libraries=libraries)
+
+    # generate sag_fast from template
+    templates = ['sklearn/linear_model/_sag_fast.pyx.tp']
+    gen_from_templates(templates, top_path)
+
+    config.add_extension('_sag_fast',
+                         sources=['_sag_fast.pyx'],
+                         include_dirs=numpy.get_include())
+
+    # add other directories
+    config.add_subpackage('tests')
+    config.add_subpackage('_glm')
+    config.add_subpackage('_glm/tests')
+
+    return config
+
+
+if __name__ == '__main__':
+    from numpy.distutils.core import setup
+    setup(**configuration(top_path='').todict())
--- a/venv/Lib/site-packages/sklearn/linear_model/sgd_fast.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/sgd_fast.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _sgd_fast  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.linear_model.sgd_fast'
+correct_import_path = 'sklearn.linear_model'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_sgd_fast, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/linear_model/stochastic_gradient.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/stochastic_gradient.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _stochastic_gradient  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.linear_model.stochastic_gradient'
+correct_import_path = 'sklearn.linear_model'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_stochastic_gradient, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/init.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/init.py
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/init.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/init.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_base.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_base.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_bayes.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_bayes.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_coordinate_descent.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_coordinate_descent.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_huber.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_huber.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_least_angle.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_least_angle.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_logistic.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_logistic.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_omp.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_omp.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_passive_aggressive.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_passive_aggressive.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_perceptron.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_perceptron.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_ransac.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_ransac.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_ridge.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_ridge.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_sag.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_sag.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_sgd.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_sgd.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_sparse_coordinate_descent.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_sparse_coordinate_descent.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_theil_sen.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_theil_sen.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/test_base.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/test_base.py
@ -0,0 +1,530 @@
+# Author: Alexandre Gramfort <alexandre.gramfort@inria.fr>
+#         Fabian Pedregosa <fabian.pedregosa@inria.fr>
+#
+# License: BSD 3 clause
+
+import pytest
+
+import numpy as np
+from scipy import sparse
+from scipy import linalg
+
+from sklearn.utils._testing import assert_array_almost_equal
+from sklearn.utils._testing import assert_array_equal
+from sklearn.utils._testing import assert_almost_equal
+from sklearn.utils._testing import assert_allclose
+from sklearn.utils.fixes import parse_version
+
+from sklearn.linear_model import LinearRegression
+from sklearn.linear_model._base import _preprocess_data
+from sklearn.linear_model._base import _rescale_data
+from sklearn.linear_model._base import make_dataset
+from sklearn.utils import check_random_state
+from sklearn.datasets import make_sparse_uncorrelated
+from sklearn.datasets import make_regression
+from sklearn.datasets import load_iris
+
+rng = np.random.RandomState(0)
+rtol = 1e-6
+
+
+def test_linear_regression():
+    # Test LinearRegression on a simple dataset.
+    # a simple dataset
+    X = [[1], [2]]
+    Y = [1, 2]
+
+    reg = LinearRegression()
+    reg.fit(X, Y)
+
+    assert_array_almost_equal(reg.coef_, [1])
+    assert_array_almost_equal(reg.intercept_, [0])
+    assert_array_almost_equal(reg.predict(X), [1, 2])
+
+    # test it also for degenerate input
+    X = [[1]]
+    Y = [0]
+
+    reg = LinearRegression()
+    reg.fit(X, Y)
+    assert_array_almost_equal(reg.coef_, [0])
+    assert_array_almost_equal(reg.intercept_, [0])
+    assert_array_almost_equal(reg.predict(X), [0])
+
+
+def test_linear_regression_sample_weights():
+    # TODO: loop over sparse data as well
+
+    rng = np.random.RandomState(0)
+
+    # It would not work with under-determined systems
+    for n_samples, n_features in ((6, 5), ):
+
+        y = rng.randn(n_samples)
+        X = rng.randn(n_samples, n_features)
+        sample_weight = 1.0 + rng.rand(n_samples)
+
+        for intercept in (True, False):
+
+            # LinearRegression with explicit sample_weight
+            reg = LinearRegression(fit_intercept=intercept)
+            reg.fit(X, y, sample_weight=sample_weight)
+            coefs1 = reg.coef_
+            inter1 = reg.intercept_
+
+            assert reg.coef_.shape == (X.shape[1], )  # sanity checks
+            assert reg.score(X, y) > 0.5
+
+            # Closed form of the weighted least square
+            # theta = (X^T W X)^(-1) * X^T W y
+            W = np.diag(sample_weight)
+            if intercept is False:
+                X_aug = X
+            else:
+                dummy_column = np.ones(shape=(n_samples, 1))
+                X_aug = np.concatenate((dummy_column, X), axis=1)
+
+            coefs2 = linalg.solve(X_aug.T.dot(W).dot(X_aug),
+                                  X_aug.T.dot(W).dot(y))
+
+            if intercept is False:
+                assert_array_almost_equal(coefs1, coefs2)
+            else:
+                assert_array_almost_equal(coefs1, coefs2[1:])
+                assert_almost_equal(inter1, coefs2[0])
+
+
+def test_raises_value_error_if_sample_weights_greater_than_1d():
+    # Sample weights must be either scalar or 1D
+
+    n_sampless = [2, 3]
+    n_featuress = [3, 2]
+
+    for n_samples, n_features in zip(n_sampless, n_featuress):
+        X = rng.randn(n_samples, n_features)
+        y = rng.randn(n_samples)
+        sample_weights_OK = rng.randn(n_samples) ** 2 + 1
+        sample_weights_OK_1 = 1.
+        sample_weights_OK_2 = 2.
+
+        reg = LinearRegression()
+
+        # make sure the "OK" sample weights actually work
+        reg.fit(X, y, sample_weights_OK)
+        reg.fit(X, y, sample_weights_OK_1)
+        reg.fit(X, y, sample_weights_OK_2)
+
+
+def test_fit_intercept():
+    # Test assertions on betas shape.
+    X2 = np.array([[0.38349978, 0.61650022],
+                   [0.58853682, 0.41146318]])
+    X3 = np.array([[0.27677969, 0.70693172, 0.01628859],
+                   [0.08385139, 0.20692515, 0.70922346]])
+    y = np.array([1, 1])
+
+    lr2_without_intercept = LinearRegression(fit_intercept=False).fit(X2, y)
+    lr2_with_intercept = LinearRegression().fit(X2, y)
+
+    lr3_without_intercept = LinearRegression(fit_intercept=False).fit(X3, y)
+    lr3_with_intercept = LinearRegression().fit(X3, y)
+
+    assert (lr2_with_intercept.coef_.shape ==
+            lr2_without_intercept.coef_.shape)
+    assert (lr3_with_intercept.coef_.shape ==
+            lr3_without_intercept.coef_.shape)
+    assert (lr2_without_intercept.coef_.ndim ==
+            lr3_without_intercept.coef_.ndim)
+
+
+def test_linear_regression_sparse(random_state=0):
+    # Test that linear regression also works with sparse data
+    random_state = check_random_state(random_state)
+    for i in range(10):
+        n = 100
+        X = sparse.eye(n, n)
+        beta = random_state.rand(n)
+        y = X * beta[:, np.newaxis]
+
+        ols = LinearRegression()
+        ols.fit(X, y.ravel())
+        assert_array_almost_equal(beta, ols.coef_ + ols.intercept_)
+
+        assert_array_almost_equal(ols.predict(X) - y.ravel(), 0)
+
+
+@pytest.mark.parametrize('normalize', [True, False])
+@pytest.mark.parametrize('fit_intercept', [True, False])
+def test_linear_regression_sparse_equal_dense(normalize, fit_intercept):
+    # Test that linear regression agrees between sparse and dense
+    rng = check_random_state(0)
+    n_samples = 200
+    n_features = 2
+    X = rng.randn(n_samples, n_features)
+    X[X < 0.1] = 0.
+    Xcsr = sparse.csr_matrix(X)
+    y = rng.rand(n_samples)
+    params = dict(normalize=normalize, fit_intercept=fit_intercept)
+    clf_dense = LinearRegression(**params)
+    clf_sparse = LinearRegression(**params)
+    clf_dense.fit(X, y)
+    clf_sparse.fit(Xcsr, y)
+    assert clf_dense.intercept_ == pytest.approx(clf_sparse.intercept_)
+    assert_allclose(clf_dense.coef_, clf_sparse.coef_)
+
+
+def test_linear_regression_multiple_outcome(random_state=0):
+    # Test multiple-outcome linear regressions
+    X, y = make_regression(random_state=random_state)
+
+    Y = np.vstack((y, y)).T
+    n_features = X.shape[1]
+
+    reg = LinearRegression()
+    reg.fit((X), Y)
+    assert reg.coef_.shape == (2, n_features)
+    Y_pred = reg.predict(X)
+    reg.fit(X, y)
+    y_pred = reg.predict(X)
+    assert_array_almost_equal(np.vstack((y_pred, y_pred)).T, Y_pred, decimal=3)
+
+
+def test_linear_regression_sparse_multiple_outcome(random_state=0):
+    # Test multiple-outcome linear regressions with sparse data
+    random_state = check_random_state(random_state)
+    X, y = make_sparse_uncorrelated(random_state=random_state)
+    X = sparse.coo_matrix(X)
+    Y = np.vstack((y, y)).T
+    n_features = X.shape[1]
+
+    ols = LinearRegression()
+    ols.fit(X, Y)
+    assert ols.coef_.shape == (2, n_features)
+    Y_pred = ols.predict(X)
+    ols.fit(X, y.ravel())
+    y_pred = ols.predict(X)
+    assert_array_almost_equal(np.vstack((y_pred, y_pred)).T, Y_pred, decimal=3)
+
+
+def test_linear_regression_pd_sparse_dataframe_warning():
+    pd = pytest.importorskip('pandas')
+    # restrict the pd versions < '0.24.0' as they have a bug in is_sparse func
+    if parse_version(pd.__version__) < parse_version('0.24.0'):
+        pytest.skip("pandas 0.24+ required.")
+
+    # Warning is raised only when some of the columns is sparse
+    df = pd.DataFrame({'0': np.random.randn(10)})
+    for col in range(1, 4):
+        arr = np.random.randn(10)
+        arr[:8] = 0
+        # all columns but the first column is sparse
+        if col != 0:
+            arr = pd.arrays.SparseArray(arr, fill_value=0)
+        df[str(col)] = arr
+
+    msg = "pandas.DataFrame with sparse columns found."
+    with pytest.warns(UserWarning, match=msg):
+        reg = LinearRegression()
+        reg.fit(df.iloc[:, 0:2], df.iloc[:, 3])
+
+    # does not warn when the whole dataframe is sparse
+    df['0'] = pd.arrays.SparseArray(df['0'], fill_value=0)
+    assert hasattr(df, "sparse")
+
+    with pytest.warns(None) as record:
+        reg.fit(df.iloc[:, 0:2], df.iloc[:, 3])
+    assert not record
+
+
+def test_preprocess_data():
+    n_samples = 200
+    n_features = 2
+    X = rng.rand(n_samples, n_features)
+    y = rng.rand(n_samples)
+    expected_X_mean = np.mean(X, axis=0)
+    expected_X_norm = np.std(X, axis=0) * np.sqrt(X.shape[0])
+    expected_y_mean = np.mean(y, axis=0)
+
+    Xt, yt, X_mean, y_mean, X_norm = \
+        _preprocess_data(X, y, fit_intercept=False, normalize=False)
+    assert_array_almost_equal(X_mean, np.zeros(n_features))
+    assert_array_almost_equal(y_mean, 0)
+    assert_array_almost_equal(X_norm, np.ones(n_features))
+    assert_array_almost_equal(Xt, X)
+    assert_array_almost_equal(yt, y)
+
+    Xt, yt, X_mean, y_mean, X_norm = \
+        _preprocess_data(X, y, fit_intercept=True, normalize=False)
+    assert_array_almost_equal(X_mean, expected_X_mean)
+    assert_array_almost_equal(y_mean, expected_y_mean)
+    assert_array_almost_equal(X_norm, np.ones(n_features))
+    assert_array_almost_equal(Xt, X - expected_X_mean)
+    assert_array_almost_equal(yt, y - expected_y_mean)
+
+    Xt, yt, X_mean, y_mean, X_norm = \
+        _preprocess_data(X, y, fit_intercept=True, normalize=True)
+    assert_array_almost_equal(X_mean, expected_X_mean)
+    assert_array_almost_equal(y_mean, expected_y_mean)
+    assert_array_almost_equal(X_norm, expected_X_norm)
+    assert_array_almost_equal(Xt, (X - expected_X_mean) / expected_X_norm)
+    assert_array_almost_equal(yt, y - expected_y_mean)
+
+
+def test_preprocess_data_multioutput():
+    n_samples = 200
+    n_features = 3
+    n_outputs = 2
+    X = rng.rand(n_samples, n_features)
+    y = rng.rand(n_samples, n_outputs)
+    expected_y_mean = np.mean(y, axis=0)
+
+    args = [X, sparse.csc_matrix(X)]
+    for X in args:
+        _, yt, _, y_mean, _ = _preprocess_data(X, y, fit_intercept=False,
+                                               normalize=False)
+        assert_array_almost_equal(y_mean, np.zeros(n_outputs))
+        assert_array_almost_equal(yt, y)
+
+        _, yt, _, y_mean, _ = _preprocess_data(X, y, fit_intercept=True,
+                                               normalize=False)
+        assert_array_almost_equal(y_mean, expected_y_mean)
+        assert_array_almost_equal(yt, y - y_mean)
+
+        _, yt, _, y_mean, _ = _preprocess_data(X, y, fit_intercept=True,
+                                               normalize=True)
+        assert_array_almost_equal(y_mean, expected_y_mean)
+        assert_array_almost_equal(yt, y - y_mean)
+
+
+def test_preprocess_data_weighted():
+    n_samples = 200
+    n_features = 2
+    X = rng.rand(n_samples, n_features)
+    y = rng.rand(n_samples)
+    sample_weight = rng.rand(n_samples)
+    expected_X_mean = np.average(X, axis=0, weights=sample_weight)
+    expected_y_mean = np.average(y, axis=0, weights=sample_weight)
+
+    # XXX: if normalize=True, should we expect a weighted standard deviation?
+    #      Currently not weighted, but calculated with respect to weighted mean
+    expected_X_norm = (np.sqrt(X.shape[0]) *
+                       np.mean((X - expected_X_mean) ** 2, axis=0) ** .5)
+
+    Xt, yt, X_mean, y_mean, X_norm = \
+        _preprocess_data(X, y, fit_intercept=True, normalize=False,
+                         sample_weight=sample_weight)
+    assert_array_almost_equal(X_mean, expected_X_mean)
+    assert_array_almost_equal(y_mean, expected_y_mean)
+    assert_array_almost_equal(X_norm, np.ones(n_features))
+    assert_array_almost_equal(Xt, X - expected_X_mean)
+    assert_array_almost_equal(yt, y - expected_y_mean)
+
+    Xt, yt, X_mean, y_mean, X_norm = \
+        _preprocess_data(X, y, fit_intercept=True, normalize=True,
+                         sample_weight=sample_weight)
+    assert_array_almost_equal(X_mean, expected_X_mean)
+    assert_array_almost_equal(y_mean, expected_y_mean)
+    assert_array_almost_equal(X_norm, expected_X_norm)
+    assert_array_almost_equal(Xt, (X - expected_X_mean) / expected_X_norm)
+    assert_array_almost_equal(yt, y - expected_y_mean)
+
+
+def test_sparse_preprocess_data_with_return_mean():
+    n_samples = 200
+    n_features = 2
+    # random_state not supported yet in sparse.rand
+    X = sparse.rand(n_samples, n_features, density=.5)  # , random_state=rng
+    X = X.tolil()
+    y = rng.rand(n_samples)
+    XA = X.toarray()
+    expected_X_norm = np.std(XA, axis=0) * np.sqrt(X.shape[0])
+
+    Xt, yt, X_mean, y_mean, X_norm = \
+        _preprocess_data(X, y, fit_intercept=False, normalize=False,
+                         return_mean=True)
+    assert_array_almost_equal(X_mean, np.zeros(n_features))
+    assert_array_almost_equal(y_mean, 0)
+    assert_array_almost_equal(X_norm, np.ones(n_features))
+    assert_array_almost_equal(Xt.A, XA)
+    assert_array_almost_equal(yt, y)
+
+    Xt, yt, X_mean, y_mean, X_norm = \
+        _preprocess_data(X, y, fit_intercept=True, normalize=False,
+                         return_mean=True)
+    assert_array_almost_equal(X_mean, np.mean(XA, axis=0))
+    assert_array_almost_equal(y_mean, np.mean(y, axis=0))
+    assert_array_almost_equal(X_norm, np.ones(n_features))
+    assert_array_almost_equal(Xt.A, XA)
+    assert_array_almost_equal(yt, y - np.mean(y, axis=0))
+
+    Xt, yt, X_mean, y_mean, X_norm = \
+        _preprocess_data(X, y, fit_intercept=True, normalize=True,
+                         return_mean=True)
+    assert_array_almost_equal(X_mean, np.mean(XA, axis=0))
+    assert_array_almost_equal(y_mean, np.mean(y, axis=0))
+    assert_array_almost_equal(X_norm, expected_X_norm)
+    assert_array_almost_equal(Xt.A, XA / expected_X_norm)
+    assert_array_almost_equal(yt, y - np.mean(y, axis=0))
+
+
+def test_csr_preprocess_data():
+    # Test output format of _preprocess_data, when input is csr
+    X, y = make_regression()
+    X[X < 2.5] = 0.0
+    csr = sparse.csr_matrix(X)
+    csr_, y, _, _, _ = _preprocess_data(csr, y, True)
+    assert csr_.getformat() == 'csr'
+
+
+@pytest.mark.parametrize('is_sparse', (True, False))
+@pytest.mark.parametrize('to_copy', (True, False))
+def test_preprocess_copy_data_no_checks(is_sparse, to_copy):
+    X, y = make_regression()
+    X[X < 2.5] = 0.0
+
+    if is_sparse:
+        X = sparse.csr_matrix(X)
+
+    X_, y_, _, _, _ = _preprocess_data(X, y, True,
+                                       copy=to_copy, check_input=False)
+
+    if to_copy and is_sparse:
+        assert not np.may_share_memory(X_.data, X.data)
+    elif to_copy:
+        assert not np.may_share_memory(X_, X)
+    elif is_sparse:
+        assert np.may_share_memory(X_.data, X.data)
+    else:
+        assert np.may_share_memory(X_, X)
+
+
+def test_dtype_preprocess_data():
+    n_samples = 200
+    n_features = 2
+    X = rng.rand(n_samples, n_features)
+    y = rng.rand(n_samples)
+
+    X_32 = np.asarray(X, dtype=np.float32)
+    y_32 = np.asarray(y, dtype=np.float32)
+    X_64 = np.asarray(X, dtype=np.float64)
+    y_64 = np.asarray(y, dtype=np.float64)
+
+    for fit_intercept in [True, False]:
+        for normalize in [True, False]:
+
+            Xt_32, yt_32, X_mean_32, y_mean_32, X_norm_32 = _preprocess_data(
+                X_32, y_32, fit_intercept=fit_intercept, normalize=normalize,
+                return_mean=True)
+
+            Xt_64, yt_64, X_mean_64, y_mean_64, X_norm_64 = _preprocess_data(
+                X_64, y_64, fit_intercept=fit_intercept, normalize=normalize,
+                return_mean=True)
+
+            Xt_3264, yt_3264, X_mean_3264, y_mean_3264, X_norm_3264 = (
+                _preprocess_data(X_32, y_64, fit_intercept=fit_intercept,
+                                 normalize=normalize, return_mean=True))
+
+            Xt_6432, yt_6432, X_mean_6432, y_mean_6432, X_norm_6432 = (
+                _preprocess_data(X_64, y_32, fit_intercept=fit_intercept,
+                                 normalize=normalize, return_mean=True))
+
+            assert Xt_32.dtype == np.float32
+            assert yt_32.dtype == np.float32
+            assert X_mean_32.dtype == np.float32
+            assert y_mean_32.dtype == np.float32
+            assert X_norm_32.dtype == np.float32
+
+            assert Xt_64.dtype == np.float64
+            assert yt_64.dtype == np.float64
+            assert X_mean_64.dtype == np.float64
+            assert y_mean_64.dtype == np.float64
+            assert X_norm_64.dtype == np.float64
+
+            assert Xt_3264.dtype == np.float32
+            assert yt_3264.dtype == np.float32
+            assert X_mean_3264.dtype == np.float32
+            assert y_mean_3264.dtype == np.float32
+            assert X_norm_3264.dtype == np.float32
+
+            assert Xt_6432.dtype == np.float64
+            assert yt_6432.dtype == np.float64
+            assert X_mean_6432.dtype == np.float64
+            assert y_mean_6432.dtype == np.float64
+            assert X_norm_6432.dtype == np.float64
+
+            assert X_32.dtype == np.float32
+            assert y_32.dtype == np.float32
+            assert X_64.dtype == np.float64
+            assert y_64.dtype == np.float64
+
+            assert_array_almost_equal(Xt_32, Xt_64)
+            assert_array_almost_equal(yt_32, yt_64)
+            assert_array_almost_equal(X_mean_32, X_mean_64)
+            assert_array_almost_equal(y_mean_32, y_mean_64)
+            assert_array_almost_equal(X_norm_32, X_norm_64)
+
+
+@pytest.mark.parametrize('n_targets', [None, 2])
+def test_rescale_data_dense(n_targets):
+    n_samples = 200
+    n_features = 2
+
+    sample_weight = 1.0 + rng.rand(n_samples)
+    X = rng.rand(n_samples, n_features)
+    if n_targets is None:
+        y = rng.rand(n_samples)
+    else:
+        y = rng.rand(n_samples, n_targets)
+    rescaled_X, rescaled_y = _rescale_data(X, y, sample_weight)
+    rescaled_X2 = X * np.sqrt(sample_weight)[:, np.newaxis]
+    if n_targets is None:
+        rescaled_y2 = y * np.sqrt(sample_weight)
+    else:
+        rescaled_y2 = y * np.sqrt(sample_weight)[:, np.newaxis]
+    assert_array_almost_equal(rescaled_X, rescaled_X2)
+    assert_array_almost_equal(rescaled_y, rescaled_y2)
+
+
+def test_fused_types_make_dataset():
+    iris = load_iris()
+
+    X_32 = iris.data.astype(np.float32)
+    y_32 = iris.target.astype(np.float32)
+    X_csr_32 = sparse.csr_matrix(X_32)
+    sample_weight_32 = np.arange(y_32.size, dtype=np.float32)
+
+    X_64 = iris.data.astype(np.float64)
+    y_64 = iris.target.astype(np.float64)
+    X_csr_64 = sparse.csr_matrix(X_64)
+    sample_weight_64 = np.arange(y_64.size, dtype=np.float64)
+
+    # array
+    dataset_32, _ = make_dataset(X_32, y_32, sample_weight_32)
+    dataset_64, _ = make_dataset(X_64, y_64, sample_weight_64)
+    xi_32, yi_32, _, _ = dataset_32._next_py()
+    xi_64, yi_64, _, _ = dataset_64._next_py()
+    xi_data_32, _, _ = xi_32
+    xi_data_64, _, _ = xi_64
+
+    assert xi_data_32.dtype == np.float32
+    assert xi_data_64.dtype == np.float64
+    assert_allclose(yi_64, yi_32, rtol=rtol)
+
+    # csr
+    datasetcsr_32, _ = make_dataset(X_csr_32, y_32, sample_weight_32)
+    datasetcsr_64, _ = make_dataset(X_csr_64, y_64, sample_weight_64)
+    xicsr_32, yicsr_32, _, _ = datasetcsr_32._next_py()
+    xicsr_64, yicsr_64, _, _ = datasetcsr_64._next_py()
+    xicsr_data_32, _, _ = xicsr_32
+    xicsr_data_64, _, _ = xicsr_64
+
+    assert xicsr_data_32.dtype == np.float32
+    assert xicsr_data_64.dtype == np.float64
+
+    assert_allclose(xicsr_data_64, xicsr_data_32, rtol=rtol)
+    assert_allclose(yicsr_64, yicsr_32, rtol=rtol)
+
+    assert_array_equal(xi_data_32, xicsr_data_32)
+    assert_array_equal(xi_data_64, xicsr_data_64)
+    assert_array_equal(yi_32, yicsr_32)
+    assert_array_equal(yi_64, yicsr_64)
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/test_bayes.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/test_bayes.py
@ -0,0 +1,274 @@
+# Author: Alexandre Gramfort <alexandre.gramfort@inria.fr>
+#         Fabian Pedregosa <fabian.pedregosa@inria.fr>
+#
+# License: BSD 3 clause
+
+from math import log
+
+import numpy as np
+from scipy.linalg import pinvh
+import pytest
+
+
+from sklearn.utils._testing import assert_array_almost_equal
+from sklearn.utils._testing import assert_almost_equal
+from sklearn.utils._testing import assert_array_less
+from sklearn.utils._testing import assert_raise_message
+from sklearn.utils import check_random_state
+from sklearn.linear_model import BayesianRidge, ARDRegression
+from sklearn.linear_model import Ridge
+from sklearn import datasets
+from sklearn.utils.extmath import fast_logdet
+
+diabetes = datasets.load_diabetes()
+
+
+def test_n_iter():
+    """Check value of n_iter."""
+    X = np.array([[1], [2], [6], [8], [10]])
+    y = np.array([1, 2, 6, 8, 10])
+    clf = BayesianRidge(n_iter=0)
+    msg = "n_iter should be greater than or equal to 1."
+    assert_raise_message(ValueError, msg, clf.fit, X, y)
+
+
+def test_bayesian_ridge_scores():
+    """Check scores attribute shape"""
+    X, y = diabetes.data, diabetes.target
+
+    clf = BayesianRidge(compute_score=True)
+    clf.fit(X, y)
+
+    assert clf.scores_.shape == (clf.n_iter_ + 1,)
+
+
+def test_bayesian_ridge_score_values():
+    """Check value of score on toy example.
+
+    Compute log marginal likelihood with equation (36) in Sparse Bayesian
+    Learning and the Relevance Vector Machine (Tipping, 2001):
+
+    - 0.5 * (log |Id/alpha + X.X^T/lambda| +
+             y^T.(Id/alpha + X.X^T/lambda).y + n * log(2 * pi))
+    + lambda_1 * log(lambda) - lambda_2 * lambda
+    + alpha_1 * log(alpha) - alpha_2 * alpha
+
+    and check equality with the score computed during training.
+    """
+
+    X, y = diabetes.data, diabetes.target
+    n_samples = X.shape[0]
+    # check with initial values of alpha and lambda (see code for the values)
+    eps = np.finfo(np.float64).eps
+    alpha_ = 1. / (np.var(y) + eps)
+    lambda_ = 1.
+
+    # value of the parameters of the Gamma hyperpriors
+    alpha_1 = 0.1
+    alpha_2 = 0.1
+    lambda_1 = 0.1
+    lambda_2 = 0.1
+
+    # compute score using formula of docstring
+    score = lambda_1 * log(lambda_) - lambda_2 * lambda_
+    score += alpha_1 * log(alpha_) - alpha_2 * alpha_
+    M = 1. / alpha_ * np.eye(n_samples) + 1. / lambda_ * np.dot(X, X.T)
+    M_inv = pinvh(M)
+    score += - 0.5 * (fast_logdet(M) + np.dot(y.T, np.dot(M_inv, y)) +
+                      n_samples * log(2 * np.pi))
+
+    # compute score with BayesianRidge
+    clf = BayesianRidge(alpha_1=alpha_1, alpha_2=alpha_2,
+                        lambda_1=lambda_1, lambda_2=lambda_2,
+                        n_iter=1, fit_intercept=False, compute_score=True)
+    clf.fit(X, y)
+
+    assert_almost_equal(clf.scores_[0], score, decimal=9)
+
+
+def test_bayesian_ridge_parameter():
+    # Test correctness of lambda_ and alpha_ parameters (GitHub issue #8224)
+    X = np.array([[1, 1], [3, 4], [5, 7], [4, 1], [2, 6], [3, 10], [3, 2]])
+    y = np.array([1, 2, 3, 2, 0, 4, 5]).T
+
+    # A Ridge regression model using an alpha value equal to the ratio of
+    # lambda_ and alpha_ from the Bayesian Ridge model must be identical
+    br_model = BayesianRidge(compute_score=True).fit(X, y)
+    rr_model = Ridge(alpha=br_model.lambda_ / br_model.alpha_).fit(X, y)
+    assert_array_almost_equal(rr_model.coef_, br_model.coef_)
+    assert_almost_equal(rr_model.intercept_, br_model.intercept_)
+
+
+def test_bayesian_sample_weights():
+    # Test correctness of the sample_weights method
+    X = np.array([[1, 1], [3, 4], [5, 7], [4, 1], [2, 6], [3, 10], [3, 2]])
+    y = np.array([1, 2, 3, 2, 0, 4, 5]).T
+    w = np.array([4, 3, 3, 1, 1, 2, 3]).T
+
+    # A Ridge regression model using an alpha value equal to the ratio of
+    # lambda_ and alpha_ from the Bayesian Ridge model must be identical
+    br_model = BayesianRidge(compute_score=True).fit(X, y, sample_weight=w)
+    rr_model = Ridge(alpha=br_model.lambda_ / br_model.alpha_).fit(
+        X, y, sample_weight=w)
+    assert_array_almost_equal(rr_model.coef_, br_model.coef_)
+    assert_almost_equal(rr_model.intercept_, br_model.intercept_)
+
+
+def test_toy_bayesian_ridge_object():
+    # Test BayesianRidge on toy
+    X = np.array([[1], [2], [6], [8], [10]])
+    Y = np.array([1, 2, 6, 8, 10])
+    clf = BayesianRidge(compute_score=True)
+    clf.fit(X, Y)
+
+    # Check that the model could approximately learn the identity function
+    test = [[1], [3], [4]]
+    assert_array_almost_equal(clf.predict(test), [1, 3, 4], 2)
+
+
+def test_bayesian_initial_params():
+    # Test BayesianRidge with initial values (alpha_init, lambda_init)
+    X = np.vander(np.linspace(0, 4, 5), 4)
+    y = np.array([0., 1., 0., -1., 0.])    # y = (x^3 - 6x^2 + 8x) / 3
+
+    # In this case, starting from the default initial values will increase
+    # the bias of the fitted curve. So, lambda_init should be small.
+    reg = BayesianRidge(alpha_init=1., lambda_init=1e-3)
+    # Check the R2 score nearly equals to one.
+    r2 = reg.fit(X, y).score(X, y)
+    assert_almost_equal(r2, 1.)
+
+
+def test_prediction_bayesian_ridge_ard_with_constant_input():
+    # Test BayesianRidge and ARDRegression predictions for edge case of
+    # constant target vectors
+    n_samples = 4
+    n_features = 5
+    random_state = check_random_state(42)
+    constant_value = random_state.rand()
+    X = random_state.random_sample((n_samples, n_features))
+    y = np.full(n_samples, constant_value,
+                dtype=np.array(constant_value).dtype)
+    expected = np.full(n_samples, constant_value,
+                       dtype=np.array(constant_value).dtype)
+
+    for clf in [BayesianRidge(), ARDRegression()]:
+        y_pred = clf.fit(X, y).predict(X)
+        assert_array_almost_equal(y_pred, expected)
+
+
+def test_std_bayesian_ridge_ard_with_constant_input():
+    # Test BayesianRidge and ARDRegression standard dev. for edge case of
+    # constant target vector
+    # The standard dev. should be relatively small (< 0.01 is tested here)
+    n_samples = 10
+    n_features = 5
+    random_state = check_random_state(42)
+    constant_value = random_state.rand()
+    X = random_state.random_sample((n_samples, n_features))
+    y = np.full(n_samples, constant_value,
+                dtype=np.array(constant_value).dtype)
+    expected_upper_boundary = 0.01
+
+    for clf in [BayesianRidge(), ARDRegression()]:
+        _, y_std = clf.fit(X, y).predict(X, return_std=True)
+        assert_array_less(y_std, expected_upper_boundary)
+
+
+def test_update_of_sigma_in_ard():
+    # Checks that `sigma_` is updated correctly after the last iteration
+    # of the ARDRegression algorithm. See issue #10128.
+    X = np.array([[1, 0],
+                  [0, 0]])
+    y = np.array([0, 0])
+    clf = ARDRegression(n_iter=1)
+    clf.fit(X, y)
+    # With the inputs above, ARDRegression prunes both of the two coefficients
+    # in the first iteration. Hence, the expected shape of `sigma_` is (0, 0).
+    assert clf.sigma_.shape == (0, 0)
+    # Ensure that no error is thrown at prediction stage
+    clf.predict(X, return_std=True)
+
+
+def test_toy_ard_object():
+    # Test BayesianRegression ARD classifier
+    X = np.array([[1], [2], [3]])
+    Y = np.array([1, 2, 3])
+    clf = ARDRegression(compute_score=True)
+    clf.fit(X, Y)
+
+    # Check that the model could approximately learn the identity function
+    test = [[1], [3], [4]]
+    assert_array_almost_equal(clf.predict(test), [1, 3, 4], 2)
+
+
+@pytest.mark.parametrize('seed', range(100))
+@pytest.mark.parametrize('n_samples, n_features', ((10, 100), (100, 10)))
+def test_ard_accuracy_on_easy_problem(seed, n_samples, n_features):
+    # Check that ARD converges with reasonable accuracy on an easy problem
+    # (Github issue #14055)
+    X = np.random.RandomState(seed=seed).normal(size=(250, 3))
+    y = X[:, 1]
+
+    regressor = ARDRegression()
+    regressor.fit(X, y)
+
+    abs_coef_error = np.abs(1 - regressor.coef_[1])
+    assert abs_coef_error < 1e-10
+
+
+def test_return_std():
+    # Test return_std option for both Bayesian regressors
+    def f(X):
+        return np.dot(X, w) + b
+
+    def f_noise(X, noise_mult):
+        return f(X) + np.random.randn(X.shape[0]) * noise_mult
+
+    d = 5
+    n_train = 50
+    n_test = 10
+
+    w = np.array([1.0, 0.0, 1.0, -1.0, 0.0])
+    b = 1.0
+
+    X = np.random.random((n_train, d))
+    X_test = np.random.random((n_test, d))
+
+    for decimal, noise_mult in enumerate([1, 0.1, 0.01]):
+        y = f_noise(X, noise_mult)
+
+        m1 = BayesianRidge()
+        m1.fit(X, y)
+        y_mean1, y_std1 = m1.predict(X_test, return_std=True)
+        assert_array_almost_equal(y_std1, noise_mult, decimal=decimal)
+
+        m2 = ARDRegression()
+        m2.fit(X, y)
+        y_mean2, y_std2 = m2.predict(X_test, return_std=True)
+        assert_array_almost_equal(y_std2, noise_mult, decimal=decimal)
+
+
+@pytest.mark.parametrize('seed', range(10))
+def test_update_sigma(seed):
+    # make sure the two update_sigma() helpers are equivalent. The woodbury
+    # formula is used when n_samples < n_features, and the other one is used
+    # otherwise.
+
+    rng = np.random.RandomState(seed)
+
+    # set n_samples == n_features to avoid instability issues when inverting
+    # the matrices. Using the woodbury formula would be unstable when
+    # n_samples > n_features
+    n_samples = n_features = 10
+    X = rng.randn(n_samples, n_features)
+    alpha = 1
+    lmbda = np.arange(1, n_features + 1)
+    keep_lambda = np.array([True] * n_features)
+
+    reg = ARDRegression()
+
+    sigma = reg._update_sigma(X, alpha, lmbda, keep_lambda)
+    sigma_woodbury = reg._update_sigma_woodbury(X, alpha, lmbda, keep_lambda)
+
+    np.testing.assert_allclose(sigma, sigma_woodbury)
--- a/Show more
+++ b/Show more