Uploaded Test files

2020-11-12 11:05:57 -05:00 · 2020-11-12 11:05:57 -05:00 · 2e81cb7d99
commit 2e81cb7d99
parent f584ad9d97
16627 changed files with 2065359 additions and 102444 deletions
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/init.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/init.py
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/init.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/init.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_base.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_base.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_bayes.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_bayes.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_coordinate_descent.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_coordinate_descent.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_huber.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_huber.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_least_angle.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_least_angle.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_logistic.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_logistic.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_omp.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_omp.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_passive_aggressive.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_passive_aggressive.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_perceptron.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_perceptron.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_ransac.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_ransac.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_ridge.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_ridge.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_sag.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_sag.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_sgd.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_sgd.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_sparse_coordinate_descent.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_sparse_coordinate_descent.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_theil_sen.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/pycache/test_theil_sen.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/test_base.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/test_base.py
@ -0,0 +1,530 @@
+# Author: Alexandre Gramfort <alexandre.gramfort@inria.fr>
+#         Fabian Pedregosa <fabian.pedregosa@inria.fr>
+#
+# License: BSD 3 clause
+
+import pytest
+
+import numpy as np
+from scipy import sparse
+from scipy import linalg
+
+from sklearn.utils._testing import assert_array_almost_equal
+from sklearn.utils._testing import assert_array_equal
+from sklearn.utils._testing import assert_almost_equal
+from sklearn.utils._testing import assert_allclose
+from sklearn.utils.fixes import parse_version
+
+from sklearn.linear_model import LinearRegression
+from sklearn.linear_model._base import _preprocess_data
+from sklearn.linear_model._base import _rescale_data
+from sklearn.linear_model._base import make_dataset
+from sklearn.utils import check_random_state
+from sklearn.datasets import make_sparse_uncorrelated
+from sklearn.datasets import make_regression
+from sklearn.datasets import load_iris
+
+rng = np.random.RandomState(0)
+rtol = 1e-6
+
+
+def test_linear_regression():
+    # Test LinearRegression on a simple dataset.
+    # a simple dataset
+    X = [[1], [2]]
+    Y = [1, 2]
+
+    reg = LinearRegression()
+    reg.fit(X, Y)
+
+    assert_array_almost_equal(reg.coef_, [1])
+    assert_array_almost_equal(reg.intercept_, [0])
+    assert_array_almost_equal(reg.predict(X), [1, 2])
+
+    # test it also for degenerate input
+    X = [[1]]
+    Y = [0]
+
+    reg = LinearRegression()
+    reg.fit(X, Y)
+    assert_array_almost_equal(reg.coef_, [0])
+    assert_array_almost_equal(reg.intercept_, [0])
+    assert_array_almost_equal(reg.predict(X), [0])
+
+
+def test_linear_regression_sample_weights():
+    # TODO: loop over sparse data as well
+
+    rng = np.random.RandomState(0)
+
+    # It would not work with under-determined systems
+    for n_samples, n_features in ((6, 5), ):
+
+        y = rng.randn(n_samples)
+        X = rng.randn(n_samples, n_features)
+        sample_weight = 1.0 + rng.rand(n_samples)
+
+        for intercept in (True, False):
+
+            # LinearRegression with explicit sample_weight
+            reg = LinearRegression(fit_intercept=intercept)
+            reg.fit(X, y, sample_weight=sample_weight)
+            coefs1 = reg.coef_
+            inter1 = reg.intercept_
+
+            assert reg.coef_.shape == (X.shape[1], )  # sanity checks
+            assert reg.score(X, y) > 0.5
+
+            # Closed form of the weighted least square
+            # theta = (X^T W X)^(-1) * X^T W y
+            W = np.diag(sample_weight)
+            if intercept is False:
+                X_aug = X
+            else:
+                dummy_column = np.ones(shape=(n_samples, 1))
+                X_aug = np.concatenate((dummy_column, X), axis=1)
+
+            coefs2 = linalg.solve(X_aug.T.dot(W).dot(X_aug),
+                                  X_aug.T.dot(W).dot(y))
+
+            if intercept is False:
+                assert_array_almost_equal(coefs1, coefs2)
+            else:
+                assert_array_almost_equal(coefs1, coefs2[1:])
+                assert_almost_equal(inter1, coefs2[0])
+
+
+def test_raises_value_error_if_sample_weights_greater_than_1d():
+    # Sample weights must be either scalar or 1D
+
+    n_sampless = [2, 3]
+    n_featuress = [3, 2]
+
+    for n_samples, n_features in zip(n_sampless, n_featuress):
+        X = rng.randn(n_samples, n_features)
+        y = rng.randn(n_samples)
+        sample_weights_OK = rng.randn(n_samples) ** 2 + 1
+        sample_weights_OK_1 = 1.
+        sample_weights_OK_2 = 2.
+
+        reg = LinearRegression()
+
+        # make sure the "OK" sample weights actually work
+        reg.fit(X, y, sample_weights_OK)
+        reg.fit(X, y, sample_weights_OK_1)
+        reg.fit(X, y, sample_weights_OK_2)
+
+
+def test_fit_intercept():
+    # Test assertions on betas shape.
+    X2 = np.array([[0.38349978, 0.61650022],
+                   [0.58853682, 0.41146318]])
+    X3 = np.array([[0.27677969, 0.70693172, 0.01628859],
+                   [0.08385139, 0.20692515, 0.70922346]])
+    y = np.array([1, 1])
+
+    lr2_without_intercept = LinearRegression(fit_intercept=False).fit(X2, y)
+    lr2_with_intercept = LinearRegression().fit(X2, y)
+
+    lr3_without_intercept = LinearRegression(fit_intercept=False).fit(X3, y)
+    lr3_with_intercept = LinearRegression().fit(X3, y)
+
+    assert (lr2_with_intercept.coef_.shape ==
+            lr2_without_intercept.coef_.shape)
+    assert (lr3_with_intercept.coef_.shape ==
+            lr3_without_intercept.coef_.shape)
+    assert (lr2_without_intercept.coef_.ndim ==
+            lr3_without_intercept.coef_.ndim)
+
+
+def test_linear_regression_sparse(random_state=0):
+    # Test that linear regression also works with sparse data
+    random_state = check_random_state(random_state)
+    for i in range(10):
+        n = 100
+        X = sparse.eye(n, n)
+        beta = random_state.rand(n)
+        y = X * beta[:, np.newaxis]
+
+        ols = LinearRegression()
+        ols.fit(X, y.ravel())
+        assert_array_almost_equal(beta, ols.coef_ + ols.intercept_)
+
+        assert_array_almost_equal(ols.predict(X) - y.ravel(), 0)
+
+
+@pytest.mark.parametrize('normalize', [True, False])
+@pytest.mark.parametrize('fit_intercept', [True, False])
+def test_linear_regression_sparse_equal_dense(normalize, fit_intercept):
+    # Test that linear regression agrees between sparse and dense
+    rng = check_random_state(0)
+    n_samples = 200
+    n_features = 2
+    X = rng.randn(n_samples, n_features)
+    X[X < 0.1] = 0.
+    Xcsr = sparse.csr_matrix(X)
+    y = rng.rand(n_samples)
+    params = dict(normalize=normalize, fit_intercept=fit_intercept)
+    clf_dense = LinearRegression(**params)
+    clf_sparse = LinearRegression(**params)
+    clf_dense.fit(X, y)
+    clf_sparse.fit(Xcsr, y)
+    assert clf_dense.intercept_ == pytest.approx(clf_sparse.intercept_)
+    assert_allclose(clf_dense.coef_, clf_sparse.coef_)
+
+
+def test_linear_regression_multiple_outcome(random_state=0):
+    # Test multiple-outcome linear regressions
+    X, y = make_regression(random_state=random_state)
+
+    Y = np.vstack((y, y)).T
+    n_features = X.shape[1]
+
+    reg = LinearRegression()
+    reg.fit((X), Y)
+    assert reg.coef_.shape == (2, n_features)
+    Y_pred = reg.predict(X)
+    reg.fit(X, y)
+    y_pred = reg.predict(X)
+    assert_array_almost_equal(np.vstack((y_pred, y_pred)).T, Y_pred, decimal=3)
+
+
+def test_linear_regression_sparse_multiple_outcome(random_state=0):
+    # Test multiple-outcome linear regressions with sparse data
+    random_state = check_random_state(random_state)
+    X, y = make_sparse_uncorrelated(random_state=random_state)
+    X = sparse.coo_matrix(X)
+    Y = np.vstack((y, y)).T
+    n_features = X.shape[1]
+
+    ols = LinearRegression()
+    ols.fit(X, Y)
+    assert ols.coef_.shape == (2, n_features)
+    Y_pred = ols.predict(X)
+    ols.fit(X, y.ravel())
+    y_pred = ols.predict(X)
+    assert_array_almost_equal(np.vstack((y_pred, y_pred)).T, Y_pred, decimal=3)
+
+
+def test_linear_regression_pd_sparse_dataframe_warning():
+    pd = pytest.importorskip('pandas')
+    # restrict the pd versions < '0.24.0' as they have a bug in is_sparse func
+    if parse_version(pd.__version__) < parse_version('0.24.0'):
+        pytest.skip("pandas 0.24+ required.")
+
+    # Warning is raised only when some of the columns is sparse
+    df = pd.DataFrame({'0': np.random.randn(10)})
+    for col in range(1, 4):
+        arr = np.random.randn(10)
+        arr[:8] = 0
+        # all columns but the first column is sparse
+        if col != 0:
+            arr = pd.arrays.SparseArray(arr, fill_value=0)
+        df[str(col)] = arr
+
+    msg = "pandas.DataFrame with sparse columns found."
+    with pytest.warns(UserWarning, match=msg):
+        reg = LinearRegression()
+        reg.fit(df.iloc[:, 0:2], df.iloc[:, 3])
+
+    # does not warn when the whole dataframe is sparse
+    df['0'] = pd.arrays.SparseArray(df['0'], fill_value=0)
+    assert hasattr(df, "sparse")
+
+    with pytest.warns(None) as record:
+        reg.fit(df.iloc[:, 0:2], df.iloc[:, 3])
+    assert not record
+
+
+def test_preprocess_data():
+    n_samples = 200
+    n_features = 2
+    X = rng.rand(n_samples, n_features)
+    y = rng.rand(n_samples)
+    expected_X_mean = np.mean(X, axis=0)
+    expected_X_norm = np.std(X, axis=0) * np.sqrt(X.shape[0])
+    expected_y_mean = np.mean(y, axis=0)
+
+    Xt, yt, X_mean, y_mean, X_norm = \
+        _preprocess_data(X, y, fit_intercept=False, normalize=False)
+    assert_array_almost_equal(X_mean, np.zeros(n_features))
+    assert_array_almost_equal(y_mean, 0)
+    assert_array_almost_equal(X_norm, np.ones(n_features))
+    assert_array_almost_equal(Xt, X)
+    assert_array_almost_equal(yt, y)
+
+    Xt, yt, X_mean, y_mean, X_norm = \
+        _preprocess_data(X, y, fit_intercept=True, normalize=False)
+    assert_array_almost_equal(X_mean, expected_X_mean)
+    assert_array_almost_equal(y_mean, expected_y_mean)
+    assert_array_almost_equal(X_norm, np.ones(n_features))
+    assert_array_almost_equal(Xt, X - expected_X_mean)
+    assert_array_almost_equal(yt, y - expected_y_mean)
+
+    Xt, yt, X_mean, y_mean, X_norm = \
+        _preprocess_data(X, y, fit_intercept=True, normalize=True)
+    assert_array_almost_equal(X_mean, expected_X_mean)
+    assert_array_almost_equal(y_mean, expected_y_mean)
+    assert_array_almost_equal(X_norm, expected_X_norm)
+    assert_array_almost_equal(Xt, (X - expected_X_mean) / expected_X_norm)
+    assert_array_almost_equal(yt, y - expected_y_mean)
+
+
+def test_preprocess_data_multioutput():
+    n_samples = 200
+    n_features = 3
+    n_outputs = 2
+    X = rng.rand(n_samples, n_features)
+    y = rng.rand(n_samples, n_outputs)
+    expected_y_mean = np.mean(y, axis=0)
+
+    args = [X, sparse.csc_matrix(X)]
+    for X in args:
+        _, yt, _, y_mean, _ = _preprocess_data(X, y, fit_intercept=False,
+                                               normalize=False)
+        assert_array_almost_equal(y_mean, np.zeros(n_outputs))
+        assert_array_almost_equal(yt, y)
+
+        _, yt, _, y_mean, _ = _preprocess_data(X, y, fit_intercept=True,
+                                               normalize=False)
+        assert_array_almost_equal(y_mean, expected_y_mean)
+        assert_array_almost_equal(yt, y - y_mean)
+
+        _, yt, _, y_mean, _ = _preprocess_data(X, y, fit_intercept=True,
+                                               normalize=True)
+        assert_array_almost_equal(y_mean, expected_y_mean)
+        assert_array_almost_equal(yt, y - y_mean)
+
+
+def test_preprocess_data_weighted():
+    n_samples = 200
+    n_features = 2
+    X = rng.rand(n_samples, n_features)
+    y = rng.rand(n_samples)
+    sample_weight = rng.rand(n_samples)
+    expected_X_mean = np.average(X, axis=0, weights=sample_weight)
+    expected_y_mean = np.average(y, axis=0, weights=sample_weight)
+
+    # XXX: if normalize=True, should we expect a weighted standard deviation?
+    #      Currently not weighted, but calculated with respect to weighted mean
+    expected_X_norm = (np.sqrt(X.shape[0]) *
+                       np.mean((X - expected_X_mean) ** 2, axis=0) ** .5)
+
+    Xt, yt, X_mean, y_mean, X_norm = \
+        _preprocess_data(X, y, fit_intercept=True, normalize=False,
+                         sample_weight=sample_weight)
+    assert_array_almost_equal(X_mean, expected_X_mean)
+    assert_array_almost_equal(y_mean, expected_y_mean)
+    assert_array_almost_equal(X_norm, np.ones(n_features))
+    assert_array_almost_equal(Xt, X - expected_X_mean)
+    assert_array_almost_equal(yt, y - expected_y_mean)
+
+    Xt, yt, X_mean, y_mean, X_norm = \
+        _preprocess_data(X, y, fit_intercept=True, normalize=True,
+                         sample_weight=sample_weight)
+    assert_array_almost_equal(X_mean, expected_X_mean)
+    assert_array_almost_equal(y_mean, expected_y_mean)
+    assert_array_almost_equal(X_norm, expected_X_norm)
+    assert_array_almost_equal(Xt, (X - expected_X_mean) / expected_X_norm)
+    assert_array_almost_equal(yt, y - expected_y_mean)
+
+
+def test_sparse_preprocess_data_with_return_mean():
+    n_samples = 200
+    n_features = 2
+    # random_state not supported yet in sparse.rand
+    X = sparse.rand(n_samples, n_features, density=.5)  # , random_state=rng
+    X = X.tolil()
+    y = rng.rand(n_samples)
+    XA = X.toarray()
+    expected_X_norm = np.std(XA, axis=0) * np.sqrt(X.shape[0])
+
+    Xt, yt, X_mean, y_mean, X_norm = \
+        _preprocess_data(X, y, fit_intercept=False, normalize=False,
+                         return_mean=True)
+    assert_array_almost_equal(X_mean, np.zeros(n_features))
+    assert_array_almost_equal(y_mean, 0)
+    assert_array_almost_equal(X_norm, np.ones(n_features))
+    assert_array_almost_equal(Xt.A, XA)
+    assert_array_almost_equal(yt, y)
+
+    Xt, yt, X_mean, y_mean, X_norm = \
+        _preprocess_data(X, y, fit_intercept=True, normalize=False,
+                         return_mean=True)
+    assert_array_almost_equal(X_mean, np.mean(XA, axis=0))
+    assert_array_almost_equal(y_mean, np.mean(y, axis=0))
+    assert_array_almost_equal(X_norm, np.ones(n_features))
+    assert_array_almost_equal(Xt.A, XA)
+    assert_array_almost_equal(yt, y - np.mean(y, axis=0))
+
+    Xt, yt, X_mean, y_mean, X_norm = \
+        _preprocess_data(X, y, fit_intercept=True, normalize=True,
+                         return_mean=True)
+    assert_array_almost_equal(X_mean, np.mean(XA, axis=0))
+    assert_array_almost_equal(y_mean, np.mean(y, axis=0))
+    assert_array_almost_equal(X_norm, expected_X_norm)
+    assert_array_almost_equal(Xt.A, XA / expected_X_norm)
+    assert_array_almost_equal(yt, y - np.mean(y, axis=0))
+
+
+def test_csr_preprocess_data():
+    # Test output format of _preprocess_data, when input is csr
+    X, y = make_regression()
+    X[X < 2.5] = 0.0
+    csr = sparse.csr_matrix(X)
+    csr_, y, _, _, _ = _preprocess_data(csr, y, True)
+    assert csr_.getformat() == 'csr'
+
+
+@pytest.mark.parametrize('is_sparse', (True, False))
+@pytest.mark.parametrize('to_copy', (True, False))
+def test_preprocess_copy_data_no_checks(is_sparse, to_copy):
+    X, y = make_regression()
+    X[X < 2.5] = 0.0
+
+    if is_sparse:
+        X = sparse.csr_matrix(X)
+
+    X_, y_, _, _, _ = _preprocess_data(X, y, True,
+                                       copy=to_copy, check_input=False)
+
+    if to_copy and is_sparse:
+        assert not np.may_share_memory(X_.data, X.data)
+    elif to_copy:
+        assert not np.may_share_memory(X_, X)
+    elif is_sparse:
+        assert np.may_share_memory(X_.data, X.data)
+    else:
+        assert np.may_share_memory(X_, X)
+
+
+def test_dtype_preprocess_data():
+    n_samples = 200
+    n_features = 2
+    X = rng.rand(n_samples, n_features)
+    y = rng.rand(n_samples)
+
+    X_32 = np.asarray(X, dtype=np.float32)
+    y_32 = np.asarray(y, dtype=np.float32)
+    X_64 = np.asarray(X, dtype=np.float64)
+    y_64 = np.asarray(y, dtype=np.float64)
+
+    for fit_intercept in [True, False]:
+        for normalize in [True, False]:
+
+            Xt_32, yt_32, X_mean_32, y_mean_32, X_norm_32 = _preprocess_data(
+                X_32, y_32, fit_intercept=fit_intercept, normalize=normalize,
+                return_mean=True)
+
+            Xt_64, yt_64, X_mean_64, y_mean_64, X_norm_64 = _preprocess_data(
+                X_64, y_64, fit_intercept=fit_intercept, normalize=normalize,
+                return_mean=True)
+
+            Xt_3264, yt_3264, X_mean_3264, y_mean_3264, X_norm_3264 = (
+                _preprocess_data(X_32, y_64, fit_intercept=fit_intercept,
+                                 normalize=normalize, return_mean=True))
+
+            Xt_6432, yt_6432, X_mean_6432, y_mean_6432, X_norm_6432 = (
+                _preprocess_data(X_64, y_32, fit_intercept=fit_intercept,
+                                 normalize=normalize, return_mean=True))
+
+            assert Xt_32.dtype == np.float32
+            assert yt_32.dtype == np.float32
+            assert X_mean_32.dtype == np.float32
+            assert y_mean_32.dtype == np.float32
+            assert X_norm_32.dtype == np.float32
+
+            assert Xt_64.dtype == np.float64
+            assert yt_64.dtype == np.float64
+            assert X_mean_64.dtype == np.float64
+            assert y_mean_64.dtype == np.float64
+            assert X_norm_64.dtype == np.float64
+
+            assert Xt_3264.dtype == np.float32
+            assert yt_3264.dtype == np.float32
+            assert X_mean_3264.dtype == np.float32
+            assert y_mean_3264.dtype == np.float32
+            assert X_norm_3264.dtype == np.float32
+
+            assert Xt_6432.dtype == np.float64
+            assert yt_6432.dtype == np.float64
+            assert X_mean_6432.dtype == np.float64
+            assert y_mean_6432.dtype == np.float64
+            assert X_norm_6432.dtype == np.float64
+
+            assert X_32.dtype == np.float32
+            assert y_32.dtype == np.float32
+            assert X_64.dtype == np.float64
+            assert y_64.dtype == np.float64
+
+            assert_array_almost_equal(Xt_32, Xt_64)
+            assert_array_almost_equal(yt_32, yt_64)
+            assert_array_almost_equal(X_mean_32, X_mean_64)
+            assert_array_almost_equal(y_mean_32, y_mean_64)
+            assert_array_almost_equal(X_norm_32, X_norm_64)
+
+
+@pytest.mark.parametrize('n_targets', [None, 2])
+def test_rescale_data_dense(n_targets):
+    n_samples = 200
+    n_features = 2
+
+    sample_weight = 1.0 + rng.rand(n_samples)
+    X = rng.rand(n_samples, n_features)
+    if n_targets is None:
+        y = rng.rand(n_samples)
+    else:
+        y = rng.rand(n_samples, n_targets)
+    rescaled_X, rescaled_y = _rescale_data(X, y, sample_weight)
+    rescaled_X2 = X * np.sqrt(sample_weight)[:, np.newaxis]
+    if n_targets is None:
+        rescaled_y2 = y * np.sqrt(sample_weight)
+    else:
+        rescaled_y2 = y * np.sqrt(sample_weight)[:, np.newaxis]
+    assert_array_almost_equal(rescaled_X, rescaled_X2)
+    assert_array_almost_equal(rescaled_y, rescaled_y2)
+
+
+def test_fused_types_make_dataset():
+    iris = load_iris()
+
+    X_32 = iris.data.astype(np.float32)
+    y_32 = iris.target.astype(np.float32)
+    X_csr_32 = sparse.csr_matrix(X_32)
+    sample_weight_32 = np.arange(y_32.size, dtype=np.float32)
+
+    X_64 = iris.data.astype(np.float64)
+    y_64 = iris.target.astype(np.float64)
+    X_csr_64 = sparse.csr_matrix(X_64)
+    sample_weight_64 = np.arange(y_64.size, dtype=np.float64)
+
+    # array
+    dataset_32, _ = make_dataset(X_32, y_32, sample_weight_32)
+    dataset_64, _ = make_dataset(X_64, y_64, sample_weight_64)
+    xi_32, yi_32, _, _ = dataset_32._next_py()
+    xi_64, yi_64, _, _ = dataset_64._next_py()
+    xi_data_32, _, _ = xi_32
+    xi_data_64, _, _ = xi_64
+
+    assert xi_data_32.dtype == np.float32
+    assert xi_data_64.dtype == np.float64
+    assert_allclose(yi_64, yi_32, rtol=rtol)
+
+    # csr
+    datasetcsr_32, _ = make_dataset(X_csr_32, y_32, sample_weight_32)
+    datasetcsr_64, _ = make_dataset(X_csr_64, y_64, sample_weight_64)
+    xicsr_32, yicsr_32, _, _ = datasetcsr_32._next_py()
+    xicsr_64, yicsr_64, _, _ = datasetcsr_64._next_py()
+    xicsr_data_32, _, _ = xicsr_32
+    xicsr_data_64, _, _ = xicsr_64
+
+    assert xicsr_data_32.dtype == np.float32
+    assert xicsr_data_64.dtype == np.float64
+
+    assert_allclose(xicsr_data_64, xicsr_data_32, rtol=rtol)
+    assert_allclose(yicsr_64, yicsr_32, rtol=rtol)
+
+    assert_array_equal(xi_data_32, xicsr_data_32)
+    assert_array_equal(xi_data_64, xicsr_data_64)
+    assert_array_equal(yi_32, yicsr_32)
+    assert_array_equal(yi_64, yicsr_64)
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/test_bayes.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/test_bayes.py
@ -0,0 +1,274 @@
+# Author: Alexandre Gramfort <alexandre.gramfort@inria.fr>
+#         Fabian Pedregosa <fabian.pedregosa@inria.fr>
+#
+# License: BSD 3 clause
+
+from math import log
+
+import numpy as np
+from scipy.linalg import pinvh
+import pytest
+
+
+from sklearn.utils._testing import assert_array_almost_equal
+from sklearn.utils._testing import assert_almost_equal
+from sklearn.utils._testing import assert_array_less
+from sklearn.utils._testing import assert_raise_message
+from sklearn.utils import check_random_state
+from sklearn.linear_model import BayesianRidge, ARDRegression
+from sklearn.linear_model import Ridge
+from sklearn import datasets
+from sklearn.utils.extmath import fast_logdet
+
+diabetes = datasets.load_diabetes()
+
+
+def test_n_iter():
+    """Check value of n_iter."""
+    X = np.array([[1], [2], [6], [8], [10]])
+    y = np.array([1, 2, 6, 8, 10])
+    clf = BayesianRidge(n_iter=0)
+    msg = "n_iter should be greater than or equal to 1."
+    assert_raise_message(ValueError, msg, clf.fit, X, y)
+
+
+def test_bayesian_ridge_scores():
+    """Check scores attribute shape"""
+    X, y = diabetes.data, diabetes.target
+
+    clf = BayesianRidge(compute_score=True)
+    clf.fit(X, y)
+
+    assert clf.scores_.shape == (clf.n_iter_ + 1,)
+
+
+def test_bayesian_ridge_score_values():
+    """Check value of score on toy example.
+
+    Compute log marginal likelihood with equation (36) in Sparse Bayesian
+    Learning and the Relevance Vector Machine (Tipping, 2001):
+
+    - 0.5 * (log |Id/alpha + X.X^T/lambda| +
+             y^T.(Id/alpha + X.X^T/lambda).y + n * log(2 * pi))
+    + lambda_1 * log(lambda) - lambda_2 * lambda
+    + alpha_1 * log(alpha) - alpha_2 * alpha
+
+    and check equality with the score computed during training.
+    """
+
+    X, y = diabetes.data, diabetes.target
+    n_samples = X.shape[0]
+    # check with initial values of alpha and lambda (see code for the values)
+    eps = np.finfo(np.float64).eps
+    alpha_ = 1. / (np.var(y) + eps)
+    lambda_ = 1.
+
+    # value of the parameters of the Gamma hyperpriors
+    alpha_1 = 0.1
+    alpha_2 = 0.1
+    lambda_1 = 0.1
+    lambda_2 = 0.1
+
+    # compute score using formula of docstring
+    score = lambda_1 * log(lambda_) - lambda_2 * lambda_
+    score += alpha_1 * log(alpha_) - alpha_2 * alpha_
+    M = 1. / alpha_ * np.eye(n_samples) + 1. / lambda_ * np.dot(X, X.T)
+    M_inv = pinvh(M)
+    score += - 0.5 * (fast_logdet(M) + np.dot(y.T, np.dot(M_inv, y)) +
+                      n_samples * log(2 * np.pi))
+
+    # compute score with BayesianRidge
+    clf = BayesianRidge(alpha_1=alpha_1, alpha_2=alpha_2,
+                        lambda_1=lambda_1, lambda_2=lambda_2,
+                        n_iter=1, fit_intercept=False, compute_score=True)
+    clf.fit(X, y)
+
+    assert_almost_equal(clf.scores_[0], score, decimal=9)
+
+
+def test_bayesian_ridge_parameter():
+    # Test correctness of lambda_ and alpha_ parameters (GitHub issue #8224)
+    X = np.array([[1, 1], [3, 4], [5, 7], [4, 1], [2, 6], [3, 10], [3, 2]])
+    y = np.array([1, 2, 3, 2, 0, 4, 5]).T
+
+    # A Ridge regression model using an alpha value equal to the ratio of
+    # lambda_ and alpha_ from the Bayesian Ridge model must be identical
+    br_model = BayesianRidge(compute_score=True).fit(X, y)
+    rr_model = Ridge(alpha=br_model.lambda_ / br_model.alpha_).fit(X, y)
+    assert_array_almost_equal(rr_model.coef_, br_model.coef_)
+    assert_almost_equal(rr_model.intercept_, br_model.intercept_)
+
+
+def test_bayesian_sample_weights():
+    # Test correctness of the sample_weights method
+    X = np.array([[1, 1], [3, 4], [5, 7], [4, 1], [2, 6], [3, 10], [3, 2]])
+    y = np.array([1, 2, 3, 2, 0, 4, 5]).T
+    w = np.array([4, 3, 3, 1, 1, 2, 3]).T
+
+    # A Ridge regression model using an alpha value equal to the ratio of
+    # lambda_ and alpha_ from the Bayesian Ridge model must be identical
+    br_model = BayesianRidge(compute_score=True).fit(X, y, sample_weight=w)
+    rr_model = Ridge(alpha=br_model.lambda_ / br_model.alpha_).fit(
+        X, y, sample_weight=w)
+    assert_array_almost_equal(rr_model.coef_, br_model.coef_)
+    assert_almost_equal(rr_model.intercept_, br_model.intercept_)
+
+
+def test_toy_bayesian_ridge_object():
+    # Test BayesianRidge on toy
+    X = np.array([[1], [2], [6], [8], [10]])
+    Y = np.array([1, 2, 6, 8, 10])
+    clf = BayesianRidge(compute_score=True)
+    clf.fit(X, Y)
+
+    # Check that the model could approximately learn the identity function
+    test = [[1], [3], [4]]
+    assert_array_almost_equal(clf.predict(test), [1, 3, 4], 2)
+
+
+def test_bayesian_initial_params():
+    # Test BayesianRidge with initial values (alpha_init, lambda_init)
+    X = np.vander(np.linspace(0, 4, 5), 4)
+    y = np.array([0., 1., 0., -1., 0.])    # y = (x^3 - 6x^2 + 8x) / 3
+
+    # In this case, starting from the default initial values will increase
+    # the bias of the fitted curve. So, lambda_init should be small.
+    reg = BayesianRidge(alpha_init=1., lambda_init=1e-3)
+    # Check the R2 score nearly equals to one.
+    r2 = reg.fit(X, y).score(X, y)
+    assert_almost_equal(r2, 1.)
+
+
+def test_prediction_bayesian_ridge_ard_with_constant_input():
+    # Test BayesianRidge and ARDRegression predictions for edge case of
+    # constant target vectors
+    n_samples = 4
+    n_features = 5
+    random_state = check_random_state(42)
+    constant_value = random_state.rand()
+    X = random_state.random_sample((n_samples, n_features))
+    y = np.full(n_samples, constant_value,
+                dtype=np.array(constant_value).dtype)
+    expected = np.full(n_samples, constant_value,
+                       dtype=np.array(constant_value).dtype)
+
+    for clf in [BayesianRidge(), ARDRegression()]:
+        y_pred = clf.fit(X, y).predict(X)
+        assert_array_almost_equal(y_pred, expected)
+
+
+def test_std_bayesian_ridge_ard_with_constant_input():
+    # Test BayesianRidge and ARDRegression standard dev. for edge case of
+    # constant target vector
+    # The standard dev. should be relatively small (< 0.01 is tested here)
+    n_samples = 10
+    n_features = 5
+    random_state = check_random_state(42)
+    constant_value = random_state.rand()
+    X = random_state.random_sample((n_samples, n_features))
+    y = np.full(n_samples, constant_value,
+                dtype=np.array(constant_value).dtype)
+    expected_upper_boundary = 0.01
+
+    for clf in [BayesianRidge(), ARDRegression()]:
+        _, y_std = clf.fit(X, y).predict(X, return_std=True)
+        assert_array_less(y_std, expected_upper_boundary)
+
+
+def test_update_of_sigma_in_ard():
+    # Checks that `sigma_` is updated correctly after the last iteration
+    # of the ARDRegression algorithm. See issue #10128.
+    X = np.array([[1, 0],
+                  [0, 0]])
+    y = np.array([0, 0])
+    clf = ARDRegression(n_iter=1)
+    clf.fit(X, y)
+    # With the inputs above, ARDRegression prunes both of the two coefficients
+    # in the first iteration. Hence, the expected shape of `sigma_` is (0, 0).
+    assert clf.sigma_.shape == (0, 0)
+    # Ensure that no error is thrown at prediction stage
+    clf.predict(X, return_std=True)
+
+
+def test_toy_ard_object():
+    # Test BayesianRegression ARD classifier
+    X = np.array([[1], [2], [3]])
+    Y = np.array([1, 2, 3])
+    clf = ARDRegression(compute_score=True)
+    clf.fit(X, Y)
+
+    # Check that the model could approximately learn the identity function
+    test = [[1], [3], [4]]
+    assert_array_almost_equal(clf.predict(test), [1, 3, 4], 2)
+
+
+@pytest.mark.parametrize('seed', range(100))
+@pytest.mark.parametrize('n_samples, n_features', ((10, 100), (100, 10)))
+def test_ard_accuracy_on_easy_problem(seed, n_samples, n_features):
+    # Check that ARD converges with reasonable accuracy on an easy problem
+    # (Github issue #14055)
+    X = np.random.RandomState(seed=seed).normal(size=(250, 3))
+    y = X[:, 1]
+
+    regressor = ARDRegression()
+    regressor.fit(X, y)
+
+    abs_coef_error = np.abs(1 - regressor.coef_[1])
+    assert abs_coef_error < 1e-10
+
+
+def test_return_std():
+    # Test return_std option for both Bayesian regressors
+    def f(X):
+        return np.dot(X, w) + b
+
+    def f_noise(X, noise_mult):
+        return f(X) + np.random.randn(X.shape[0]) * noise_mult
+
+    d = 5
+    n_train = 50
+    n_test = 10
+
+    w = np.array([1.0, 0.0, 1.0, -1.0, 0.0])
+    b = 1.0
+
+    X = np.random.random((n_train, d))
+    X_test = np.random.random((n_test, d))
+
+    for decimal, noise_mult in enumerate([1, 0.1, 0.01]):
+        y = f_noise(X, noise_mult)
+
+        m1 = BayesianRidge()
+        m1.fit(X, y)
+        y_mean1, y_std1 = m1.predict(X_test, return_std=True)
+        assert_array_almost_equal(y_std1, noise_mult, decimal=decimal)
+
+        m2 = ARDRegression()
+        m2.fit(X, y)
+        y_mean2, y_std2 = m2.predict(X_test, return_std=True)
+        assert_array_almost_equal(y_std2, noise_mult, decimal=decimal)
+
+
+@pytest.mark.parametrize('seed', range(10))
+def test_update_sigma(seed):
+    # make sure the two update_sigma() helpers are equivalent. The woodbury
+    # formula is used when n_samples < n_features, and the other one is used
+    # otherwise.
+
+    rng = np.random.RandomState(seed)
+
+    # set n_samples == n_features to avoid instability issues when inverting
+    # the matrices. Using the woodbury formula would be unstable when
+    # n_samples > n_features
+    n_samples = n_features = 10
+    X = rng.randn(n_samples, n_features)
+    alpha = 1
+    lmbda = np.arange(1, n_features + 1)
+    keep_lambda = np.array([True] * n_features)
+
+    reg = ARDRegression()
+
+    sigma = reg._update_sigma(X, alpha, lmbda, keep_lambda)
+    sigma_woodbury = reg._update_sigma_woodbury(X, alpha, lmbda, keep_lambda)
+
+    np.testing.assert_allclose(sigma, sigma_woodbury)
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/test_coordinate_descent.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/test_coordinate_descent.py
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/test_huber.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/test_huber.py
@ -0,0 +1,211 @@
+# Authors: Manoj Kumar mks542@nyu.edu
+# License: BSD 3 clause
+
+import numpy as np
+from scipy import optimize, sparse
+
+from sklearn.utils._testing import assert_almost_equal
+from sklearn.utils._testing import assert_array_equal
+from sklearn.utils._testing import assert_array_almost_equal
+
+from sklearn.datasets import make_regression
+from sklearn.linear_model import (
+    HuberRegressor, LinearRegression, SGDRegressor, Ridge)
+from sklearn.linear_model._huber import _huber_loss_and_gradient
+
+
+def make_regression_with_outliers(n_samples=50, n_features=20):
+    rng = np.random.RandomState(0)
+    # Generate data with outliers by replacing 10% of the samples with noise.
+    X, y = make_regression(
+        n_samples=n_samples, n_features=n_features,
+        random_state=0, noise=0.05)
+
+    # Replace 10% of the sample with noise.
+    num_noise = int(0.1 * n_samples)
+    random_samples = rng.randint(0, n_samples, num_noise)
+    X[random_samples, :] = 2.0 * rng.normal(0, 1, (num_noise, X.shape[1]))
+    return X, y
+
+
+def test_huber_equals_lr_for_high_epsilon():
+    # Test that Ridge matches LinearRegression for large epsilon
+    X, y = make_regression_with_outliers()
+    lr = LinearRegression()
+    lr.fit(X, y)
+    huber = HuberRegressor(epsilon=1e3, alpha=0.0)
+    huber.fit(X, y)
+    assert_almost_equal(huber.coef_, lr.coef_, 3)
+    assert_almost_equal(huber.intercept_, lr.intercept_, 2)
+
+
+def test_huber_max_iter():
+    X, y = make_regression_with_outliers()
+    huber = HuberRegressor(max_iter=1)
+    huber.fit(X, y)
+    assert huber.n_iter_ == huber.max_iter
+
+
+def test_huber_gradient():
+    # Test that the gradient calculated by _huber_loss_and_gradient is correct
+    rng = np.random.RandomState(1)
+    X, y = make_regression_with_outliers()
+    sample_weight = rng.randint(1, 3, (y.shape[0]))
+
+    def loss_func(x, *args):
+        return _huber_loss_and_gradient(x, *args)[0]
+
+    def grad_func(x, *args):
+        return _huber_loss_and_gradient(x, *args)[1]
+
+    # Check using optimize.check_grad that the gradients are equal.
+    for _ in range(5):
+        # Check for both fit_intercept and otherwise.
+        for n_features in [X.shape[1] + 1, X.shape[1] + 2]:
+            w = rng.randn(n_features)
+            w[-1] = np.abs(w[-1])
+            grad_same = optimize.check_grad(
+                loss_func, grad_func, w, X, y, 0.01, 0.1, sample_weight)
+            assert_almost_equal(grad_same, 1e-6, 4)
+
+
+def test_huber_sample_weights():
+    # Test sample_weights implementation in HuberRegressor"""
+
+    X, y = make_regression_with_outliers()
+    huber = HuberRegressor()
+    huber.fit(X, y)
+    huber_coef = huber.coef_
+    huber_intercept = huber.intercept_
+
+    # Rescale coefs before comparing with assert_array_almost_equal to make
+    # sure that the number of decimal places used is somewhat insensitive to
+    # the amplitude of the coefficients and therefore to the scale of the
+    # data and the regularization parameter
+    scale = max(np.mean(np.abs(huber.coef_)),
+                np.mean(np.abs(huber.intercept_)))
+
+    huber.fit(X, y, sample_weight=np.ones(y.shape[0]))
+    assert_array_almost_equal(huber.coef_ / scale, huber_coef / scale)
+    assert_array_almost_equal(huber.intercept_ / scale,
+                              huber_intercept / scale)
+
+    X, y = make_regression_with_outliers(n_samples=5, n_features=20)
+    X_new = np.vstack((X, np.vstack((X[1], X[1], X[3]))))
+    y_new = np.concatenate((y, [y[1]], [y[1]], [y[3]]))
+    huber.fit(X_new, y_new)
+    huber_coef = huber.coef_
+    huber_intercept = huber.intercept_
+    sample_weight = np.ones(X.shape[0])
+    sample_weight[1] = 3
+    sample_weight[3] = 2
+    huber.fit(X, y, sample_weight=sample_weight)
+
+    assert_array_almost_equal(huber.coef_ / scale, huber_coef / scale)
+    assert_array_almost_equal(huber.intercept_ / scale,
+                              huber_intercept / scale)
+
+    # Test sparse implementation with sample weights.
+    X_csr = sparse.csr_matrix(X)
+    huber_sparse = HuberRegressor()
+    huber_sparse.fit(X_csr, y, sample_weight=sample_weight)
+    assert_array_almost_equal(huber_sparse.coef_ / scale,
+                              huber_coef / scale)
+
+
+def test_huber_sparse():
+    X, y = make_regression_with_outliers()
+    huber = HuberRegressor(alpha=0.1)
+    huber.fit(X, y)
+
+    X_csr = sparse.csr_matrix(X)
+    huber_sparse = HuberRegressor(alpha=0.1)
+    huber_sparse.fit(X_csr, y)
+    assert_array_almost_equal(huber_sparse.coef_, huber.coef_)
+    assert_array_equal(huber.outliers_, huber_sparse.outliers_)
+
+
+def test_huber_scaling_invariant():
+    # Test that outliers filtering is scaling independent.
+    X, y = make_regression_with_outliers()
+    huber = HuberRegressor(fit_intercept=False, alpha=0.0, max_iter=100)
+    huber.fit(X, y)
+    n_outliers_mask_1 = huber.outliers_
+    assert not np.all(n_outliers_mask_1)
+
+    huber.fit(X, 2. * y)
+    n_outliers_mask_2 = huber.outliers_
+    assert_array_equal(n_outliers_mask_2, n_outliers_mask_1)
+
+    huber.fit(2. * X, 2. * y)
+    n_outliers_mask_3 = huber.outliers_
+    assert_array_equal(n_outliers_mask_3, n_outliers_mask_1)
+
+
+def test_huber_and_sgd_same_results():
+    # Test they should converge to same coefficients for same parameters
+
+    X, y = make_regression_with_outliers(n_samples=10, n_features=2)
+
+    # Fit once to find out the scale parameter. Scale down X and y by scale
+    # so that the scale parameter is optimized to 1.0
+    huber = HuberRegressor(fit_intercept=False, alpha=0.0, max_iter=100,
+                           epsilon=1.35)
+    huber.fit(X, y)
+    X_scale = X / huber.scale_
+    y_scale = y / huber.scale_
+    huber.fit(X_scale, y_scale)
+    assert_almost_equal(huber.scale_, 1.0, 3)
+
+    sgdreg = SGDRegressor(
+        alpha=0.0, loss="huber", shuffle=True, random_state=0, max_iter=10000,
+        fit_intercept=False, epsilon=1.35, tol=None)
+    sgdreg.fit(X_scale, y_scale)
+    assert_array_almost_equal(huber.coef_, sgdreg.coef_, 1)
+
+
+def test_huber_warm_start():
+    X, y = make_regression_with_outliers()
+    huber_warm = HuberRegressor(
+        alpha=1.0, max_iter=10000, warm_start=True, tol=1e-1)
+
+    huber_warm.fit(X, y)
+    huber_warm_coef = huber_warm.coef_.copy()
+    huber_warm.fit(X, y)
+
+    # SciPy performs the tol check after doing the coef updates, so
+    # these would be almost same but not equal.
+    assert_array_almost_equal(huber_warm.coef_, huber_warm_coef, 1)
+
+    assert huber_warm.n_iter_ == 0
+
+
+def test_huber_better_r2_score():
+    # Test that huber returns a better r2 score than non-outliers"""
+    X, y = make_regression_with_outliers()
+    huber = HuberRegressor(alpha=0.01)
+    huber.fit(X, y)
+    linear_loss = np.dot(X, huber.coef_) + huber.intercept_ - y
+    mask = np.abs(linear_loss) < huber.epsilon * huber.scale_
+    huber_score = huber.score(X[mask], y[mask])
+    huber_outlier_score = huber.score(X[~mask], y[~mask])
+
+    # The Ridge regressor should be influenced by the outliers and hence
+    # give a worse score on the non-outliers as compared to the huber
+    # regressor.
+    ridge = Ridge(alpha=0.01)
+    ridge.fit(X, y)
+    ridge_score = ridge.score(X[mask], y[mask])
+    ridge_outlier_score = ridge.score(X[~mask], y[~mask])
+    assert huber_score > ridge_score
+
+    # The huber model should also fit poorly on the outliers.
+    assert ridge_outlier_score > huber_outlier_score
+
+
+def test_huber_bool():
+    # Test that it does not crash with bool data
+    X, y = make_regression(n_samples=200, n_features=2, noise=4.0,
+                           random_state=0)
+    X_bool = X > 0
+    HuberRegressor().fit(X_bool, y)
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/test_least_angle.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/test_least_angle.py
@ -0,0 +1,775 @@
+import warnings
+
+import numpy as np
+import pytest
+from scipy import linalg
+
+from sklearn.base import clone
+from sklearn.model_selection import train_test_split
+from sklearn.utils._testing import assert_allclose
+from sklearn.utils._testing import assert_array_almost_equal
+from sklearn.utils._testing import assert_raises
+from sklearn.utils._testing import ignore_warnings
+from sklearn.utils._testing import assert_warns
+from sklearn.utils._testing import TempMemmap
+from sklearn.utils.fixes import np_version, parse_version
+from sklearn.exceptions import ConvergenceWarning
+from sklearn import linear_model, datasets
+from sklearn.linear_model._least_angle import _lars_path_residues
+from sklearn.linear_model import LassoLarsIC, lars_path
+from sklearn.linear_model import Lars, LassoLars
+
+# TODO: use another dataset that has multiple drops
+diabetes = datasets.load_diabetes()
+X, y = diabetes.data, diabetes.target
+G = np.dot(X.T, X)
+Xy = np.dot(X.T, y)
+n_samples = y.size
+
+
+def test_simple():
+    # Principle of Lars is to keep covariances tied and decreasing
+
+    # also test verbose output
+    from io import StringIO
+    import sys
+    old_stdout = sys.stdout
+    try:
+        sys.stdout = StringIO()
+
+        _, _, coef_path_ = linear_model.lars_path(
+            X, y, method='lar', verbose=10)
+
+        sys.stdout = old_stdout
+
+        for i, coef_ in enumerate(coef_path_.T):
+            res = y - np.dot(X, coef_)
+            cov = np.dot(X.T, res)
+            C = np.max(abs(cov))
+            eps = 1e-3
+            ocur = len(cov[C - eps < abs(cov)])
+            if i < X.shape[1]:
+                assert ocur == i + 1
+            else:
+                # no more than max_pred variables can go into the active set
+                assert ocur == X.shape[1]
+    finally:
+        sys.stdout = old_stdout
+
+
+def test_simple_precomputed():
+    # The same, with precomputed Gram matrix
+
+    _, _, coef_path_ = linear_model.lars_path(
+        X, y, Gram=G, method='lar')
+
+    for i, coef_ in enumerate(coef_path_.T):
+        res = y - np.dot(X, coef_)
+        cov = np.dot(X.T, res)
+        C = np.max(abs(cov))
+        eps = 1e-3
+        ocur = len(cov[C - eps < abs(cov)])
+        if i < X.shape[1]:
+            assert ocur == i + 1
+        else:
+            # no more than max_pred variables can go into the active set
+            assert ocur == X.shape[1]
+
+
+def _assert_same_lars_path_result(output1, output2):
+    assert len(output1) == len(output2)
+    for o1, o2 in zip(output1, output2):
+        assert_allclose(o1, o2)
+
+
+@pytest.mark.parametrize('method', ['lar', 'lasso'])
+@pytest.mark.parametrize('return_path', [True, False])
+def test_lars_path_gram_equivalent(method, return_path):
+    _assert_same_lars_path_result(
+        linear_model.lars_path_gram(
+            Xy=Xy, Gram=G, n_samples=n_samples, method=method,
+            return_path=return_path),
+        linear_model.lars_path(
+            X, y, Gram=G, method=method,
+            return_path=return_path))
+
+
+def test_x_none_gram_none_raises_value_error():
+    # Test that lars_path with no X and Gram raises exception
+    Xy = np.dot(X.T, y)
+    assert_raises(ValueError, linear_model.lars_path, None, y, Gram=None,
+                  Xy=Xy)
+
+
+def test_all_precomputed():
+    # Test that lars_path with precomputed Gram and Xy gives the right answer
+    G = np.dot(X.T, X)
+    Xy = np.dot(X.T, y)
+    for method in 'lar', 'lasso':
+        output = linear_model.lars_path(X, y, method=method)
+        output_pre = linear_model.lars_path(X, y, Gram=G, Xy=Xy,
+                                            method=method)
+        for expected, got in zip(output, output_pre):
+            assert_array_almost_equal(expected, got)
+
+
+@pytest.mark.filterwarnings('ignore: `rcond` parameter will change')
+# numpy deprecation
+def test_lars_lstsq():
+    # Test that Lars gives least square solution at the end
+    # of the path
+    X1 = 3 * X  # use un-normalized dataset
+    clf = linear_model.LassoLars(alpha=0.)
+    clf.fit(X1, y)
+    # Avoid FutureWarning about default value change when numpy >= 1.14
+    rcond = None if np_version >= parse_version('1.14') else -1
+    coef_lstsq = np.linalg.lstsq(X1, y, rcond=rcond)[0]
+    assert_array_almost_equal(clf.coef_, coef_lstsq)
+
+
+@pytest.mark.filterwarnings('ignore:`rcond` parameter will change')
+# numpy deprecation
+def test_lasso_gives_lstsq_solution():
+    # Test that Lars Lasso gives least square solution at the end
+    # of the path
+    _, _, coef_path_ = linear_model.lars_path(X, y, method='lasso')
+    coef_lstsq = np.linalg.lstsq(X, y)[0]
+    assert_array_almost_equal(coef_lstsq, coef_path_[:, -1])
+
+
+def test_collinearity():
+    # Check that lars_path is robust to collinearity in input
+    X = np.array([[3., 3., 1.],
+                  [2., 2., 0.],
+                  [1., 1., 0]])
+    y = np.array([1., 0., 0])
+    rng = np.random.RandomState(0)
+
+    f = ignore_warnings
+    _, _, coef_path_ = f(linear_model.lars_path)(X, y, alpha_min=0.01)
+    assert not np.isnan(coef_path_).any()
+    residual = np.dot(X, coef_path_[:, -1]) - y
+    assert (residual ** 2).sum() < 1.  # just make sure it's bounded
+
+    n_samples = 10
+    X = rng.rand(n_samples, 5)
+    y = np.zeros(n_samples)
+    _, _, coef_path_ = linear_model.lars_path(X, y, Gram='auto', copy_X=False,
+                                              copy_Gram=False, alpha_min=0.,
+                                              method='lasso', verbose=0,
+                                              max_iter=500)
+    assert_array_almost_equal(coef_path_, np.zeros_like(coef_path_))
+
+
+def test_no_path():
+    # Test that the ``return_path=False`` option returns the correct output
+    alphas_, _, coef_path_ = linear_model.lars_path(
+        X, y, method='lar')
+    alpha_, _, coef = linear_model.lars_path(
+        X, y, method='lar', return_path=False)
+
+    assert_array_almost_equal(coef, coef_path_[:, -1])
+    assert alpha_ == alphas_[-1]
+
+
+def test_no_path_precomputed():
+    # Test that the ``return_path=False`` option with Gram remains correct
+    alphas_, _, coef_path_ = linear_model.lars_path(
+        X, y, method='lar', Gram=G)
+    alpha_, _, coef = linear_model.lars_path(
+        X, y, method='lar', Gram=G, return_path=False)
+
+    assert_array_almost_equal(coef, coef_path_[:, -1])
+    assert alpha_ == alphas_[-1]
+
+
+def test_no_path_all_precomputed():
+    # Test that the ``return_path=False`` option with Gram and Xy remains
+    # correct
+    X, y = 3 * diabetes.data, diabetes.target
+    G = np.dot(X.T, X)
+    Xy = np.dot(X.T, y)
+    alphas_, _, coef_path_ = linear_model.lars_path(
+        X, y, method='lasso', Xy=Xy, Gram=G, alpha_min=0.9)
+    alpha_, _, coef = linear_model.lars_path(
+        X, y, method='lasso', Gram=G, Xy=Xy, alpha_min=0.9, return_path=False)
+
+    assert_array_almost_equal(coef, coef_path_[:, -1])
+    assert alpha_ == alphas_[-1]
+
+
+@pytest.mark.parametrize(
+        'classifier',
+        [linear_model.Lars, linear_model.LarsCV, linear_model.LassoLarsIC])
+def test_lars_precompute(classifier):
+    # Check for different values of precompute
+    G = np.dot(X.T, X)
+
+    clf = classifier(precompute=G)
+    output_1 = ignore_warnings(clf.fit)(X, y).coef_
+    for precompute in [True, False, 'auto', None]:
+        clf = classifier(precompute=precompute)
+        output_2 = clf.fit(X, y).coef_
+        assert_array_almost_equal(output_1, output_2, decimal=8)
+
+
+def test_singular_matrix():
+    # Test when input is a singular matrix
+    X1 = np.array([[1, 1.], [1., 1.]])
+    y1 = np.array([1, 1])
+    _, _, coef_path = linear_model.lars_path(X1, y1)
+    assert_array_almost_equal(coef_path.T, [[0, 0], [1, 0]])
+
+
+def test_rank_deficient_design():
+    # consistency test that checks that LARS Lasso is handling rank
+    # deficient input data (with n_features < rank) in the same way
+    # as coordinate descent Lasso
+    y = [5, 0, 5]
+    for X in (
+              [[5, 0],
+               [0, 5],
+               [10, 10]],
+              [[10, 10, 0],
+               [1e-32, 0, 0],
+               [0, 0, 1]]
+             ):
+        # To be able to use the coefs to compute the objective function,
+        # we need to turn off normalization
+        lars = linear_model.LassoLars(.1, normalize=False)
+        coef_lars_ = lars.fit(X, y).coef_
+        obj_lars = (1. / (2. * 3.)
+                    * linalg.norm(y - np.dot(X, coef_lars_)) ** 2
+                    + .1 * linalg.norm(coef_lars_, 1))
+        coord_descent = linear_model.Lasso(.1, tol=1e-6, normalize=False)
+        coef_cd_ = coord_descent.fit(X, y).coef_
+        obj_cd = ((1. / (2. * 3.)) * linalg.norm(y - np.dot(X, coef_cd_)) ** 2
+                  + .1 * linalg.norm(coef_cd_, 1))
+        assert obj_lars < obj_cd * (1. + 1e-8)
+
+
+def test_lasso_lars_vs_lasso_cd():
+    # Test that LassoLars and Lasso using coordinate descent give the
+    # same results.
+    X = 3 * diabetes.data
+
+    alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso')
+    lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8)
+    for c, a in zip(lasso_path.T, alphas):
+        if a == 0:
+            continue
+        lasso_cd.alpha = a
+        lasso_cd.fit(X, y)
+        error = linalg.norm(c - lasso_cd.coef_)
+        assert error < 0.01
+
+    # similar test, with the classifiers
+    for alpha in np.linspace(1e-2, 1 - 1e-2, 20):
+        clf1 = linear_model.LassoLars(alpha=alpha, normalize=False).fit(X, y)
+        clf2 = linear_model.Lasso(alpha=alpha, tol=1e-8,
+                                  normalize=False).fit(X, y)
+        err = linalg.norm(clf1.coef_ - clf2.coef_)
+        assert err < 1e-3
+
+    # same test, with normalized data
+    X = diabetes.data
+    alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso')
+    lasso_cd = linear_model.Lasso(fit_intercept=False, normalize=True,
+                                  tol=1e-8)
+    for c, a in zip(lasso_path.T, alphas):
+        if a == 0:
+            continue
+        lasso_cd.alpha = a
+        lasso_cd.fit(X, y)
+        error = linalg.norm(c - lasso_cd.coef_)
+        assert error < 0.01
+
+
+def test_lasso_lars_vs_lasso_cd_early_stopping():
+    # Test that LassoLars and Lasso using coordinate descent give the
+    # same results when early stopping is used.
+    # (test : before, in the middle, and in the last part of the path)
+    alphas_min = [10, 0.9, 1e-4]
+
+    for alpha_min in alphas_min:
+        alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso',
+                                                       alpha_min=alpha_min)
+        lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8)
+        lasso_cd.alpha = alphas[-1]
+        lasso_cd.fit(X, y)
+        error = linalg.norm(lasso_path[:, -1] - lasso_cd.coef_)
+        assert error < 0.01
+
+    # same test, with normalization
+    for alpha_min in alphas_min:
+        alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso',
+                                                       alpha_min=alpha_min)
+        lasso_cd = linear_model.Lasso(normalize=True, tol=1e-8)
+        lasso_cd.alpha = alphas[-1]
+        lasso_cd.fit(X, y)
+        error = linalg.norm(lasso_path[:, -1] - lasso_cd.coef_)
+        assert error < 0.01
+
+
+def test_lasso_lars_path_length():
+    # Test that the path length of the LassoLars is right
+    lasso = linear_model.LassoLars()
+    lasso.fit(X, y)
+    lasso2 = linear_model.LassoLars(alpha=lasso.alphas_[2])
+    lasso2.fit(X, y)
+    assert_array_almost_equal(lasso.alphas_[:3], lasso2.alphas_)
+    # Also check that the sequence of alphas is always decreasing
+    assert np.all(np.diff(lasso.alphas_) < 0)
+
+
+def test_lasso_lars_vs_lasso_cd_ill_conditioned():
+    # Test lasso lars on a very ill-conditioned design, and check that
+    # it does not blow up, and stays somewhat close to a solution given
+    # by the coordinate descent solver
+    # Also test that lasso_path (using lars_path output style) gives
+    # the same result as lars_path and previous lasso output style
+    # under these conditions.
+    rng = np.random.RandomState(42)
+
+    # Generate data
+    n, m = 70, 100
+    k = 5
+    X = rng.randn(n, m)
+    w = np.zeros((m, 1))
+    i = np.arange(0, m)
+    rng.shuffle(i)
+    supp = i[:k]
+    w[supp] = np.sign(rng.randn(k, 1)) * (rng.rand(k, 1) + 1)
+    y = np.dot(X, w)
+    sigma = 0.2
+    y += sigma * rng.rand(*y.shape)
+    y = y.squeeze()
+    lars_alphas, _, lars_coef = linear_model.lars_path(X, y, method='lasso')
+
+    _, lasso_coef2, _ = linear_model.lasso_path(X, y,
+                                                alphas=lars_alphas,
+                                                tol=1e-6,
+                                                fit_intercept=False)
+
+    assert_array_almost_equal(lars_coef, lasso_coef2, decimal=1)
+
+
+def test_lasso_lars_vs_lasso_cd_ill_conditioned2():
+    # Create an ill-conditioned situation in which the LARS has to go
+    # far in the path to converge, and check that LARS and coordinate
+    # descent give the same answers
+    # Note it used to be the case that Lars had to use the drop for good
+    # strategy for this but this is no longer the case with the
+    # equality_tolerance checks
+    X = [[1e20, 1e20, 0],
+         [-1e-32, 0, 0],
+         [1, 1, 1]]
+    y = [10, 10, 1]
+    alpha = .0001
+
+    def objective_function(coef):
+        return (1. / (2. * len(X)) * linalg.norm(y - np.dot(X, coef)) ** 2
+                + alpha * linalg.norm(coef, 1))
+
+    lars = linear_model.LassoLars(alpha=alpha, normalize=False)
+    assert_warns(ConvergenceWarning, lars.fit, X, y)
+    lars_coef_ = lars.coef_
+    lars_obj = objective_function(lars_coef_)
+
+    coord_descent = linear_model.Lasso(alpha=alpha, tol=1e-4, normalize=False)
+    cd_coef_ = coord_descent.fit(X, y).coef_
+    cd_obj = objective_function(cd_coef_)
+
+    assert lars_obj < cd_obj * (1. + 1e-8)
+
+
+def test_lars_add_features():
+    # assure that at least some features get added if necessary
+    # test for 6d2b4c
+    # Hilbert matrix
+    n = 5
+    H = 1. / (np.arange(1, n + 1) + np.arange(n)[:, np.newaxis])
+    clf = linear_model.Lars(fit_intercept=False).fit(
+        H, np.arange(n))
+    assert np.all(np.isfinite(clf.coef_))
+
+
+def test_lars_n_nonzero_coefs(verbose=False):
+    lars = linear_model.Lars(n_nonzero_coefs=6, verbose=verbose)
+    lars.fit(X, y)
+    assert len(lars.coef_.nonzero()[0]) == 6
+    # The path should be of length 6 + 1 in a Lars going down to 6
+    # non-zero coefs
+    assert len(lars.alphas_) == 7
+
+
+@ignore_warnings
+def test_multitarget():
+    # Assure that estimators receiving multidimensional y do the right thing
+    Y = np.vstack([y, y ** 2]).T
+    n_targets = Y.shape[1]
+    estimators = [
+        linear_model.LassoLars(),
+        linear_model.Lars(),
+        # regression test for gh-1615
+        linear_model.LassoLars(fit_intercept=False),
+        linear_model.Lars(fit_intercept=False),
+    ]
+
+    for estimator in estimators:
+        estimator.fit(X, Y)
+        Y_pred = estimator.predict(X)
+        alphas, active, coef, path = (estimator.alphas_, estimator.active_,
+                                      estimator.coef_, estimator.coef_path_)
+        for k in range(n_targets):
+            estimator.fit(X, Y[:, k])
+            y_pred = estimator.predict(X)
+            assert_array_almost_equal(alphas[k], estimator.alphas_)
+            assert_array_almost_equal(active[k], estimator.active_)
+            assert_array_almost_equal(coef[k], estimator.coef_)
+            assert_array_almost_equal(path[k], estimator.coef_path_)
+            assert_array_almost_equal(Y_pred[:, k], y_pred)
+
+
+def test_lars_cv():
+    # Test the LassoLarsCV object by checking that the optimal alpha
+    # increases as the number of samples increases.
+    # This property is not actually guaranteed in general and is just a
+    # property of the given dataset, with the given steps chosen.
+    old_alpha = 0
+    lars_cv = linear_model.LassoLarsCV()
+    for length in (400, 200, 100):
+        X = diabetes.data[:length]
+        y = diabetes.target[:length]
+        lars_cv.fit(X, y)
+        np.testing.assert_array_less(old_alpha, lars_cv.alpha_)
+        old_alpha = lars_cv.alpha_
+    assert not hasattr(lars_cv, 'n_nonzero_coefs')
+
+
+def test_lars_cv_max_iter(recwarn):
+    warnings.simplefilter('always')
+    with np.errstate(divide='raise', invalid='raise'):
+        X = diabetes.data
+        y = diabetes.target
+        rng = np.random.RandomState(42)
+        x = rng.randn(len(y))
+        X = diabetes.data
+        X = np.c_[X, x, x]  # add correlated features
+        lars_cv = linear_model.LassoLarsCV(max_iter=5, cv=5)
+        lars_cv.fit(X, y)
+    # Check that there is no warning in general and no ConvergenceWarning
+    # in particular.
+    # Materialize the string representation of the warning to get a more
+    # informative error message in case of AssertionError.
+    recorded_warnings = [str(w) for w in recwarn]
+    assert recorded_warnings == []
+
+
+def test_lasso_lars_ic():
+    # Test the LassoLarsIC object by checking that
+    # - some good features are selected.
+    # - alpha_bic > alpha_aic
+    # - n_nonzero_bic < n_nonzero_aic
+    lars_bic = linear_model.LassoLarsIC('bic')
+    lars_aic = linear_model.LassoLarsIC('aic')
+    rng = np.random.RandomState(42)
+    X = diabetes.data
+    X = np.c_[X, rng.randn(X.shape[0], 5)]  # add 5 bad features
+    lars_bic.fit(X, y)
+    lars_aic.fit(X, y)
+    nonzero_bic = np.where(lars_bic.coef_)[0]
+    nonzero_aic = np.where(lars_aic.coef_)[0]
+    assert lars_bic.alpha_ > lars_aic.alpha_
+    assert len(nonzero_bic) < len(nonzero_aic)
+    assert np.max(nonzero_bic) < diabetes.data.shape[1]
+
+    # test error on unknown IC
+    lars_broken = linear_model.LassoLarsIC('<unknown>')
+    assert_raises(ValueError, lars_broken.fit, X, y)
+
+
+def test_lars_path_readonly_data():
+    # When using automated memory mapping on large input, the
+    # fold data is in read-only mode
+    # This is a non-regression test for:
+    # https://github.com/scikit-learn/scikit-learn/issues/4597
+    splitted_data = train_test_split(X, y, random_state=42)
+    with TempMemmap(splitted_data) as (X_train, X_test, y_train, y_test):
+        # The following should not fail despite copy=False
+        _lars_path_residues(X_train, y_train, X_test, y_test, copy=False)
+
+
+def test_lars_path_positive_constraint():
+    # this is the main test for the positive parameter on the lars_path method
+    # the estimator classes just make use of this function
+
+    # we do the test on the diabetes dataset
+
+    # ensure that we get negative coefficients when positive=False
+    # and all positive when positive=True
+    # for method 'lar' (default) and lasso
+
+    err_msg = "Positive constraint not supported for 'lar' coding method."
+    with pytest.raises(ValueError, match=err_msg):
+        linear_model.lars_path(diabetes['data'], diabetes['target'],
+                               method='lar', positive=True)
+
+    method = 'lasso'
+    _, _, coefs = \
+        linear_model.lars_path(X, y, return_path=True, method=method,
+                               positive=False)
+    assert coefs.min() < 0
+
+    _, _, coefs = \
+        linear_model.lars_path(X, y, return_path=True, method=method,
+                               positive=True)
+    assert coefs.min() >= 0
+
+
+# now we gonna test the positive option for all estimator classes
+
+default_parameter = {'fit_intercept': False}
+
+estimator_parameter_map = {'LassoLars': {'alpha': 0.1},
+                           'LassoLarsCV': {},
+                           'LassoLarsIC': {}}
+
+
+def test_estimatorclasses_positive_constraint():
+    # testing the transmissibility for the positive option of all estimator
+    # classes in this same function here
+    default_parameter = {'fit_intercept': False}
+
+    estimator_parameter_map = {'LassoLars': {'alpha': 0.1},
+                               'LassoLarsCV': {},
+                               'LassoLarsIC': {}}
+    for estname in estimator_parameter_map:
+        params = default_parameter.copy()
+        params.update(estimator_parameter_map[estname])
+        estimator = getattr(linear_model, estname)(positive=False, **params)
+        estimator.fit(X, y)
+        assert estimator.coef_.min() < 0
+        estimator = getattr(linear_model, estname)(positive=True, **params)
+        estimator.fit(X, y)
+        assert min(estimator.coef_) >= 0
+
+
+def test_lasso_lars_vs_lasso_cd_positive():
+    # Test that LassoLars and Lasso using coordinate descent give the
+    # same results when using the positive option
+
+    # This test is basically a copy of the above with additional positive
+    # option. However for the middle part, the comparison of coefficient values
+    # for a range of alphas, we had to make an adaptations. See below.
+
+    # not normalized data
+    X = 3 * diabetes.data
+
+    alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso',
+                                                   positive=True)
+    lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8, positive=True)
+    for c, a in zip(lasso_path.T, alphas):
+        if a == 0:
+            continue
+        lasso_cd.alpha = a
+        lasso_cd.fit(X, y)
+        error = linalg.norm(c - lasso_cd.coef_)
+        assert error < 0.01
+
+    # The range of alphas chosen for coefficient comparison here is restricted
+    # as compared with the above test without the positive option. This is due
+    # to the circumstance that the Lars-Lasso algorithm does not converge to
+    # the least-squares-solution for small alphas, see 'Least Angle Regression'
+    # by Efron et al 2004. The coefficients are typically in congruence up to
+    # the smallest alpha reached by the Lars-Lasso algorithm and start to
+    # diverge thereafter.  See
+    # https://gist.github.com/michigraber/7e7d7c75eca694c7a6ff
+
+    for alpha in np.linspace(6e-1, 1 - 1e-2, 20):
+        clf1 = linear_model.LassoLars(fit_intercept=False, alpha=alpha,
+                                      normalize=False, positive=True).fit(X, y)
+        clf2 = linear_model.Lasso(fit_intercept=False, alpha=alpha, tol=1e-8,
+                                  normalize=False, positive=True).fit(X, y)
+        err = linalg.norm(clf1.coef_ - clf2.coef_)
+        assert err < 1e-3
+
+    # normalized data
+    X = diabetes.data
+    alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso',
+                                                   positive=True)
+    lasso_cd = linear_model.Lasso(fit_intercept=False, normalize=True,
+                                  tol=1e-8, positive=True)
+    for c, a in zip(lasso_path.T[:-1], alphas[:-1]):  # don't include alpha=0
+        lasso_cd.alpha = a
+        lasso_cd.fit(X, y)
+        error = linalg.norm(c - lasso_cd.coef_)
+        assert error < 0.01
+
+
+def test_lasso_lars_vs_R_implementation():
+    # Test that sklearn LassoLars implementation agrees with the LassoLars
+    # implementation available in R (lars library) under the following
+    # scenarios:
+    # 1) fit_intercept=False and normalize=False
+    # 2) fit_intercept=True and normalize=True
+
+    # Let's generate the data used in the bug report 7778
+    y = np.array([-6.45006793, -3.51251449, -8.52445396, 6.12277822,
+                  -19.42109366])
+    x = np.array([[0.47299829, 0, 0, 0, 0],
+                  [0.08239882, 0.85784863, 0, 0, 0],
+                  [0.30114139, -0.07501577, 0.80895216, 0, 0],
+                  [-0.01460346, -0.1015233, 0.0407278, 0.80338378, 0],
+                  [-0.69363927, 0.06754067, 0.18064514, -0.0803561,
+                   0.40427291]])
+
+    X = x.T
+
+    ###########################################################################
+    # Scenario 1: Let's compare R vs sklearn when fit_intercept=False and
+    # normalize=False
+    ###########################################################################
+    #
+    # The R result was obtained using the following code:
+    #
+    # library(lars)
+    # model_lasso_lars = lars(X, t(y), type="lasso", intercept=FALSE,
+    #                         trace=TRUE, normalize=FALSE)
+    # r = t(model_lasso_lars$beta)
+    #
+
+    r = np.array([[0, 0, 0, 0, 0, -79.810362809499026, -83.528788732782829,
+                   -83.777653739190711, -83.784156932888934,
+                   -84.033390591756657],
+                  [0, 0, 0, 0, -0.476624256777266, 0, 0, 0, 0,
+                   0.025219751009936],
+                  [0, -3.577397088285891, -4.702795355871871,
+                   -7.016748621359461, -7.614898471899412, -0.336938391359179,
+                   0, 0, 0.001213370600853, 0.048162321585148],
+                  [0, 0, 0, 2.231558436628169, 2.723267514525966,
+                   2.811549786389614, 2.813766976061531, 2.817462468949557,
+                   2.817368178703816, 2.816221090636795],
+                  [0, 0, -1.218422599914637, -3.457726183014808,
+                   -4.021304522060710, -45.827461592423745,
+                   -47.776608869312305,
+                   -47.911561610746404, -47.914845922736234,
+                   -48.039562334265717]])
+
+    model_lasso_lars = linear_model.LassoLars(alpha=0, fit_intercept=False,
+                                              normalize=False)
+    model_lasso_lars.fit(X, y)
+    skl_betas = model_lasso_lars.coef_path_
+
+    assert_array_almost_equal(r, skl_betas, decimal=12)
+    ###########################################################################
+
+    ###########################################################################
+    # Scenario 2: Let's compare R vs sklearn when fit_intercept=True and
+    # normalize=True
+    #
+    # Note: When normalize is equal to True, R returns the coefficients in
+    # their original units, that is, they are rescaled back, whereas sklearn
+    # does not do that, therefore, we need to do this step before comparing
+    # their results.
+    ###########################################################################
+    #
+    # The R result was obtained using the following code:
+    #
+    # library(lars)
+    # model_lasso_lars2 = lars(X, t(y), type="lasso", intercept=TRUE,
+    #                           trace=TRUE, normalize=TRUE)
+    # r2 = t(model_lasso_lars2$beta)
+
+    r2 = np.array([[0, 0, 0, 0, 0],
+                   [0, 0, 0, 8.371887668009453, 19.463768371044026],
+                   [0, 0, 0, 0, 9.901611055290553],
+                   [0, 7.495923132833733, 9.245133544334507,
+                    17.389369207545062, 26.971656815643499],
+                   [0, 0, -1.569380717440311, -5.924804108067312,
+                    -7.996385265061972]])
+
+    model_lasso_lars2 = linear_model.LassoLars(alpha=0, normalize=True)
+    model_lasso_lars2.fit(X, y)
+    skl_betas2 = model_lasso_lars2.coef_path_
+
+    # Let's rescale back the coefficients returned by sklearn before comparing
+    # against the R result (read the note above)
+    temp = X - np.mean(X, axis=0)
+    normx = np.sqrt(np.sum(temp ** 2, axis=0))
+    skl_betas2 /= normx[:, np.newaxis]
+
+    assert_array_almost_equal(r2, skl_betas2, decimal=12)
+    ###########################################################################
+
+
+@pytest.mark.parametrize('copy_X', [True, False])
+def test_lasso_lars_copyX_behaviour(copy_X):
+    """
+    Test that user input regarding copy_X is not being overridden (it was until
+    at least version 0.21)
+
+    """
+    lasso_lars = LassoLarsIC(copy_X=copy_X, precompute=False)
+    rng = np.random.RandomState(0)
+    X = rng.normal(0, 1, (100, 5))
+    X_copy = X.copy()
+    y = X[:, 2]
+    lasso_lars.fit(X, y)
+    assert copy_X == np.array_equal(X, X_copy)
+
+
+@pytest.mark.parametrize('copy_X', [True, False])
+def test_lasso_lars_fit_copyX_behaviour(copy_X):
+    """
+    Test that user input to .fit for copy_X overrides default __init__ value
+
+    """
+    lasso_lars = LassoLarsIC(precompute=False)
+    rng = np.random.RandomState(0)
+    X = rng.normal(0, 1, (100, 5))
+    X_copy = X.copy()
+    y = X[:, 2]
+    lasso_lars.fit(X, y, copy_X=copy_X)
+    assert copy_X == np.array_equal(X, X_copy)
+
+
+@pytest.mark.parametrize('est', (LassoLars(alpha=1e-3), Lars()))
+def test_lars_with_jitter(est):
+    # Test that a small amount of jitter helps stability,
+    # using example provided in issue #2746
+
+    X = np.array([[0.0, 0.0, 0.0, -1.0, 0.0],
+                  [0.0, -1.0, 0.0, 0.0, 0.0]])
+    y = [-2.5, -2.5]
+    expected_coef = [0, 2.5, 0, 2.5, 0]
+
+    # set to fit_intercept to False since target is constant and we want check
+    # the value of coef. coef would be all zeros otherwise.
+    est.set_params(fit_intercept=False)
+    est_jitter = clone(est).set_params(jitter=10e-8, random_state=0)
+
+    est.fit(X, y)
+    est_jitter.fit(X, y)
+
+    assert np.mean((est.coef_ - est_jitter.coef_)**2) > .1
+    np.testing.assert_allclose(est_jitter.coef_, expected_coef, rtol=1e-3)
+
+
+def test_X_none_gram_not_none():
+    with pytest.raises(ValueError,
+                       match="X cannot be None if Gram is not None"):
+        lars_path(X=None, y=[1], Gram='not None')
+
+
+def test_copy_X_with_auto_gram():
+    # Non-regression test for #17789, `copy_X=True` and Gram='auto' does not
+    # overwrite X
+    rng = np.random.RandomState(42)
+    X = rng.rand(6, 6)
+    y = rng.rand(6)
+
+    X_before = X.copy()
+    linear_model.lars_path(X, y, Gram='auto', copy_X=True, method='lasso')
+    # X did not change
+    assert_allclose(X, X_before)
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/test_logistic.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/test_logistic.py
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/test_omp.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/test_omp.py
@ -0,0 +1,232 @@
+# Author: Vlad Niculae
+# License: BSD 3 clause
+
+import numpy as np
+
+from sklearn.utils._testing import assert_raises
+from sklearn.utils._testing import assert_array_equal
+from sklearn.utils._testing import assert_array_almost_equal
+from sklearn.utils._testing import assert_warns
+from sklearn.utils._testing import ignore_warnings
+
+
+from sklearn.linear_model import (orthogonal_mp, orthogonal_mp_gram,
+                                  OrthogonalMatchingPursuit,
+                                  OrthogonalMatchingPursuitCV,
+                                  LinearRegression)
+from sklearn.utils import check_random_state
+from sklearn.datasets import make_sparse_coded_signal
+
+n_samples, n_features, n_nonzero_coefs, n_targets = 25, 35, 5, 3
+y, X, gamma = make_sparse_coded_signal(n_samples=n_targets,
+                                       n_components=n_features,
+                                       n_features=n_samples,
+                                       n_nonzero_coefs=n_nonzero_coefs,
+                                       random_state=0)
+# Make X not of norm 1 for testing
+X *= 10
+y *= 10
+G, Xy = np.dot(X.T, X), np.dot(X.T, y)
+# this makes X (n_samples, n_features)
+# and y (n_samples, 3)
+
+
+def test_correct_shapes():
+    assert (orthogonal_mp(X, y[:, 0], n_nonzero_coefs=5).shape ==
+                 (n_features,))
+    assert (orthogonal_mp(X, y, n_nonzero_coefs=5).shape ==
+                 (n_features, 3))
+
+
+def test_correct_shapes_gram():
+    assert (orthogonal_mp_gram(G, Xy[:, 0], n_nonzero_coefs=5).shape ==
+                 (n_features,))
+    assert (orthogonal_mp_gram(G, Xy, n_nonzero_coefs=5).shape ==
+                 (n_features, 3))
+
+
+def test_n_nonzero_coefs():
+    assert np.count_nonzero(orthogonal_mp(X, y[:, 0], n_nonzero_coefs=5)) <= 5
+    assert np.count_nonzero(orthogonal_mp(X, y[:, 0],
+                                          n_nonzero_coefs=5,
+                                          precompute=True)) <= 5
+
+
+def test_tol():
+    tol = 0.5
+    gamma = orthogonal_mp(X, y[:, 0], tol=tol)
+    gamma_gram = orthogonal_mp(X, y[:, 0], tol=tol, precompute=True)
+    assert np.sum((y[:, 0] - np.dot(X, gamma)) ** 2) <= tol
+    assert np.sum((y[:, 0] - np.dot(X, gamma_gram)) ** 2) <= tol
+
+
+def test_with_without_gram():
+    assert_array_almost_equal(
+        orthogonal_mp(X, y, n_nonzero_coefs=5),
+        orthogonal_mp(X, y, n_nonzero_coefs=5, precompute=True))
+
+
+def test_with_without_gram_tol():
+    assert_array_almost_equal(
+        orthogonal_mp(X, y, tol=1.),
+        orthogonal_mp(X, y, tol=1., precompute=True))
+
+
+def test_unreachable_accuracy():
+    assert_array_almost_equal(
+        orthogonal_mp(X, y, tol=0),
+        orthogonal_mp(X, y, n_nonzero_coefs=n_features))
+
+    assert_array_almost_equal(
+        assert_warns(RuntimeWarning, orthogonal_mp, X, y, tol=0,
+                     precompute=True),
+        orthogonal_mp(X, y, precompute=True,
+                      n_nonzero_coefs=n_features))
+
+
+def test_bad_input():
+    assert_raises(ValueError, orthogonal_mp, X, y, tol=-1)
+    assert_raises(ValueError, orthogonal_mp, X, y, n_nonzero_coefs=-1)
+    assert_raises(ValueError, orthogonal_mp, X, y,
+                  n_nonzero_coefs=n_features + 1)
+    assert_raises(ValueError, orthogonal_mp_gram, G, Xy, tol=-1)
+    assert_raises(ValueError, orthogonal_mp_gram, G, Xy, n_nonzero_coefs=-1)
+    assert_raises(ValueError, orthogonal_mp_gram, G, Xy,
+                  n_nonzero_coefs=n_features + 1)
+
+
+def test_perfect_signal_recovery():
+    idx, = gamma[:, 0].nonzero()
+    gamma_rec = orthogonal_mp(X, y[:, 0], n_nonzero_coefs=5)
+    gamma_gram = orthogonal_mp_gram(G, Xy[:, 0], n_nonzero_coefs=5)
+    assert_array_equal(idx, np.flatnonzero(gamma_rec))
+    assert_array_equal(idx, np.flatnonzero(gamma_gram))
+    assert_array_almost_equal(gamma[:, 0], gamma_rec, decimal=2)
+    assert_array_almost_equal(gamma[:, 0], gamma_gram, decimal=2)
+
+
+def test_orthogonal_mp_gram_readonly():
+    # Non-regression test for:
+    # https://github.com/scikit-learn/scikit-learn/issues/5956
+    idx, = gamma[:, 0].nonzero()
+    G_readonly = G.copy()
+    G_readonly.setflags(write=False)
+    Xy_readonly = Xy.copy()
+    Xy_readonly.setflags(write=False)
+    gamma_gram = orthogonal_mp_gram(G_readonly, Xy_readonly[:, 0],
+                                    n_nonzero_coefs=5,
+                                    copy_Gram=False, copy_Xy=False)
+    assert_array_equal(idx, np.flatnonzero(gamma_gram))
+    assert_array_almost_equal(gamma[:, 0], gamma_gram, decimal=2)
+
+
+def test_estimator():
+    omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs)
+    omp.fit(X, y[:, 0])
+    assert omp.coef_.shape == (n_features,)
+    assert omp.intercept_.shape == ()
+    assert np.count_nonzero(omp.coef_) <= n_nonzero_coefs
+
+    omp.fit(X, y)
+    assert omp.coef_.shape == (n_targets, n_features)
+    assert omp.intercept_.shape == (n_targets,)
+    assert np.count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs
+
+    coef_normalized = omp.coef_[0].copy()
+    omp.set_params(fit_intercept=True, normalize=False)
+    omp.fit(X, y[:, 0])
+    assert_array_almost_equal(coef_normalized, omp.coef_)
+
+    omp.set_params(fit_intercept=False, normalize=False)
+    omp.fit(X, y[:, 0])
+    assert np.count_nonzero(omp.coef_) <= n_nonzero_coefs
+    assert omp.coef_.shape == (n_features,)
+    assert omp.intercept_ == 0
+
+    omp.fit(X, y)
+    assert omp.coef_.shape == (n_targets, n_features)
+    assert omp.intercept_ == 0
+    assert np.count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs
+
+
+def test_identical_regressors():
+    newX = X.copy()
+    newX[:, 1] = newX[:, 0]
+    gamma = np.zeros(n_features)
+    gamma[0] = gamma[1] = 1.
+    newy = np.dot(newX, gamma)
+    assert_warns(RuntimeWarning, orthogonal_mp, newX, newy, 2)
+
+
+def test_swapped_regressors():
+    gamma = np.zeros(n_features)
+    # X[:, 21] should be selected first, then X[:, 0] selected second,
+    # which will take X[:, 21]'s place in case the algorithm does
+    # column swapping for optimization (which is the case at the moment)
+    gamma[21] = 1.0
+    gamma[0] = 0.5
+    new_y = np.dot(X, gamma)
+    new_Xy = np.dot(X.T, new_y)
+    gamma_hat = orthogonal_mp(X, new_y, n_nonzero_coefs=2)
+    gamma_hat_gram = orthogonal_mp_gram(G, new_Xy, n_nonzero_coefs=2)
+    assert_array_equal(np.flatnonzero(gamma_hat), [0, 21])
+    assert_array_equal(np.flatnonzero(gamma_hat_gram), [0, 21])
+
+
+def test_no_atoms():
+    y_empty = np.zeros_like(y)
+    Xy_empty = np.dot(X.T, y_empty)
+    gamma_empty = ignore_warnings(orthogonal_mp)(X, y_empty,
+                                                 n_nonzero_coefs=1)
+    gamma_empty_gram = ignore_warnings(orthogonal_mp)(G, Xy_empty,
+                                                      n_nonzero_coefs=1)
+    assert np.all(gamma_empty == 0)
+    assert np.all(gamma_empty_gram == 0)
+
+
+def test_omp_path():
+    path = orthogonal_mp(X, y, n_nonzero_coefs=5, return_path=True)
+    last = orthogonal_mp(X, y, n_nonzero_coefs=5, return_path=False)
+    assert path.shape == (n_features, n_targets, 5)
+    assert_array_almost_equal(path[:, :, -1], last)
+    path = orthogonal_mp_gram(G, Xy, n_nonzero_coefs=5, return_path=True)
+    last = orthogonal_mp_gram(G, Xy, n_nonzero_coefs=5, return_path=False)
+    assert path.shape == (n_features, n_targets, 5)
+    assert_array_almost_equal(path[:, :, -1], last)
+
+
+def test_omp_return_path_prop_with_gram():
+    path = orthogonal_mp(X, y, n_nonzero_coefs=5, return_path=True,
+                         precompute=True)
+    last = orthogonal_mp(X, y, n_nonzero_coefs=5, return_path=False,
+                         precompute=True)
+    assert path.shape == (n_features, n_targets, 5)
+    assert_array_almost_equal(path[:, :, -1], last)
+
+
+def test_omp_cv():
+    y_ = y[:, 0]
+    gamma_ = gamma[:, 0]
+    ompcv = OrthogonalMatchingPursuitCV(normalize=True, fit_intercept=False,
+                                        max_iter=10)
+    ompcv.fit(X, y_)
+    assert ompcv.n_nonzero_coefs_ == n_nonzero_coefs
+    assert_array_almost_equal(ompcv.coef_, gamma_)
+    omp = OrthogonalMatchingPursuit(normalize=True, fit_intercept=False,
+                                    n_nonzero_coefs=ompcv.n_nonzero_coefs_)
+    omp.fit(X, y_)
+    assert_array_almost_equal(ompcv.coef_, omp.coef_)
+
+
+def test_omp_reaches_least_squares():
+    # Use small simple data; it's a sanity check but OMP can stop early
+    rng = check_random_state(0)
+    n_samples, n_features = (10, 8)
+    n_targets = 3
+    X = rng.randn(n_samples, n_features)
+    Y = rng.randn(n_samples, n_targets)
+    omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_features)
+    lstsq = LinearRegression()
+    omp.fit(X, Y)
+    lstsq.fit(X, Y)
+    assert_array_almost_equal(omp.coef_, lstsq.coef_)
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/test_passive_aggressive.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/test_passive_aggressive.py
@ -0,0 +1,281 @@
+import numpy as np
+import scipy.sparse as sp
+
+import pytest
+
+from sklearn.utils._testing import assert_array_almost_equal
+from sklearn.utils._testing import assert_array_equal
+from sklearn.utils._testing import assert_almost_equal
+from sklearn.utils._testing import assert_raises
+
+from sklearn.base import ClassifierMixin
+from sklearn.utils import check_random_state
+from sklearn.datasets import load_iris
+from sklearn.linear_model import PassiveAggressiveClassifier
+from sklearn.linear_model import PassiveAggressiveRegressor
+
+iris = load_iris()
+random_state = check_random_state(12)
+indices = np.arange(iris.data.shape[0])
+random_state.shuffle(indices)
+X = iris.data[indices]
+y = iris.target[indices]
+X_csr = sp.csr_matrix(X)
+
+
+class MyPassiveAggressive(ClassifierMixin):
+
+    def __init__(self, C=1.0, epsilon=0.01, loss="hinge",
+                 fit_intercept=True, n_iter=1, random_state=None):
+        self.C = C
+        self.epsilon = epsilon
+        self.loss = loss
+        self.fit_intercept = fit_intercept
+        self.n_iter = n_iter
+
+    def fit(self, X, y):
+        n_samples, n_features = X.shape
+        self.w = np.zeros(n_features, dtype=np.float64)
+        self.b = 0.0
+
+        for t in range(self.n_iter):
+            for i in range(n_samples):
+                p = self.project(X[i])
+                if self.loss in ("hinge", "squared_hinge"):
+                    loss = max(1 - y[i] * p, 0)
+                else:
+                    loss = max(np.abs(p - y[i]) - self.epsilon, 0)
+
+                sqnorm = np.dot(X[i], X[i])
+
+                if self.loss in ("hinge", "epsilon_insensitive"):
+                    step = min(self.C, loss / sqnorm)
+                elif self.loss in ("squared_hinge",
+                                   "squared_epsilon_insensitive"):
+                    step = loss / (sqnorm + 1.0 / (2 * self.C))
+
+                if self.loss in ("hinge", "squared_hinge"):
+                    step *= y[i]
+                else:
+                    step *= np.sign(y[i] - p)
+
+                self.w += step * X[i]
+                if self.fit_intercept:
+                    self.b += step
+
+    def project(self, X):
+        return np.dot(X, self.w) + self.b
+
+
+def test_classifier_accuracy():
+    for data in (X, X_csr):
+        for fit_intercept in (True, False):
+            for average in (False, True):
+                clf = PassiveAggressiveClassifier(
+                    C=1.0, max_iter=30, fit_intercept=fit_intercept,
+                    random_state=1, average=average, tol=None)
+                clf.fit(data, y)
+                score = clf.score(data, y)
+                assert score > 0.79
+                if average:
+                    assert hasattr(clf, '_average_coef')
+                    assert hasattr(clf, '_average_intercept')
+                    assert hasattr(clf, '_standard_intercept')
+                    assert hasattr(clf, '_standard_coef')
+
+
+def test_classifier_partial_fit():
+    classes = np.unique(y)
+    for data in (X, X_csr):
+        for average in (False, True):
+            clf = PassiveAggressiveClassifier(random_state=0,
+                                              average=average,
+                                              max_iter=5)
+            for t in range(30):
+                clf.partial_fit(data, y, classes)
+            score = clf.score(data, y)
+            assert score > 0.79
+            if average:
+                assert hasattr(clf, '_average_coef')
+                assert hasattr(clf, '_average_intercept')
+                assert hasattr(clf, '_standard_intercept')
+                assert hasattr(clf, '_standard_coef')
+
+
+def test_classifier_refit():
+    # Classifier can be retrained on different labels and features.
+    clf = PassiveAggressiveClassifier(max_iter=5).fit(X, y)
+    assert_array_equal(clf.classes_, np.unique(y))
+
+    clf.fit(X[:, :-1], iris.target_names[y])
+    assert_array_equal(clf.classes_, iris.target_names)
+
+
+@pytest.mark.parametrize('loss', ("hinge", "squared_hinge"))
+def test_classifier_correctness(loss):
+    y_bin = y.copy()
+    y_bin[y != 1] = -1
+
+    clf1 = MyPassiveAggressive(loss=loss, n_iter=2)
+    clf1.fit(X, y_bin)
+
+    for data in (X, X_csr):
+        clf2 = PassiveAggressiveClassifier(loss=loss, max_iter=2,
+                                           shuffle=False, tol=None)
+        clf2.fit(data, y_bin)
+
+        assert_array_almost_equal(clf1.w, clf2.coef_.ravel(), decimal=2)
+
+
+def test_classifier_undefined_methods():
+    clf = PassiveAggressiveClassifier(max_iter=100)
+    for meth in ("predict_proba", "predict_log_proba", "transform"):
+        assert_raises(AttributeError, lambda x: getattr(clf, x), meth)
+
+
+def test_class_weights():
+    # Test class weights.
+    X2 = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0],
+                   [1.0, 1.0], [1.0, 0.0]])
+    y2 = [1, 1, 1, -1, -1]
+
+    clf = PassiveAggressiveClassifier(C=0.1, max_iter=100, class_weight=None,
+                                      random_state=100)
+    clf.fit(X2, y2)
+    assert_array_equal(clf.predict([[0.2, -1.0]]), np.array([1]))
+
+    # we give a small weights to class 1
+    clf = PassiveAggressiveClassifier(C=0.1, max_iter=100,
+                                      class_weight={1: 0.001},
+                                      random_state=100)
+    clf.fit(X2, y2)
+
+    # now the hyperplane should rotate clock-wise and
+    # the prediction on this point should shift
+    assert_array_equal(clf.predict([[0.2, -1.0]]), np.array([-1]))
+
+
+def test_partial_fit_weight_class_balanced():
+    # partial_fit with class_weight='balanced' not supported
+    clf = PassiveAggressiveClassifier(class_weight="balanced", max_iter=100)
+    assert_raises(ValueError, clf.partial_fit, X, y, classes=np.unique(y))
+
+
+def test_equal_class_weight():
+    X2 = [[1, 0], [1, 0], [0, 1], [0, 1]]
+    y2 = [0, 0, 1, 1]
+    clf = PassiveAggressiveClassifier(
+        C=0.1, max_iter=1000, tol=None, class_weight=None)
+    clf.fit(X2, y2)
+
+    # Already balanced, so "balanced" weights should have no effect
+    clf_balanced = PassiveAggressiveClassifier(
+        C=0.1, max_iter=1000, tol=None, class_weight="balanced")
+    clf_balanced.fit(X2, y2)
+
+    clf_weighted = PassiveAggressiveClassifier(
+        C=0.1, max_iter=1000, tol=None, class_weight={0: 0.5, 1: 0.5})
+    clf_weighted.fit(X2, y2)
+
+    # should be similar up to some epsilon due to learning rate schedule
+    assert_almost_equal(clf.coef_, clf_weighted.coef_, decimal=2)
+    assert_almost_equal(clf.coef_, clf_balanced.coef_, decimal=2)
+
+
+def test_wrong_class_weight_label():
+    # ValueError due to wrong class_weight label.
+    X2 = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0],
+                   [1.0, 1.0], [1.0, 0.0]])
+    y2 = [1, 1, 1, -1, -1]
+
+    clf = PassiveAggressiveClassifier(class_weight={0: 0.5}, max_iter=100)
+    assert_raises(ValueError, clf.fit, X2, y2)
+
+
+def test_wrong_class_weight_format():
+    # ValueError due to wrong class_weight argument type.
+    X2 = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0],
+                   [1.0, 1.0], [1.0, 0.0]])
+    y2 = [1, 1, 1, -1, -1]
+
+    clf = PassiveAggressiveClassifier(class_weight=[0.5], max_iter=100)
+    assert_raises(ValueError, clf.fit, X2, y2)
+
+    clf = PassiveAggressiveClassifier(class_weight="the larch", max_iter=100)
+    assert_raises(ValueError, clf.fit, X2, y2)
+
+
+def test_regressor_mse():
+    y_bin = y.copy()
+    y_bin[y != 1] = -1
+
+    for data in (X, X_csr):
+        for fit_intercept in (True, False):
+            for average in (False, True):
+                reg = PassiveAggressiveRegressor(
+                    C=1.0, fit_intercept=fit_intercept,
+                    random_state=0, average=average, max_iter=5)
+                reg.fit(data, y_bin)
+                pred = reg.predict(data)
+                assert np.mean((pred - y_bin) ** 2) < 1.7
+                if average:
+                    assert hasattr(reg, '_average_coef')
+                    assert hasattr(reg, '_average_intercept')
+                    assert hasattr(reg, '_standard_intercept')
+                    assert hasattr(reg, '_standard_coef')
+
+
+def test_regressor_partial_fit():
+    y_bin = y.copy()
+    y_bin[y != 1] = -1
+
+    for data in (X, X_csr):
+        for average in (False, True):
+            reg = PassiveAggressiveRegressor(random_state=0,
+                                             average=average, max_iter=100)
+            for t in range(50):
+                reg.partial_fit(data, y_bin)
+            pred = reg.predict(data)
+            assert np.mean((pred - y_bin) ** 2) < 1.7
+            if average:
+                assert hasattr(reg, '_average_coef')
+                assert hasattr(reg, '_average_intercept')
+                assert hasattr(reg, '_standard_intercept')
+                assert hasattr(reg, '_standard_coef')
+
+
+@pytest.mark.parametrize(
+        'loss',
+        ("epsilon_insensitive", "squared_epsilon_insensitive"))
+def test_regressor_correctness(loss):
+    y_bin = y.copy()
+    y_bin[y != 1] = -1
+
+    reg1 = MyPassiveAggressive(loss=loss, n_iter=2)
+    reg1.fit(X, y_bin)
+
+    for data in (X, X_csr):
+        reg2 = PassiveAggressiveRegressor(tol=None, loss=loss, max_iter=2,
+                                          shuffle=False)
+        reg2.fit(data, y_bin)
+
+        assert_array_almost_equal(reg1.w, reg2.coef_.ravel(), decimal=2)
+
+
+def test_regressor_undefined_methods():
+    reg = PassiveAggressiveRegressor(max_iter=100)
+    for meth in ("transform",):
+        assert_raises(AttributeError, lambda x: getattr(reg, x), meth)
+
+# TODO: remove in 0.25
+@pytest.mark.parametrize('klass', [PassiveAggressiveClassifier,
+                                   PassiveAggressiveRegressor])
+def test_passive_aggressive_deprecated_attr(klass):
+    est = klass(average=True)
+    est.fit(X, y)
+
+    msg = "Attribute {} was deprecated"
+    for att in ['average_coef_', 'average_intercept_',
+                'standard_coef_', 'standard_intercept_']:
+        with pytest.warns(FutureWarning, match=msg.format(att)):
+            getattr(est, att)
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/test_perceptron.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/test_perceptron.py
@ -0,0 +1,69 @@
+import numpy as np
+import scipy.sparse as sp
+
+from sklearn.utils._testing import assert_array_almost_equal
+from sklearn.utils._testing import assert_raises
+
+from sklearn.utils import check_random_state
+from sklearn.datasets import load_iris
+from sklearn.linear_model import Perceptron
+
+iris = load_iris()
+random_state = check_random_state(12)
+indices = np.arange(iris.data.shape[0])
+random_state.shuffle(indices)
+X = iris.data[indices]
+y = iris.target[indices]
+X_csr = sp.csr_matrix(X)
+X_csr.sort_indices()
+
+
+class MyPerceptron:
+
+    def __init__(self, n_iter=1):
+        self.n_iter = n_iter
+
+    def fit(self, X, y):
+        n_samples, n_features = X.shape
+        self.w = np.zeros(n_features, dtype=np.float64)
+        self.b = 0.0
+
+        for t in range(self.n_iter):
+            for i in range(n_samples):
+                if self.predict(X[i])[0] != y[i]:
+                    self.w += y[i] * X[i]
+                    self.b += y[i]
+
+    def project(self, X):
+        return np.dot(X, self.w) + self.b
+
+    def predict(self, X):
+        X = np.atleast_2d(X)
+        return np.sign(self.project(X))
+
+
+def test_perceptron_accuracy():
+    for data in (X, X_csr):
+        clf = Perceptron(max_iter=100, tol=None, shuffle=False)
+        clf.fit(data, y)
+        score = clf.score(data, y)
+        assert score > 0.7
+
+
+def test_perceptron_correctness():
+    y_bin = y.copy()
+    y_bin[y != 1] = -1
+
+    clf1 = MyPerceptron(n_iter=2)
+    clf1.fit(X, y_bin)
+
+    clf2 = Perceptron(max_iter=2, shuffle=False, tol=None)
+    clf2.fit(X, y_bin)
+
+    assert_array_almost_equal(clf1.w, clf2.coef_.ravel())
+
+
+def test_undefined_methods():
+    clf = Perceptron(max_iter=100)
+    for meth in ("predict_proba", "predict_log_proba"):
+        assert_raises(AttributeError, lambda x: getattr(clf, x), meth)
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/test_ransac.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/test_ransac.py
@ -0,0 +1,515 @@
+import numpy as np
+from scipy import sparse
+
+from numpy.testing import assert_array_almost_equal
+from numpy.testing import assert_array_equal
+
+from sklearn.utils import check_random_state
+from sklearn.utils._testing import assert_warns
+from sklearn.utils._testing import assert_raises_regexp
+from sklearn.utils._testing import assert_raises
+from sklearn.utils._testing import assert_allclose
+from sklearn.datasets import make_regression
+from sklearn.linear_model import LinearRegression, RANSACRegressor
+from sklearn.linear_model import OrthogonalMatchingPursuit
+from sklearn.linear_model._ransac import _dynamic_max_trials
+from sklearn.exceptions import ConvergenceWarning
+
+
+# Generate coordinates of line
+X = np.arange(-200, 200)
+y = 0.2 * X + 20
+data = np.column_stack([X, y])
+
+# Add some faulty data
+rng = np.random.RandomState(1000)
+outliers = np.unique(rng.randint(len(X), size=200))
+data[outliers, :] += 50 + rng.rand(len(outliers), 2) * 10
+
+X = data[:, 0][:, np.newaxis]
+y = data[:, 1]
+
+
+def test_ransac_inliers_outliers():
+
+    base_estimator = LinearRegression()
+    ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
+                                       residual_threshold=5, random_state=0)
+
+    # Estimate parameters of corrupted data
+    ransac_estimator.fit(X, y)
+
+    # Ground truth / reference inlier mask
+    ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_
+                                   ).astype(np.bool_)
+    ref_inlier_mask[outliers] = False
+
+    assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
+
+
+def test_ransac_is_data_valid():
+    def is_data_valid(X, y):
+        assert X.shape[0] == 2
+        assert y.shape[0] == 2
+        return False
+
+    rng = np.random.RandomState(0)
+    X = rng.rand(10, 2)
+    y = rng.rand(10, 1)
+
+    base_estimator = LinearRegression()
+    ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
+                                       residual_threshold=5,
+                                       is_data_valid=is_data_valid,
+                                       random_state=0)
+
+    assert_raises(ValueError, ransac_estimator.fit, X, y)
+
+
+def test_ransac_is_model_valid():
+    def is_model_valid(estimator, X, y):
+        assert X.shape[0] == 2
+        assert y.shape[0] == 2
+        return False
+
+    base_estimator = LinearRegression()
+    ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
+                                       residual_threshold=5,
+                                       is_model_valid=is_model_valid,
+                                       random_state=0)
+
+    assert_raises(ValueError, ransac_estimator.fit, X, y)
+
+
+def test_ransac_max_trials():
+    base_estimator = LinearRegression()
+
+    ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
+                                       residual_threshold=5, max_trials=0,
+                                       random_state=0)
+    assert_raises(ValueError, ransac_estimator.fit, X, y)
+
+    # there is a 1e-9 chance it will take these many trials. No good reason
+    # 1e-2 isn't enough, can still happen
+    # 2 is the what ransac defines  as min_samples = X.shape[1] + 1
+    max_trials = _dynamic_max_trials(
+        len(X) - len(outliers), X.shape[0], 2, 1 - 1e-9)
+    ransac_estimator = RANSACRegressor(base_estimator, min_samples=2)
+    for i in range(50):
+        ransac_estimator.set_params(min_samples=2, random_state=i)
+        ransac_estimator.fit(X, y)
+        assert ransac_estimator.n_trials_ < max_trials + 1
+
+def test_ransac_stop_n_inliers():
+    base_estimator = LinearRegression()
+    ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
+                                       residual_threshold=5, stop_n_inliers=2,
+                                       random_state=0)
+    ransac_estimator.fit(X, y)
+
+    assert ransac_estimator.n_trials_ == 1
+
+
+def test_ransac_stop_score():
+    base_estimator = LinearRegression()
+    ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
+                                       residual_threshold=5, stop_score=0,
+                                       random_state=0)
+    ransac_estimator.fit(X, y)
+
+    assert ransac_estimator.n_trials_ == 1
+
+
+def test_ransac_score():
+    X = np.arange(100)[:, None]
+    y = np.zeros((100, ))
+    y[0] = 1
+    y[1] = 100
+
+    base_estimator = LinearRegression()
+    ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
+                                       residual_threshold=0.5, random_state=0)
+    ransac_estimator.fit(X, y)
+
+    assert ransac_estimator.score(X[2:], y[2:]) == 1
+    assert ransac_estimator.score(X[:2], y[:2]) < 1
+
+
+def test_ransac_predict():
+    X = np.arange(100)[:, None]
+    y = np.zeros((100, ))
+    y[0] = 1
+    y[1] = 100
+
+    base_estimator = LinearRegression()
+    ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
+                                       residual_threshold=0.5, random_state=0)
+    ransac_estimator.fit(X, y)
+
+    assert_array_equal(ransac_estimator.predict(X), np.zeros(100))
+
+
+def test_ransac_resid_thresh_no_inliers():
+    # When residual_threshold=0.0 there are no inliers and a
+    # ValueError with a message should be raised
+    base_estimator = LinearRegression()
+    ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
+                                       residual_threshold=0.0, random_state=0,
+                                       max_trials=5)
+
+    msg = ("RANSAC could not find a valid consensus set")
+    assert_raises_regexp(ValueError, msg, ransac_estimator.fit, X, y)
+    assert ransac_estimator.n_skips_no_inliers_ == 5
+    assert ransac_estimator.n_skips_invalid_data_ == 0
+    assert ransac_estimator.n_skips_invalid_model_ == 0
+
+
+def test_ransac_no_valid_data():
+    def is_data_valid(X, y):
+        return False
+
+    base_estimator = LinearRegression()
+    ransac_estimator = RANSACRegressor(base_estimator,
+                                       is_data_valid=is_data_valid,
+                                       max_trials=5)
+
+    msg = ("RANSAC could not find a valid consensus set")
+    assert_raises_regexp(ValueError, msg, ransac_estimator.fit, X, y)
+    assert ransac_estimator.n_skips_no_inliers_ == 0
+    assert ransac_estimator.n_skips_invalid_data_ == 5
+    assert ransac_estimator.n_skips_invalid_model_ == 0
+
+
+def test_ransac_no_valid_model():
+    def is_model_valid(estimator, X, y):
+        return False
+
+    base_estimator = LinearRegression()
+    ransac_estimator = RANSACRegressor(base_estimator,
+                                       is_model_valid=is_model_valid,
+                                       max_trials=5)
+
+    msg = ("RANSAC could not find a valid consensus set")
+    assert_raises_regexp(ValueError, msg, ransac_estimator.fit, X, y)
+    assert ransac_estimator.n_skips_no_inliers_ == 0
+    assert ransac_estimator.n_skips_invalid_data_ == 0
+    assert ransac_estimator.n_skips_invalid_model_ == 5
+
+
+def test_ransac_exceed_max_skips():
+    def is_data_valid(X, y):
+        return False
+
+    base_estimator = LinearRegression()
+    ransac_estimator = RANSACRegressor(base_estimator,
+                                       is_data_valid=is_data_valid,
+                                       max_trials=5,
+                                       max_skips=3)
+
+    msg = ("RANSAC skipped more iterations than `max_skips`")
+    assert_raises_regexp(ValueError, msg, ransac_estimator.fit, X, y)
+    assert ransac_estimator.n_skips_no_inliers_ == 0
+    assert ransac_estimator.n_skips_invalid_data_ == 4
+    assert ransac_estimator.n_skips_invalid_model_ == 0
+
+
+def test_ransac_warn_exceed_max_skips():
+    global cause_skip
+    cause_skip = False
+
+    def is_data_valid(X, y):
+        global cause_skip
+        if not cause_skip:
+            cause_skip = True
+            return True
+        else:
+            return False
+
+    base_estimator = LinearRegression()
+    ransac_estimator = RANSACRegressor(base_estimator,
+                                       is_data_valid=is_data_valid,
+                                       max_skips=3,
+                                       max_trials=5)
+
+    assert_warns(ConvergenceWarning, ransac_estimator.fit, X, y)
+    assert ransac_estimator.n_skips_no_inliers_ == 0
+    assert ransac_estimator.n_skips_invalid_data_ == 4
+    assert ransac_estimator.n_skips_invalid_model_ == 0
+
+
+def test_ransac_sparse_coo():
+    X_sparse = sparse.coo_matrix(X)
+
+    base_estimator = LinearRegression()
+    ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
+                                       residual_threshold=5, random_state=0)
+    ransac_estimator.fit(X_sparse, y)
+
+    ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_
+                                   ).astype(np.bool_)
+    ref_inlier_mask[outliers] = False
+
+    assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
+
+
+def test_ransac_sparse_csr():
+    X_sparse = sparse.csr_matrix(X)
+
+    base_estimator = LinearRegression()
+    ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
+                                       residual_threshold=5, random_state=0)
+    ransac_estimator.fit(X_sparse, y)
+
+    ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_
+                                   ).astype(np.bool_)
+    ref_inlier_mask[outliers] = False
+
+    assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
+
+
+def test_ransac_sparse_csc():
+    X_sparse = sparse.csc_matrix(X)
+
+    base_estimator = LinearRegression()
+    ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
+                                       residual_threshold=5, random_state=0)
+    ransac_estimator.fit(X_sparse, y)
+
+    ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_
+                                   ).astype(np.bool_)
+    ref_inlier_mask[outliers] = False
+
+    assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
+
+
+def test_ransac_none_estimator():
+
+    base_estimator = LinearRegression()
+
+    ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
+                                       residual_threshold=5, random_state=0)
+    ransac_none_estimator = RANSACRegressor(None, min_samples=2,
+                                            residual_threshold=5,
+                                            random_state=0)
+
+    ransac_estimator.fit(X, y)
+    ransac_none_estimator.fit(X, y)
+
+    assert_array_almost_equal(ransac_estimator.predict(X),
+                              ransac_none_estimator.predict(X))
+
+
+def test_ransac_min_n_samples():
+    base_estimator = LinearRegression()
+    ransac_estimator1 = RANSACRegressor(base_estimator, min_samples=2,
+                                        residual_threshold=5, random_state=0)
+    ransac_estimator2 = RANSACRegressor(base_estimator,
+                                        min_samples=2. / X.shape[0],
+                                        residual_threshold=5, random_state=0)
+    ransac_estimator3 = RANSACRegressor(base_estimator, min_samples=-1,
+                                        residual_threshold=5, random_state=0)
+    ransac_estimator4 = RANSACRegressor(base_estimator, min_samples=5.2,
+                                        residual_threshold=5, random_state=0)
+    ransac_estimator5 = RANSACRegressor(base_estimator, min_samples=2.0,
+                                        residual_threshold=5, random_state=0)
+    ransac_estimator6 = RANSACRegressor(base_estimator,
+                                        residual_threshold=5, random_state=0)
+    ransac_estimator7 = RANSACRegressor(base_estimator,
+                                        min_samples=X.shape[0] + 1,
+                                        residual_threshold=5, random_state=0)
+
+    ransac_estimator1.fit(X, y)
+    ransac_estimator2.fit(X, y)
+    ransac_estimator5.fit(X, y)
+    ransac_estimator6.fit(X, y)
+
+    assert_array_almost_equal(ransac_estimator1.predict(X),
+                              ransac_estimator2.predict(X))
+    assert_array_almost_equal(ransac_estimator1.predict(X),
+                              ransac_estimator5.predict(X))
+    assert_array_almost_equal(ransac_estimator1.predict(X),
+                              ransac_estimator6.predict(X))
+
+    assert_raises(ValueError, ransac_estimator3.fit, X, y)
+    assert_raises(ValueError, ransac_estimator4.fit, X, y)
+    assert_raises(ValueError, ransac_estimator7.fit, X, y)
+
+
+def test_ransac_multi_dimensional_targets():
+
+    base_estimator = LinearRegression()
+    ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
+                                       residual_threshold=5, random_state=0)
+
+    # 3-D target values
+    yyy = np.column_stack([y, y, y])
+
+    # Estimate parameters of corrupted data
+    ransac_estimator.fit(X, yyy)
+
+    # Ground truth / reference inlier mask
+    ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_
+                                   ).astype(np.bool_)
+    ref_inlier_mask[outliers] = False
+
+    assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
+
+
+def test_ransac_residual_loss():
+    loss_multi1 = lambda y_true, y_pred: np.sum(np.abs(y_true - y_pred), axis=1)
+    loss_multi2 = lambda y_true, y_pred: np.sum((y_true - y_pred) ** 2, axis=1)
+
+    loss_mono = lambda y_true, y_pred : np.abs(y_true - y_pred)
+    yyy = np.column_stack([y, y, y])
+
+    base_estimator = LinearRegression()
+    ransac_estimator0 = RANSACRegressor(base_estimator, min_samples=2,
+                                        residual_threshold=5, random_state=0)
+    ransac_estimator1 = RANSACRegressor(base_estimator, min_samples=2,
+                                        residual_threshold=5, random_state=0,
+                                        loss=loss_multi1)
+    ransac_estimator2 = RANSACRegressor(base_estimator, min_samples=2,
+                                        residual_threshold=5, random_state=0,
+                                        loss=loss_multi2)
+
+    # multi-dimensional
+    ransac_estimator0.fit(X, yyy)
+    ransac_estimator1.fit(X, yyy)
+    ransac_estimator2.fit(X, yyy)
+    assert_array_almost_equal(ransac_estimator0.predict(X),
+                              ransac_estimator1.predict(X))
+    assert_array_almost_equal(ransac_estimator0.predict(X),
+                              ransac_estimator2.predict(X))
+
+    # one-dimensional
+    ransac_estimator0.fit(X, y)
+    ransac_estimator2.loss = loss_mono
+    ransac_estimator2.fit(X, y)
+    assert_array_almost_equal(ransac_estimator0.predict(X),
+                              ransac_estimator2.predict(X))
+    ransac_estimator3 = RANSACRegressor(base_estimator, min_samples=2,
+                                        residual_threshold=5, random_state=0,
+                                        loss="squared_loss")
+    ransac_estimator3.fit(X, y)
+    assert_array_almost_equal(ransac_estimator0.predict(X),
+                              ransac_estimator2.predict(X))
+
+
+def test_ransac_default_residual_threshold():
+    base_estimator = LinearRegression()
+    ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
+                                       random_state=0)
+
+    # Estimate parameters of corrupted data
+    ransac_estimator.fit(X, y)
+
+    # Ground truth / reference inlier mask
+    ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_
+                                   ).astype(np.bool_)
+    ref_inlier_mask[outliers] = False
+
+    assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
+
+
+def test_ransac_dynamic_max_trials():
+    # Numbers hand-calculated and confirmed on page 119 (Table 4.3) in
+    #   Hartley, R.~I. and Zisserman, A., 2004,
+    #   Multiple View Geometry in Computer Vision, Second Edition,
+    #   Cambridge University Press, ISBN: 0521540518
+
+    # e = 0%, min_samples = X
+    assert _dynamic_max_trials(100, 100, 2, 0.99) == 1
+
+    # e = 5%, min_samples = 2
+    assert _dynamic_max_trials(95, 100, 2, 0.99) == 2
+    # e = 10%, min_samples = 2
+    assert _dynamic_max_trials(90, 100, 2, 0.99) == 3
+    # e = 30%, min_samples = 2
+    assert _dynamic_max_trials(70, 100, 2, 0.99) == 7
+    # e = 50%, min_samples = 2
+    assert _dynamic_max_trials(50, 100, 2, 0.99) == 17
+
+    # e = 5%, min_samples = 8
+    assert _dynamic_max_trials(95, 100, 8, 0.99) == 5
+    # e = 10%, min_samples = 8
+    assert _dynamic_max_trials(90, 100, 8, 0.99) == 9
+    # e = 30%, min_samples = 8
+    assert _dynamic_max_trials(70, 100, 8, 0.99) == 78
+    # e = 50%, min_samples = 8
+    assert _dynamic_max_trials(50, 100, 8, 0.99) == 1177
+
+    # e = 0%, min_samples = 10
+    assert _dynamic_max_trials(1, 100, 10, 0) == 0
+    assert _dynamic_max_trials(1, 100, 10, 1) == float('inf')
+
+    base_estimator = LinearRegression()
+    ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
+                                       stop_probability=-0.1)
+    assert_raises(ValueError, ransac_estimator.fit, X, y)
+    ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
+                                       stop_probability=1.1)
+    assert_raises(ValueError, ransac_estimator.fit, X, y)
+
+
+def test_ransac_fit_sample_weight():
+    ransac_estimator = RANSACRegressor(random_state=0)
+    n_samples = y.shape[0]
+    weights = np.ones(n_samples)
+    ransac_estimator.fit(X, y, weights)
+    # sanity check
+    assert ransac_estimator.inlier_mask_.shape[0] == n_samples
+
+    ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_
+                                   ).astype(np.bool_)
+    ref_inlier_mask[outliers] = False
+    # check that mask is correct
+    assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
+
+    # check that fit(X)  = fit([X1, X2, X3],sample_weight = [n1, n2, n3]) where
+    #   X = X1 repeated n1 times, X2 repeated n2 times and so forth
+    random_state = check_random_state(0)
+    X_ = random_state.randint(0, 200, [10, 1])
+    y_ = np.ndarray.flatten(0.2 * X_ + 2)
+    sample_weight = random_state.randint(0, 10, 10)
+    outlier_X = random_state.randint(0, 1000, [1, 1])
+    outlier_weight = random_state.randint(0, 10, 1)
+    outlier_y = random_state.randint(-1000, 0, 1)
+
+    X_flat = np.append(np.repeat(X_, sample_weight, axis=0),
+                       np.repeat(outlier_X, outlier_weight, axis=0), axis=0)
+    y_flat = np.ndarray.flatten(np.append(np.repeat(y_, sample_weight, axis=0),
+                                np.repeat(outlier_y, outlier_weight, axis=0),
+                                          axis=0))
+    ransac_estimator.fit(X_flat, y_flat)
+    ref_coef_ = ransac_estimator.estimator_.coef_
+
+    sample_weight = np.append(sample_weight, outlier_weight)
+    X_ = np.append(X_, outlier_X, axis=0)
+    y_ = np.append(y_, outlier_y)
+    ransac_estimator.fit(X_, y_, sample_weight)
+
+    assert_allclose(ransac_estimator.estimator_.coef_, ref_coef_)
+
+    # check that if base_estimator.fit doesn't support
+    # sample_weight, raises error
+    base_estimator = OrthogonalMatchingPursuit()
+    ransac_estimator = RANSACRegressor(base_estimator)
+    assert_raises(ValueError, ransac_estimator.fit, X, y, weights)
+
+
+def test_ransac_final_model_fit_sample_weight():
+    X, y = make_regression(n_samples=1000, random_state=10)
+    rng = check_random_state(42)
+    sample_weight = rng.randint(1, 4, size=y.shape[0])
+    sample_weight = sample_weight / sample_weight.sum()
+    ransac = RANSACRegressor(base_estimator=LinearRegression(), random_state=0)
+    ransac.fit(X, y, sample_weight=sample_weight)
+
+    final_model = LinearRegression()
+    mask_samples = ransac.inlier_mask_
+    final_model.fit(
+        X[mask_samples], y[mask_samples],
+        sample_weight=sample_weight[mask_samples]
+    )
+
+    assert_allclose(ransac.estimator_.coef_, final_model.coef_)
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/test_ridge.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/test_ridge.py
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/test_sag.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/test_sag.py
@ -0,0 +1,848 @@
+# Authors: Danny Sullivan <dbsullivan23@gmail.com>
+#          Tom Dupre la Tour <tom.dupre-la-tour@m4x.org>
+#
+# License: BSD 3 clause
+
+import math
+import pytest
+import numpy as np
+import scipy.sparse as sp
+from scipy.special import logsumexp
+
+from sklearn.linear_model._sag import get_auto_step_size
+from sklearn.linear_model._sag_fast import _multinomial_grad_loss_all_samples
+from sklearn.linear_model import LogisticRegression, Ridge
+from sklearn.linear_model._base import make_dataset
+from sklearn.linear_model._logistic import _multinomial_loss_grad
+
+from sklearn.utils.extmath import row_norms
+from sklearn.utils._testing import assert_almost_equal
+from sklearn.utils._testing import assert_array_almost_equal
+from sklearn.utils._testing import assert_allclose
+from sklearn.utils._testing import assert_raise_message
+from sklearn.utils import compute_class_weight
+from sklearn.utils import check_random_state
+from sklearn.preprocessing import LabelEncoder, LabelBinarizer
+from sklearn.datasets import make_blobs, load_iris, make_classification
+from sklearn.base import clone
+
+iris = load_iris()
+
+
+# this is used for sag classification
+def log_dloss(p, y):
+    z = p * y
+    # approximately equal and saves the computation of the log
+    if z > 18.0:
+        return math.exp(-z) * -y
+    if z < -18.0:
+        return -y
+    return -y / (math.exp(z) + 1.0)
+
+
+def log_loss(p, y):
+    return np.mean(np.log(1. + np.exp(-y * p)))
+
+
+# this is used for sag regression
+def squared_dloss(p, y):
+    return p - y
+
+
+def squared_loss(p, y):
+    return np.mean(0.5 * (p - y) * (p - y))
+
+
+# function for measuring the log loss
+def get_pobj(w, alpha, myX, myy, loss):
+    w = w.ravel()
+    pred = np.dot(myX, w)
+    p = loss(pred, myy)
+    p += alpha * w.dot(w) / 2.
+    return p
+
+
+def sag(X, y, step_size, alpha, n_iter=1, dloss=None, sparse=False,
+        sample_weight=None, fit_intercept=True, saga=False):
+    n_samples, n_features = X.shape[0], X.shape[1]
+
+    weights = np.zeros(X.shape[1])
+    sum_gradient = np.zeros(X.shape[1])
+    gradient_memory = np.zeros((n_samples, n_features))
+
+    intercept = 0.0
+    intercept_sum_gradient = 0.0
+    intercept_gradient_memory = np.zeros(n_samples)
+
+    rng = np.random.RandomState(77)
+    decay = 1.0
+    seen = set()
+
+    # sparse data has a fixed decay of .01
+    if sparse:
+        decay = .01
+
+    for epoch in range(n_iter):
+        for k in range(n_samples):
+            idx = int(rng.rand(1) * n_samples)
+            # idx = k
+            entry = X[idx]
+            seen.add(idx)
+            p = np.dot(entry, weights) + intercept
+            gradient = dloss(p, y[idx])
+            if sample_weight is not None:
+                gradient *= sample_weight[idx]
+            update = entry * gradient + alpha * weights
+            gradient_correction = update - gradient_memory[idx]
+            sum_gradient += gradient_correction
+            gradient_memory[idx] = update
+            if saga:
+                weights -= (gradient_correction *
+                            step_size * (1 - 1. / len(seen)))
+
+            if fit_intercept:
+                gradient_correction = (gradient -
+                                       intercept_gradient_memory[idx])
+                intercept_gradient_memory[idx] = gradient
+                intercept_sum_gradient += gradient_correction
+                gradient_correction *= step_size * (1. - 1. / len(seen))
+                if saga:
+                    intercept -= (step_size * intercept_sum_gradient /
+                                  len(seen) * decay) + gradient_correction
+                else:
+                    intercept -= (step_size * intercept_sum_gradient /
+                                  len(seen) * decay)
+
+            weights -= step_size * sum_gradient / len(seen)
+
+    return weights, intercept
+
+
+def sag_sparse(X, y, step_size, alpha, n_iter=1,
+               dloss=None, sample_weight=None, sparse=False,
+               fit_intercept=True, saga=False, random_state=0):
+    if step_size * alpha == 1.:
+        raise ZeroDivisionError("Sparse sag does not handle the case "
+                                "step_size * alpha == 1")
+    n_samples, n_features = X.shape[0], X.shape[1]
+
+    weights = np.zeros(n_features)
+    sum_gradient = np.zeros(n_features)
+    last_updated = np.zeros(n_features, dtype=np.int)
+    gradient_memory = np.zeros(n_samples)
+    rng = check_random_state(random_state)
+    intercept = 0.0
+    intercept_sum_gradient = 0.0
+    wscale = 1.0
+    decay = 1.0
+    seen = set()
+
+    c_sum = np.zeros(n_iter * n_samples)
+
+    # sparse data has a fixed decay of .01
+    if sparse:
+        decay = .01
+
+    counter = 0
+    for epoch in range(n_iter):
+        for k in range(n_samples):
+            # idx = k
+            idx = int(rng.rand(1) * n_samples)
+            entry = X[idx]
+            seen.add(idx)
+
+            if counter >= 1:
+                for j in range(n_features):
+                    if last_updated[j] == 0:
+                        weights[j] -= c_sum[counter - 1] * sum_gradient[j]
+                    else:
+                        weights[j] -= ((c_sum[counter - 1] -
+                                        c_sum[last_updated[j] - 1]) *
+                                       sum_gradient[j])
+                    last_updated[j] = counter
+
+            p = (wscale * np.dot(entry, weights)) + intercept
+            gradient = dloss(p, y[idx])
+
+            if sample_weight is not None:
+                gradient *= sample_weight[idx]
+
+            update = entry * gradient
+            gradient_correction = update - (gradient_memory[idx] * entry)
+            sum_gradient += gradient_correction
+            if saga:
+                for j in range(n_features):
+                    weights[j] -= (gradient_correction[j] * step_size *
+                                   (1 - 1. / len(seen)) / wscale)
+
+            if fit_intercept:
+                gradient_correction = gradient - gradient_memory[idx]
+                intercept_sum_gradient += gradient_correction
+                gradient_correction *= step_size * (1. - 1. / len(seen))
+                if saga:
+                    intercept -= ((step_size * intercept_sum_gradient /
+                                   len(seen) * decay) +
+                                  gradient_correction)
+                else:
+                    intercept -= (step_size * intercept_sum_gradient /
+                                  len(seen) * decay)
+
+            gradient_memory[idx] = gradient
+
+            wscale *= (1.0 - alpha * step_size)
+            if counter == 0:
+                c_sum[0] = step_size / (wscale * len(seen))
+            else:
+                c_sum[counter] = (c_sum[counter - 1] +
+                                  step_size / (wscale * len(seen)))
+
+            if counter >= 1 and wscale < 1e-9:
+                for j in range(n_features):
+                    if last_updated[j] == 0:
+                        weights[j] -= c_sum[counter] * sum_gradient[j]
+                    else:
+                        weights[j] -= ((c_sum[counter] -
+                                        c_sum[last_updated[j] - 1]) *
+                                       sum_gradient[j])
+                    last_updated[j] = counter + 1
+                c_sum[counter] = 0
+                weights *= wscale
+                wscale = 1.0
+
+            counter += 1
+
+    for j in range(n_features):
+        if last_updated[j] == 0:
+            weights[j] -= c_sum[counter - 1] * sum_gradient[j]
+        else:
+            weights[j] -= ((c_sum[counter - 1] -
+                            c_sum[last_updated[j] - 1]) *
+                           sum_gradient[j])
+    weights *= wscale
+    return weights, intercept
+
+
+def get_step_size(X, alpha, fit_intercept, classification=True):
+    if classification:
+        return (4.0 / (np.max(np.sum(X * X, axis=1)) +
+                       fit_intercept + 4.0 * alpha))
+    else:
+        return 1.0 / (np.max(np.sum(X * X, axis=1)) + fit_intercept + alpha)
+
+
+def test_classifier_matching():
+    n_samples = 20
+    X, y = make_blobs(n_samples=n_samples, centers=2, random_state=0,
+                      cluster_std=0.1)
+    y[y == 0] = -1
+    alpha = 1.1
+    fit_intercept = True
+    step_size = get_step_size(X, alpha, fit_intercept)
+    for solver in ['sag', 'saga']:
+        if solver == 'sag':
+            n_iter = 80
+        else:
+            # SAGA variance w.r.t. stream order is higher
+            n_iter = 300
+        clf = LogisticRegression(solver=solver, fit_intercept=fit_intercept,
+                                 tol=1e-11, C=1. / alpha / n_samples,
+                                 max_iter=n_iter, random_state=10,
+                                 multi_class='ovr')
+        clf.fit(X, y)
+
+        weights, intercept = sag_sparse(X, y, step_size, alpha, n_iter=n_iter,
+                                        dloss=log_dloss,
+                                        fit_intercept=fit_intercept,
+                                        saga=solver == 'saga')
+        weights2, intercept2 = sag(X, y, step_size, alpha, n_iter=n_iter,
+                                   dloss=log_dloss,
+                                   fit_intercept=fit_intercept,
+                                   saga=solver == 'saga')
+        weights = np.atleast_2d(weights)
+        intercept = np.atleast_1d(intercept)
+        weights2 = np.atleast_2d(weights2)
+        intercept2 = np.atleast_1d(intercept2)
+
+        assert_array_almost_equal(weights, clf.coef_, decimal=9)
+        assert_array_almost_equal(intercept, clf.intercept_, decimal=9)
+        assert_array_almost_equal(weights2, clf.coef_, decimal=9)
+        assert_array_almost_equal(intercept2, clf.intercept_, decimal=9)
+
+
+def test_regressor_matching():
+    n_samples = 10
+    n_features = 5
+
+    rng = np.random.RandomState(10)
+    X = rng.normal(size=(n_samples, n_features))
+    true_w = rng.normal(size=n_features)
+    y = X.dot(true_w)
+
+    alpha = 1.
+    n_iter = 100
+    fit_intercept = True
+
+    step_size = get_step_size(X, alpha, fit_intercept, classification=False)
+    clf = Ridge(fit_intercept=fit_intercept, tol=.00000000001, solver='sag',
+                alpha=alpha * n_samples, max_iter=n_iter)
+    clf.fit(X, y)
+
+    weights1, intercept1 = sag_sparse(X, y, step_size, alpha, n_iter=n_iter,
+                                      dloss=squared_dloss,
+                                      fit_intercept=fit_intercept)
+    weights2, intercept2 = sag(X, y, step_size, alpha, n_iter=n_iter,
+                               dloss=squared_dloss,
+                               fit_intercept=fit_intercept)
+
+    assert_allclose(weights1, clf.coef_)
+    assert_allclose(intercept1, clf.intercept_)
+    assert_allclose(weights2, clf.coef_)
+    assert_allclose(intercept2, clf.intercept_)
+
+
+@pytest.mark.filterwarnings('ignore:The max_iter was reached')
+def test_sag_pobj_matches_logistic_regression():
+    """tests if the sag pobj matches log reg"""
+    n_samples = 100
+    alpha = 1.0
+    max_iter = 20
+    X, y = make_blobs(n_samples=n_samples, centers=2, random_state=0,
+                      cluster_std=0.1)
+
+    clf1 = LogisticRegression(solver='sag', fit_intercept=False, tol=.0000001,
+                              C=1. / alpha / n_samples, max_iter=max_iter,
+                              random_state=10, multi_class='ovr')
+    clf2 = clone(clf1)
+    clf3 = LogisticRegression(fit_intercept=False, tol=.0000001,
+                              C=1. / alpha / n_samples, max_iter=max_iter,
+                              random_state=10, multi_class='ovr')
+
+    clf1.fit(X, y)
+    clf2.fit(sp.csr_matrix(X), y)
+    clf3.fit(X, y)
+
+    pobj1 = get_pobj(clf1.coef_, alpha, X, y, log_loss)
+    pobj2 = get_pobj(clf2.coef_, alpha, X, y, log_loss)
+    pobj3 = get_pobj(clf3.coef_, alpha, X, y, log_loss)
+
+    assert_array_almost_equal(pobj1, pobj2, decimal=4)
+    assert_array_almost_equal(pobj2, pobj3, decimal=4)
+    assert_array_almost_equal(pobj3, pobj1, decimal=4)
+
+
+@pytest.mark.filterwarnings('ignore:The max_iter was reached')
+def test_sag_pobj_matches_ridge_regression():
+    """tests if the sag pobj matches ridge reg"""
+    n_samples = 100
+    n_features = 10
+    alpha = 1.0
+    n_iter = 100
+    fit_intercept = False
+    rng = np.random.RandomState(10)
+    X = rng.normal(size=(n_samples, n_features))
+    true_w = rng.normal(size=n_features)
+    y = X.dot(true_w)
+
+    clf1 = Ridge(fit_intercept=fit_intercept, tol=.00000000001, solver='sag',
+                 alpha=alpha, max_iter=n_iter, random_state=42)
+    clf2 = clone(clf1)
+    clf3 = Ridge(fit_intercept=fit_intercept, tol=.00001, solver='lsqr',
+                 alpha=alpha, max_iter=n_iter, random_state=42)
+
+    clf1.fit(X, y)
+    clf2.fit(sp.csr_matrix(X), y)
+    clf3.fit(X, y)
+
+    pobj1 = get_pobj(clf1.coef_, alpha, X, y, squared_loss)
+    pobj2 = get_pobj(clf2.coef_, alpha, X, y, squared_loss)
+    pobj3 = get_pobj(clf3.coef_, alpha, X, y, squared_loss)
+
+    assert_array_almost_equal(pobj1, pobj2, decimal=4)
+    assert_array_almost_equal(pobj1, pobj3, decimal=4)
+    assert_array_almost_equal(pobj3, pobj2, decimal=4)
+
+
+@pytest.mark.filterwarnings('ignore:The max_iter was reached')
+def test_sag_regressor_computed_correctly():
+    """tests if the sag regressor is computed correctly"""
+    alpha = .1
+    n_features = 10
+    n_samples = 40
+    max_iter = 100
+    tol = .000001
+    fit_intercept = True
+    rng = np.random.RandomState(0)
+    X = rng.normal(size=(n_samples, n_features))
+    w = rng.normal(size=n_features)
+    y = np.dot(X, w) + 2.
+    step_size = get_step_size(X, alpha, fit_intercept, classification=False)
+
+    clf1 = Ridge(fit_intercept=fit_intercept, tol=tol, solver='sag',
+                 alpha=alpha * n_samples, max_iter=max_iter,
+                 random_state=rng)
+    clf2 = clone(clf1)
+
+    clf1.fit(X, y)
+    clf2.fit(sp.csr_matrix(X), y)
+
+    spweights1, spintercept1 = sag_sparse(X, y, step_size, alpha,
+                                          n_iter=max_iter,
+                                          dloss=squared_dloss,
+                                          fit_intercept=fit_intercept,
+                                          random_state=rng)
+
+    spweights2, spintercept2 = sag_sparse(X, y, step_size, alpha,
+                                          n_iter=max_iter,
+                                          dloss=squared_dloss, sparse=True,
+                                          fit_intercept=fit_intercept,
+                                          random_state=rng)
+
+    assert_array_almost_equal(clf1.coef_.ravel(),
+                              spweights1.ravel(),
+                              decimal=3)
+    assert_almost_equal(clf1.intercept_, spintercept1, decimal=1)
+
+    # TODO: uncomment when sparse Ridge with intercept will be fixed (#4710)
+    # assert_array_almost_equal(clf2.coef_.ravel(),
+    #                          spweights2.ravel(),
+    #                          decimal=3)
+    # assert_almost_equal(clf2.intercept_, spintercept2, decimal=1)'''
+
+
+def test_get_auto_step_size():
+    X = np.array([[1, 2, 3], [2, 3, 4], [2, 3, 2]], dtype=np.float64)
+    alpha = 1.2
+    fit_intercept = False
+    # sum the squares of the second sample because that's the largest
+    max_squared_sum = 4 + 9 + 16
+    max_squared_sum_ = row_norms(X, squared=True).max()
+    n_samples = X.shape[0]
+    assert_almost_equal(max_squared_sum, max_squared_sum_, decimal=4)
+
+    for saga in [True, False]:
+        for fit_intercept in (True, False):
+            if saga:
+                L_sqr = (max_squared_sum + alpha + int(fit_intercept))
+                L_log = (max_squared_sum + 4.0 * alpha +
+                         int(fit_intercept)) / 4.0
+                mun_sqr = min(2 * n_samples * alpha, L_sqr)
+                mun_log = min(2 * n_samples * alpha, L_log)
+                step_size_sqr = 1 / (2 * L_sqr + mun_sqr)
+                step_size_log = 1 / (2 * L_log + mun_log)
+            else:
+                step_size_sqr = 1.0 / (max_squared_sum +
+                                       alpha + int(fit_intercept))
+                step_size_log = 4.0 / (max_squared_sum + 4.0 * alpha +
+                                       int(fit_intercept))
+
+            step_size_sqr_ = get_auto_step_size(max_squared_sum_, alpha,
+                                                "squared",
+                                                fit_intercept,
+                                                n_samples=n_samples,
+                                                is_saga=saga)
+            step_size_log_ = get_auto_step_size(max_squared_sum_, alpha, "log",
+                                                fit_intercept,
+                                                n_samples=n_samples,
+                                                is_saga=saga)
+
+            assert_almost_equal(step_size_sqr, step_size_sqr_, decimal=4)
+            assert_almost_equal(step_size_log, step_size_log_, decimal=4)
+
+    msg = 'Unknown loss function for SAG solver, got wrong instead of'
+    assert_raise_message(ValueError, msg, get_auto_step_size,
+                         max_squared_sum_, alpha, "wrong", fit_intercept)
+
+
+def test_sag_regressor():
+    """tests if the sag regressor performs well"""
+    xmin, xmax = -5, 5
+    n_samples = 20
+    tol = .001
+    max_iter = 50
+    alpha = 0.1
+    rng = np.random.RandomState(0)
+    X = np.linspace(xmin, xmax, n_samples).reshape(n_samples, 1)
+
+    # simple linear function without noise
+    y = 0.5 * X.ravel()
+
+    clf1 = Ridge(tol=tol, solver='sag', max_iter=max_iter,
+                 alpha=alpha * n_samples, random_state=rng)
+    clf2 = clone(clf1)
+    clf1.fit(X, y)
+    clf2.fit(sp.csr_matrix(X), y)
+    score1 = clf1.score(X, y)
+    score2 = clf2.score(X, y)
+    assert score1 > 0.99
+    assert score2 > 0.99
+
+    # simple linear function with noise
+    y = 0.5 * X.ravel() + rng.randn(n_samples, 1).ravel()
+
+    clf1 = Ridge(tol=tol, solver='sag', max_iter=max_iter,
+                 alpha=alpha * n_samples)
+    clf2 = clone(clf1)
+    clf1.fit(X, y)
+    clf2.fit(sp.csr_matrix(X), y)
+    score1 = clf1.score(X, y)
+    score2 = clf2.score(X, y)
+    score2 = clf2.score(X, y)
+    assert score1 > 0.5
+    assert score2 > 0.5
+
+
+@pytest.mark.filterwarnings('ignore:The max_iter was reached')
+def test_sag_classifier_computed_correctly():
+    """tests if the binary classifier is computed correctly"""
+    alpha = .1
+    n_samples = 50
+    n_iter = 50
+    tol = .00001
+    fit_intercept = True
+    X, y = make_blobs(n_samples=n_samples, centers=2, random_state=0,
+                      cluster_std=0.1)
+    step_size = get_step_size(X, alpha, fit_intercept, classification=True)
+    classes = np.unique(y)
+    y_tmp = np.ones(n_samples)
+    y_tmp[y != classes[1]] = -1
+    y = y_tmp
+
+    clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples,
+                              max_iter=n_iter, tol=tol, random_state=77,
+                              fit_intercept=fit_intercept, multi_class='ovr')
+    clf2 = clone(clf1)
+
+    clf1.fit(X, y)
+    clf2.fit(sp.csr_matrix(X), y)
+
+    spweights, spintercept = sag_sparse(X, y, step_size, alpha, n_iter=n_iter,
+                                        dloss=log_dloss,
+                                        fit_intercept=fit_intercept)
+    spweights2, spintercept2 = sag_sparse(X, y, step_size, alpha,
+                                          n_iter=n_iter,
+                                          dloss=log_dloss, sparse=True,
+                                          fit_intercept=fit_intercept)
+
+    assert_array_almost_equal(clf1.coef_.ravel(),
+                              spweights.ravel(),
+                              decimal=2)
+    assert_almost_equal(clf1.intercept_, spintercept, decimal=1)
+
+    assert_array_almost_equal(clf2.coef_.ravel(),
+                              spweights2.ravel(),
+                              decimal=2)
+    assert_almost_equal(clf2.intercept_, spintercept2, decimal=1)
+
+
+@pytest.mark.filterwarnings('ignore:The max_iter was reached')
+def test_sag_multiclass_computed_correctly():
+    """tests if the multiclass classifier is computed correctly"""
+    alpha = .1
+    n_samples = 20
+    tol = .00001
+    max_iter = 40
+    fit_intercept = True
+    X, y = make_blobs(n_samples=n_samples, centers=3, random_state=0,
+                      cluster_std=0.1)
+    step_size = get_step_size(X, alpha, fit_intercept, classification=True)
+    classes = np.unique(y)
+
+    clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples,
+                              max_iter=max_iter, tol=tol, random_state=77,
+                              fit_intercept=fit_intercept, multi_class='ovr')
+    clf2 = clone(clf1)
+
+    clf1.fit(X, y)
+    clf2.fit(sp.csr_matrix(X), y)
+
+    coef1 = []
+    intercept1 = []
+    coef2 = []
+    intercept2 = []
+    for cl in classes:
+        y_encoded = np.ones(n_samples)
+        y_encoded[y != cl] = -1
+
+        spweights1, spintercept1 = sag_sparse(X, y_encoded, step_size, alpha,
+                                              dloss=log_dloss, n_iter=max_iter,
+                                              fit_intercept=fit_intercept)
+        spweights2, spintercept2 = sag_sparse(X, y_encoded, step_size, alpha,
+                                              dloss=log_dloss, n_iter=max_iter,
+                                              sparse=True,
+                                              fit_intercept=fit_intercept)
+        coef1.append(spweights1)
+        intercept1.append(spintercept1)
+
+        coef2.append(spweights2)
+        intercept2.append(spintercept2)
+
+    coef1 = np.vstack(coef1)
+    intercept1 = np.array(intercept1)
+    coef2 = np.vstack(coef2)
+    intercept2 = np.array(intercept2)
+
+    for i, cl in enumerate(classes):
+        assert_array_almost_equal(clf1.coef_[i].ravel(),
+                                  coef1[i].ravel(),
+                                  decimal=2)
+        assert_almost_equal(clf1.intercept_[i], intercept1[i], decimal=1)
+
+        assert_array_almost_equal(clf2.coef_[i].ravel(),
+                                  coef2[i].ravel(),
+                                  decimal=2)
+        assert_almost_equal(clf2.intercept_[i], intercept2[i], decimal=1)
+
+
+def test_classifier_results():
+    """tests if classifier results match target"""
+    alpha = .1
+    n_features = 20
+    n_samples = 10
+    tol = .01
+    max_iter = 200
+    rng = np.random.RandomState(0)
+    X = rng.normal(size=(n_samples, n_features))
+    w = rng.normal(size=n_features)
+    y = np.dot(X, w)
+    y = np.sign(y)
+    clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples,
+                              max_iter=max_iter, tol=tol, random_state=77)
+    clf2 = clone(clf1)
+
+    clf1.fit(X, y)
+    clf2.fit(sp.csr_matrix(X), y)
+    pred1 = clf1.predict(X)
+    pred2 = clf2.predict(X)
+    assert_almost_equal(pred1, y, decimal=12)
+    assert_almost_equal(pred2, y, decimal=12)
+
+
+@pytest.mark.filterwarnings('ignore:The max_iter was reached')
+def test_binary_classifier_class_weight():
+    """tests binary classifier with classweights for each class"""
+    alpha = .1
+    n_samples = 50
+    n_iter = 20
+    tol = .00001
+    fit_intercept = True
+    X, y = make_blobs(n_samples=n_samples, centers=2, random_state=10,
+                      cluster_std=0.1)
+    step_size = get_step_size(X, alpha, fit_intercept, classification=True)
+    classes = np.unique(y)
+    y_tmp = np.ones(n_samples)
+    y_tmp[y != classes[1]] = -1
+    y = y_tmp
+
+    class_weight = {1: .45, -1: .55}
+    clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples,
+                              max_iter=n_iter, tol=tol, random_state=77,
+                              fit_intercept=fit_intercept, multi_class='ovr',
+                              class_weight=class_weight)
+    clf2 = clone(clf1)
+
+    clf1.fit(X, y)
+    clf2.fit(sp.csr_matrix(X), y)
+
+    le = LabelEncoder()
+    class_weight_ = compute_class_weight(class_weight, classes=np.unique(y),
+                                         y=y)
+    sample_weight = class_weight_[le.fit_transform(y)]
+    spweights, spintercept = sag_sparse(X, y, step_size, alpha, n_iter=n_iter,
+                                        dloss=log_dloss,
+                                        sample_weight=sample_weight,
+                                        fit_intercept=fit_intercept)
+    spweights2, spintercept2 = sag_sparse(X, y, step_size, alpha,
+                                          n_iter=n_iter,
+                                          dloss=log_dloss, sparse=True,
+                                          sample_weight=sample_weight,
+                                          fit_intercept=fit_intercept)
+
+    assert_array_almost_equal(clf1.coef_.ravel(),
+                              spweights.ravel(),
+                              decimal=2)
+    assert_almost_equal(clf1.intercept_, spintercept, decimal=1)
+
+    assert_array_almost_equal(clf2.coef_.ravel(),
+                              spweights2.ravel(),
+                              decimal=2)
+    assert_almost_equal(clf2.intercept_, spintercept2, decimal=1)
+
+
+@pytest.mark.filterwarnings('ignore:The max_iter was reached')
+def test_multiclass_classifier_class_weight():
+    """tests multiclass with classweights for each class"""
+    alpha = .1
+    n_samples = 20
+    tol = .00001
+    max_iter = 50
+    class_weight = {0: .45, 1: .55, 2: .75}
+    fit_intercept = True
+    X, y = make_blobs(n_samples=n_samples, centers=3, random_state=0,
+                      cluster_std=0.1)
+    step_size = get_step_size(X, alpha, fit_intercept, classification=True)
+    classes = np.unique(y)
+
+    clf1 = LogisticRegression(solver='sag', C=1. / alpha / n_samples,
+                              max_iter=max_iter, tol=tol, random_state=77,
+                              fit_intercept=fit_intercept, multi_class='ovr',
+                              class_weight=class_weight)
+    clf2 = clone(clf1)
+    clf1.fit(X, y)
+    clf2.fit(sp.csr_matrix(X), y)
+
+    le = LabelEncoder()
+    class_weight_ = compute_class_weight(class_weight, classes=np.unique(y),
+                                         y=y)
+    sample_weight = class_weight_[le.fit_transform(y)]
+
+    coef1 = []
+    intercept1 = []
+    coef2 = []
+    intercept2 = []
+    for cl in classes:
+        y_encoded = np.ones(n_samples)
+        y_encoded[y != cl] = -1
+
+        spweights1, spintercept1 = sag_sparse(X, y_encoded, step_size, alpha,
+                                              n_iter=max_iter, dloss=log_dloss,
+                                              sample_weight=sample_weight)
+        spweights2, spintercept2 = sag_sparse(X, y_encoded, step_size, alpha,
+                                              n_iter=max_iter, dloss=log_dloss,
+                                              sample_weight=sample_weight,
+                                              sparse=True)
+        coef1.append(spweights1)
+        intercept1.append(spintercept1)
+        coef2.append(spweights2)
+        intercept2.append(spintercept2)
+
+    coef1 = np.vstack(coef1)
+    intercept1 = np.array(intercept1)
+    coef2 = np.vstack(coef2)
+    intercept2 = np.array(intercept2)
+
+    for i, cl in enumerate(classes):
+        assert_array_almost_equal(clf1.coef_[i].ravel(),
+                                  coef1[i].ravel(),
+                                  decimal=2)
+        assert_almost_equal(clf1.intercept_[i], intercept1[i], decimal=1)
+
+        assert_array_almost_equal(clf2.coef_[i].ravel(),
+                                  coef2[i].ravel(),
+                                  decimal=2)
+        assert_almost_equal(clf2.intercept_[i], intercept2[i], decimal=1)
+
+
+def test_classifier_single_class():
+    """tests if ValueError is thrown with only one class"""
+    X = [[1, 2], [3, 4]]
+    y = [1, 1]
+
+    assert_raise_message(ValueError,
+                         "This solver needs samples of at least 2 classes "
+                         "in the data",
+                         LogisticRegression(solver='sag').fit,
+                         X, y)
+
+
+def test_step_size_alpha_error():
+    X = [[0, 0], [0, 0]]
+    y = [1, -1]
+    fit_intercept = False
+    alpha = 1.
+    msg = ("Current sag implementation does not handle the case"
+           " step_size * alpha_scaled == 1")
+
+    clf1 = LogisticRegression(solver='sag', C=1. / alpha,
+                              fit_intercept=fit_intercept)
+    assert_raise_message(ZeroDivisionError, msg, clf1.fit, X, y)
+
+    clf2 = Ridge(fit_intercept=fit_intercept, solver='sag', alpha=alpha)
+    assert_raise_message(ZeroDivisionError, msg, clf2.fit, X, y)
+
+
+def test_multinomial_loss():
+    # test if the multinomial loss and gradient computations are consistent
+    X, y = iris.data, iris.target.astype(np.float64)
+    n_samples, n_features = X.shape
+    n_classes = len(np.unique(y))
+
+    rng = check_random_state(42)
+    weights = rng.randn(n_features, n_classes)
+    intercept = rng.randn(n_classes)
+    sample_weights = rng.randn(n_samples)
+    np.abs(sample_weights, sample_weights)
+
+    # compute loss and gradient like in multinomial SAG
+    dataset, _ = make_dataset(X, y, sample_weights, random_state=42)
+    loss_1, grad_1 = _multinomial_grad_loss_all_samples(dataset, weights,
+                                                        intercept, n_samples,
+                                                        n_features, n_classes)
+    # compute loss and gradient like in multinomial LogisticRegression
+    lbin = LabelBinarizer()
+    Y_bin = lbin.fit_transform(y)
+    weights_intercept = np.vstack((weights, intercept)).T.ravel()
+    loss_2, grad_2, _ = _multinomial_loss_grad(weights_intercept, X, Y_bin,
+                                               0.0, sample_weights)
+    grad_2 = grad_2.reshape(n_classes, -1)
+    grad_2 = grad_2[:, :-1].T
+
+    # comparison
+    assert_array_almost_equal(grad_1, grad_2)
+    assert_almost_equal(loss_1, loss_2)
+
+
+def test_multinomial_loss_ground_truth():
+    # n_samples, n_features, n_classes = 4, 2, 3
+    n_classes = 3
+    X = np.array([[1.1, 2.2], [2.2, -4.4], [3.3, -2.2], [1.1, 1.1]])
+    y = np.array([0, 1, 2, 0])
+    lbin = LabelBinarizer()
+    Y_bin = lbin.fit_transform(y)
+
+    weights = np.array([[0.1, 0.2, 0.3], [1.1, 1.2, -1.3]])
+    intercept = np.array([1., 0, -.2])
+    sample_weights = np.array([0.8, 1, 1, 0.8])
+
+    prediction = np.dot(X, weights) + intercept
+    logsumexp_prediction = logsumexp(prediction, axis=1)
+    p = prediction - logsumexp_prediction[:, np.newaxis]
+    loss_1 = -(sample_weights[:, np.newaxis] * p * Y_bin).sum()
+    diff = sample_weights[:, np.newaxis] * (np.exp(p) - Y_bin)
+    grad_1 = np.dot(X.T, diff)
+
+    weights_intercept = np.vstack((weights, intercept)).T.ravel()
+    loss_2, grad_2, _ = _multinomial_loss_grad(weights_intercept, X, Y_bin,
+                                               0.0, sample_weights)
+    grad_2 = grad_2.reshape(n_classes, -1)
+    grad_2 = grad_2[:, :-1].T
+
+    assert_almost_equal(loss_1, loss_2)
+    assert_array_almost_equal(grad_1, grad_2)
+
+    # ground truth
+    loss_gt = 11.680360354325961
+    grad_gt = np.array([[-0.557487, -1.619151, +2.176638],
+                        [-0.903942, +5.258745, -4.354803]])
+    assert_almost_equal(loss_1, loss_gt)
+    assert_array_almost_equal(grad_1, grad_gt)
+
+
+@pytest.mark.parametrize("solver", ["sag", "saga"])
+def test_sag_classifier_raises_error(solver):
+    # Following #13316, the error handling behavior changed in cython sag. This
+    # is simply a non-regression test to make sure numerical errors are
+    # properly raised.
+
+    # Train a classifier on a simple problem
+    rng = np.random.RandomState(42)
+    X, y = make_classification(random_state=rng)
+    clf = LogisticRegression(solver=solver, random_state=rng, warm_start=True)
+    clf.fit(X, y)
+
+    # Trigger a numerical error by:
+    # - corrupting the fitted coefficients of the classifier
+    # - fit it again starting from its current state thanks to warm_start
+    clf.coef_[:] = np.nan
+
+    with pytest.raises(ValueError, match="Floating-point under-/overflow"):
+        clf.fit(X, y)
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/test_sgd.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/test_sgd.py
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/test_sparse_coordinate_descent.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/test_sparse_coordinate_descent.py
@ -0,0 +1,300 @@
+import numpy as np
+import scipy.sparse as sp
+
+from sklearn.utils._testing import assert_array_almost_equal
+from sklearn.utils._testing import assert_almost_equal
+
+from sklearn.utils._testing import ignore_warnings
+from sklearn.utils._testing import assert_warns
+from sklearn.exceptions import ConvergenceWarning
+
+from sklearn.linear_model import Lasso, ElasticNet, LassoCV, ElasticNetCV
+
+
+def test_sparse_coef():
+    # Check that the sparse_coef property works
+    clf = ElasticNet()
+    clf.coef_ = [1, 2, 3]
+
+    assert sp.isspmatrix(clf.sparse_coef_)
+    assert clf.sparse_coef_.toarray().tolist()[0] == clf.coef_
+
+
+def test_normalize_option():
+    # Check that the normalize option in enet works
+    X = sp.csc_matrix([[-1], [0], [1]])
+    y = [-1, 0, 1]
+    clf_dense = ElasticNet(normalize=True)
+    clf_sparse = ElasticNet(normalize=True)
+    clf_dense.fit(X, y)
+    X = sp.csc_matrix(X)
+    clf_sparse.fit(X, y)
+    assert_almost_equal(clf_dense.dual_gap_, 0)
+    assert_array_almost_equal(clf_dense.coef_, clf_sparse.coef_)
+
+
+def test_lasso_zero():
+    # Check that the sparse lasso can handle zero data without crashing
+    X = sp.csc_matrix((3, 1))
+    y = [0, 0, 0]
+    T = np.array([[1], [2], [3]])
+    clf = Lasso().fit(X, y)
+    pred = clf.predict(T)
+    assert_array_almost_equal(clf.coef_, [0])
+    assert_array_almost_equal(pred, [0, 0, 0])
+    assert_almost_equal(clf.dual_gap_,  0)
+
+
+def test_enet_toy_list_input():
+    # Test ElasticNet for various values of alpha and l1_ratio with list X
+
+    X = np.array([[-1], [0], [1]])
+    X = sp.csc_matrix(X)
+    Y = [-1, 0, 1]       # just a straight line
+    T = np.array([[2], [3], [4]])  # test sample
+
+    # this should be the same as unregularized least squares
+    clf = ElasticNet(alpha=0, l1_ratio=1.0)
+    # catch warning about alpha=0.
+    # this is discouraged but should work.
+    ignore_warnings(clf.fit)(X, Y)
+    pred = clf.predict(T)
+    assert_array_almost_equal(clf.coef_, [1])
+    assert_array_almost_equal(pred, [2, 3, 4])
+    assert_almost_equal(clf.dual_gap_, 0)
+
+    clf = ElasticNet(alpha=0.5, l1_ratio=0.3, max_iter=1000)
+    clf.fit(X, Y)
+    pred = clf.predict(T)
+    assert_array_almost_equal(clf.coef_, [0.50819], decimal=3)
+    assert_array_almost_equal(pred, [1.0163,  1.5245,  2.0327], decimal=3)
+    assert_almost_equal(clf.dual_gap_, 0)
+
+    clf = ElasticNet(alpha=0.5, l1_ratio=0.5)
+    clf.fit(X, Y)
+    pred = clf.predict(T)
+    assert_array_almost_equal(clf.coef_, [0.45454], 3)
+    assert_array_almost_equal(pred, [0.9090,  1.3636,  1.8181], 3)
+    assert_almost_equal(clf.dual_gap_, 0)
+
+
+def test_enet_toy_explicit_sparse_input():
+    # Test ElasticNet for various values of alpha and l1_ratio with sparse X
+    f = ignore_warnings
+    # training samples
+    X = sp.lil_matrix((3, 1))
+    X[0, 0] = -1
+    # X[1, 0] = 0
+    X[2, 0] = 1
+    Y = [-1, 0, 1]       # just a straight line (the identity function)
+
+    # test samples
+    T = sp.lil_matrix((3, 1))
+    T[0, 0] = 2
+    T[1, 0] = 3
+    T[2, 0] = 4
+
+    # this should be the same as lasso
+    clf = ElasticNet(alpha=0, l1_ratio=1.0)
+    f(clf.fit)(X, Y)
+    pred = clf.predict(T)
+    assert_array_almost_equal(clf.coef_, [1])
+    assert_array_almost_equal(pred, [2, 3, 4])
+    assert_almost_equal(clf.dual_gap_, 0)
+
+    clf = ElasticNet(alpha=0.5, l1_ratio=0.3, max_iter=1000)
+    clf.fit(X, Y)
+    pred = clf.predict(T)
+    assert_array_almost_equal(clf.coef_, [0.50819], decimal=3)
+    assert_array_almost_equal(pred, [1.0163,  1.5245,  2.0327], decimal=3)
+    assert_almost_equal(clf.dual_gap_, 0)
+
+    clf = ElasticNet(alpha=0.5, l1_ratio=0.5)
+    clf.fit(X, Y)
+    pred = clf.predict(T)
+    assert_array_almost_equal(clf.coef_, [0.45454], 3)
+    assert_array_almost_equal(pred, [0.9090,  1.3636,  1.8181], 3)
+    assert_almost_equal(clf.dual_gap_, 0)
+
+
+def make_sparse_data(n_samples=100, n_features=100, n_informative=10, seed=42,
+                     positive=False, n_targets=1):
+    random_state = np.random.RandomState(seed)
+
+    # build an ill-posed linear regression problem with many noisy features and
+    # comparatively few samples
+
+    # generate a ground truth model
+    w = random_state.randn(n_features, n_targets)
+    w[n_informative:] = 0.0  # only the top features are impacting the model
+    if positive:
+        w = np.abs(w)
+
+    X = random_state.randn(n_samples, n_features)
+    rnd = random_state.uniform(size=(n_samples, n_features))
+    X[rnd > 0.5] = 0.0  # 50% of zeros in input signal
+
+    # generate training ground truth labels
+    y = np.dot(X, w)
+    X = sp.csc_matrix(X)
+    if n_targets == 1:
+        y = np.ravel(y)
+    return X, y
+
+
+def _test_sparse_enet_not_as_toy_dataset(alpha, fit_intercept, positive):
+    n_samples, n_features, max_iter = 100, 100, 1000
+    n_informative = 10
+
+    X, y = make_sparse_data(n_samples, n_features, n_informative,
+                            positive=positive)
+
+    X_train, X_test = X[n_samples // 2:], X[:n_samples // 2]
+    y_train, y_test = y[n_samples // 2:], y[:n_samples // 2]
+
+    s_clf = ElasticNet(alpha=alpha, l1_ratio=0.8, fit_intercept=fit_intercept,
+                       max_iter=max_iter, tol=1e-7, positive=positive,
+                       warm_start=True)
+    s_clf.fit(X_train, y_train)
+
+    assert_almost_equal(s_clf.dual_gap_, 0, 4)
+    assert s_clf.score(X_test, y_test) > 0.85
+
+    # check the convergence is the same as the dense version
+    d_clf = ElasticNet(alpha=alpha, l1_ratio=0.8, fit_intercept=fit_intercept,
+                       max_iter=max_iter, tol=1e-7, positive=positive,
+                       warm_start=True)
+    d_clf.fit(X_train.toarray(), y_train)
+
+    assert_almost_equal(d_clf.dual_gap_, 0, 4)
+    assert d_clf.score(X_test, y_test) > 0.85
+
+    assert_almost_equal(s_clf.coef_, d_clf.coef_, 5)
+    assert_almost_equal(s_clf.intercept_, d_clf.intercept_, 5)
+
+    # check that the coefs are sparse
+    assert np.sum(s_clf.coef_ != 0.0) < 2 * n_informative
+
+
+def test_sparse_enet_not_as_toy_dataset():
+    _test_sparse_enet_not_as_toy_dataset(alpha=0.1, fit_intercept=False,
+                                         positive=False)
+    _test_sparse_enet_not_as_toy_dataset(alpha=0.1, fit_intercept=True,
+                                         positive=False)
+    _test_sparse_enet_not_as_toy_dataset(alpha=1e-3, fit_intercept=False,
+                                         positive=True)
+    _test_sparse_enet_not_as_toy_dataset(alpha=1e-3, fit_intercept=True,
+                                         positive=True)
+
+
+def test_sparse_lasso_not_as_toy_dataset():
+    n_samples = 100
+    max_iter = 1000
+    n_informative = 10
+    X, y = make_sparse_data(n_samples=n_samples, n_informative=n_informative)
+
+    X_train, X_test = X[n_samples // 2:], X[:n_samples // 2]
+    y_train, y_test = y[n_samples // 2:], y[:n_samples // 2]
+
+    s_clf = Lasso(alpha=0.1, fit_intercept=False, max_iter=max_iter, tol=1e-7)
+    s_clf.fit(X_train, y_train)
+    assert_almost_equal(s_clf.dual_gap_, 0, 4)
+    assert s_clf.score(X_test, y_test) > 0.85
+
+    # check the convergence is the same as the dense version
+    d_clf = Lasso(alpha=0.1, fit_intercept=False, max_iter=max_iter, tol=1e-7)
+    d_clf.fit(X_train.toarray(), y_train)
+    assert_almost_equal(d_clf.dual_gap_, 0, 4)
+    assert d_clf.score(X_test, y_test) > 0.85
+
+    # check that the coefs are sparse
+    assert np.sum(s_clf.coef_ != 0.0) == n_informative
+
+
+def test_enet_multitarget():
+    n_targets = 3
+    X, y = make_sparse_data(n_targets=n_targets)
+
+    estimator = ElasticNet(alpha=0.01, precompute=None)
+    # XXX: There is a bug when precompute is not None!
+    estimator.fit(X, y)
+    coef, intercept, dual_gap = (estimator.coef_,
+                                 estimator.intercept_,
+                                 estimator.dual_gap_)
+
+    for k in range(n_targets):
+        estimator.fit(X, y[:, k])
+        assert_array_almost_equal(coef[k, :], estimator.coef_)
+        assert_array_almost_equal(intercept[k], estimator.intercept_)
+        assert_array_almost_equal(dual_gap[k], estimator.dual_gap_)
+
+
+def test_path_parameters():
+    X, y = make_sparse_data()
+    max_iter = 50
+    n_alphas = 10
+    clf = ElasticNetCV(n_alphas=n_alphas, eps=1e-3, max_iter=max_iter,
+                       l1_ratio=0.5, fit_intercept=False)
+    ignore_warnings(clf.fit)(X, y)  # new params
+    assert_almost_equal(0.5, clf.l1_ratio)
+    assert n_alphas == clf.n_alphas
+    assert n_alphas == len(clf.alphas_)
+    sparse_mse_path = clf.mse_path_
+    ignore_warnings(clf.fit)(X.toarray(), y)  # compare with dense data
+    assert_almost_equal(clf.mse_path_, sparse_mse_path)
+
+
+def test_same_output_sparse_dense_lasso_and_enet_cv():
+    X, y = make_sparse_data(n_samples=40, n_features=10)
+    for normalize in [True, False]:
+        clfs = ElasticNetCV(max_iter=100, normalize=normalize)
+        ignore_warnings(clfs.fit)(X, y)
+        clfd = ElasticNetCV(max_iter=100, normalize=normalize)
+        ignore_warnings(clfd.fit)(X.toarray(), y)
+        assert_almost_equal(clfs.alpha_, clfd.alpha_, 7)
+        assert_almost_equal(clfs.intercept_, clfd.intercept_, 7)
+        assert_array_almost_equal(clfs.mse_path_, clfd.mse_path_)
+        assert_array_almost_equal(clfs.alphas_, clfd.alphas_)
+
+        clfs = LassoCV(max_iter=100, cv=4, normalize=normalize)
+        ignore_warnings(clfs.fit)(X, y)
+        clfd = LassoCV(max_iter=100, cv=4, normalize=normalize)
+        ignore_warnings(clfd.fit)(X.toarray(), y)
+        assert_almost_equal(clfs.alpha_, clfd.alpha_, 7)
+        assert_almost_equal(clfs.intercept_, clfd.intercept_, 7)
+        assert_array_almost_equal(clfs.mse_path_, clfd.mse_path_)
+        assert_array_almost_equal(clfs.alphas_, clfd.alphas_)
+
+
+def test_same_multiple_output_sparse_dense():
+    for normalize in [True, False]:
+        l = ElasticNet(normalize=normalize)
+        X = [[0, 1, 2, 3, 4],
+             [0, 2, 5, 8, 11],
+             [9, 10, 11, 12, 13],
+             [10, 11, 12, 13, 14]]
+        y = [[1, 2, 3, 4, 5],
+             [1, 3, 6, 9, 12],
+             [10, 11, 12, 13, 14],
+             [11, 12, 13, 14, 15]]
+        ignore_warnings(l.fit)(X, y)
+        sample = np.array([1, 2, 3, 4, 5]).reshape(1, -1)
+        predict_dense = l.predict(sample)
+
+        l_sp = ElasticNet(normalize=normalize)
+        X_sp = sp.coo_matrix(X)
+        ignore_warnings(l_sp.fit)(X_sp, y)
+        sample_sparse = sp.coo_matrix(sample)
+        predict_sparse = l_sp.predict(sample_sparse)
+
+        assert_array_almost_equal(predict_sparse, predict_dense)
+
+
+def test_sparse_enet_coordinate_descent():
+    """Test that a warning is issued if model does not converge"""
+    clf = Lasso(max_iter=2)
+    n_samples = 5
+    n_features = 2
+    X = sp.csc_matrix((n_samples, n_features)) * 1e50
+    y = np.ones(n_samples)
+    assert_warns(ConvergenceWarning, clf.fit, X, y)
--- a/venv/Lib/site-packages/sklearn/linear_model/tests/test_theil_sen.py
+++ b/venv/Lib/site-packages/sklearn/linear_model/tests/test_theil_sen.py
@ -0,0 +1,281 @@
+"""
+Testing for Theil-Sen module (sklearn.linear_model.theil_sen)
+"""
+
+# Author: Florian Wilhelm <florian.wilhelm@gmail.com>
+# License: BSD 3 clause
+import os
+import sys
+from contextlib import contextmanager
+import numpy as np
+from numpy.testing import assert_array_equal, assert_array_less
+from numpy.testing import assert_array_almost_equal, assert_warns
+from scipy.linalg import norm
+from scipy.optimize import fmin_bfgs
+from sklearn.exceptions import ConvergenceWarning
+from sklearn.linear_model import LinearRegression, TheilSenRegressor
+from sklearn.linear_model._theil_sen import _spatial_median, _breakdown_point
+from sklearn.linear_model._theil_sen import _modified_weiszfeld_step
+from sklearn.utils._testing import assert_almost_equal, assert_raises
+
+
+@contextmanager
+def no_stdout_stderr():
+    old_stdout = sys.stdout
+    old_stderr = sys.stderr
+    with open(os.devnull, 'w') as devnull:
+        sys.stdout = devnull
+        sys.stderr = devnull
+        yield
+        devnull.flush()
+        sys.stdout = old_stdout
+        sys.stderr = old_stderr
+
+
+def gen_toy_problem_1d(intercept=True):
+    random_state = np.random.RandomState(0)
+    # Linear model y = 3*x + N(2, 0.1**2)
+    w = 3.
+    if intercept:
+        c = 2.
+        n_samples = 50
+    else:
+        c = 0.1
+        n_samples = 100
+    x = random_state.normal(size=n_samples)
+    noise = 0.1 * random_state.normal(size=n_samples)
+    y = w * x + c + noise
+    # Add some outliers
+    if intercept:
+        x[42], y[42] = (-2, 4)
+        x[43], y[43] = (-2.5, 8)
+        x[33], y[33] = (2.5, 1)
+        x[49], y[49] = (2.1, 2)
+    else:
+        x[42], y[42] = (-2, 4)
+        x[43], y[43] = (-2.5, 8)
+        x[53], y[53] = (2.5, 1)
+        x[60], y[60] = (2.1, 2)
+        x[72], y[72] = (1.8, -7)
+    return x[:, np.newaxis], y, w, c
+
+
+def gen_toy_problem_2d():
+    random_state = np.random.RandomState(0)
+    n_samples = 100
+    # Linear model y = 5*x_1 + 10*x_2 + N(1, 0.1**2)
+    X = random_state.normal(size=(n_samples, 2))
+    w = np.array([5., 10.])
+    c = 1.
+    noise = 0.1 * random_state.normal(size=n_samples)
+    y = np.dot(X, w) + c + noise
+    # Add some outliers
+    n_outliers = n_samples // 10
+    ix = random_state.randint(0, n_samples, size=n_outliers)
+    y[ix] = 50 * random_state.normal(size=n_outliers)
+    return X, y, w, c
+
+
+def gen_toy_problem_4d():
+    random_state = np.random.RandomState(0)
+    n_samples = 10000
+    # Linear model y = 5*x_1 + 10*x_2  + 42*x_3 + 7*x_4 + N(1, 0.1**2)
+    X = random_state.normal(size=(n_samples, 4))
+    w = np.array([5., 10., 42., 7.])
+    c = 1.
+    noise = 0.1 * random_state.normal(size=n_samples)
+    y = np.dot(X, w) + c + noise
+    # Add some outliers
+    n_outliers = n_samples // 10
+    ix = random_state.randint(0, n_samples, size=n_outliers)
+    y[ix] = 50 * random_state.normal(size=n_outliers)
+    return X, y, w, c
+
+
+def test_modweiszfeld_step_1d():
+    X = np.array([1., 2., 3.]).reshape(3, 1)
+    # Check startvalue is element of X and solution
+    median = 2.
+    new_y = _modified_weiszfeld_step(X, median)
+    assert_array_almost_equal(new_y, median)
+    # Check startvalue is not the solution
+    y = 2.5
+    new_y = _modified_weiszfeld_step(X, y)
+    assert_array_less(median, new_y)
+    assert_array_less(new_y, y)
+    # Check startvalue is not the solution but element of X
+    y = 3.
+    new_y = _modified_weiszfeld_step(X, y)
+    assert_array_less(median, new_y)
+    assert_array_less(new_y, y)
+    # Check that a single vector is identity
+    X = np.array([1., 2., 3.]).reshape(1, 3)
+    y = X[0, ]
+    new_y = _modified_weiszfeld_step(X, y)
+    assert_array_equal(y, new_y)
+
+
+def test_modweiszfeld_step_2d():
+    X = np.array([0., 0., 1., 1., 0., 1.]).reshape(3, 2)
+    y = np.array([0.5, 0.5])
+    # Check first two iterations
+    new_y = _modified_weiszfeld_step(X, y)
+    assert_array_almost_equal(new_y, np.array([1 / 3, 2 / 3]))
+    new_y = _modified_weiszfeld_step(X, new_y)
+    assert_array_almost_equal(new_y, np.array([0.2792408, 0.7207592]))
+    # Check fix point
+    y = np.array([0.21132505, 0.78867497])
+    new_y = _modified_weiszfeld_step(X, y)
+    assert_array_almost_equal(new_y, y)
+
+
+def test_spatial_median_1d():
+    X = np.array([1., 2., 3.]).reshape(3, 1)
+    true_median = 2.
+    _, median = _spatial_median(X)
+    assert_array_almost_equal(median, true_median)
+    # Test larger problem and for exact solution in 1d case
+    random_state = np.random.RandomState(0)
+    X = random_state.randint(100, size=(1000, 1))
+    true_median = np.median(X.ravel())
+    _, median = _spatial_median(X)
+    assert_array_equal(median, true_median)
+
+
+def test_spatial_median_2d():
+    X = np.array([0., 0., 1., 1., 0., 1.]).reshape(3, 2)
+    _, median = _spatial_median(X, max_iter=100, tol=1.e-6)
+
+    def cost_func(y):
+        dists = np.array([norm(x - y) for x in X])
+        return np.sum(dists)
+
+    # Check if median is solution of the Fermat-Weber location problem
+    fermat_weber = fmin_bfgs(cost_func, median, disp=False)
+    assert_array_almost_equal(median, fermat_weber)
+    # Check when maximum iteration is exceeded a warning is emitted
+    assert_warns(ConvergenceWarning, _spatial_median, X, max_iter=30, tol=0.)
+
+
+def test_theil_sen_1d():
+    X, y, w, c = gen_toy_problem_1d()
+    # Check that Least Squares fails
+    lstq = LinearRegression().fit(X, y)
+    assert np.abs(lstq.coef_ - w) > 0.9
+    # Check that Theil-Sen works
+    theil_sen = TheilSenRegressor(random_state=0).fit(X, y)
+    assert_array_almost_equal(theil_sen.coef_, w, 1)
+    assert_array_almost_equal(theil_sen.intercept_, c, 1)
+
+
+def test_theil_sen_1d_no_intercept():
+    X, y, w, c = gen_toy_problem_1d(intercept=False)
+    # Check that Least Squares fails
+    lstq = LinearRegression(fit_intercept=False).fit(X, y)
+    assert np.abs(lstq.coef_ - w - c) > 0.5
+    # Check that Theil-Sen works
+    theil_sen = TheilSenRegressor(fit_intercept=False,
+                                  random_state=0).fit(X, y)
+    assert_array_almost_equal(theil_sen.coef_, w + c, 1)
+    assert_almost_equal(theil_sen.intercept_, 0.)
+
+
+def test_theil_sen_2d():
+    X, y, w, c = gen_toy_problem_2d()
+    # Check that Least Squares fails
+    lstq = LinearRegression().fit(X, y)
+    assert norm(lstq.coef_ - w) > 1.0
+    # Check that Theil-Sen works
+    theil_sen = TheilSenRegressor(max_subpopulation=1e3,
+                                  random_state=0).fit(X, y)
+    assert_array_almost_equal(theil_sen.coef_, w, 1)
+    assert_array_almost_equal(theil_sen.intercept_, c, 1)
+
+
+def test_calc_breakdown_point():
+    bp = _breakdown_point(1e10, 2)
+    assert np.abs(bp - 1 + 1 / (np.sqrt(2))) < 1.e-6
+
+
+def test_checksubparams_negative_subpopulation():
+    X, y, w, c = gen_toy_problem_1d()
+    theil_sen = TheilSenRegressor(max_subpopulation=-1, random_state=0)
+    assert_raises(ValueError, theil_sen.fit, X, y)
+
+
+def test_checksubparams_too_few_subsamples():
+    X, y, w, c = gen_toy_problem_1d()
+    theil_sen = TheilSenRegressor(n_subsamples=1, random_state=0)
+    assert_raises(ValueError, theil_sen.fit, X, y)
+
+
+def test_checksubparams_too_many_subsamples():
+    X, y, w, c = gen_toy_problem_1d()
+    theil_sen = TheilSenRegressor(n_subsamples=101, random_state=0)
+    assert_raises(ValueError, theil_sen.fit, X, y)
+
+
+def test_checksubparams_n_subsamples_if_less_samples_than_features():
+    random_state = np.random.RandomState(0)
+    n_samples, n_features = 10, 20
+    X = random_state.normal(size=(n_samples, n_features))
+    y = random_state.normal(size=n_samples)
+    theil_sen = TheilSenRegressor(n_subsamples=9, random_state=0)
+    assert_raises(ValueError, theil_sen.fit, X, y)
+
+
+def test_subpopulation():
+    X, y, w, c = gen_toy_problem_4d()
+    theil_sen = TheilSenRegressor(max_subpopulation=250,
+                                  random_state=0).fit(X, y)
+    assert_array_almost_equal(theil_sen.coef_, w, 1)
+    assert_array_almost_equal(theil_sen.intercept_, c, 1)
+
+
+def test_subsamples():
+    X, y, w, c = gen_toy_problem_4d()
+    theil_sen = TheilSenRegressor(n_subsamples=X.shape[0],
+                                  random_state=0).fit(X, y)
+    lstq = LinearRegression().fit(X, y)
+    # Check for exact the same results as Least Squares
+    assert_array_almost_equal(theil_sen.coef_, lstq.coef_, 9)
+
+
+def test_verbosity():
+    X, y, w, c = gen_toy_problem_1d()
+    # Check that Theil-Sen can be verbose
+    with no_stdout_stderr():
+        TheilSenRegressor(verbose=True, random_state=0).fit(X, y)
+        TheilSenRegressor(verbose=True,
+                          max_subpopulation=10,
+                          random_state=0).fit(X, y)
+
+
+def test_theil_sen_parallel():
+    X, y, w, c = gen_toy_problem_2d()
+    # Check that Least Squares fails
+    lstq = LinearRegression().fit(X, y)
+    assert norm(lstq.coef_ - w) > 1.0
+    # Check that Theil-Sen works
+    theil_sen = TheilSenRegressor(n_jobs=2,
+                                  random_state=0,
+                                  max_subpopulation=2e3).fit(X, y)
+    assert_array_almost_equal(theil_sen.coef_, w, 1)
+    assert_array_almost_equal(theil_sen.intercept_, c, 1)
+
+
+def test_less_samples_than_features():
+    random_state = np.random.RandomState(0)
+    n_samples, n_features = 10, 20
+    X = random_state.normal(size=(n_samples, n_features))
+    y = random_state.normal(size=n_samples)
+    # Check that Theil-Sen falls back to Least Squares if fit_intercept=False
+    theil_sen = TheilSenRegressor(fit_intercept=False,
+                                  random_state=0).fit(X, y)
+    lstq = LinearRegression(fit_intercept=False).fit(X, y)
+    assert_array_almost_equal(theil_sen.coef_, lstq.coef_, 12)
+    # Check fit_intercept=True case. This will not be equal to the Least
+    # Squares solution since the intercept is calculated differently.
+    theil_sen = TheilSenRegressor(fit_intercept=True, random_state=0).fit(X, y)
+    y_pred = theil_sen.predict(X)
+    assert_array_almost_equal(y_pred, y, 12)