Uploaded Test files

2020-11-12 11:05:57 -05:00 · 2020-11-12 11:05:57 -05:00 · 2e81cb7d99
commit 2e81cb7d99
parent f584ad9d97
16627 changed files with 2065359 additions and 102444 deletions
--- a/venv/Lib/site-packages/sklearn/utils/tests/init.py
+++ b/venv/Lib/site-packages/sklearn/utils/tests/init.py
--- a/venv/Lib/site-packages/sklearn/utils/tests/pycache/init.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/utils/tests/pycache/init.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/utils/tests/pycache/conftest.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/utils/tests/pycache/conftest.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_class_weight.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_class_weight.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_cython_blas.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_cython_blas.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_deprecated_utils.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_deprecated_utils.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_deprecation.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_deprecation.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_estimator_checks.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_estimator_checks.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_estimator_html_repr.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_estimator_html_repr.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_extmath.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_extmath.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_fast_dict.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_fast_dict.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_fixes.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_fixes.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_metaestimators.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_metaestimators.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_multiclass.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_multiclass.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_murmurhash.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_murmurhash.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_optimize.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_optimize.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_pprint.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_pprint.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_random.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_random.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_seq_dataset.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_seq_dataset.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_shortest_path.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_shortest_path.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_show_versions.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_show_versions.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_sparsefuncs.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_sparsefuncs.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_testing.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_testing.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_utils.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_utils.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_validation.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/utils/tests/pycache/test_validation.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/utils/tests/conftest.py
+++ b/venv/Lib/site-packages/sklearn/utils/tests/conftest.py
@ -0,0 +1,10 @@
+import pytest
+
+import sklearn
+
+
+@pytest.fixture
+def print_changed_only_false():
+    sklearn.set_config(print_changed_only=False)
+    yield
+    sklearn.set_config(print_changed_only=True)  # reset to default
--- a/venv/Lib/site-packages/sklearn/utils/tests/test_class_weight.py
+++ b/venv/Lib/site-packages/sklearn/utils/tests/test_class_weight.py
@ -0,0 +1,266 @@
+import numpy as np
+import pytest
+
+from sklearn.datasets import make_blobs
+from sklearn.linear_model import LogisticRegression
+
+from sklearn.utils.class_weight import compute_class_weight
+from sklearn.utils.class_weight import compute_sample_weight
+from sklearn.utils._testing import assert_array_almost_equal
+from sklearn.utils._testing import assert_almost_equal
+
+
+def test_compute_class_weight():
+    # Test (and demo) compute_class_weight.
+    y = np.asarray([2, 2, 2, 3, 3, 4])
+    classes = np.unique(y)
+
+    cw = compute_class_weight("balanced", classes=classes, y=y)
+    # total effect of samples is preserved
+    class_counts = np.bincount(y)[2:]
+    assert_almost_equal(np.dot(cw, class_counts), y.shape[0])
+    assert cw[0] < cw[1] < cw[2]
+
+
+def test_compute_class_weight_not_present():
+    # Raise error when y does not contain all class labels
+    classes = np.arange(4)
+    y = np.asarray([0, 0, 0, 1, 1, 2])
+    with pytest.raises(ValueError):
+        compute_class_weight("balanced", classes=classes, y=y)
+    # Fix exception in error message formatting when missing label is a string
+    # https://github.com/scikit-learn/scikit-learn/issues/8312
+    with pytest.raises(ValueError,
+                       match="Class label label_not_present not present"):
+        compute_class_weight({"label_not_present": 1.}, classes=classes, y=y)
+    # Raise error when y has items not in classes
+    classes = np.arange(2)
+    with pytest.raises(ValueError):
+        compute_class_weight("balanced", classes=classes, y=y)
+    with pytest.raises(ValueError):
+        compute_class_weight({0: 1., 1: 2.}, classes=classes, y=y)
+
+
+def test_compute_class_weight_dict():
+    classes = np.arange(3)
+    class_weights = {0: 1.0, 1: 2.0, 2: 3.0}
+    y = np.asarray([0, 0, 1, 2])
+    cw = compute_class_weight(class_weights, classes=classes, y=y)
+
+    # When the user specifies class weights, compute_class_weights should just
+    # return them.
+    assert_array_almost_equal(np.asarray([1.0, 2.0, 3.0]), cw)
+
+    # When a class weight is specified that isn't in classes, a ValueError
+    # should get raised
+    msg = 'Class label 4 not present.'
+    class_weights = {0: 1.0, 1: 2.0, 2: 3.0, 4: 1.5}
+    with pytest.raises(ValueError, match=msg):
+        compute_class_weight(class_weights, classes=classes, y=y)
+
+    msg = 'Class label -1 not present.'
+    class_weights = {-1: 5.0, 0: 1.0, 1: 2.0, 2: 3.0}
+    with pytest.raises(ValueError, match=msg):
+        compute_class_weight(class_weights, classes=classes, y=y)
+
+
+def test_compute_class_weight_invariance():
+    # Test that results with class_weight="balanced" is invariant wrt
+    # class imbalance if the number of samples is identical.
+    # The test uses a balanced two class dataset with 100 datapoints.
+    # It creates three versions, one where class 1 is duplicated
+    # resulting in 150 points of class 1 and 50 of class 0,
+    # one where there are 50 points in class 1 and 150 in class 0,
+    # and one where there are 100 points of each class (this one is balanced
+    # again).
+    # With balancing class weights, all three should give the same model.
+    X, y = make_blobs(centers=2, random_state=0)
+    # create dataset where class 1 is duplicated twice
+    X_1 = np.vstack([X] + [X[y == 1]] * 2)
+    y_1 = np.hstack([y] + [y[y == 1]] * 2)
+    # create dataset where class 0 is duplicated twice
+    X_0 = np.vstack([X] + [X[y == 0]] * 2)
+    y_0 = np.hstack([y] + [y[y == 0]] * 2)
+    # duplicate everything
+    X_ = np.vstack([X] * 2)
+    y_ = np.hstack([y] * 2)
+    # results should be identical
+    logreg1 = LogisticRegression(class_weight="balanced").fit(X_1, y_1)
+    logreg0 = LogisticRegression(class_weight="balanced").fit(X_0, y_0)
+    logreg = LogisticRegression(class_weight="balanced").fit(X_, y_)
+    assert_array_almost_equal(logreg1.coef_, logreg0.coef_)
+    assert_array_almost_equal(logreg.coef_, logreg0.coef_)
+
+
+def test_compute_class_weight_balanced_negative():
+    # Test compute_class_weight when labels are negative
+    # Test with balanced class labels.
+    classes = np.array([-2, -1, 0])
+    y = np.asarray([-1, -1, 0, 0, -2, -2])
+
+    cw = compute_class_weight("balanced", classes=classes, y=y)
+    assert len(cw) == len(classes)
+    assert_array_almost_equal(cw, np.array([1., 1., 1.]))
+
+    # Test with unbalanced class labels.
+    y = np.asarray([-1, 0, 0, -2, -2, -2])
+
+    cw = compute_class_weight("balanced", classes=classes, y=y)
+    assert len(cw) == len(classes)
+    class_counts = np.bincount(y + 2)
+    assert_almost_equal(np.dot(cw, class_counts), y.shape[0])
+    assert_array_almost_equal(cw, [2. / 3, 2., 1.])
+
+
+def test_compute_class_weight_balanced_unordered():
+    # Test compute_class_weight when classes are unordered
+    classes = np.array([1, 0, 3])
+    y = np.asarray([1, 0, 0, 3, 3, 3])
+
+    cw = compute_class_weight("balanced", classes=classes, y=y)
+    class_counts = np.bincount(y)[classes]
+    assert_almost_equal(np.dot(cw, class_counts), y.shape[0])
+    assert_array_almost_equal(cw, [2., 1., 2. / 3])
+
+
+def test_compute_class_weight_default():
+    # Test for the case where no weight is given for a present class.
+    # Current behaviour is to assign the unweighted classes a weight of 1.
+    y = np.asarray([2, 2, 2, 3, 3, 4])
+    classes = np.unique(y)
+    classes_len = len(classes)
+
+    # Test for non specified weights
+    cw = compute_class_weight(None, classes=classes, y=y)
+    assert len(cw) == classes_len
+    assert_array_almost_equal(cw, np.ones(3))
+
+    # Tests for partly specified weights
+    cw = compute_class_weight({2: 1.5}, classes=classes, y=y)
+    assert len(cw) == classes_len
+    assert_array_almost_equal(cw, [1.5, 1., 1.])
+
+    cw = compute_class_weight({2: 1.5, 4: 0.5}, classes=classes, y=y)
+    assert len(cw) == classes_len
+    assert_array_almost_equal(cw, [1.5, 1., 0.5])
+
+
+def test_compute_sample_weight():
+    # Test (and demo) compute_sample_weight.
+    # Test with balanced classes
+    y = np.asarray([1, 1, 1, 2, 2, 2])
+    sample_weight = compute_sample_weight("balanced", y)
+    assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.])
+
+    # Test with user-defined weights
+    sample_weight = compute_sample_weight({1: 2, 2: 1}, y)
+    assert_array_almost_equal(sample_weight, [2., 2., 2., 1., 1., 1.])
+
+    # Test with column vector of balanced classes
+    y = np.asarray([[1], [1], [1], [2], [2], [2]])
+    sample_weight = compute_sample_weight("balanced", y)
+    assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.])
+
+    # Test with unbalanced classes
+    y = np.asarray([1, 1, 1, 2, 2, 2, 3])
+    sample_weight = compute_sample_weight("balanced", y)
+    expected_balanced = np.array([0.7777, 0.7777, 0.7777, 0.7777, 0.7777,
+                                  0.7777, 2.3333])
+    assert_array_almost_equal(sample_weight, expected_balanced, decimal=4)
+
+    # Test with `None` weights
+    sample_weight = compute_sample_weight(None, y)
+    assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1., 1.])
+
+    # Test with multi-output of balanced classes
+    y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1]])
+    sample_weight = compute_sample_weight("balanced", y)
+    assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.])
+
+    # Test with multi-output with user-defined weights
+    y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1]])
+    sample_weight = compute_sample_weight([{1: 2, 2: 1}, {0: 1, 1: 2}], y)
+    assert_array_almost_equal(sample_weight, [2., 2., 2., 2., 2., 2.])
+
+    # Test with multi-output of unbalanced classes
+    y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1], [3, -1]])
+    sample_weight = compute_sample_weight("balanced", y)
+    assert_array_almost_equal(sample_weight, expected_balanced ** 2, decimal=3)
+
+
+def test_compute_sample_weight_with_subsample():
+    # Test compute_sample_weight with subsamples specified.
+    # Test with balanced classes and all samples present
+    y = np.asarray([1, 1, 1, 2, 2, 2])
+    sample_weight = compute_sample_weight("balanced", y, indices=range(6))
+    assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.])
+
+    # Test with column vector of balanced classes and all samples present
+    y = np.asarray([[1], [1], [1], [2], [2], [2]])
+    sample_weight = compute_sample_weight("balanced", y, indices=range(6))
+    assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.])
+
+    # Test with a subsample
+    y = np.asarray([1, 1, 1, 2, 2, 2])
+    sample_weight = compute_sample_weight("balanced", y, indices=range(4))
+    assert_array_almost_equal(sample_weight, [2. / 3, 2. / 3,
+                                              2. / 3, 2., 2., 2.])
+
+    # Test with a bootstrap subsample
+    y = np.asarray([1, 1, 1, 2, 2, 2])
+    sample_weight = compute_sample_weight("balanced", y,
+                                          indices=[0, 1, 1, 2, 2, 3])
+    expected_balanced = np.asarray([0.6, 0.6, 0.6, 3., 3., 3.])
+    assert_array_almost_equal(sample_weight, expected_balanced)
+
+    # Test with a bootstrap subsample for multi-output
+    y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1]])
+    sample_weight = compute_sample_weight("balanced", y,
+                                          indices=[0, 1, 1, 2, 2, 3])
+    assert_array_almost_equal(sample_weight, expected_balanced ** 2)
+
+    # Test with a missing class
+    y = np.asarray([1, 1, 1, 2, 2, 2, 3])
+    sample_weight = compute_sample_weight("balanced", y, indices=range(6))
+    assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1., 0.])
+
+    # Test with a missing class for multi-output
+    y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1], [2, 2]])
+    sample_weight = compute_sample_weight("balanced", y, indices=range(6))
+    assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1., 0.])
+
+
+def test_compute_sample_weight_errors():
+    # Test compute_sample_weight raises errors expected.
+    # Invalid preset string
+    y = np.asarray([1, 1, 1, 2, 2, 2])
+    y_ = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1]])
+
+    with pytest.raises(ValueError):
+        compute_sample_weight("ni", y)
+    with pytest.raises(ValueError):
+        compute_sample_weight("ni", y, indices=range(4))
+    with pytest.raises(ValueError):
+        compute_sample_weight("ni", y_)
+    with pytest.raises(ValueError):
+        compute_sample_weight("ni", y_, indices=range(4))
+
+    # Not "balanced" for subsample
+    with pytest.raises(ValueError):
+        compute_sample_weight({1: 2, 2: 1}, y, indices=range(4))
+
+    # Not a list or preset for multi-output
+    with pytest.raises(ValueError):
+        compute_sample_weight({1: 2, 2: 1}, y_)
+
+    # Incorrect length list for multi-output
+    with pytest.raises(ValueError):
+        compute_sample_weight([{1: 2, 2: 1}], y_)
+
+
+def test_compute_sample_weight_more_than_32():
+    # Non-regression smoke test for #12146
+    y = np.arange(50)  # more than 32 distinct classes
+    indices = np.arange(50)  # use subsampling
+    weight = compute_sample_weight('balanced', y, indices=indices)
+    assert_array_almost_equal(weight, np.ones(y.shape[0]))
--- a/venv/Lib/site-packages/sklearn/utils/tests/test_cython_blas.py
+++ b/venv/Lib/site-packages/sklearn/utils/tests/test_cython_blas.py
@ -0,0 +1,229 @@
+import pytest
+
+import numpy as np
+
+from sklearn.utils._testing import assert_allclose
+from sklearn.utils._cython_blas import _dot_memview
+from sklearn.utils._cython_blas import _asum_memview
+from sklearn.utils._cython_blas import _axpy_memview
+from sklearn.utils._cython_blas import _nrm2_memview
+from sklearn.utils._cython_blas import _copy_memview
+from sklearn.utils._cython_blas import _scal_memview
+from sklearn.utils._cython_blas import _rotg_memview
+from sklearn.utils._cython_blas import _rot_memview
+from sklearn.utils._cython_blas import _gemv_memview
+from sklearn.utils._cython_blas import _ger_memview
+from sklearn.utils._cython_blas import _gemm_memview
+from sklearn.utils._cython_blas import RowMajor, ColMajor
+from sklearn.utils._cython_blas import Trans, NoTrans
+
+
+def _numpy_to_cython(dtype):
+    cython = pytest.importorskip("cython")
+    if dtype == np.float32:
+        return cython.float
+    elif dtype == np.float64:
+        return cython.double
+
+
+RTOL = {np.float32: 1e-6, np.float64: 1e-12}
+ORDER = {RowMajor: 'C', ColMajor: 'F'}
+
+
+def _no_op(x):
+    return x
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_dot(dtype):
+    dot = _dot_memview[_numpy_to_cython(dtype)]
+
+    rng = np.random.RandomState(0)
+    x = rng.random_sample(10).astype(dtype, copy=False)
+    y = rng.random_sample(10).astype(dtype, copy=False)
+
+    expected = x.dot(y)
+    actual = dot(x, y)
+
+    assert_allclose(actual, expected, rtol=RTOL[dtype])
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_asum(dtype):
+    asum = _asum_memview[_numpy_to_cython(dtype)]
+
+    rng = np.random.RandomState(0)
+    x = rng.random_sample(10).astype(dtype, copy=False)
+
+    expected = np.abs(x).sum()
+    actual = asum(x)
+
+    assert_allclose(actual, expected, rtol=RTOL[dtype])
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_axpy(dtype):
+    axpy = _axpy_memview[_numpy_to_cython(dtype)]
+
+    rng = np.random.RandomState(0)
+    x = rng.random_sample(10).astype(dtype, copy=False)
+    y = rng.random_sample(10).astype(dtype, copy=False)
+    alpha = 2.5
+
+    expected = alpha * x + y
+    axpy(alpha, x, y)
+
+    assert_allclose(y, expected, rtol=RTOL[dtype])
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_nrm2(dtype):
+    nrm2 = _nrm2_memview[_numpy_to_cython(dtype)]
+
+    rng = np.random.RandomState(0)
+    x = rng.random_sample(10).astype(dtype, copy=False)
+
+    expected = np.linalg.norm(x)
+    actual = nrm2(x)
+
+    assert_allclose(actual, expected, rtol=RTOL[dtype])
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_copy(dtype):
+    copy = _copy_memview[_numpy_to_cython(dtype)]
+
+    rng = np.random.RandomState(0)
+    x = rng.random_sample(10).astype(dtype, copy=False)
+    y = np.empty_like(x)
+
+    expected = x.copy()
+    copy(x, y)
+
+    assert_allclose(y, expected, rtol=RTOL[dtype])
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_scal(dtype):
+    scal = _scal_memview[_numpy_to_cython(dtype)]
+
+    rng = np.random.RandomState(0)
+    x = rng.random_sample(10).astype(dtype, copy=False)
+    alpha = 2.5
+
+    expected = alpha * x
+    scal(alpha, x)
+
+    assert_allclose(x, expected, rtol=RTOL[dtype])
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_rotg(dtype):
+    rotg = _rotg_memview[_numpy_to_cython(dtype)]
+
+    rng = np.random.RandomState(0)
+    a = dtype(rng.randn())
+    b = dtype(rng.randn())
+    c, s = 0.0, 0.0
+
+    def expected_rotg(a, b):
+        roe = a if abs(a) > abs(b) else b
+        if a == 0 and b == 0:
+            c, s, r, z = (1, 0, 0, 0)
+        else:
+            r = np.sqrt(a**2 + b**2) * (1 if roe >= 0 else -1)
+            c, s = a/r, b/r
+            z = s if roe == a else (1 if c == 0 else 1 / c)
+        return r, z, c, s
+
+    expected = expected_rotg(a, b)
+    actual = rotg(a, b, c, s)
+
+    assert_allclose(actual, expected, rtol=RTOL[dtype])
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_rot(dtype):
+    rot = _rot_memview[_numpy_to_cython(dtype)]
+
+    rng = np.random.RandomState(0)
+    x = rng.random_sample(10).astype(dtype, copy=False)
+    y = rng.random_sample(10).astype(dtype, copy=False)
+    c = dtype(rng.randn())
+    s = dtype(rng.randn())
+
+    expected_x = c * x + s * y
+    expected_y = c * y - s * x
+
+    rot(x, y, c, s)
+
+    assert_allclose(x, expected_x)
+    assert_allclose(y, expected_y)
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+@pytest.mark.parametrize("opA, transA",
+                         [(_no_op, NoTrans), (np.transpose, Trans)],
+                         ids=["NoTrans", "Trans"])
+@pytest.mark.parametrize("order", [RowMajor, ColMajor],
+                         ids=["RowMajor", "ColMajor"])
+def test_gemv(dtype, opA, transA, order):
+    gemv = _gemv_memview[_numpy_to_cython(dtype)]
+
+    rng = np.random.RandomState(0)
+    A = np.asarray(opA(rng.random_sample((20, 10)).astype(dtype, copy=False)),
+                   order=ORDER[order])
+    x = rng.random_sample(10).astype(dtype, copy=False)
+    y = rng.random_sample(20).astype(dtype, copy=False)
+    alpha, beta = 2.5, -0.5
+
+    expected = alpha * opA(A).dot(x) + beta * y
+    gemv(transA, alpha, A, x, beta, y)
+
+    assert_allclose(y, expected, rtol=RTOL[dtype])
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+@pytest.mark.parametrize("order", [RowMajor, ColMajor],
+                         ids=["RowMajor", "ColMajor"])
+def test_ger(dtype, order):
+    ger = _ger_memview[_numpy_to_cython(dtype)]
+
+    rng = np.random.RandomState(0)
+    x = rng.random_sample(10).astype(dtype, copy=False)
+    y = rng.random_sample(20).astype(dtype, copy=False)
+    A = np.asarray(rng.random_sample((10, 20)).astype(dtype, copy=False),
+                   order=ORDER[order])
+    alpha = 2.5
+
+    expected = alpha * np.outer(x, y) + A
+    ger(alpha, x, y, A)
+
+    assert_allclose(A, expected, rtol=RTOL[dtype])
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+@pytest.mark.parametrize("opB, transB",
+                         [(_no_op, NoTrans), (np.transpose, Trans)],
+                         ids=["NoTrans", "Trans"])
+@pytest.mark.parametrize("opA, transA",
+                         [(_no_op, NoTrans), (np.transpose, Trans)],
+                         ids=["NoTrans", "Trans"])
+@pytest.mark.parametrize("order", [RowMajor, ColMajor],
+                         ids=["RowMajor", "ColMajor"])
+def test_gemm(dtype, opA, transA, opB, transB, order):
+    gemm = _gemm_memview[_numpy_to_cython(dtype)]
+
+    rng = np.random.RandomState(0)
+    A = np.asarray(opA(rng.random_sample((30, 10)).astype(dtype, copy=False)),
+                   order=ORDER[order])
+    B = np.asarray(opB(rng.random_sample((10, 20)).astype(dtype, copy=False)),
+                   order=ORDER[order])
+    C = np.asarray(rng.random_sample((30, 20)).astype(dtype, copy=False),
+                   order=ORDER[order])
+    alpha, beta = 2.5, -0.5
+
+    expected = alpha * opA(A).dot(opB(B)) + beta * C
+    gemm(transA, transB, alpha, A, B, beta, C)
+
+    assert_allclose(C, expected, rtol=RTOL[dtype])
--- a/venv/Lib/site-packages/sklearn/utils/tests/test_deprecated_utils.py
+++ b/venv/Lib/site-packages/sklearn/utils/tests/test_deprecated_utils.py
@ -0,0 +1,128 @@
+import pytest
+import types
+import numpy as np
+import warnings
+
+from sklearn.dummy import DummyClassifier
+from sklearn.utils import all_estimators
+from sklearn.utils.estimator_checks import choose_check_classifiers_labels
+from sklearn.utils.estimator_checks import NotAnArray
+from sklearn.utils.estimator_checks import enforce_estimator_tags_y
+from sklearn.utils.estimator_checks import is_public_parameter
+from sklearn.utils.estimator_checks import pairwise_estimator_convert_X
+from sklearn.utils.estimator_checks import set_checking_parameters
+from sklearn.utils.optimize import newton_cg
+from sklearn.utils.random import random_choice_csc
+from sklearn.utils import safe_indexing
+
+
+# This file tests the utils that are deprecated
+
+
+# TODO: remove in 0.24
+def test_choose_check_classifiers_labels_deprecated():
+    with pytest.warns(FutureWarning,
+                      match="removed in version 0.24"):
+        choose_check_classifiers_labels(None, None, None)
+
+
+# TODO: remove in 0.24
+def test_enforce_estimator_tags_y():
+    with pytest.warns(FutureWarning,
+                      match="removed in version 0.24"):
+        enforce_estimator_tags_y(DummyClassifier(), np.array([0, 1]))
+
+
+# TODO: remove in 0.24
+def test_notanarray():
+    with pytest.warns(FutureWarning,
+                      match="removed in version 0.24"):
+        NotAnArray([1, 2])
+
+
+# TODO: remove in 0.24
+def test_is_public_parameter():
+    with pytest.warns(FutureWarning,
+                      match="removed in version 0.24"):
+        is_public_parameter('hello')
+
+
+# TODO: remove in 0.24
+def test_pairwise_estimator_convert_X():
+    with pytest.warns(FutureWarning,
+                      match="removed in version 0.24"):
+        pairwise_estimator_convert_X([[1, 2]], DummyClassifier())
+
+
+# TODO: remove in 0.24
+def test_set_checking_parameters():
+    with pytest.warns(FutureWarning,
+                      match="removed in version 0.24"):
+        set_checking_parameters(DummyClassifier())
+
+
+# TODO: remove in 0.24
+def test_newton_cg():
+    rng = np.random.RandomState(0)
+    A = rng.normal(size=(10, 10))
+    x0 = np.ones(10)
+
+    def func(x):
+        Ax = A.dot(x)
+        return .5 * (Ax).dot(Ax)
+
+    def grad(x):
+        return A.T.dot(A.dot(x))
+
+    def grad_hess(x):
+        return grad(x), lambda x: A.T.dot(A.dot(x))
+
+    with pytest.warns(FutureWarning,
+                      match="removed in version 0.24"):
+        newton_cg(grad_hess, func, grad, x0)
+
+
+# TODO: remove in 0.24
+def test_random_choice_csc():
+    with pytest.warns(FutureWarning,
+                      match="removed in version 0.24"):
+        random_choice_csc(10, [[2]])
+
+
+# TODO: remove in 0.24
+def test_safe_indexing():
+    with pytest.warns(FutureWarning,
+                      match="removed in version 0.24"):
+        safe_indexing([1, 2], 0)
+
+
+# TODO: remove in 0.24
+def test_partial_dependence_no_shadowing():
+    # Non-regression test for:
+    # https://github.com/scikit-learn/scikit-learn/issues/15842
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore", category=FutureWarning)
+        from sklearn.inspection.partial_dependence import partial_dependence as _  # noqa
+
+        # Calling all_estimators() also triggers a recursive import of all
+        # submodules, including deprecated ones.
+        all_estimators()
+
+    from sklearn.inspection import partial_dependence
+    assert isinstance(partial_dependence, types.FunctionType)
+
+
+# TODO: remove in 0.24
+def test_dict_learning_no_shadowing():
+    # Non-regression test for:
+    # https://github.com/scikit-learn/scikit-learn/issues/15842
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore", category=FutureWarning)
+        from sklearn.decomposition.dict_learning import dict_learning as _  # noqa
+
+        # Calling all_estimators() also triggers a recursive import of all
+        # submodules, including deprecated ones.
+        all_estimators()
+
+    from sklearn.decomposition import dict_learning
+    assert isinstance(dict_learning, types.FunctionType)
--- a/venv/Lib/site-packages/sklearn/utils/tests/test_deprecation.py
+++ b/venv/Lib/site-packages/sklearn/utils/tests/test_deprecation.py
@ -0,0 +1,59 @@
+# Authors: Raghav RV <rvraghav93@gmail.com>
+# License: BSD 3 clause
+
+
+import pickle
+
+from sklearn.utils.deprecation import _is_deprecated
+from sklearn.utils.deprecation import deprecated
+from sklearn.utils._testing import assert_warns_message
+
+
+@deprecated('qwerty')
+class MockClass1:
+    pass
+
+
+class MockClass2:
+    @deprecated('mockclass2_method')
+    def method(self):
+        pass
+
+
+class MockClass3:
+    @deprecated()
+    def __init__(self):
+        pass
+
+
+class MockClass4:
+    pass
+
+
+@deprecated()
+def mock_function():
+    return 10
+
+
+def test_deprecated():
+    assert_warns_message(FutureWarning, 'qwerty', MockClass1)
+    assert_warns_message(FutureWarning, 'mockclass2_method',
+                         MockClass2().method)
+    assert_warns_message(FutureWarning, 'deprecated', MockClass3)
+    val = assert_warns_message(FutureWarning, 'deprecated',
+                               mock_function)
+    assert val == 10
+
+
+def test_is_deprecated():
+    # Test if _is_deprecated helper identifies wrapping via deprecated
+    # NOTE it works only for class methods and functions
+    assert _is_deprecated(MockClass1.__init__)
+    assert _is_deprecated(MockClass2().method)
+    assert _is_deprecated(MockClass3.__init__)
+    assert not _is_deprecated(MockClass4.__init__)
+    assert _is_deprecated(mock_function)
+
+
+def test_pickle():
+    pickle.loads(pickle.dumps(mock_function))
--- a/venv/Lib/site-packages/sklearn/utils/tests/test_estimator_checks.py
+++ b/venv/Lib/site-packages/sklearn/utils/tests/test_estimator_checks.py
@ -0,0 +1,640 @@
+import unittest
+import sys
+
+import numpy as np
+import scipy.sparse as sp
+import joblib
+
+from io import StringIO
+
+from sklearn.base import BaseEstimator, ClassifierMixin
+from sklearn.utils import deprecated
+from sklearn.utils._testing import (assert_raises_regex,
+                                    ignore_warnings,
+                                    assert_warns, assert_raises,
+                                    SkipTest)
+from sklearn.utils.estimator_checks import check_estimator, _NotAnArray
+from sklearn.utils.estimator_checks \
+    import check_class_weight_balanced_linear_classifier
+from sklearn.utils.estimator_checks import set_random_state
+from sklearn.utils.estimator_checks import _set_checking_parameters
+from sklearn.utils.estimator_checks import check_estimators_unfitted
+from sklearn.utils.estimator_checks import check_fit_score_takes_y
+from sklearn.utils.estimator_checks import check_no_attributes_set_in_init
+from sklearn.utils.estimator_checks import check_classifier_data_not_an_array
+from sklearn.utils.estimator_checks import check_regressor_data_not_an_array
+from sklearn.utils.validation import check_is_fitted
+from sklearn.utils.estimator_checks import check_outlier_corruption
+from sklearn.utils.fixes import np_version, parse_version
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.linear_model import LinearRegression, SGDClassifier
+from sklearn.mixture import GaussianMixture
+from sklearn.cluster import MiniBatchKMeans
+from sklearn.decomposition import NMF
+from sklearn.linear_model import MultiTaskElasticNet, LogisticRegression
+from sklearn.svm import SVC
+from sklearn.neighbors import KNeighborsRegressor
+from sklearn.utils.validation import check_array
+from sklearn.utils import all_estimators
+
+
+class CorrectNotFittedError(ValueError):
+    """Exception class to raise if estimator is used before fitting.
+
+    Like NotFittedError, it inherits from ValueError, but not from
+    AttributeError. Used for testing only.
+    """
+
+
+class BaseBadClassifier(ClassifierMixin, BaseEstimator):
+    def fit(self, X, y):
+        return self
+
+    def predict(self, X):
+        return np.ones(X.shape[0])
+
+
+class ChangesDict(BaseEstimator):
+    def __init__(self, key=0):
+        self.key = key
+
+    def fit(self, X, y=None):
+        X, y = self._validate_data(X, y)
+        return self
+
+    def predict(self, X):
+        X = check_array(X)
+        self.key = 1000
+        return np.ones(X.shape[0])
+
+
+class SetsWrongAttribute(BaseEstimator):
+    def __init__(self, acceptable_key=0):
+        self.acceptable_key = acceptable_key
+
+    def fit(self, X, y=None):
+        self.wrong_attribute = 0
+        X, y = self._validate_data(X, y)
+        return self
+
+
+class ChangesWrongAttribute(BaseEstimator):
+    def __init__(self, wrong_attribute=0):
+        self.wrong_attribute = wrong_attribute
+
+    def fit(self, X, y=None):
+        self.wrong_attribute = 1
+        X, y = self._validate_data(X, y)
+        return self
+
+
+class ChangesUnderscoreAttribute(BaseEstimator):
+    def fit(self, X, y=None):
+        self._good_attribute = 1
+        X, y = self._validate_data(X, y)
+        return self
+
+
+class RaisesErrorInSetParams(BaseEstimator):
+    def __init__(self, p=0):
+        self.p = p
+
+    def set_params(self, **kwargs):
+        if 'p' in kwargs:
+            p = kwargs.pop('p')
+            if p < 0:
+                raise ValueError("p can't be less than 0")
+            self.p = p
+        return super().set_params(**kwargs)
+
+    def fit(self, X, y=None):
+        X, y = self._validate_data(X, y)
+        return self
+
+
+class ModifiesValueInsteadOfRaisingError(BaseEstimator):
+    def __init__(self, p=0):
+        self.p = p
+
+    def set_params(self, **kwargs):
+        if 'p' in kwargs:
+            p = kwargs.pop('p')
+            if p < 0:
+                p = 0
+            self.p = p
+        return super().set_params(**kwargs)
+
+    def fit(self, X, y=None):
+        X, y = self._validate_data(X, y)
+        return self
+
+
+class ModifiesAnotherValue(BaseEstimator):
+    def __init__(self, a=0, b='method1'):
+        self.a = a
+        self.b = b
+
+    def set_params(self, **kwargs):
+        if 'a' in kwargs:
+            a = kwargs.pop('a')
+            self.a = a
+            if a is None:
+                kwargs.pop('b')
+                self.b = 'method2'
+        return super().set_params(**kwargs)
+
+    def fit(self, X, y=None):
+        X, y = self._validate_data(X, y)
+        return self
+
+
+class NoCheckinPredict(BaseBadClassifier):
+    def fit(self, X, y):
+        X, y = self._validate_data(X, y)
+        return self
+
+
+class NoSparseClassifier(BaseBadClassifier):
+    def fit(self, X, y):
+        X, y = self._validate_data(X, y, accept_sparse=['csr', 'csc'])
+        if sp.issparse(X):
+            raise ValueError("Nonsensical Error")
+        return self
+
+    def predict(self, X):
+        X = check_array(X)
+        return np.ones(X.shape[0])
+
+
+class CorrectNotFittedErrorClassifier(BaseBadClassifier):
+    def fit(self, X, y):
+        X, y = self._validate_data(X, y)
+        self.coef_ = np.ones(X.shape[1])
+        return self
+
+    def predict(self, X):
+        check_is_fitted(self)
+        X = check_array(X)
+        return np.ones(X.shape[0])
+
+
+class NoSampleWeightPandasSeriesType(BaseEstimator):
+    def fit(self, X, y, sample_weight=None):
+        # Convert data
+        X, y = self._validate_data(
+            X, y,
+            accept_sparse=("csr", "csc"),
+            multi_output=True,
+            y_numeric=True)
+        # Function is only called after we verify that pandas is installed
+        from pandas import Series
+        if isinstance(sample_weight, Series):
+            raise ValueError("Estimator does not accept 'sample_weight'"
+                             "of type pandas.Series")
+        return self
+
+    def predict(self, X):
+        X = check_array(X)
+        return np.ones(X.shape[0])
+
+
+class BadBalancedWeightsClassifier(BaseBadClassifier):
+    def __init__(self, class_weight=None):
+        self.class_weight = class_weight
+
+    def fit(self, X, y):
+        from sklearn.preprocessing import LabelEncoder
+        from sklearn.utils import compute_class_weight
+
+        label_encoder = LabelEncoder().fit(y)
+        classes = label_encoder.classes_
+        class_weight = compute_class_weight(self.class_weight, classes=classes,
+                                            y=y)
+
+        # Intentionally modify the balanced class_weight
+        # to simulate a bug and raise an exception
+        if self.class_weight == "balanced":
+            class_weight += 1.
+
+        # Simply assigning coef_ to the class_weight
+        self.coef_ = class_weight
+        return self
+
+
+class BadTransformerWithoutMixin(BaseEstimator):
+    def fit(self, X, y=None):
+        X = self._validate_data(X)
+        return self
+
+    def transform(self, X):
+        X = check_array(X)
+        return X
+
+
+class NotInvariantPredict(BaseEstimator):
+    def fit(self, X, y):
+        # Convert data
+        X, y = self._validate_data(
+            X, y,
+            accept_sparse=("csr", "csc"),
+            multi_output=True,
+            y_numeric=True)
+        return self
+
+    def predict(self, X):
+        # return 1 if X has more than one element else return 0
+        X = check_array(X)
+        if X.shape[0] > 1:
+            return np.ones(X.shape[0])
+        return np.zeros(X.shape[0])
+
+
+class LargeSparseNotSupportedClassifier(BaseEstimator):
+    def fit(self, X, y):
+        X, y = self._validate_data(
+            X, y,
+            accept_sparse=("csr", "csc", "coo"),
+            accept_large_sparse=True,
+            multi_output=True,
+            y_numeric=True)
+        if sp.issparse(X):
+            if X.getformat() == "coo":
+                if X.row.dtype == "int64" or X.col.dtype == "int64":
+                    raise ValueError(
+                        "Estimator doesn't support 64-bit indices")
+            elif X.getformat() in ["csc", "csr"]:
+                assert "int64" not in (X.indices.dtype, X.indptr.dtype),\
+                    "Estimator doesn't support 64-bit indices"
+
+        return self
+
+
+class SparseTransformer(BaseEstimator):
+    def fit(self, X, y=None):
+        self.X_shape_ = self._validate_data(X).shape
+        return self
+
+    def fit_transform(self, X, y=None):
+        return self.fit(X, y).transform(X)
+
+    def transform(self, X):
+        X = check_array(X)
+        if X.shape[1] != self.X_shape_[1]:
+            raise ValueError('Bad number of features')
+        return sp.csr_matrix(X)
+
+
+class EstimatorInconsistentForPandas(BaseEstimator):
+    def fit(self, X, y):
+        try:
+            from pandas import DataFrame
+            if isinstance(X, DataFrame):
+                self.value_ = X.iloc[0, 0]
+            else:
+                X = check_array(X)
+                self.value_ = X[1, 0]
+            return self
+
+        except ImportError:
+            X = check_array(X)
+            self.value_ = X[1, 0]
+            return self
+
+    def predict(self, X):
+        X = check_array(X)
+        return np.array([self.value_] * X.shape[0])
+
+
+class UntaggedBinaryClassifier(SGDClassifier):
+    # Toy classifier that only supports binary classification, will fail tests.
+    def fit(self, X, y, coef_init=None, intercept_init=None,
+            sample_weight=None):
+        super().fit(X, y, coef_init, intercept_init, sample_weight)
+        if len(self.classes_) > 2:
+            raise ValueError('Only 2 classes are supported')
+        return self
+
+    def partial_fit(self, X, y, classes=None, sample_weight=None):
+        super().partial_fit(X=X, y=y, classes=classes,
+                            sample_weight=sample_weight)
+        if len(self.classes_) > 2:
+            raise ValueError('Only 2 classes are supported')
+        return self
+
+
+class TaggedBinaryClassifier(UntaggedBinaryClassifier):
+    # Toy classifier that only supports binary classification.
+    def _more_tags(self):
+        return {'binary_only': True}
+
+
+class RequiresPositiveYRegressor(LinearRegression):
+
+    def fit(self, X, y):
+        X, y = self._validate_data(X, y, multi_output=True)
+        if (y <= 0).any():
+            raise ValueError('negative y values not supported!')
+        return super().fit(X, y)
+
+    def _more_tags(self):
+        return {"requires_positive_y": True}
+
+
+def test_not_an_array_array_function():
+    if np_version < parse_version('1.17'):
+        raise SkipTest("array_function protocol not supported in numpy <1.17")
+    not_array = _NotAnArray(np.ones(10))
+    msg = "Don't want to call array_function sum!"
+    assert_raises_regex(TypeError, msg, np.sum, not_array)
+    # always returns True
+    assert np.may_share_memory(not_array, None)
+
+
+def test_check_fit_score_takes_y_works_on_deprecated_fit():
+    # Tests that check_fit_score_takes_y works on a class with
+    # a deprecated fit method
+
+    class TestEstimatorWithDeprecatedFitMethod(BaseEstimator):
+        @deprecated("Deprecated for the purpose of testing "
+                    "check_fit_score_takes_y")
+        def fit(self, X, y):
+            return self
+
+    check_fit_score_takes_y("test", TestEstimatorWithDeprecatedFitMethod())
+
+
+@ignore_warnings("Passing a class is depr", category=FutureWarning)  # 0.24
+def test_check_estimator():
+    # tests that the estimator actually fails on "bad" estimators.
+    # not a complete test of all checks, which are very extensive.
+
+    # check that we have a set_params and can clone
+    msg = "it does not implement a 'get_params' method"
+    assert_raises_regex(TypeError, msg, check_estimator, object)
+    msg = "object has no attribute '_get_tags'"
+    assert_raises_regex(AttributeError, msg, check_estimator, object())
+    # check that values returned by get_params match set_params
+    msg = "get_params result does not match what was passed to set_params"
+    assert_raises_regex(AssertionError, msg, check_estimator,
+                        ModifiesValueInsteadOfRaisingError())
+    assert_warns(UserWarning, check_estimator, RaisesErrorInSetParams())
+    assert_raises_regex(AssertionError, msg, check_estimator,
+                        ModifiesAnotherValue())
+    # check that we have a fit method
+    msg = "object has no attribute 'fit'"
+    assert_raises_regex(AttributeError, msg, check_estimator, BaseEstimator)
+    assert_raises_regex(AttributeError, msg, check_estimator, BaseEstimator())
+    # check that fit does input validation
+    msg = "ValueError not raised"
+    assert_raises_regex(AssertionError, msg, check_estimator,
+                        BaseBadClassifier)
+    assert_raises_regex(AssertionError, msg, check_estimator,
+                        BaseBadClassifier())
+    # check that sample_weights in fit accepts pandas.Series type
+    try:
+        from pandas import Series  # noqa
+        msg = ("Estimator NoSampleWeightPandasSeriesType raises error if "
+               "'sample_weight' parameter is of type pandas.Series")
+        assert_raises_regex(
+            ValueError, msg, check_estimator, NoSampleWeightPandasSeriesType)
+    except ImportError:
+        pass
+    # check that predict does input validation (doesn't accept dicts in input)
+    msg = "Estimator doesn't check for NaN and inf in predict"
+    assert_raises_regex(AssertionError, msg, check_estimator, NoCheckinPredict)
+    assert_raises_regex(AssertionError, msg, check_estimator,
+                        NoCheckinPredict())
+    # check that estimator state does not change
+    # at transform/predict/predict_proba time
+    msg = 'Estimator changes __dict__ during predict'
+    assert_raises_regex(AssertionError, msg, check_estimator, ChangesDict)
+    # check that `fit` only changes attribures that
+    # are private (start with an _ or end with a _).
+    msg = ('Estimator ChangesWrongAttribute should not change or mutate  '
+           'the parameter wrong_attribute from 0 to 1 during fit.')
+    assert_raises_regex(AssertionError, msg,
+                        check_estimator, ChangesWrongAttribute)
+    check_estimator(ChangesUnderscoreAttribute)
+    # check that `fit` doesn't add any public attribute
+    msg = (r'Estimator adds public attribute\(s\) during the fit method.'
+           ' Estimators are only allowed to add private attributes'
+           ' either started with _ or ended'
+           ' with _ but wrong_attribute added')
+    assert_raises_regex(AssertionError, msg,
+                        check_estimator, SetsWrongAttribute)
+    # check for invariant method
+    name = NotInvariantPredict.__name__
+    method = 'predict'
+    msg = ("{method} of {name} is not invariant when applied "
+           "to a subset.").format(method=method, name=name)
+    assert_raises_regex(AssertionError, msg,
+                        check_estimator, NotInvariantPredict)
+    # check for sparse matrix input handling
+    name = NoSparseClassifier.__name__
+    msg = "Estimator %s doesn't seem to fail gracefully on sparse data" % name
+    # the check for sparse input handling prints to the stdout,
+    # instead of raising an error, so as not to remove the original traceback.
+    # that means we need to jump through some hoops to catch it.
+    old_stdout = sys.stdout
+    string_buffer = StringIO()
+    sys.stdout = string_buffer
+    try:
+        check_estimator(NoSparseClassifier)
+    except:
+        pass
+    finally:
+        sys.stdout = old_stdout
+    assert msg in string_buffer.getvalue()
+
+    # Large indices test on bad estimator
+    msg = ('Estimator LargeSparseNotSupportedClassifier doesn\'t seem to '
+           r'support \S{3}_64 matrix, and is not failing gracefully.*')
+    assert_raises_regex(AssertionError, msg, check_estimator,
+                        LargeSparseNotSupportedClassifier)
+
+    # does error on binary_only untagged estimator
+    msg = 'Only 2 classes are supported'
+    assert_raises_regex(ValueError, msg, check_estimator,
+                        UntaggedBinaryClassifier)
+
+    # non-regression test for estimators transforming to sparse data
+    check_estimator(SparseTransformer())
+
+    # doesn't error on actual estimator
+    check_estimator(LogisticRegression)
+    check_estimator(LogisticRegression(C=0.01))
+    check_estimator(MultiTaskElasticNet)
+    check_estimator(MultiTaskElasticNet())
+
+    # doesn't error on binary_only tagged estimator
+    check_estimator(TaggedBinaryClassifier)
+
+    # Check regressor with requires_positive_y estimator tag
+    msg = 'negative y values not supported!'
+    assert_raises_regex(ValueError, msg, check_estimator,
+                        RequiresPositiveYRegressor)
+
+
+def test_check_outlier_corruption():
+    # should raise AssertionError
+    decision = np.array([0., 1., 1.5, 2.])
+    assert_raises(AssertionError, check_outlier_corruption, 1, 2, decision)
+    # should pass
+    decision = np.array([0., 1., 1., 2.])
+    check_outlier_corruption(1, 2, decision)
+
+
+def test_check_estimator_transformer_no_mixin():
+    # check that TransformerMixin is not required for transformer tests to run
+    assert_raises_regex(AttributeError, '.*fit_transform.*',
+                        check_estimator, BadTransformerWithoutMixin())
+
+
+def test_check_estimator_clones():
+    # check that check_estimator doesn't modify the estimator it receives
+    from sklearn.datasets import load_iris
+    iris = load_iris()
+
+    for Estimator in [GaussianMixture, LinearRegression,
+                      RandomForestClassifier, NMF, SGDClassifier,
+                      MiniBatchKMeans]:
+        with ignore_warnings(category=FutureWarning):
+            # when 'est = SGDClassifier()'
+            est = Estimator()
+            _set_checking_parameters(est)
+            set_random_state(est)
+            # without fitting
+            old_hash = joblib.hash(est)
+            check_estimator(est)
+        assert old_hash == joblib.hash(est)
+
+        with ignore_warnings(category=FutureWarning):
+            # when 'est = SGDClassifier()'
+            est = Estimator()
+            _set_checking_parameters(est)
+            set_random_state(est)
+            # with fitting
+            est.fit(iris.data + 10, iris.target)
+            old_hash = joblib.hash(est)
+            check_estimator(est)
+        assert old_hash == joblib.hash(est)
+
+
+def test_check_estimators_unfitted():
+    # check that a ValueError/AttributeError is raised when calling predict
+    # on an unfitted estimator
+    msg = "NotFittedError not raised by predict"
+    assert_raises_regex(AssertionError, msg, check_estimators_unfitted,
+                        "estimator", NoSparseClassifier())
+
+    # check that CorrectNotFittedError inherit from either ValueError
+    # or AttributeError
+    check_estimators_unfitted("estimator", CorrectNotFittedErrorClassifier())
+
+
+def test_check_no_attributes_set_in_init():
+    class NonConformantEstimatorPrivateSet(BaseEstimator):
+        def __init__(self):
+            self.you_should_not_set_this_ = None
+
+    class NonConformantEstimatorNoParamSet(BaseEstimator):
+        def __init__(self, you_should_set_this_=None):
+            pass
+
+    assert_raises_regex(AssertionError,
+                        "Estimator estimator_name should not set any"
+                        " attribute apart from parameters during init."
+                        r" Found attributes \['you_should_not_set_this_'\].",
+                        check_no_attributes_set_in_init,
+                        'estimator_name',
+                        NonConformantEstimatorPrivateSet())
+    assert_raises_regex(AssertionError,
+                        "Estimator estimator_name should store all "
+                        "parameters as an attribute during init. "
+                        "Did not find attributes "
+                        r"\['you_should_set_this_'\].",
+                        check_no_attributes_set_in_init,
+                        'estimator_name',
+                        NonConformantEstimatorNoParamSet())
+
+
+def test_check_estimator_pairwise():
+    # check that check_estimator() works on estimator with _pairwise
+    # kernel or metric
+
+    # test precomputed kernel
+    est = SVC(kernel='precomputed')
+    check_estimator(est)
+
+    # test precomputed metric
+    est = KNeighborsRegressor(metric='precomputed')
+    check_estimator(est)
+
+
+def test_check_classifier_data_not_an_array():
+    assert_raises_regex(AssertionError,
+                        'Not equal to tolerance',
+                        check_classifier_data_not_an_array,
+                        'estimator_name',
+                        EstimatorInconsistentForPandas())
+
+
+def test_check_regressor_data_not_an_array():
+    assert_raises_regex(AssertionError,
+                        'Not equal to tolerance',
+                        check_regressor_data_not_an_array,
+                        'estimator_name',
+                        EstimatorInconsistentForPandas())
+
+
+@ignore_warnings("Passing a class is depr", category=FutureWarning)  # 0.24
+def test_check_estimator_required_parameters_skip():
+    # TODO: remove whole test in 0.24 since passes classes to check_estimator()
+    # isn't supported anymore
+    class MyEstimator(BaseEstimator):
+        _required_parameters = ["special_parameter"]
+
+        def __init__(self, special_parameter):
+            self.special_parameter = special_parameter
+
+    assert_raises_regex(SkipTest, r"Can't instantiate estimator MyEstimator "
+                                  r"which requires parameters "
+                                  r"\['special_parameter'\]",
+                                  check_estimator, MyEstimator)
+
+
+def run_tests_without_pytest():
+    """Runs the tests in this file without using pytest.
+    """
+    main_module = sys.modules['__main__']
+    test_functions = [getattr(main_module, name) for name in dir(main_module)
+                      if name.startswith('test_')]
+    test_cases = [unittest.FunctionTestCase(fn) for fn in test_functions]
+    suite = unittest.TestSuite()
+    suite.addTests(test_cases)
+    runner = unittest.TextTestRunner()
+    runner.run(suite)
+
+
+def test_check_class_weight_balanced_linear_classifier():
+    # check that ill-computed balanced weights raises an exception
+    assert_raises_regex(AssertionError,
+                        "Classifier estimator_name is not computing"
+                        " class_weight=balanced properly.",
+                        check_class_weight_balanced_linear_classifier,
+                        'estimator_name',
+                        BadBalancedWeightsClassifier)
+
+
+def test_all_estimators_all_public():
+    # all_estimator should not fail when pytest is not installed and return
+    # only public estimators
+    estimators = all_estimators()
+    for est in estimators:
+        assert not est.__class__.__name__.startswith("_")
+
+
+if __name__ == '__main__':
+    # This module is run as a script to check that we have no dependency on
+    # pytest for estimator checks.
+    run_tests_without_pytest()
--- a/venv/Lib/site-packages/sklearn/utils/tests/test_estimator_html_repr.py
+++ b/venv/Lib/site-packages/sklearn/utils/tests/test_estimator_html_repr.py
@ -0,0 +1,267 @@
+from contextlib import closing
+from io import StringIO
+
+import pytest
+
+from sklearn import config_context
+from sklearn.linear_model import LogisticRegression
+from sklearn.neural_network import MLPClassifier
+from sklearn.impute import SimpleImputer
+from sklearn.decomposition import PCA
+from sklearn.decomposition import TruncatedSVD
+from sklearn.pipeline import Pipeline
+from sklearn.pipeline import FeatureUnion
+from sklearn.compose import ColumnTransformer
+from sklearn.ensemble import VotingClassifier
+from sklearn.feature_selection import SelectPercentile
+from sklearn.cluster import Birch
+from sklearn.cluster import AgglomerativeClustering
+from sklearn.preprocessing import OneHotEncoder
+from sklearn.svm import LinearSVC
+from sklearn.svm import LinearSVR
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.multiclass import OneVsOneClassifier
+from sklearn.ensemble import StackingClassifier
+from sklearn.ensemble import StackingRegressor
+from sklearn.gaussian_process import GaussianProcessRegressor
+from sklearn.gaussian_process.kernels import RationalQuadratic
+from sklearn.utils._estimator_html_repr import _write_label_html
+from sklearn.utils._estimator_html_repr import _get_visual_block
+from sklearn.utils._estimator_html_repr import estimator_html_repr
+
+
+@pytest.mark.parametrize("checked", [True, False])
+def test_write_label_html(checked):
+    # Test checking logic and labeling
+    name = "LogisticRegression"
+    tool_tip = "hello-world"
+
+    with closing(StringIO()) as out:
+        _write_label_html(out, name, tool_tip, checked=checked)
+        html_label = out.getvalue()
+        assert 'LogisticRegression</label>' in html_label
+        assert html_label.startswith('<div class="sk-label-container">')
+        assert '<pre>hello-world</pre>' in html_label
+        if checked:
+            assert 'checked>' in html_label
+
+
+@pytest.mark.parametrize('est', ['passthrough', 'drop', None])
+def test_get_visual_block_single_str_none(est):
+    # Test estimators that are represnted by strings
+    est_html_info = _get_visual_block(est)
+    assert est_html_info.kind == 'single'
+    assert est_html_info.estimators == est
+    assert est_html_info.names == str(est)
+    assert est_html_info.name_details == str(est)
+
+
+def test_get_visual_block_single_estimator():
+    est = LogisticRegression(C=10.0)
+    est_html_info = _get_visual_block(est)
+    assert est_html_info.kind == 'single'
+    assert est_html_info.estimators == est
+    assert est_html_info.names == est.__class__.__name__
+    assert est_html_info.name_details == str(est)
+
+
+def test_get_visual_block_pipeline():
+    pipe = Pipeline([
+        ('imputer', SimpleImputer()),
+        ('do_nothing', 'passthrough'),
+        ('do_nothing_more', None),
+        ('classifier', LogisticRegression())
+    ])
+    est_html_info = _get_visual_block(pipe)
+    assert est_html_info.kind == 'serial'
+    assert est_html_info.estimators == tuple(step[1] for step in pipe.steps)
+    assert est_html_info.names == ['imputer: SimpleImputer',
+                                   'do_nothing: passthrough',
+                                   'do_nothing_more: passthrough',
+                                   'classifier: LogisticRegression']
+    assert est_html_info.name_details == [str(est) for _, est in pipe.steps]
+
+
+def test_get_visual_block_feature_union():
+    f_union = FeatureUnion([
+        ('pca', PCA()), ('svd', TruncatedSVD())
+    ])
+    est_html_info = _get_visual_block(f_union)
+    assert est_html_info.kind == 'parallel'
+    assert est_html_info.names == ('pca', 'svd')
+    assert est_html_info.estimators == tuple(
+        trans[1] for trans in f_union.transformer_list)
+    assert est_html_info.name_details == (None, None)
+
+
+def test_get_visual_block_voting():
+    clf = VotingClassifier([
+        ('log_reg', LogisticRegression()),
+        ('mlp', MLPClassifier())
+    ])
+    est_html_info = _get_visual_block(clf)
+    assert est_html_info.kind == 'parallel'
+    assert est_html_info.estimators == tuple(trans[1]
+                                             for trans in clf.estimators)
+    assert est_html_info.names == ('log_reg', 'mlp')
+    assert est_html_info.name_details == (None, None)
+
+
+def test_get_visual_block_column_transformer():
+    ct = ColumnTransformer([
+        ('pca', PCA(), ['num1', 'num2']),
+        ('svd', TruncatedSVD, [0, 3])
+    ])
+    est_html_info = _get_visual_block(ct)
+    assert est_html_info.kind == 'parallel'
+    assert est_html_info.estimators == tuple(
+        trans[1] for trans in ct.transformers)
+    assert est_html_info.names == ('pca', 'svd')
+    assert est_html_info.name_details == (['num1', 'num2'], [0, 3])
+
+
+def test_estimator_html_repr_pipeline():
+    num_trans = Pipeline(steps=[
+        ('pass', 'passthrough'),
+        ('imputer', SimpleImputer(strategy='median'))
+    ])
+
+    cat_trans = Pipeline(steps=[
+        ('imputer', SimpleImputer(strategy='constant',
+                                  missing_values='empty')),
+        ('one-hot', OneHotEncoder(drop='first'))
+    ])
+
+    preprocess = ColumnTransformer([
+        ('num', num_trans, ['a', 'b', 'c', 'd', 'e']),
+        ('cat', cat_trans, [0, 1, 2, 3])
+    ])
+
+    feat_u = FeatureUnion([
+            ('pca', PCA(n_components=1)),
+            ('tsvd', Pipeline([('first', TruncatedSVD(n_components=3)),
+                               ('select', SelectPercentile())]))
+    ])
+
+    clf = VotingClassifier([
+        ('lr', LogisticRegression(solver='lbfgs', random_state=1)),
+        ('mlp', MLPClassifier(alpha=0.001))
+    ])
+
+    pipe = Pipeline([
+        ('preprocessor', preprocess), ('feat_u', feat_u), ('classifier', clf)
+    ])
+    html_output = estimator_html_repr(pipe)
+
+    # top level estimators show estimator with changes
+    assert str(pipe) in html_output
+    for _, est in pipe.steps:
+        assert (f"<div class=\"sk-toggleable__content\">"
+                f"<pre>{str(est)}") in html_output
+
+    # low level estimators do not show changes
+    with config_context(print_changed_only=True):
+        assert str(num_trans['pass']) in html_output
+        assert 'passthrough</label>' in html_output
+        assert str(num_trans['imputer']) in html_output
+
+        for _, _, cols in preprocess.transformers:
+            assert f"<pre>{cols}</pre>" in html_output
+
+        # feature union
+        for name, _ in feat_u.transformer_list:
+            assert f"<label>{name}</label>" in html_output
+
+        pca = feat_u.transformer_list[0][1]
+        assert f"<pre>{str(pca)}</pre>" in html_output
+
+        tsvd = feat_u.transformer_list[1][1]
+        first = tsvd['first']
+        select = tsvd['select']
+        assert f"<pre>{str(first)}</pre>" in html_output
+        assert f"<pre>{str(select)}</pre>" in html_output
+
+        # voting classifer
+        for name, est in clf.estimators:
+            assert f"<label>{name}</label>" in html_output
+            assert f"<pre>{str(est)}</pre>" in html_output
+
+
+@pytest.mark.parametrize("final_estimator", [None, LinearSVC()])
+def test_stacking_classsifer(final_estimator):
+    estimators = [('mlp', MLPClassifier(alpha=0.001)),
+                  ('tree', DecisionTreeClassifier())]
+    clf = StackingClassifier(
+        estimators=estimators, final_estimator=final_estimator)
+
+    html_output = estimator_html_repr(clf)
+
+    assert str(clf) in html_output
+    # If final_estimator's default changes from LogisticRegression
+    # this should be updated
+    if final_estimator is None:
+        assert "LogisticRegression(" in html_output
+    else:
+        assert final_estimator.__class__.__name__ in html_output
+
+
+@pytest.mark.parametrize("final_estimator", [None, LinearSVR()])
+def test_stacking_regressor(final_estimator):
+    reg = StackingRegressor(
+        estimators=[('svr', LinearSVR())], final_estimator=final_estimator)
+    html_output = estimator_html_repr(reg)
+
+    assert str(reg.estimators[0][0]) in html_output
+    assert "LinearSVR</label>" in html_output
+    if final_estimator is None:
+        assert "RidgeCV</label>" in html_output
+    else:
+        assert final_estimator.__class__.__name__ in html_output
+
+
+def test_birch_duck_typing_meta():
+    # Test duck typing meta estimators with Birch
+    birch = Birch(n_clusters=AgglomerativeClustering(n_clusters=3))
+    html_output = estimator_html_repr(birch)
+
+    # inner estimators do not show changes
+    with config_context(print_changed_only=True):
+        assert f"<pre>{str(birch.n_clusters)}" in html_output
+        assert "AgglomerativeClustering</label>" in html_output
+
+    # outer estimator contains all changes
+    assert f"<pre>{str(birch)}" in html_output
+
+
+def test_ovo_classifier_duck_typing_meta():
+    # Test duck typing metaestimators with OVO
+    ovo = OneVsOneClassifier(LinearSVC(penalty='l1'))
+    html_output = estimator_html_repr(ovo)
+
+    # inner estimators do not show changes
+    with config_context(print_changed_only=True):
+        assert f"<pre>{str(ovo.estimator)}" in html_output
+        assert "LinearSVC</label>" in html_output
+
+    # outter estimator
+    assert f"<pre>{str(ovo)}" in html_output
+
+
+def test_duck_typing_nested_estimator():
+    # Test duck typing metaestimators with GP
+    kernel = RationalQuadratic(length_scale=1.0, alpha=0.1)
+    gp = GaussianProcessRegressor(kernel=kernel)
+    html_output = estimator_html_repr(gp)
+
+    assert f"<pre>{str(kernel)}" in html_output
+    assert f"<pre>{str(gp)}" in html_output
+
+
+@pytest.mark.parametrize('print_changed_only', [True, False])
+def test_one_estimator_print_change_only(print_changed_only):
+    pca = PCA(n_components=10)
+
+    with config_context(print_changed_only=print_changed_only):
+        pca_repr = str(pca)
+        html_output = estimator_html_repr(pca)
+        assert pca_repr in html_output
--- a/venv/Lib/site-packages/sklearn/utils/tests/test_extmath.py
+++ b/venv/Lib/site-packages/sklearn/utils/tests/test_extmath.py
@ -0,0 +1,722 @@
+# Authors: Olivier Grisel <olivier.grisel@ensta.org>
+#          Mathieu Blondel <mathieu@mblondel.org>
+#          Denis Engemann <denis-alexander.engemann@inria.fr>
+#
+# License: BSD 3 clause
+
+import numpy as np
+from scipy import sparse
+from scipy import linalg
+from scipy import stats
+from scipy.special import expit
+
+import pytest
+
+from sklearn.utils._testing import assert_almost_equal
+from sklearn.utils._testing import assert_allclose
+from sklearn.utils._testing import assert_allclose_dense_sparse
+from sklearn.utils._testing import assert_array_equal
+from sklearn.utils._testing import assert_array_almost_equal
+from sklearn.utils._testing import assert_warns
+from sklearn.utils._testing import assert_warns_message
+from sklearn.utils._testing import skip_if_32bit
+
+from sklearn.utils.extmath import density
+from sklearn.utils.extmath import randomized_svd
+from sklearn.utils.extmath import row_norms
+from sklearn.utils.extmath import weighted_mode
+from sklearn.utils.extmath import cartesian
+from sklearn.utils.extmath import log_logistic
+from sklearn.utils.extmath import svd_flip
+from sklearn.utils.extmath import _incremental_mean_and_var
+from sklearn.utils.extmath import _deterministic_vector_sign_flip
+from sklearn.utils.extmath import softmax
+from sklearn.utils.extmath import stable_cumsum
+from sklearn.utils.extmath import safe_min
+from sklearn.utils.extmath import safe_sparse_dot
+from sklearn.datasets import make_low_rank_matrix
+
+
+def test_density():
+    rng = np.random.RandomState(0)
+    X = rng.randint(10, size=(10, 5))
+    X[1, 2] = 0
+    X[5, 3] = 0
+    X_csr = sparse.csr_matrix(X)
+    X_csc = sparse.csc_matrix(X)
+    X_coo = sparse.coo_matrix(X)
+    X_lil = sparse.lil_matrix(X)
+
+    for X_ in (X_csr, X_csc, X_coo, X_lil):
+        assert density(X_) == density(X)
+
+
+def test_uniform_weights():
+    # with uniform weights, results should be identical to stats.mode
+    rng = np.random.RandomState(0)
+    x = rng.randint(10, size=(10, 5))
+    weights = np.ones(x.shape)
+
+    for axis in (None, 0, 1):
+        mode, score = stats.mode(x, axis)
+        mode2, score2 = weighted_mode(x, weights, axis=axis)
+
+        assert_array_equal(mode, mode2)
+        assert_array_equal(score, score2)
+
+
+def test_random_weights():
+    # set this up so that each row should have a weighted mode of 6,
+    # with a score that is easily reproduced
+    mode_result = 6
+
+    rng = np.random.RandomState(0)
+    x = rng.randint(mode_result, size=(100, 10))
+    w = rng.random_sample(x.shape)
+
+    x[:, :5] = mode_result
+    w[:, :5] += 1
+
+    mode, score = weighted_mode(x, w, axis=1)
+
+    assert_array_equal(mode, mode_result)
+    assert_array_almost_equal(score.ravel(), w[:, :5].sum(1))
+
+
+def check_randomized_svd_low_rank(dtype):
+    # Check that extmath.randomized_svd is consistent with linalg.svd
+    n_samples = 100
+    n_features = 500
+    rank = 5
+    k = 10
+    decimal = 5 if dtype == np.float32 else 7
+    dtype = np.dtype(dtype)
+
+    # generate a matrix X of approximate effective rank `rank` and no noise
+    # component (very structured signal):
+    X = make_low_rank_matrix(n_samples=n_samples, n_features=n_features,
+                             effective_rank=rank, tail_strength=0.0,
+                             random_state=0).astype(dtype, copy=False)
+    assert X.shape == (n_samples, n_features)
+
+    # compute the singular values of X using the slow exact method
+    U, s, V = linalg.svd(X, full_matrices=False)
+
+    # Convert the singular values to the specific dtype
+    U = U.astype(dtype, copy=False)
+    s = s.astype(dtype, copy=False)
+    V = V.astype(dtype, copy=False)
+
+    for normalizer in ['auto', 'LU', 'QR']:  # 'none' would not be stable
+        # compute the singular values of X using the fast approximate method
+        Ua, sa, Va = randomized_svd(
+            X, k, power_iteration_normalizer=normalizer, random_state=0)
+
+        # If the input dtype is float, then the output dtype is float of the
+        # same bit size (f32 is not upcast to f64)
+        # But if the input dtype is int, the output dtype is float64
+        if dtype.kind == 'f':
+            assert Ua.dtype == dtype
+            assert sa.dtype == dtype
+            assert Va.dtype == dtype
+        else:
+            assert Ua.dtype == np.float64
+            assert sa.dtype == np.float64
+            assert Va.dtype == np.float64
+
+        assert Ua.shape == (n_samples, k)
+        assert sa.shape == (k,)
+        assert Va.shape == (k, n_features)
+
+        # ensure that the singular values of both methods are equal up to the
+        # real rank of the matrix
+        assert_almost_equal(s[:k], sa, decimal=decimal)
+
+        # check the singular vectors too (while not checking the sign)
+        assert_almost_equal(np.dot(U[:, :k], V[:k, :]), np.dot(Ua, Va),
+                            decimal=decimal)
+
+        # check the sparse matrix representation
+        X = sparse.csr_matrix(X)
+
+        # compute the singular values of X using the fast approximate method
+        Ua, sa, Va = \
+            randomized_svd(X, k, power_iteration_normalizer=normalizer,
+                           random_state=0)
+        if dtype.kind == 'f':
+            assert Ua.dtype == dtype
+            assert sa.dtype == dtype
+            assert Va.dtype == dtype
+        else:
+            assert Ua.dtype.kind == 'f'
+            assert sa.dtype.kind == 'f'
+            assert Va.dtype.kind == 'f'
+
+        assert_almost_equal(s[:rank], sa[:rank], decimal=decimal)
+
+
+@pytest.mark.parametrize('dtype',
+                         (np.int32, np.int64, np.float32, np.float64))
+def test_randomized_svd_low_rank_all_dtypes(dtype):
+    check_randomized_svd_low_rank(dtype)
+
+
+@pytest.mark.parametrize('dtype',
+                         (np.float32, np.float64))
+def test_row_norms(dtype):
+    X = np.random.RandomState(42).randn(100, 100)
+    if dtype is np.float32:
+        precision = 4
+    else:
+        precision = 5
+
+    X = X.astype(dtype, copy=False)
+    sq_norm = (X ** 2).sum(axis=1)
+
+    assert_array_almost_equal(sq_norm, row_norms(X, squared=True),
+                              precision)
+    assert_array_almost_equal(np.sqrt(sq_norm), row_norms(X), precision)
+
+    for csr_index_dtype in [np.int32, np.int64]:
+        Xcsr = sparse.csr_matrix(X, dtype=dtype)
+        # csr_matrix will use int32 indices by default,
+        # up-casting those to int64 when necessary
+        if csr_index_dtype is np.int64:
+            Xcsr.indptr = Xcsr.indptr.astype(csr_index_dtype, copy=False)
+            Xcsr.indices = Xcsr.indices.astype(csr_index_dtype, copy=False)
+        assert Xcsr.indices.dtype == csr_index_dtype
+        assert Xcsr.indptr.dtype == csr_index_dtype
+        assert_array_almost_equal(sq_norm, row_norms(Xcsr, squared=True),
+                                  precision)
+        assert_array_almost_equal(np.sqrt(sq_norm), row_norms(Xcsr),
+                                  precision)
+
+
+def test_randomized_svd_low_rank_with_noise():
+    # Check that extmath.randomized_svd can handle noisy matrices
+    n_samples = 100
+    n_features = 500
+    rank = 5
+    k = 10
+
+    # generate a matrix X wity structure approximate rank `rank` and an
+    # important noisy component
+    X = make_low_rank_matrix(n_samples=n_samples, n_features=n_features,
+                             effective_rank=rank, tail_strength=0.1,
+                             random_state=0)
+    assert X.shape == (n_samples, n_features)
+
+    # compute the singular values of X using the slow exact method
+    _, s, _ = linalg.svd(X, full_matrices=False)
+
+    for normalizer in ['auto', 'none', 'LU', 'QR']:
+        # compute the singular values of X using the fast approximate
+        # method without the iterated power method
+        _, sa, _ = randomized_svd(X, k, n_iter=0,
+                                  power_iteration_normalizer=normalizer,
+                                  random_state=0)
+
+        # the approximation does not tolerate the noise:
+        assert np.abs(s[:k] - sa).max() > 0.01
+
+        # compute the singular values of X using the fast approximate
+        # method with iterated power method
+        _, sap, _ = randomized_svd(X, k,
+                                   power_iteration_normalizer=normalizer,
+                                   random_state=0)
+
+        # the iterated power method is helping getting rid of the noise:
+        assert_almost_equal(s[:k], sap, decimal=3)
+
+
+def test_randomized_svd_infinite_rank():
+    # Check that extmath.randomized_svd can handle noisy matrices
+    n_samples = 100
+    n_features = 500
+    rank = 5
+    k = 10
+
+    # let us try again without 'low_rank component': just regularly but slowly
+    # decreasing singular values: the rank of the data matrix is infinite
+    X = make_low_rank_matrix(n_samples=n_samples, n_features=n_features,
+                             effective_rank=rank, tail_strength=1.0,
+                             random_state=0)
+    assert X.shape == (n_samples, n_features)
+
+    # compute the singular values of X using the slow exact method
+    _, s, _ = linalg.svd(X, full_matrices=False)
+    for normalizer in ['auto', 'none', 'LU', 'QR']:
+        # compute the singular values of X using the fast approximate method
+        # without the iterated power method
+        _, sa, _ = randomized_svd(X, k, n_iter=0,
+                                  power_iteration_normalizer=normalizer)
+
+        # the approximation does not tolerate the noise:
+        assert np.abs(s[:k] - sa).max() > 0.1
+
+        # compute the singular values of X using the fast approximate method
+        # with iterated power method
+        _, sap, _ = randomized_svd(X, k, n_iter=5,
+                                   power_iteration_normalizer=normalizer)
+
+        # the iterated power method is still managing to get most of the
+        # structure at the requested rank
+        assert_almost_equal(s[:k], sap, decimal=3)
+
+
+def test_randomized_svd_transpose_consistency():
+    # Check that transposing the design matrix has limited impact
+    n_samples = 100
+    n_features = 500
+    rank = 4
+    k = 10
+
+    X = make_low_rank_matrix(n_samples=n_samples, n_features=n_features,
+                             effective_rank=rank, tail_strength=0.5,
+                             random_state=0)
+    assert X.shape == (n_samples, n_features)
+
+    U1, s1, V1 = randomized_svd(X, k, n_iter=3, transpose=False,
+                                random_state=0)
+    U2, s2, V2 = randomized_svd(X, k, n_iter=3, transpose=True,
+                                random_state=0)
+    U3, s3, V3 = randomized_svd(X, k, n_iter=3, transpose='auto',
+                                random_state=0)
+    U4, s4, V4 = linalg.svd(X, full_matrices=False)
+
+    assert_almost_equal(s1, s4[:k], decimal=3)
+    assert_almost_equal(s2, s4[:k], decimal=3)
+    assert_almost_equal(s3, s4[:k], decimal=3)
+
+    assert_almost_equal(np.dot(U1, V1), np.dot(U4[:, :k], V4[:k, :]),
+                        decimal=2)
+    assert_almost_equal(np.dot(U2, V2), np.dot(U4[:, :k], V4[:k, :]),
+                        decimal=2)
+
+    # in this case 'auto' is equivalent to transpose
+    assert_almost_equal(s2, s3)
+
+
+def test_randomized_svd_power_iteration_normalizer():
+    # randomized_svd with power_iteration_normalized='none' diverges for
+    # large number of power iterations on this dataset
+    rng = np.random.RandomState(42)
+    X = make_low_rank_matrix(100, 500, effective_rank=50, random_state=rng)
+    X += 3 * rng.randint(0, 2, size=X.shape)
+    n_components = 50
+
+    # Check that it diverges with many (non-normalized) power iterations
+    U, s, V = randomized_svd(X, n_components, n_iter=2,
+                             power_iteration_normalizer='none')
+    A = X - U.dot(np.diag(s).dot(V))
+    error_2 = linalg.norm(A, ord='fro')
+    U, s, V = randomized_svd(X, n_components, n_iter=20,
+                             power_iteration_normalizer='none')
+    A = X - U.dot(np.diag(s).dot(V))
+    error_20 = linalg.norm(A, ord='fro')
+    assert np.abs(error_2 - error_20) > 100
+
+    for normalizer in ['LU', 'QR', 'auto']:
+        U, s, V = randomized_svd(X, n_components, n_iter=2,
+                                 power_iteration_normalizer=normalizer,
+                                 random_state=0)
+        A = X - U.dot(np.diag(s).dot(V))
+        error_2 = linalg.norm(A, ord='fro')
+
+        for i in [5, 10, 50]:
+            U, s, V = randomized_svd(X, n_components, n_iter=i,
+                                     power_iteration_normalizer=normalizer,
+                                     random_state=0)
+            A = X - U.dot(np.diag(s).dot(V))
+            error = linalg.norm(A, ord='fro')
+            assert 15 > np.abs(error_2 - error)
+
+
+def test_randomized_svd_sparse_warnings():
+    # randomized_svd throws a warning for lil and dok matrix
+    rng = np.random.RandomState(42)
+    X = make_low_rank_matrix(50, 20, effective_rank=10, random_state=rng)
+    n_components = 5
+    for cls in (sparse.lil_matrix, sparse.dok_matrix):
+        X = cls(X)
+        assert_warns_message(
+            sparse.SparseEfficiencyWarning,
+            "Calculating SVD of a {} is expensive. "
+            "csr_matrix is more efficient.".format(cls.__name__),
+            randomized_svd, X, n_components, n_iter=1,
+            power_iteration_normalizer='none')
+
+
+def test_svd_flip():
+    # Check that svd_flip works in both situations, and reconstructs input.
+    rs = np.random.RandomState(1999)
+    n_samples = 20
+    n_features = 10
+    X = rs.randn(n_samples, n_features)
+
+    # Check matrix reconstruction
+    U, S, V = linalg.svd(X, full_matrices=False)
+    U1, V1 = svd_flip(U, V, u_based_decision=False)
+    assert_almost_equal(np.dot(U1 * S, V1), X, decimal=6)
+
+    # Check transposed matrix reconstruction
+    XT = X.T
+    U, S, V = linalg.svd(XT, full_matrices=False)
+    U2, V2 = svd_flip(U, V, u_based_decision=True)
+    assert_almost_equal(np.dot(U2 * S, V2), XT, decimal=6)
+
+    # Check that different flip methods are equivalent under reconstruction
+    U_flip1, V_flip1 = svd_flip(U, V, u_based_decision=True)
+    assert_almost_equal(np.dot(U_flip1 * S, V_flip1), XT, decimal=6)
+    U_flip2, V_flip2 = svd_flip(U, V, u_based_decision=False)
+    assert_almost_equal(np.dot(U_flip2 * S, V_flip2), XT, decimal=6)
+
+
+def test_randomized_svd_sign_flip():
+    a = np.array([[2.0, 0.0], [0.0, 1.0]])
+    u1, s1, v1 = randomized_svd(a, 2, flip_sign=True, random_state=41)
+    for seed in range(10):
+        u2, s2, v2 = randomized_svd(a, 2, flip_sign=True, random_state=seed)
+        assert_almost_equal(u1, u2)
+        assert_almost_equal(v1, v2)
+        assert_almost_equal(np.dot(u2 * s2, v2), a)
+        assert_almost_equal(np.dot(u2.T, u2), np.eye(2))
+        assert_almost_equal(np.dot(v2.T, v2), np.eye(2))
+
+
+def test_randomized_svd_sign_flip_with_transpose():
+    # Check if the randomized_svd sign flipping is always done based on u
+    # irrespective of transpose.
+    # See https://github.com/scikit-learn/scikit-learn/issues/5608
+    # for more details.
+    def max_loading_is_positive(u, v):
+        """
+        returns bool tuple indicating if the values maximising np.abs
+        are positive across all rows for u and across all columns for v.
+        """
+        u_based = (np.abs(u).max(axis=0) == u.max(axis=0)).all()
+        v_based = (np.abs(v).max(axis=1) == v.max(axis=1)).all()
+        return u_based, v_based
+
+    mat = np.arange(10 * 8).reshape(10, -1)
+
+    # Without transpose
+    u_flipped, _, v_flipped = randomized_svd(mat, 3, flip_sign=True)
+    u_based, v_based = max_loading_is_positive(u_flipped, v_flipped)
+    assert u_based
+    assert not v_based
+
+    # With transpose
+    u_flipped_with_transpose, _, v_flipped_with_transpose = randomized_svd(
+        mat, 3, flip_sign=True, transpose=True)
+    u_based, v_based = max_loading_is_positive(
+        u_flipped_with_transpose, v_flipped_with_transpose)
+    assert u_based
+    assert not v_based
+
+
+def test_cartesian():
+    # Check if cartesian product delivers the right results
+
+    axes = (np.array([1, 2, 3]), np.array([4, 5]), np.array([6, 7]))
+
+    true_out = np.array([[1, 4, 6],
+                         [1, 4, 7],
+                         [1, 5, 6],
+                         [1, 5, 7],
+                         [2, 4, 6],
+                         [2, 4, 7],
+                         [2, 5, 6],
+                         [2, 5, 7],
+                         [3, 4, 6],
+                         [3, 4, 7],
+                         [3, 5, 6],
+                         [3, 5, 7]])
+
+    out = cartesian(axes)
+    assert_array_equal(true_out, out)
+
+    # check single axis
+    x = np.arange(3)
+    assert_array_equal(x[:, np.newaxis], cartesian((x,)))
+
+
+def test_logistic_sigmoid():
+    # Check correctness and robustness of logistic sigmoid implementation
+    def naive_log_logistic(x):
+        return np.log(expit(x))
+
+    x = np.linspace(-2, 2, 50)
+    assert_array_almost_equal(log_logistic(x), naive_log_logistic(x))
+
+    extreme_x = np.array([-100., 100.])
+    assert_array_almost_equal(log_logistic(extreme_x), [-100, 0])
+
+
+def test_incremental_variance_update_formulas():
+    # Test Youngs and Cramer incremental variance formulas.
+    # Doggie data from https://www.mathsisfun.com/data/standard-deviation.html
+    A = np.array([[600, 470, 170, 430, 300],
+                  [600, 470, 170, 430, 300],
+                  [600, 470, 170, 430, 300],
+                  [600, 470, 170, 430, 300]]).T
+    idx = 2
+    X1 = A[:idx, :]
+    X2 = A[idx:, :]
+
+    old_means = X1.mean(axis=0)
+    old_variances = X1.var(axis=0)
+    old_sample_count = np.full(X1.shape[1], X1.shape[0], dtype=np.int32)
+    final_means, final_variances, final_count = \
+        _incremental_mean_and_var(X2, old_means, old_variances,
+                                  old_sample_count)
+    assert_almost_equal(final_means, A.mean(axis=0), 6)
+    assert_almost_equal(final_variances, A.var(axis=0), 6)
+    assert_almost_equal(final_count, A.shape[0])
+
+
+def test_incremental_mean_and_variance_ignore_nan():
+    old_means = np.array([535., 535., 535., 535.])
+    old_variances = np.array([4225., 4225., 4225., 4225.])
+    old_sample_count = np.array([2, 2, 2, 2], dtype=np.int32)
+
+    X = np.array([[170, 170, 170, 170],
+                  [430, 430, 430, 430],
+                  [300, 300, 300, 300]])
+
+    X_nan = np.array([[170, np.nan, 170, 170],
+                      [np.nan, 170, 430, 430],
+                      [430, 430, np.nan, 300],
+                      [300, 300, 300, np.nan]])
+
+    X_means, X_variances, X_count = _incremental_mean_and_var(
+        X, old_means, old_variances, old_sample_count)
+    X_nan_means, X_nan_variances, X_nan_count = _incremental_mean_and_var(
+        X_nan, old_means, old_variances, old_sample_count)
+
+    assert_allclose(X_nan_means, X_means)
+    assert_allclose(X_nan_variances, X_variances)
+    assert_allclose(X_nan_count, X_count)
+
+
+@skip_if_32bit
+def test_incremental_variance_numerical_stability():
+    # Test Youngs and Cramer incremental variance formulas.
+
+    def np_var(A):
+        return A.var(axis=0)
+
+    # Naive one pass variance computation - not numerically stable
+    # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
+    def one_pass_var(X):
+        n = X.shape[0]
+        exp_x2 = (X ** 2).sum(axis=0) / n
+        expx_2 = (X.sum(axis=0) / n) ** 2
+        return exp_x2 - expx_2
+
+    # Two-pass algorithm, stable.
+    # We use it as a benchmark. It is not an online algorithm
+    # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Two-pass_algorithm
+    def two_pass_var(X):
+        mean = X.mean(axis=0)
+        Y = X.copy()
+        return np.mean((Y - mean)**2, axis=0)
+
+    # Naive online implementation
+    # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online_algorithm
+    # This works only for chunks for size 1
+    def naive_mean_variance_update(x, last_mean, last_variance,
+                                   last_sample_count):
+        updated_sample_count = (last_sample_count + 1)
+        samples_ratio = last_sample_count / float(updated_sample_count)
+        updated_mean = x / updated_sample_count + last_mean * samples_ratio
+        updated_variance = last_variance * samples_ratio + \
+            (x - last_mean) * (x - updated_mean) / updated_sample_count
+        return updated_mean, updated_variance, updated_sample_count
+
+    # We want to show a case when one_pass_var has error > 1e-3 while
+    # _batch_mean_variance_update has less.
+    tol = 200
+    n_features = 2
+    n_samples = 10000
+    x1 = np.array(1e8, dtype=np.float64)
+    x2 = np.log(1e-5, dtype=np.float64)
+    A0 = np.full((n_samples // 2, n_features), x1, dtype=np.float64)
+    A1 = np.full((n_samples // 2, n_features), x2, dtype=np.float64)
+    A = np.vstack((A0, A1))
+
+    # Naive one pass var: >tol (=1063)
+    assert np.abs(np_var(A) - one_pass_var(A)).max() > tol
+
+    # Starting point for online algorithms: after A0
+
+    # Naive implementation: >tol (436)
+    mean, var, n = A0[0, :], np.zeros(n_features), n_samples // 2
+    for i in range(A1.shape[0]):
+        mean, var, n = \
+            naive_mean_variance_update(A1[i, :], mean, var, n)
+    assert n == A.shape[0]
+    # the mean is also slightly unstable
+    assert np.abs(A.mean(axis=0) - mean).max() > 1e-6
+    assert np.abs(np_var(A) - var).max() > tol
+
+    # Robust implementation: <tol (177)
+    mean, var = A0[0, :], np.zeros(n_features)
+    n = np.full(n_features, n_samples // 2, dtype=np.int32)
+    for i in range(A1.shape[0]):
+        mean, var, n = \
+            _incremental_mean_and_var(A1[i, :].reshape((1, A1.shape[1])),
+                                      mean, var, n)
+    assert_array_equal(n, A.shape[0])
+    assert_array_almost_equal(A.mean(axis=0), mean)
+    assert tol > np.abs(np_var(A) - var).max()
+
+
+def test_incremental_variance_ddof():
+    # Test that degrees of freedom parameter for calculations are correct.
+    rng = np.random.RandomState(1999)
+    X = rng.randn(50, 10)
+    n_samples, n_features = X.shape
+    for batch_size in [11, 20, 37]:
+        steps = np.arange(0, X.shape[0], batch_size)
+        if steps[-1] != X.shape[0]:
+            steps = np.hstack([steps, n_samples])
+
+        for i, j in zip(steps[:-1], steps[1:]):
+            batch = X[i:j, :]
+            if i == 0:
+                incremental_means = batch.mean(axis=0)
+                incremental_variances = batch.var(axis=0)
+                # Assign this twice so that the test logic is consistent
+                incremental_count = batch.shape[0]
+                sample_count = np.full(batch.shape[1], batch.shape[0],
+                                       dtype=np.int32)
+            else:
+                result = _incremental_mean_and_var(
+                    batch, incremental_means, incremental_variances,
+                    sample_count)
+                (incremental_means, incremental_variances,
+                 incremental_count) = result
+                sample_count += batch.shape[0]
+
+            calculated_means = np.mean(X[:j], axis=0)
+            calculated_variances = np.var(X[:j], axis=0)
+            assert_almost_equal(incremental_means, calculated_means, 6)
+            assert_almost_equal(incremental_variances,
+                                calculated_variances, 6)
+            assert_array_equal(incremental_count, sample_count)
+
+
+def test_vector_sign_flip():
+    # Testing that sign flip is working & largest value has positive sign
+    data = np.random.RandomState(36).randn(5, 5)
+    max_abs_rows = np.argmax(np.abs(data), axis=1)
+    data_flipped = _deterministic_vector_sign_flip(data)
+    max_rows = np.argmax(data_flipped, axis=1)
+    assert_array_equal(max_abs_rows, max_rows)
+    signs = np.sign(data[range(data.shape[0]), max_abs_rows])
+    assert_array_equal(data, data_flipped * signs[:, np.newaxis])
+
+
+def test_softmax():
+    rng = np.random.RandomState(0)
+    X = rng.randn(3, 5)
+    exp_X = np.exp(X)
+    sum_exp_X = np.sum(exp_X, axis=1).reshape((-1, 1))
+    assert_array_almost_equal(softmax(X), exp_X / sum_exp_X)
+
+
+def test_stable_cumsum():
+    assert_array_equal(stable_cumsum([1, 2, 3]), np.cumsum([1, 2, 3]))
+    r = np.random.RandomState(0).rand(100000)
+    assert_warns(RuntimeWarning, stable_cumsum, r, rtol=0, atol=0)
+
+    # test axis parameter
+    A = np.random.RandomState(36).randint(1000, size=(5, 5, 5))
+    assert_array_equal(stable_cumsum(A, axis=0), np.cumsum(A, axis=0))
+    assert_array_equal(stable_cumsum(A, axis=1), np.cumsum(A, axis=1))
+    assert_array_equal(stable_cumsum(A, axis=2), np.cumsum(A, axis=2))
+
+
+def test_safe_min():
+    msg = ("safe_min is deprecated in version 0.22 and will be removed "
+           "in version 0.24.")
+    with pytest.warns(FutureWarning, match=msg):
+        safe_min(np.ones(10))
+
+
+@pytest.mark.parametrize("A_array_constr", [np.array, sparse.csr_matrix],
+                         ids=["dense", "sparse"])
+@pytest.mark.parametrize("B_array_constr", [np.array, sparse.csr_matrix],
+                         ids=["dense", "sparse"])
+def test_safe_sparse_dot_2d(A_array_constr, B_array_constr):
+    rng = np.random.RandomState(0)
+
+    A = rng.random_sample((30, 10))
+    B = rng.random_sample((10, 20))
+    expected = np.dot(A, B)
+
+    A = A_array_constr(A)
+    B = B_array_constr(B)
+    actual = safe_sparse_dot(A, B, dense_output=True)
+
+    assert_allclose(actual, expected)
+
+
+def test_safe_sparse_dot_nd():
+    rng = np.random.RandomState(0)
+
+    # dense ND / sparse
+    A = rng.random_sample((2, 3, 4, 5, 6))
+    B = rng.random_sample((6, 7))
+    expected = np.dot(A, B)
+    B = sparse.csr_matrix(B)
+    actual = safe_sparse_dot(A, B)
+    assert_allclose(actual, expected)
+
+    # sparse / dense ND
+    A = rng.random_sample((2, 3))
+    B = rng.random_sample((4, 5, 3, 6))
+    expected = np.dot(A, B)
+    A = sparse.csr_matrix(A)
+    actual = safe_sparse_dot(A, B)
+    assert_allclose(actual, expected)
+
+
+@pytest.mark.parametrize("A_array_constr", [np.array, sparse.csr_matrix],
+                         ids=["dense", "sparse"])
+def test_safe_sparse_dot_2d_1d(A_array_constr):
+    rng = np.random.RandomState(0)
+
+    B = rng.random_sample((10))
+
+    # 2D @ 1D
+    A = rng.random_sample((30, 10))
+    expected = np.dot(A, B)
+    A = A_array_constr(A)
+    actual = safe_sparse_dot(A, B)
+    assert_allclose(actual, expected)
+
+    # 1D @ 2D
+    A = rng.random_sample((10, 30))
+    expected = np.dot(B, A)
+    A = A_array_constr(A)
+    actual = safe_sparse_dot(B, A)
+    assert_allclose(actual, expected)
+
+
+@pytest.mark.parametrize("dense_output", [True, False])
+def test_safe_sparse_dot_dense_output(dense_output):
+    rng = np.random.RandomState(0)
+
+    A = sparse.random(30, 10, density=0.1, random_state=rng)
+    B = sparse.random(10, 20, density=0.1, random_state=rng)
+
+    expected = A.dot(B)
+    actual = safe_sparse_dot(A, B, dense_output=dense_output)
+
+    assert sparse.issparse(actual) == (not dense_output)
+
+    if dense_output:
+        expected = expected.toarray()
+    assert_allclose_dense_sparse(actual, expected)
--- a/venv/Lib/site-packages/sklearn/utils/tests/test_fast_dict.py
+++ b/venv/Lib/site-packages/sklearn/utils/tests/test_fast_dict.py
@ -0,0 +1,31 @@
+""" Test fast_dict.
+"""
+import numpy as np
+
+from sklearn.utils._fast_dict import IntFloatDict, argmin
+
+
+def test_int_float_dict():
+    rng = np.random.RandomState(0)
+    keys = np.unique(rng.randint(100, size=10).astype(np.intp))
+    values = rng.rand(len(keys))
+
+    d = IntFloatDict(keys, values)
+    for key, value in zip(keys, values):
+        assert d[key] == value
+    assert len(d) == len(keys)
+
+    d.append(120, 3.)
+    assert d[120] == 3.0
+    assert len(d) == len(keys) + 1
+    for i in range(2000):
+        d.append(i + 1000, 4.0)
+    assert d[1100] == 4.0
+
+
+def test_int_float_dict_argmin():
+    # Test the argmin implementation on the IntFloatDict
+    keys = np.arange(100, dtype=np.intp)
+    values = np.arange(100, dtype=np.float64)
+    d = IntFloatDict(keys, values)
+    assert argmin(d) == (0, 0)
--- a/venv/Lib/site-packages/sklearn/utils/tests/test_fixes.py
+++ b/venv/Lib/site-packages/sklearn/utils/tests/test_fixes.py
@ -0,0 +1,91 @@
+# Authors: Gael Varoquaux <gael.varoquaux@normalesup.org>
+#          Justin Vincent
+#          Lars Buitinck
+# License: BSD 3 clause
+
+import math
+
+import numpy as np
+import pytest
+import scipy.stats
+
+from sklearn.utils._testing import assert_array_equal
+
+from sklearn.utils.fixes import _joblib_parallel_args
+from sklearn.utils.fixes import _object_dtype_isnan
+from sklearn.utils.fixes import loguniform
+from sklearn.utils.fixes import MaskedArray
+
+
+@pytest.mark.parametrize('joblib_version', ('0.11', '0.12.0'))
+def test_joblib_parallel_args(monkeypatch, joblib_version):
+    import joblib
+    monkeypatch.setattr(joblib, '__version__', joblib_version)
+
+    if joblib_version == '0.12.0':
+        # arguments are simply passed through
+        assert _joblib_parallel_args(prefer='threads') == {'prefer': 'threads'}
+        assert _joblib_parallel_args(prefer='processes', require=None) == {
+                    'prefer': 'processes', 'require': None}
+        assert _joblib_parallel_args(non_existing=1) == {'non_existing': 1}
+    elif joblib_version == '0.11':
+        # arguments are mapped to the corresponding backend
+        assert _joblib_parallel_args(prefer='threads') == {
+                    'backend': 'threading'}
+        assert _joblib_parallel_args(prefer='processes') == {
+                    'backend': 'multiprocessing'}
+        with pytest.raises(ValueError):
+            _joblib_parallel_args(prefer='invalid')
+        assert _joblib_parallel_args(
+                prefer='processes', require='sharedmem') == {
+                    'backend': 'threading'}
+        with pytest.raises(ValueError):
+            _joblib_parallel_args(require='invalid')
+        with pytest.raises(NotImplementedError):
+            _joblib_parallel_args(verbose=True)
+    else:
+        raise ValueError
+
+
+@pytest.mark.parametrize("dtype, val", ([object, 1],
+                                        [object, "a"],
+                                        [float, 1]))
+def test_object_dtype_isnan(dtype, val):
+    X = np.array([[val, np.nan],
+                  [np.nan, val]], dtype=dtype)
+
+    expected_mask = np.array([[False, True],
+                              [True, False]])
+
+    mask = _object_dtype_isnan(X)
+
+    assert_array_equal(mask, expected_mask)
+
+
+@pytest.mark.parametrize("low,high,base",
+                         [(-1, 0, 10), (0, 2, np.exp(1)), (-1, 1, 2)])
+def test_loguniform(low, high, base):
+    rv = loguniform(base ** low, base ** high)
+    assert isinstance(rv, scipy.stats._distn_infrastructure.rv_frozen)
+    rvs = rv.rvs(size=2000, random_state=0)
+
+    # Test the basics; right bounds, right size
+    assert (base ** low <= rvs).all() and (rvs <= base ** high).all()
+    assert len(rvs) == 2000
+
+    # Test that it's actually (fairly) uniform
+    log_rvs = np.array([math.log(x, base) for x in rvs])
+    counts, _ = np.histogram(log_rvs)
+    assert counts.mean() == 200
+    assert np.abs(counts - counts.mean()).max() <= 40
+
+    # Test that random_state works
+    assert (
+        loguniform(base ** low, base ** high).rvs(random_state=0)
+        == loguniform(base ** low, base ** high).rvs(random_state=0)
+    )
+
+
+def test_masked_array_deprecated():  # TODO: remove in 0.25
+    with pytest.warns(FutureWarning, match='is deprecated'):
+        MaskedArray()
--- a/venv/Lib/site-packages/sklearn/utils/tests/test_metaestimators.py
+++ b/venv/Lib/site-packages/sklearn/utils/tests/test_metaestimators.py
@ -0,0 +1,77 @@
+from sklearn.utils.metaestimators import if_delegate_has_method
+
+
+class Prefix:
+    def func(self):
+        pass
+
+
+class MockMetaEstimator:
+    """This is a mock meta estimator"""
+    a_prefix = Prefix()
+
+    @if_delegate_has_method(delegate="a_prefix")
+    def func(self):
+        """This is a mock delegated function"""
+        pass
+
+
+def test_delegated_docstring():
+    assert "This is a mock delegated function" \
+                in str(MockMetaEstimator.__dict__['func'].__doc__)
+    assert "This is a mock delegated function" \
+           in str(MockMetaEstimator.func.__doc__)
+    assert "This is a mock delegated function" \
+           in str(MockMetaEstimator().func.__doc__)
+
+
+class MetaEst:
+    """A mock meta estimator"""
+    def __init__(self, sub_est, better_sub_est=None):
+        self.sub_est = sub_est
+        self.better_sub_est = better_sub_est
+
+    @if_delegate_has_method(delegate='sub_est')
+    def predict(self):
+        pass
+
+
+class MetaEstTestTuple(MetaEst):
+    """A mock meta estimator to test passing a tuple of delegates"""
+
+    @if_delegate_has_method(delegate=('sub_est', 'better_sub_est'))
+    def predict(self):
+        pass
+
+
+class MetaEstTestList(MetaEst):
+    """A mock meta estimator to test passing a list of delegates"""
+
+    @if_delegate_has_method(delegate=['sub_est', 'better_sub_est'])
+    def predict(self):
+        pass
+
+
+class HasPredict:
+    """A mock sub-estimator with predict method"""
+
+    def predict(self):
+        pass
+
+
+class HasNoPredict:
+    """A mock sub-estimator with no predict method"""
+    pass
+
+
+def test_if_delegate_has_method():
+    assert hasattr(MetaEst(HasPredict()), 'predict')
+    assert not hasattr(MetaEst(HasNoPredict()), 'predict')
+    assert not hasattr(MetaEstTestTuple(HasNoPredict(), HasNoPredict()),
+                       'predict')
+    assert hasattr(MetaEstTestTuple(HasPredict(), HasNoPredict()), 'predict')
+    assert not hasattr(MetaEstTestTuple(HasNoPredict(), HasPredict()),
+                       'predict')
+    assert not hasattr(MetaEstTestList(HasNoPredict(), HasPredict()),
+                       'predict')
+    assert hasattr(MetaEstTestList(HasPredict(), HasPredict()), 'predict')
--- a/venv/Lib/site-packages/sklearn/utils/tests/test_multiclass.py
+++ b/venv/Lib/site-packages/sklearn/utils/tests/test_multiclass.py
@ -0,0 +1,439 @@
+
+import numpy as np
+import scipy.sparse as sp
+from itertools import product
+import pytest
+
+from scipy.sparse import issparse
+from scipy.sparse import csc_matrix
+from scipy.sparse import csr_matrix
+from scipy.sparse import coo_matrix
+from scipy.sparse import dok_matrix
+from scipy.sparse import lil_matrix
+
+from sklearn.utils._testing import assert_array_equal
+from sklearn.utils._testing import assert_array_almost_equal
+from sklearn.utils._testing import assert_allclose
+from sklearn.utils.estimator_checks import _NotAnArray
+from sklearn.utils.fixes import parse_version
+
+from sklearn.utils.multiclass import unique_labels
+from sklearn.utils.multiclass import is_multilabel
+from sklearn.utils.multiclass import type_of_target
+from sklearn.utils.multiclass import class_distribution
+from sklearn.utils.multiclass import check_classification_targets
+from sklearn.utils.multiclass import _ovr_decision_function
+
+from sklearn.utils.metaestimators import _safe_split
+from sklearn.model_selection import ShuffleSplit
+from sklearn.svm import SVC
+from sklearn import datasets
+
+
+EXAMPLES = {
+    'multilabel-indicator': [
+        # valid when the data is formatted as sparse or dense, identified
+        # by CSR format when the testing takes place
+        csr_matrix(np.random.RandomState(42).randint(2, size=(10, 10))),
+        [[0, 1], [1, 0]],
+        [[0, 1]],
+        csr_matrix(np.array([[0, 1], [1, 0]])),
+        csr_matrix(np.array([[0, 1], [1, 0]], dtype=np.bool)),
+        csr_matrix(np.array([[0, 1], [1, 0]], dtype=np.int8)),
+        csr_matrix(np.array([[0, 1], [1, 0]], dtype=np.uint8)),
+        csr_matrix(np.array([[0, 1], [1, 0]], dtype=np.float)),
+        csr_matrix(np.array([[0, 1], [1, 0]], dtype=np.float32)),
+        csr_matrix(np.array([[0, 0], [0, 0]])),
+        csr_matrix(np.array([[0, 1]])),
+        # Only valid when data is dense
+        [[-1, 1], [1, -1]],
+        np.array([[-1, 1], [1, -1]]),
+        np.array([[-3, 3], [3, -3]]),
+        _NotAnArray(np.array([[-3, 3], [3, -3]])),
+    ],
+    'multiclass': [
+        [1, 0, 2, 2, 1, 4, 2, 4, 4, 4],
+        np.array([1, 0, 2]),
+        np.array([1, 0, 2], dtype=np.int8),
+        np.array([1, 0, 2], dtype=np.uint8),
+        np.array([1, 0, 2], dtype=np.float),
+        np.array([1, 0, 2], dtype=np.float32),
+        np.array([[1], [0], [2]]),
+        _NotAnArray(np.array([1, 0, 2])),
+        [0, 1, 2],
+        ['a', 'b', 'c'],
+        np.array(['a', 'b', 'c']),
+        np.array(['a', 'b', 'c'], dtype=object),
+        np.array(['a', 'b', 'c'], dtype=object),
+    ],
+    'multiclass-multioutput': [
+        [[1, 0, 2, 2], [1, 4, 2, 4]],
+        [['a', 'b'], ['c', 'd']],
+        np.array([[1, 0, 2, 2], [1, 4, 2, 4]]),
+        np.array([[1, 0, 2, 2], [1, 4, 2, 4]], dtype=np.int8),
+        np.array([[1, 0, 2, 2], [1, 4, 2, 4]], dtype=np.uint8),
+        np.array([[1, 0, 2, 2], [1, 4, 2, 4]], dtype=np.float),
+        np.array([[1, 0, 2, 2], [1, 4, 2, 4]], dtype=np.float32),
+        np.array([['a', 'b'], ['c', 'd']]),
+        np.array([['a', 'b'], ['c', 'd']]),
+        np.array([['a', 'b'], ['c', 'd']], dtype=object),
+        np.array([[1, 0, 2]]),
+        _NotAnArray(np.array([[1, 0, 2]])),
+    ],
+    'binary': [
+        [0, 1],
+        [1, 1],
+        [],
+        [0],
+        np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1]),
+        np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1], dtype=np.bool),
+        np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1], dtype=np.int8),
+        np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1], dtype=np.uint8),
+        np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1], dtype=np.float),
+        np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1], dtype=np.float32),
+        np.array([[0], [1]]),
+        _NotAnArray(np.array([[0], [1]])),
+        [1, -1],
+        [3, 5],
+        ['a'],
+        ['a', 'b'],
+        ['abc', 'def'],
+        np.array(['abc', 'def']),
+        ['a', 'b'],
+        np.array(['abc', 'def'], dtype=object),
+    ],
+    'continuous': [
+        [1e-5],
+        [0, .5],
+        np.array([[0], [.5]]),
+        np.array([[0], [.5]], dtype=np.float32),
+    ],
+    'continuous-multioutput': [
+        np.array([[0, .5], [.5, 0]]),
+        np.array([[0, .5], [.5, 0]], dtype=np.float32),
+        np.array([[0, .5]]),
+    ],
+    'unknown': [
+        [[]],
+        [()],
+        # sequence of sequences that weren't supported even before deprecation
+        np.array([np.array([]), np.array([1, 2, 3])], dtype=object),
+        [np.array([]), np.array([1, 2, 3])],
+        [{1, 2, 3}, {1, 2}],
+        [frozenset([1, 2, 3]), frozenset([1, 2])],
+
+        # and also confusable as sequences of sequences
+        [{0: 'a', 1: 'b'}, {0: 'a'}],
+
+        # empty second dimension
+        np.array([[], []]),
+
+        # 3d
+        np.array([[[0, 1], [2, 3]], [[4, 5], [6, 7]]]),
+    ]
+}
+
+NON_ARRAY_LIKE_EXAMPLES = [
+    {1, 2, 3},
+    {0: 'a', 1: 'b'},
+    {0: [5], 1: [5]},
+    'abc',
+    frozenset([1, 2, 3]),
+    None,
+]
+
+MULTILABEL_SEQUENCES = [
+    [[1], [2], [0, 1]],
+    [(), (2), (0, 1)],
+    np.array([[], [1, 2]], dtype='object'),
+    _NotAnArray(np.array([[], [1, 2]], dtype='object'))
+]
+
+
+def test_unique_labels():
+    # Empty iterable
+    with pytest.raises(ValueError):
+        unique_labels()
+
+    # Multiclass problem
+    assert_array_equal(unique_labels(range(10)), np.arange(10))
+    assert_array_equal(unique_labels(np.arange(10)), np.arange(10))
+    assert_array_equal(unique_labels([4, 0, 2]), np.array([0, 2, 4]))
+
+    # Multilabel indicator
+    assert_array_equal(unique_labels(np.array([[0, 0, 1],
+                                               [1, 0, 1],
+                                               [0, 0, 0]])),
+                       np.arange(3))
+
+    assert_array_equal(unique_labels(np.array([[0, 0, 1],
+                                               [0, 0, 0]])),
+                       np.arange(3))
+
+    # Several arrays passed
+    assert_array_equal(unique_labels([4, 0, 2], range(5)),
+                       np.arange(5))
+    assert_array_equal(unique_labels((0, 1, 2), (0,), (2, 1)),
+                       np.arange(3))
+
+    # Border line case with binary indicator matrix
+    with pytest.raises(ValueError):
+        unique_labels([4, 0, 2], np.ones((5, 5)))
+    with pytest.raises(ValueError):
+        unique_labels(np.ones((5, 4)), np.ones((5, 5)))
+
+    assert_array_equal(unique_labels(np.ones((4, 5)), np.ones((5, 5))),
+                       np.arange(5))
+
+
+def test_unique_labels_non_specific():
+    # Test unique_labels with a variety of collected examples
+
+    # Smoke test for all supported format
+    for format in ["binary", "multiclass", "multilabel-indicator"]:
+        for y in EXAMPLES[format]:
+            unique_labels(y)
+
+    # We don't support those format at the moment
+    for example in NON_ARRAY_LIKE_EXAMPLES:
+        with pytest.raises(ValueError):
+            unique_labels(example)
+
+    for y_type in ["unknown", "continuous", 'continuous-multioutput',
+                   'multiclass-multioutput']:
+        for example in EXAMPLES[y_type]:
+            with pytest.raises(ValueError):
+                unique_labels(example)
+
+
+def test_unique_labels_mixed_types():
+    # Mix with binary or multiclass and multilabel
+    mix_clf_format = product(EXAMPLES["multilabel-indicator"],
+                             EXAMPLES["multiclass"] +
+                             EXAMPLES["binary"])
+
+    for y_multilabel, y_multiclass in mix_clf_format:
+        with pytest.raises(ValueError):
+            unique_labels(y_multiclass, y_multilabel)
+        with pytest.raises(ValueError):
+            unique_labels(y_multilabel, y_multiclass)
+
+    with pytest.raises(ValueError):
+        unique_labels([[1, 2]], [["a", "d"]])
+
+    with pytest.raises(ValueError):
+        unique_labels(["1", 2])
+
+    with pytest.raises(ValueError):
+        unique_labels([["1", 2], [1, 3]])
+
+    with pytest.raises(ValueError):
+        unique_labels([["1", "2"], [2, 3]])
+
+
+def test_is_multilabel():
+    for group, group_examples in EXAMPLES.items():
+        if group in ['multilabel-indicator']:
+            dense_exp = True
+        else:
+            dense_exp = False
+
+        for example in group_examples:
+            # Only mark explicitly defined sparse examples as valid sparse
+            # multilabel-indicators
+            if group == 'multilabel-indicator' and issparse(example):
+                sparse_exp = True
+            else:
+                sparse_exp = False
+
+            if (issparse(example) or
+                (hasattr(example, '__array__') and
+                 np.asarray(example).ndim == 2 and
+                 np.asarray(example).dtype.kind in 'biuf' and
+                 np.asarray(example).shape[1] > 0)):
+                examples_sparse = [sparse_matrix(example)
+                                   for sparse_matrix in [coo_matrix,
+                                                         csc_matrix,
+                                                         csr_matrix,
+                                                         dok_matrix,
+                                                         lil_matrix]]
+                for exmpl_sparse in examples_sparse:
+                    assert sparse_exp == is_multilabel(exmpl_sparse), (
+                            'is_multilabel(%r) should be %s'
+                            % (exmpl_sparse, sparse_exp))
+
+            # Densify sparse examples before testing
+            if issparse(example):
+                example = example.toarray()
+
+            assert dense_exp == is_multilabel(example), (
+                    'is_multilabel(%r) should be %s'
+                    % (example, dense_exp))
+
+
+def test_check_classification_targets():
+    for y_type in EXAMPLES.keys():
+        if y_type in ["unknown", "continuous", 'continuous-multioutput']:
+            for example in EXAMPLES[y_type]:
+                msg = 'Unknown label type: '
+                with pytest.raises(ValueError, match=msg):
+                    check_classification_targets(example)
+        else:
+            for example in EXAMPLES[y_type]:
+                check_classification_targets(example)
+
+
+# @ignore_warnings
+def test_type_of_target():
+    for group, group_examples in EXAMPLES.items():
+        for example in group_examples:
+            assert type_of_target(example) == group, (
+                'type_of_target(%r) should be %r, got %r'
+                % (example, group, type_of_target(example)))
+
+    for example in NON_ARRAY_LIKE_EXAMPLES:
+        msg_regex = r'Expected array-like \(array or non-string sequence\).*'
+        with pytest.raises(ValueError, match=msg_regex):
+            type_of_target(example)
+
+    for example in MULTILABEL_SEQUENCES:
+        msg = ('You appear to be using a legacy multi-label data '
+               'representation. Sequence of sequences are no longer supported;'
+               ' use a binary array or sparse matrix instead.')
+        with pytest.raises(ValueError, match=msg):
+            type_of_target(example)
+
+
+def test_type_of_target_pandas_sparse():
+    pd = pytest.importorskip("pandas")
+
+    if parse_version(pd.__version__) >= parse_version('0.25'):
+        pd_sparse_array = pd.arrays.SparseArray
+    else:
+        pd_sparse_array = pd.SparseArray
+
+    y = pd_sparse_array([1, np.nan, np.nan, 1, np.nan])
+    msg = "y cannot be class 'SparseSeries' or 'SparseArray'"
+    with pytest.raises(ValueError, match=msg):
+        type_of_target(y)
+
+
+def test_class_distribution():
+    y = np.array([[1, 0, 0, 1],
+                  [2, 2, 0, 1],
+                  [1, 3, 0, 1],
+                  [4, 2, 0, 1],
+                  [2, 0, 0, 1],
+                  [1, 3, 0, 1]])
+    # Define the sparse matrix with a mix of implicit and explicit zeros
+    data = np.array([1, 2, 1, 4, 2, 1, 0, 2, 3, 2, 3, 1, 1, 1, 1, 1, 1])
+    indices = np.array([0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 5, 0, 1, 2, 3, 4, 5])
+    indptr = np.array([0, 6, 11, 11, 17])
+    y_sp = sp.csc_matrix((data, indices, indptr), shape=(6, 4))
+
+    classes, n_classes, class_prior = class_distribution(y)
+    classes_sp, n_classes_sp, class_prior_sp = class_distribution(y_sp)
+    classes_expected = [[1, 2, 4],
+                        [0, 2, 3],
+                        [0],
+                        [1]]
+    n_classes_expected = [3, 3, 1, 1]
+    class_prior_expected = [[3/6, 2/6, 1/6],
+                            [1/3, 1/3, 1/3],
+                            [1.0],
+                            [1.0]]
+
+    for k in range(y.shape[1]):
+        assert_array_almost_equal(classes[k], classes_expected[k])
+        assert_array_almost_equal(n_classes[k], n_classes_expected[k])
+        assert_array_almost_equal(class_prior[k], class_prior_expected[k])
+
+        assert_array_almost_equal(classes_sp[k], classes_expected[k])
+        assert_array_almost_equal(n_classes_sp[k], n_classes_expected[k])
+        assert_array_almost_equal(class_prior_sp[k], class_prior_expected[k])
+
+    # Test again with explicit sample weights
+    (classes,
+     n_classes,
+     class_prior) = class_distribution(y, [1.0, 2.0, 1.0, 2.0, 1.0, 2.0])
+    (classes_sp,
+     n_classes_sp,
+     class_prior_sp) = class_distribution(y, [1.0, 2.0, 1.0, 2.0, 1.0, 2.0])
+    class_prior_expected = [[4/9, 3/9, 2/9],
+                            [2/9, 4/9, 3/9],
+                            [1.0],
+                            [1.0]]
+
+    for k in range(y.shape[1]):
+        assert_array_almost_equal(classes[k], classes_expected[k])
+        assert_array_almost_equal(n_classes[k], n_classes_expected[k])
+        assert_array_almost_equal(class_prior[k], class_prior_expected[k])
+
+        assert_array_almost_equal(classes_sp[k], classes_expected[k])
+        assert_array_almost_equal(n_classes_sp[k], n_classes_expected[k])
+        assert_array_almost_equal(class_prior_sp[k], class_prior_expected[k])
+
+
+def test_safe_split_with_precomputed_kernel():
+    clf = SVC()
+    clfp = SVC(kernel="precomputed")
+
+    iris = datasets.load_iris()
+    X, y = iris.data, iris.target
+    K = np.dot(X, X.T)
+
+    cv = ShuffleSplit(test_size=0.25, random_state=0)
+    train, test = list(cv.split(X))[0]
+
+    X_train, y_train = _safe_split(clf, X, y, train)
+    K_train, y_train2 = _safe_split(clfp, K, y, train)
+    assert_array_almost_equal(K_train, np.dot(X_train, X_train.T))
+    assert_array_almost_equal(y_train, y_train2)
+
+    X_test, y_test = _safe_split(clf, X, y, test, train)
+    K_test, y_test2 = _safe_split(clfp, K, y, test, train)
+    assert_array_almost_equal(K_test, np.dot(X_test, X_train.T))
+    assert_array_almost_equal(y_test, y_test2)
+
+
+def test_ovr_decision_function():
+    # test properties for ovr decision function
+
+    predictions = np.array([[0, 1, 1],
+                            [0, 1, 0],
+                            [0, 1, 1],
+                            [0, 1, 1]])
+
+    confidences = np.array([[-1e16, 0, -1e16],
+                            [1., 2., -3.],
+                            [-5., 2., 5.],
+                            [-0.5, 0.2, 0.5]])
+
+    n_classes = 3
+
+    dec_values = _ovr_decision_function(predictions, confidences, n_classes)
+
+    # check that the decision values are within 0.5 range of the votes
+    votes = np.array([[1, 0, 2],
+                      [1, 1, 1],
+                      [1, 0, 2],
+                      [1, 0, 2]])
+
+    assert_allclose(votes, dec_values, atol=0.5)
+
+    # check that the prediction are what we expect
+    # highest vote or highest confidence if there is a tie.
+    # for the second sample we have a tie (should be won by 1)
+    expected_prediction = np.array([2, 1, 2, 2])
+    assert_array_equal(np.argmax(dec_values, axis=1), expected_prediction)
+
+    # third and fourth sample have the same vote but third sample
+    # has higher confidence, this should reflect on the decision values
+    assert (dec_values[2, 2] > dec_values[3, 2])
+
+    # assert subset invariance.
+    dec_values_one = [_ovr_decision_function(np.array([predictions[i]]),
+                                             np.array([confidences[i]]),
+                                             n_classes)[0] for i in range(4)]
+
+    assert_allclose(dec_values, dec_values_one, atol=1e-6)
--- a/venv/Lib/site-packages/sklearn/utils/tests/test_murmurhash.py
+++ b/venv/Lib/site-packages/sklearn/utils/tests/test_murmurhash.py
@ -0,0 +1,78 @@
+# Author: Olivier Grisel <olivier.grisel@ensta.org>
+#
+# License: BSD 3 clause
+
+import numpy as np
+from sklearn.utils.murmurhash import murmurhash3_32
+from numpy.testing import assert_array_almost_equal
+from numpy.testing import assert_array_equal
+
+
+def test_mmhash3_int():
+    assert murmurhash3_32(3) == 847579505
+    assert murmurhash3_32(3, seed=0) == 847579505
+    assert murmurhash3_32(3, seed=42) == -1823081949
+
+    assert murmurhash3_32(3, positive=False) == 847579505
+    assert murmurhash3_32(3, seed=0, positive=False) == 847579505
+    assert murmurhash3_32(3, seed=42, positive=False) == -1823081949
+
+    assert murmurhash3_32(3, positive=True) == 847579505
+    assert murmurhash3_32(3, seed=0, positive=True) == 847579505
+    assert murmurhash3_32(3, seed=42, positive=True) == 2471885347
+
+
+def test_mmhash3_int_array():
+    rng = np.random.RandomState(42)
+    keys = rng.randint(-5342534, 345345, size=3 * 2 * 1).astype(np.int32)
+    keys = keys.reshape((3, 2, 1))
+
+    for seed in [0, 42]:
+        expected = np.array([murmurhash3_32(int(k), seed)
+                             for k in keys.flat])
+        expected = expected.reshape(keys.shape)
+        assert_array_equal(murmurhash3_32(keys, seed), expected)
+
+    for seed in [0, 42]:
+        expected = np.array([murmurhash3_32(k, seed, positive=True)
+                             for k in keys.flat])
+        expected = expected.reshape(keys.shape)
+        assert_array_equal(murmurhash3_32(keys, seed, positive=True),
+                           expected)
+
+
+def test_mmhash3_bytes():
+    assert murmurhash3_32(b'foo', 0) == -156908512
+    assert murmurhash3_32(b'foo', 42) == -1322301282
+
+    assert murmurhash3_32(b'foo', 0, positive=True) == 4138058784
+    assert murmurhash3_32(b'foo', 42, positive=True) == 2972666014
+
+
+def test_mmhash3_unicode():
+    assert murmurhash3_32('foo', 0) == -156908512
+    assert murmurhash3_32('foo', 42) == -1322301282
+
+    assert murmurhash3_32('foo', 0, positive=True) == 4138058784
+    assert murmurhash3_32('foo', 42, positive=True) == 2972666014
+
+
+def test_no_collision_on_byte_range():
+    previous_hashes = set()
+    for i in range(100):
+        h = murmurhash3_32(' ' * i, 0)
+        assert h not in previous_hashes, \
+            "Found collision on growing empty string"
+
+
+def test_uniform_distribution():
+    n_bins, n_samples = 10, 100000
+    bins = np.zeros(n_bins, dtype=np.float64)
+
+    for i in range(n_samples):
+        bins[murmurhash3_32(i, positive=True) % n_bins] += 1
+
+    means = bins / n_samples
+    expected = np.full(n_bins, 1. / n_bins)
+
+    assert_array_almost_equal(means / expected, np.ones(n_bins), 2)
--- a/venv/Lib/site-packages/sklearn/utils/tests/test_optimize.py
+++ b/venv/Lib/site-packages/sklearn/utils/tests/test_optimize.py
@ -0,0 +1,32 @@
+import numpy as np
+
+from sklearn.utils.optimize import _newton_cg
+from scipy.optimize import fmin_ncg
+
+from sklearn.utils._testing import assert_array_almost_equal
+
+
+def test_newton_cg():
+    # Test that newton_cg gives same result as scipy's fmin_ncg
+
+    rng = np.random.RandomState(0)
+    A = rng.normal(size=(10, 10))
+    x0 = np.ones(10)
+
+    def func(x):
+        Ax = A.dot(x)
+        return .5 * (Ax).dot(Ax)
+
+    def grad(x):
+        return A.T.dot(A.dot(x))
+
+    def hess(x, p):
+        return p.dot(A.T.dot(A.dot(x.all())))
+
+    def grad_hess(x):
+        return grad(x), lambda x: A.T.dot(A.dot(x))
+
+    assert_array_almost_equal(
+        _newton_cg(grad_hess, func, grad, x0, tol=1e-10)[0],
+        fmin_ncg(f=func, x0=x0, fprime=grad, fhess_p=hess)
+        )
--- a/venv/Lib/site-packages/sklearn/utils/tests/test_pprint.py
+++ b/venv/Lib/site-packages/sklearn/utils/tests/test_pprint.py
@ -0,0 +1,576 @@
+import re
+from pprint import PrettyPrinter
+
+import numpy as np
+
+from sklearn.utils._pprint import _EstimatorPrettyPrinter
+from sklearn.linear_model import LogisticRegressionCV
+from sklearn.pipeline import make_pipeline
+from sklearn.base import BaseEstimator, TransformerMixin
+from sklearn.feature_selection import SelectKBest, chi2
+from sklearn import set_config, config_context
+
+
+# Ignore flake8 (lots of line too long issues)
+# flake8: noqa
+
+# Constructors excerpted to test pprinting
+class LogisticRegression(BaseEstimator):
+    def __init__(self, penalty='l2', dual=False, tol=1e-4, C=1.0,
+                 fit_intercept=True, intercept_scaling=1, class_weight=None,
+                 random_state=None, solver='warn', max_iter=100,
+                 multi_class='warn', verbose=0, warm_start=False, n_jobs=None,
+                 l1_ratio=None):
+        self.penalty = penalty
+        self.dual = dual
+        self.tol = tol
+        self.C = C
+        self.fit_intercept = fit_intercept
+        self.intercept_scaling = intercept_scaling
+        self.class_weight = class_weight
+        self.random_state = random_state
+        self.solver = solver
+        self.max_iter = max_iter
+        self.multi_class = multi_class
+        self.verbose = verbose
+        self.warm_start = warm_start
+        self.n_jobs = n_jobs
+        self.l1_ratio = l1_ratio
+
+    def fit(self, X, y):
+        return self
+
+
+class StandardScaler(TransformerMixin, BaseEstimator):
+    def __init__(self, copy=True, with_mean=True, with_std=True):
+        self.with_mean = with_mean
+        self.with_std = with_std
+        self.copy = copy
+
+    def transform(self, X, copy=None):
+        return self
+
+
+class RFE(BaseEstimator):
+    def __init__(self, estimator, n_features_to_select=None, step=1,
+                 verbose=0):
+        self.estimator = estimator
+        self.n_features_to_select = n_features_to_select
+        self.step = step
+        self.verbose = verbose
+
+
+class GridSearchCV(BaseEstimator):
+    def __init__(self, estimator, param_grid, scoring=None,
+                 n_jobs=None, iid='warn', refit=True, cv='warn', verbose=0,
+                 pre_dispatch='2*n_jobs', error_score='raise-deprecating',
+                 return_train_score=False):
+        self.estimator = estimator
+        self.param_grid = param_grid
+        self.scoring = scoring
+        self.n_jobs = n_jobs
+        self.iid = iid
+        self.refit = refit
+        self.cv = cv
+        self.verbose = verbose
+        self.pre_dispatch = pre_dispatch
+        self.error_score = error_score
+        self.return_train_score = return_train_score
+
+
+class CountVectorizer(BaseEstimator):
+    def __init__(self, input='content', encoding='utf-8',
+                 decode_error='strict', strip_accents=None,
+                 lowercase=True, preprocessor=None, tokenizer=None,
+                 stop_words=None, token_pattern=r"(?u)\b\w\w+\b",
+                 ngram_range=(1, 1), analyzer='word',
+                 max_df=1.0, min_df=1, max_features=None,
+                 vocabulary=None, binary=False, dtype=np.int64):
+        self.input = input
+        self.encoding = encoding
+        self.decode_error = decode_error
+        self.strip_accents = strip_accents
+        self.preprocessor = preprocessor
+        self.tokenizer = tokenizer
+        self.analyzer = analyzer
+        self.lowercase = lowercase
+        self.token_pattern = token_pattern
+        self.stop_words = stop_words
+        self.max_df = max_df
+        self.min_df = min_df
+        self.max_features = max_features
+        self.ngram_range = ngram_range
+        self.vocabulary = vocabulary
+        self.binary = binary
+        self.dtype = dtype
+
+
+class Pipeline(BaseEstimator):
+    def __init__(self, steps, memory=None):
+        self.steps = steps
+        self.memory = memory
+
+
+class SVC(BaseEstimator):
+    def __init__(self, C=1.0, kernel='rbf', degree=3, gamma='auto_deprecated',
+                 coef0=0.0, shrinking=True, probability=False,
+                 tol=1e-3, cache_size=200, class_weight=None,
+                 verbose=False, max_iter=-1, decision_function_shape='ovr',
+                 random_state=None):
+        self.kernel = kernel
+        self.degree = degree
+        self.gamma = gamma
+        self.coef0 = coef0
+        self.tol = tol
+        self.C = C
+        self.shrinking = shrinking
+        self.probability = probability
+        self.cache_size = cache_size
+        self.class_weight = class_weight
+        self.verbose = verbose
+        self.max_iter = max_iter
+        self.decision_function_shape = decision_function_shape
+        self.random_state = random_state
+
+
+class PCA(BaseEstimator):
+    def __init__(self, n_components=None, copy=True, whiten=False,
+                 svd_solver='auto', tol=0.0, iterated_power='auto',
+                 random_state=None):
+        self.n_components = n_components
+        self.copy = copy
+        self.whiten = whiten
+        self.svd_solver = svd_solver
+        self.tol = tol
+        self.iterated_power = iterated_power
+        self.random_state = random_state
+
+
+class NMF(BaseEstimator):
+    def __init__(self, n_components=None, init=None, solver='cd',
+                 beta_loss='frobenius', tol=1e-4, max_iter=200,
+                 random_state=None, alpha=0., l1_ratio=0., verbose=0,
+                 shuffle=False):
+        self.n_components = n_components
+        self.init = init
+        self.solver = solver
+        self.beta_loss = beta_loss
+        self.tol = tol
+        self.max_iter = max_iter
+        self.random_state = random_state
+        self.alpha = alpha
+        self.l1_ratio = l1_ratio
+        self.verbose = verbose
+        self.shuffle = shuffle
+
+
+class SimpleImputer(BaseEstimator):
+    def __init__(self, missing_values=np.nan, strategy="mean",
+                 fill_value=None, verbose=0, copy=True):
+        self.missing_values = missing_values
+        self.strategy = strategy
+        self.fill_value = fill_value
+        self.verbose = verbose
+        self.copy = copy
+
+
+def test_basic(print_changed_only_false):
+    # Basic pprint test
+    lr = LogisticRegression()
+    expected = """
+LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
+                   intercept_scaling=1, l1_ratio=None, max_iter=100,
+                   multi_class='warn', n_jobs=None, penalty='l2',
+                   random_state=None, solver='warn', tol=0.0001, verbose=0,
+                   warm_start=False)"""
+
+    expected = expected[1:]  # remove first \n
+    assert lr.__repr__() == expected
+
+
+def test_changed_only():
+    # Make sure the changed_only param is correctly used when True (default)
+    lr = LogisticRegression(C=99)
+    expected = """LogisticRegression(C=99)"""
+    assert lr.__repr__() == expected
+
+    # Check with a repr that doesn't fit on a single line
+    lr = LogisticRegression(C=99, class_weight=.4, fit_intercept=False,
+                            tol=1234, verbose=True)
+    expected = """
+LogisticRegression(C=99, class_weight=0.4, fit_intercept=False, tol=1234,
+                   verbose=True)"""
+    expected = expected[1:]  # remove first \n
+    assert lr.__repr__() == expected
+
+    imputer = SimpleImputer(missing_values=0)
+    expected = """SimpleImputer(missing_values=0)"""
+    assert imputer.__repr__() == expected
+
+    # Defaults to np.NaN, trying with float('NaN')
+    imputer = SimpleImputer(missing_values=float('NaN'))
+    expected = """SimpleImputer()"""
+    assert imputer.__repr__() == expected
+
+    # make sure array parameters don't throw error (see #13583)
+    repr(LogisticRegressionCV(Cs=np.array([0.1, 1])))
+
+
+def test_pipeline(print_changed_only_false):
+    # Render a pipeline object
+    pipeline = make_pipeline(StandardScaler(), LogisticRegression(C=999))
+    expected = """
+Pipeline(memory=None,
+         steps=[('standardscaler',
+                 StandardScaler(copy=True, with_mean=True, with_std=True)),
+                ('logisticregression',
+                 LogisticRegression(C=999, class_weight=None, dual=False,
+                                    fit_intercept=True, intercept_scaling=1,
+                                    l1_ratio=None, max_iter=100,
+                                    multi_class='warn', n_jobs=None,
+                                    penalty='l2', random_state=None,
+                                    solver='warn', tol=0.0001, verbose=0,
+                                    warm_start=False))],
+         verbose=False)"""
+
+    expected = expected[1:]  # remove first \n
+    assert pipeline.__repr__() == expected
+
+
+def test_deeply_nested(print_changed_only_false):
+    # Render a deeply nested estimator
+    rfe = RFE(RFE(RFE(RFE(RFE(RFE(RFE(LogisticRegression())))))))
+    expected = """
+RFE(estimator=RFE(estimator=RFE(estimator=RFE(estimator=RFE(estimator=RFE(estimator=RFE(estimator=LogisticRegression(C=1.0,
+                                                                                                                     class_weight=None,
+                                                                                                                     dual=False,
+                                                                                                                     fit_intercept=True,
+                                                                                                                     intercept_scaling=1,
+                                                                                                                     l1_ratio=None,
+                                                                                                                     max_iter=100,
+                                                                                                                     multi_class='warn',
+                                                                                                                     n_jobs=None,
+                                                                                                                     penalty='l2',
+                                                                                                                     random_state=None,
+                                                                                                                     solver='warn',
+                                                                                                                     tol=0.0001,
+                                                                                                                     verbose=0,
+                                                                                                                     warm_start=False),
+                                                                                        n_features_to_select=None,
+                                                                                        step=1,
+                                                                                        verbose=0),
+                                                                          n_features_to_select=None,
+                                                                          step=1,
+                                                                          verbose=0),
+                                                            n_features_to_select=None,
+                                                            step=1, verbose=0),
+                                              n_features_to_select=None, step=1,
+                                              verbose=0),
+                                n_features_to_select=None, step=1, verbose=0),
+                  n_features_to_select=None, step=1, verbose=0),
+    n_features_to_select=None, step=1, verbose=0)"""
+
+    expected = expected[1:]  # remove first \n
+    assert rfe.__repr__() == expected
+
+
+def test_gridsearch(print_changed_only_false):
+    # render a gridsearch
+    param_grid = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4],
+                   'C': [1, 10, 100, 1000]},
+                  {'kernel': ['linear'], 'C': [1, 10, 100, 1000]}]
+    gs = GridSearchCV(SVC(), param_grid, cv=5)
+
+    expected = """
+GridSearchCV(cv=5, error_score='raise-deprecating',
+             estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
+                           decision_function_shape='ovr', degree=3,
+                           gamma='auto_deprecated', kernel='rbf', max_iter=-1,
+                           probability=False, random_state=None, shrinking=True,
+                           tol=0.001, verbose=False),
+             iid='warn', n_jobs=None,
+             param_grid=[{'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001],
+                          'kernel': ['rbf']},
+                         {'C': [1, 10, 100, 1000], 'kernel': ['linear']}],
+             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
+             scoring=None, verbose=0)"""
+
+    expected = expected[1:]  # remove first \n
+    assert gs.__repr__() == expected
+
+
+def test_gridsearch_pipeline(print_changed_only_false):
+    # render a pipeline inside a gridsearch
+    pp = _EstimatorPrettyPrinter(compact=True, indent=1, indent_at_name=True)
+
+    pipeline = Pipeline([
+        ('reduce_dim', PCA()),
+        ('classify', SVC())
+    ])
+    N_FEATURES_OPTIONS = [2, 4, 8]
+    C_OPTIONS = [1, 10, 100, 1000]
+    param_grid = [
+        {
+            'reduce_dim': [PCA(iterated_power=7), NMF()],
+            'reduce_dim__n_components': N_FEATURES_OPTIONS,
+            'classify__C': C_OPTIONS
+        },
+        {
+            'reduce_dim': [SelectKBest(chi2)],
+            'reduce_dim__k': N_FEATURES_OPTIONS,
+            'classify__C': C_OPTIONS
+        }
+    ]
+    gspipline = GridSearchCV(pipeline, cv=3, n_jobs=1, param_grid=param_grid)
+    expected = """
+GridSearchCV(cv=3, error_score='raise-deprecating',
+             estimator=Pipeline(memory=None,
+                                steps=[('reduce_dim',
+                                        PCA(copy=True, iterated_power='auto',
+                                            n_components=None,
+                                            random_state=None,
+                                            svd_solver='auto', tol=0.0,
+                                            whiten=False)),
+                                       ('classify',
+                                        SVC(C=1.0, cache_size=200,
+                                            class_weight=None, coef0=0.0,
+                                            decision_function_shape='ovr',
+                                            degree=3, gamma='auto_deprecated',
+                                            kernel='rbf', max_iter=-1,
+                                            probability=False,
+                                            random_state=None, shrinking=True,
+                                            tol=0.001, verbose=False))]),
+             iid='warn', n_jobs=1,
+             param_grid=[{'classify__C': [1, 10, 100, 1000],
+                          'reduce_dim': [PCA(copy=True, iterated_power=7,
+                                             n_components=None,
+                                             random_state=None,
+                                             svd_solver='auto', tol=0.0,
+                                             whiten=False),
+                                         NMF(alpha=0.0, beta_loss='frobenius',
+                                             init=None, l1_ratio=0.0,
+                                             max_iter=200, n_components=None,
+                                             random_state=None, shuffle=False,
+                                             solver='cd', tol=0.0001,
+                                             verbose=0)],
+                          'reduce_dim__n_components': [2, 4, 8]},
+                         {'classify__C': [1, 10, 100, 1000],
+                          'reduce_dim': [SelectKBest(k=10,
+                                                     score_func=<function chi2 at some_address>)],
+                          'reduce_dim__k': [2, 4, 8]}],
+             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
+             scoring=None, verbose=0)"""
+
+    expected = expected[1:]  # remove first \n
+    repr_ = pp.pformat(gspipline)
+    # Remove address of '<function chi2 at 0x.....>' for reproducibility
+    repr_ = re.sub('function chi2 at 0x.*>',
+                   'function chi2 at some_address>', repr_)
+    assert repr_ == expected
+
+
+def test_n_max_elements_to_show(print_changed_only_false):
+
+    n_max_elements_to_show = 30
+    pp = _EstimatorPrettyPrinter(
+        compact=True, indent=1, indent_at_name=True,
+        n_max_elements_to_show=n_max_elements_to_show
+    )
+
+    # No ellipsis
+    vocabulary = {i: i for i in range(n_max_elements_to_show)}
+    vectorizer = CountVectorizer(vocabulary=vocabulary)
+
+    expected = r"""
+CountVectorizer(analyzer='word', binary=False, decode_error='strict',
+                dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',
+                lowercase=True, max_df=1.0, max_features=None, min_df=1,
+                ngram_range=(1, 1), preprocessor=None, stop_words=None,
+                strip_accents=None, token_pattern='(?u)\\b\\w\\w+\\b',
+                tokenizer=None,
+                vocabulary={0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7,
+                            8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14,
+                            15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20,
+                            21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26,
+                            27: 27, 28: 28, 29: 29})"""
+
+    expected = expected[1:]  # remove first \n
+    assert pp.pformat(vectorizer) == expected
+
+    # Now with ellipsis
+    vocabulary = {i: i for i in range(n_max_elements_to_show + 1)}
+    vectorizer = CountVectorizer(vocabulary=vocabulary)
+
+    expected = r"""
+CountVectorizer(analyzer='word', binary=False, decode_error='strict',
+                dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',
+                lowercase=True, max_df=1.0, max_features=None, min_df=1,
+                ngram_range=(1, 1), preprocessor=None, stop_words=None,
+                strip_accents=None, token_pattern='(?u)\\b\\w\\w+\\b',
+                tokenizer=None,
+                vocabulary={0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7,
+                            8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14,
+                            15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20,
+                            21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26,
+                            27: 27, 28: 28, 29: 29, ...})"""
+
+    expected = expected[1:]  # remove first \n
+    assert pp.pformat(vectorizer) == expected
+
+    # Also test with lists
+    param_grid = {'C': list(range(n_max_elements_to_show))}
+    gs = GridSearchCV(SVC(), param_grid)
+    expected = """
+GridSearchCV(cv='warn', error_score='raise-deprecating',
+             estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
+                           decision_function_shape='ovr', degree=3,
+                           gamma='auto_deprecated', kernel='rbf', max_iter=-1,
+                           probability=False, random_state=None, shrinking=True,
+                           tol=0.001, verbose=False),
+             iid='warn', n_jobs=None,
+             param_grid={'C': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+                               15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
+                               27, 28, 29]},
+             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
+             scoring=None, verbose=0)"""
+
+    expected = expected[1:]  # remove first \n
+    assert pp.pformat(gs) == expected
+
+    # Now with ellipsis
+    param_grid = {'C': list(range(n_max_elements_to_show + 1))}
+    gs = GridSearchCV(SVC(), param_grid)
+    expected = """
+GridSearchCV(cv='warn', error_score='raise-deprecating',
+             estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
+                           decision_function_shape='ovr', degree=3,
+                           gamma='auto_deprecated', kernel='rbf', max_iter=-1,
+                           probability=False, random_state=None, shrinking=True,
+                           tol=0.001, verbose=False),
+             iid='warn', n_jobs=None,
+             param_grid={'C': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+                               15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
+                               27, 28, 29, ...]},
+             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
+             scoring=None, verbose=0)"""
+
+    expected = expected[1:]  # remove first \n
+    assert pp.pformat(gs) == expected
+
+
+def test_bruteforce_ellipsis(print_changed_only_false):
+    # Check that the bruteforce ellipsis (used when the number of non-blank
+    # characters exceeds N_CHAR_MAX) renders correctly.
+
+    lr = LogisticRegression()
+
+    # test when the left and right side of the ellipsis aren't on the same
+    # line.
+    expected = """
+LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
+                   in...
+                   multi_class='warn', n_jobs=None, penalty='l2',
+                   random_state=None, solver='warn', tol=0.0001, verbose=0,
+                   warm_start=False)"""
+
+    expected = expected[1:]  # remove first \n
+    assert expected == lr.__repr__(N_CHAR_MAX=150)
+
+    # test with very small N_CHAR_MAX
+    # Note that N_CHAR_MAX is not strictly enforced, but it's normal: to avoid
+    # weird reprs we still keep the whole line of the right part (after the
+    # ellipsis).
+    expected = """
+Lo...
+                   warm_start=False)"""
+
+    expected = expected[1:]  # remove first \n
+    assert expected == lr.__repr__(N_CHAR_MAX=4)
+
+    # test with N_CHAR_MAX == number of non-blank characters: In this case we
+    # don't want ellipsis
+    full_repr = lr.__repr__(N_CHAR_MAX=float('inf'))
+    n_nonblank = len(''.join(full_repr.split()))
+    assert lr.__repr__(N_CHAR_MAX=n_nonblank) == full_repr
+    assert '...' not in full_repr
+
+    # test with N_CHAR_MAX == number of non-blank characters - 10: the left and
+    # right side of the ellispsis are on different lines. In this case we
+    # want to expend the whole line of the right side
+    expected = """
+LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
+                   intercept_scaling=1, l1_ratio=None, max_i...
+                   multi_class='warn', n_jobs=None, penalty='l2',
+                   random_state=None, solver='warn', tol=0.0001, verbose=0,
+                   warm_start=False)"""
+    expected = expected[1:]  # remove first \n
+    assert expected == lr.__repr__(N_CHAR_MAX=n_nonblank - 10)
+
+    # test with N_CHAR_MAX == number of non-blank characters - 10: the left and
+    # right side of the ellispsis are on the same line. In this case we don't
+    # want to expend the whole line of the right side, just add the ellispsis
+    # between the 2 sides.
+    expected = """
+LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
+                   intercept_scaling=1, l1_ratio=None, max_iter...,
+                   multi_class='warn', n_jobs=None, penalty='l2',
+                   random_state=None, solver='warn', tol=0.0001, verbose=0,
+                   warm_start=False)"""
+    expected = expected[1:]  # remove first \n
+    assert expected == lr.__repr__(N_CHAR_MAX=n_nonblank - 4)
+
+    # test with N_CHAR_MAX == number of non-blank characters - 2: the left and
+    # right side of the ellispsis are on the same line, but adding the ellipsis
+    # would actually make the repr longer. So we don't add the ellipsis.
+    expected = """
+LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
+                   intercept_scaling=1, l1_ratio=None, max_iter=100,
+                   multi_class='warn', n_jobs=None, penalty='l2',
+                   random_state=None, solver='warn', tol=0.0001, verbose=0,
+                   warm_start=False)"""
+    expected = expected[1:]  # remove first \n
+    assert expected == lr.__repr__(N_CHAR_MAX=n_nonblank - 2)
+
+
+def test_builtin_prettyprinter():
+    # non regression test than ensures we can still use the builtin
+    # PrettyPrinter class for estimators (as done e.g. by joblib).
+    # Used to be a bug
+
+    PrettyPrinter().pprint(LogisticRegression())
+
+
+def test_kwargs_in_init():
+    # Make sure the changed_only=True mode is OK when an argument is passed as
+    # kwargs.
+    # Non-regression test for
+    # https://github.com/scikit-learn/scikit-learn/issues/17206
+
+    class WithKWargs(BaseEstimator):
+        # Estimator with a kwargs argument. These need to hack around
+        # set_params and get_params. Here we mimic what LightGBM does.
+        def __init__(self, a='willchange', b='unchanged', **kwargs):
+            self.a = a
+            self.b = b
+            self._other_params = {}
+            self.set_params(**kwargs)
+
+        def get_params(self, deep=True):
+            params = super().get_params(deep=deep)
+            params.update(self._other_params)
+            return params
+
+        def set_params(self, **params):
+            for key, value in params.items():
+                setattr(self, key, value)
+                self._other_params[key] = value
+            return self
+
+    est = WithKWargs(a='something', c='abcd', d=None)
+
+    expected = "WithKWargs(a='something', c='abcd', d=None)"
+    assert expected == est.__repr__()
+
+    with config_context(print_changed_only=False):
+        expected = "WithKWargs(a='something', b='unchanged', c='abcd', d=None)"
+        assert expected == est.__repr__()
--- a/venv/Lib/site-packages/sklearn/utils/tests/test_random.py
+++ b/venv/Lib/site-packages/sklearn/utils/tests/test_random.py
@ -0,0 +1,187 @@
+import numpy as np
+import pytest
+import scipy.sparse as sp
+from scipy.special import comb
+from numpy.testing import assert_array_almost_equal
+
+from sklearn.utils.random import _random_choice_csc, sample_without_replacement
+from sklearn.utils._random import _our_rand_r_py
+
+
+###############################################################################
+# test custom sampling without replacement algorithm
+###############################################################################
+def test_invalid_sample_without_replacement_algorithm():
+    with pytest.raises(ValueError):
+        sample_without_replacement(5, 4, "unknown")
+
+
+def test_sample_without_replacement_algorithms():
+    methods = ("auto", "tracking_selection", "reservoir_sampling", "pool")
+
+    for m in methods:
+        def sample_without_replacement_method(n_population, n_samples,
+                                              random_state=None):
+            return sample_without_replacement(n_population, n_samples,
+                                              method=m,
+                                              random_state=random_state)
+
+        check_edge_case_of_sample_int(sample_without_replacement_method)
+        check_sample_int(sample_without_replacement_method)
+        check_sample_int_distribution(sample_without_replacement_method)
+
+
+def check_edge_case_of_sample_int(sample_without_replacement):
+
+    # n_population < n_sample
+    with pytest.raises(ValueError):
+        sample_without_replacement(0, 1)
+    with pytest.raises(ValueError):
+        sample_without_replacement(1, 2)
+
+    # n_population == n_samples
+    assert sample_without_replacement(0, 0).shape == (0, )
+
+    assert sample_without_replacement(1, 1).shape == (1, )
+
+    # n_population >= n_samples
+    assert sample_without_replacement(5, 0).shape == (0, )
+    assert sample_without_replacement(5, 1).shape == (1, )
+
+    # n_population < 0 or n_samples < 0
+    with pytest.raises(ValueError):
+        sample_without_replacement(-1, 5)
+    with pytest.raises(ValueError):
+        sample_without_replacement(5, -1)
+
+
+def check_sample_int(sample_without_replacement):
+    # This test is heavily inspired from test_random.py of python-core.
+    #
+    # For the entire allowable range of 0 <= k <= N, validate that
+    # the sample is of the correct length and contains only unique items
+    n_population = 100
+
+    for n_samples in range(n_population + 1):
+        s = sample_without_replacement(n_population, n_samples)
+        assert len(s) == n_samples
+        unique = np.unique(s)
+        assert np.size(unique) == n_samples
+        assert np.all(unique < n_population)
+
+    # test edge case n_population == n_samples == 0
+    assert np.size(sample_without_replacement(0, 0)) == 0
+
+
+def check_sample_int_distribution(sample_without_replacement):
+    # This test is heavily inspired from test_random.py of python-core.
+    #
+    # For the entire allowable range of 0 <= k <= N, validate that
+    # sample generates all possible permutations
+    n_population = 10
+
+    # a large number of trials prevents false negatives without slowing normal
+    # case
+    n_trials = 10000
+
+    for n_samples in range(n_population):
+        # Counting the number of combinations is not as good as counting the
+        # the number of permutations. However, it works with sampling algorithm
+        # that does not provide a random permutation of the subset of integer.
+        n_expected = comb(n_population, n_samples, exact=True)
+
+        output = {}
+        for i in range(n_trials):
+            output[frozenset(sample_without_replacement(n_population,
+                                                        n_samples))] = None
+
+            if len(output) == n_expected:
+                break
+        else:
+            raise AssertionError(
+                "number of combinations != number of expected (%s != %s)" %
+                (len(output), n_expected))
+
+
+def test_random_choice_csc(n_samples=10000, random_state=24):
+    # Explicit class probabilities
+    classes = [np.array([0, 1]),  np.array([0, 1, 2])]
+    class_probabilities = [np.array([0.5, 0.5]), np.array([0.6, 0.1, 0.3])]
+
+    got = _random_choice_csc(n_samples, classes, class_probabilities,
+                            random_state)
+    assert sp.issparse(got)
+
+    for k in range(len(classes)):
+        p = np.bincount(got.getcol(k).toarray().ravel()) / float(n_samples)
+        assert_array_almost_equal(class_probabilities[k], p, decimal=1)
+
+    # Implicit class probabilities
+    classes = [[0, 1],  [1, 2]]  # test for array-like support
+    class_probabilities = [np.array([0.5, 0.5]), np.array([0, 1/2, 1/2])]
+
+    got = _random_choice_csc(n_samples=n_samples,
+                            classes=classes,
+                            random_state=random_state)
+    assert sp.issparse(got)
+
+    for k in range(len(classes)):
+        p = np.bincount(got.getcol(k).toarray().ravel()) / float(n_samples)
+        assert_array_almost_equal(class_probabilities[k], p, decimal=1)
+
+    # Edge case probabilities 1.0 and 0.0
+    classes = [np.array([0, 1]),  np.array([0, 1, 2])]
+    class_probabilities = [np.array([1.0, 0.0]), np.array([0.0, 1.0, 0.0])]
+
+    got = _random_choice_csc(n_samples, classes, class_probabilities,
+                            random_state)
+    assert sp.issparse(got)
+
+    for k in range(len(classes)):
+        p = np.bincount(got.getcol(k).toarray().ravel(),
+                        minlength=len(class_probabilities[k])) / n_samples
+        assert_array_almost_equal(class_probabilities[k], p, decimal=1)
+
+    # One class target data
+    classes = [[1],  [0]]  # test for array-like support
+    class_probabilities = [np.array([0.0, 1.0]), np.array([1.0])]
+
+    got = _random_choice_csc(n_samples=n_samples,
+                            classes=classes,
+                            random_state=random_state)
+    assert sp.issparse(got)
+
+    for k in range(len(classes)):
+        p = np.bincount(got.getcol(k).toarray().ravel()) / n_samples
+        assert_array_almost_equal(class_probabilities[k], p, decimal=1)
+
+
+def test_random_choice_csc_errors():
+    # the length of an array in classes and class_probabilities is mismatched
+    classes = [np.array([0, 1]),  np.array([0, 1, 2, 3])]
+    class_probabilities = [np.array([0.5, 0.5]), np.array([0.6, 0.1, 0.3])]
+    with pytest.raises(ValueError):
+        _random_choice_csc(4, classes, class_probabilities, 1)
+
+    # the class dtype is not supported
+    classes = [np.array(["a", "1"]),  np.array(["z", "1", "2"])]
+    class_probabilities = [np.array([0.5, 0.5]), np.array([0.6, 0.1, 0.3])]
+    with pytest.raises(ValueError):
+        _random_choice_csc(4, classes, class_probabilities, 1)
+
+    # the class dtype is not supported
+    classes = [np.array([4.2, 0.1]),  np.array([0.1, 0.2, 9.4])]
+    class_probabilities = [np.array([0.5, 0.5]), np.array([0.6, 0.1, 0.3])]
+    with pytest.raises(ValueError):
+        _random_choice_csc(4, classes, class_probabilities, 1)
+
+    # Given probabilities don't sum to 1
+    classes = [np.array([0, 1]),  np.array([0, 1, 2])]
+    class_probabilities = [np.array([0.5, 0.6]), np.array([0.6, 0.1, 0.3])]
+    with pytest.raises(ValueError):
+        _random_choice_csc(4, classes, class_probabilities, 1)
+
+
+def test_our_rand_r():
+    assert 131541053 == _our_rand_r_py(1273642419)
+    assert 270369 == _our_rand_r_py(0)
--- a/venv/Lib/site-packages/sklearn/utils/tests/test_seq_dataset.py
+++ b/venv/Lib/site-packages/sklearn/utils/tests/test_seq_dataset.py
@ -0,0 +1,153 @@
+# Author: Tom Dupre la Tour
+#         Joan Massich <mailsik@gmail.com>
+#
+# License: BSD 3 clause
+
+import numpy as np
+import pytest
+import scipy.sparse as sp
+from numpy.testing import assert_array_equal
+from sklearn.utils._seq_dataset import (
+    ArrayDataset32, ArrayDataset64, CSRDataset32, CSRDataset64)
+
+from sklearn.datasets import load_iris
+from sklearn.utils._testing import assert_allclose
+
+iris = load_iris()
+X64 = iris.data.astype(np.float64)
+y64 = iris.target.astype(np.float64)
+X_csr64 = sp.csr_matrix(X64)
+sample_weight64 = np.arange(y64.size, dtype=np.float64)
+
+X32 = iris.data.astype(np.float32)
+y32 = iris.target.astype(np.float32)
+X_csr32 = sp.csr_matrix(X32)
+sample_weight32 = np.arange(y32.size, dtype=np.float32)
+
+
+def assert_csr_equal_values(current, expected):
+    current.eliminate_zeros()
+    expected.eliminate_zeros()
+    expected = expected.astype(current.dtype)
+    assert current.shape[0] == expected.shape[0]
+    assert current.shape[1] == expected.shape[1]
+    assert_array_equal(current.data, expected.data)
+    assert_array_equal(current.indices, expected.indices)
+    assert_array_equal(current.indptr, expected.indptr)
+
+
+def make_dense_dataset_32():
+    return ArrayDataset32(X32, y32, sample_weight32, seed=42)
+
+
+def make_dense_dataset_64():
+    return ArrayDataset64(X64, y64, sample_weight64, seed=42)
+
+
+def make_sparse_dataset_32():
+    return CSRDataset32(X_csr32.data, X_csr32.indptr, X_csr32.indices, y32,
+                        sample_weight32, seed=42)
+
+
+def make_sparse_dataset_64():
+    return CSRDataset64(X_csr64.data, X_csr64.indptr, X_csr64.indices, y64,
+                        sample_weight64, seed=42)
+
+
+@pytest.mark.parametrize('dataset_constructor', [
+    make_dense_dataset_32,
+    make_dense_dataset_64,
+    make_sparse_dataset_32,
+    make_sparse_dataset_64,
+])
+def test_seq_dataset_basic_iteration(dataset_constructor):
+    NUMBER_OF_RUNS = 5
+    dataset = dataset_constructor()
+    for _ in range(NUMBER_OF_RUNS):
+        # next sample
+        xi_, yi, swi, idx = dataset._next_py()
+        xi = sp.csr_matrix((xi_), shape=(1, X64.shape[1]))
+
+        assert_csr_equal_values(xi, X_csr64[idx])
+        assert yi == y64[idx]
+        assert swi == sample_weight64[idx]
+
+        # random sample
+        xi_, yi, swi, idx = dataset._random_py()
+        xi = sp.csr_matrix((xi_), shape=(1, X64.shape[1]))
+
+        assert_csr_equal_values(xi, X_csr64[idx])
+        assert yi == y64[idx]
+        assert swi == sample_weight64[idx]
+
+
+@pytest.mark.parametrize('make_dense_dataset,make_sparse_dataset', [
+    (make_dense_dataset_32, make_sparse_dataset_32),
+    (make_dense_dataset_64, make_sparse_dataset_64),
+])
+def test_seq_dataset_shuffle(make_dense_dataset, make_sparse_dataset):
+    dense_dataset, sparse_dataset = make_dense_dataset(), make_sparse_dataset()
+    # not shuffled
+    for i in range(5):
+        _, _, _, idx1 = dense_dataset._next_py()
+        _, _, _, idx2 = sparse_dataset._next_py()
+        assert idx1 == i
+        assert idx2 == i
+
+    for i in [132, 50, 9, 18, 58]:
+        _, _, _, idx1 = dense_dataset._random_py()
+        _, _, _, idx2 = sparse_dataset._random_py()
+        assert idx1 == i
+        assert idx2 == i
+
+    seed = 77
+    dense_dataset._shuffle_py(seed)
+    sparse_dataset._shuffle_py(seed)
+
+    idx_next = [63, 91, 148, 87, 29]
+    idx_shuffle = [137, 125, 56, 121, 127]
+    for i, j in zip(idx_next, idx_shuffle):
+        _, _, _, idx1 = dense_dataset._next_py()
+        _, _, _, idx2 = sparse_dataset._next_py()
+        assert idx1 == i
+        assert idx2 == i
+
+        _, _, _, idx1 = dense_dataset._random_py()
+        _, _, _, idx2 = sparse_dataset._random_py()
+        assert idx1 == j
+        assert idx2 == j
+
+
+@pytest.mark.parametrize('make_dataset_32,make_dataset_64', [
+    (make_dense_dataset_32, make_dense_dataset_64),
+    (make_sparse_dataset_32, make_sparse_dataset_64),
+])
+def test_fused_types_consistency(make_dataset_32, make_dataset_64):
+    dataset_32, dataset_64 = make_dataset_32(), make_dataset_64()
+    NUMBER_OF_RUNS = 5
+    for _ in range(NUMBER_OF_RUNS):
+        # next sample
+        (xi_data32, _, _), yi32, _, _ = dataset_32._next_py()
+        (xi_data64, _, _), yi64, _, _ = dataset_64._next_py()
+
+        assert xi_data32.dtype == np.float32
+        assert xi_data64.dtype == np.float64
+
+        assert_allclose(xi_data64, xi_data32, rtol=1e-5)
+        assert_allclose(yi64, yi32, rtol=1e-5)
+
+
+def test_buffer_dtype_mismatch_error():
+    with pytest.raises(ValueError, match='Buffer dtype mismatch'):
+        ArrayDataset64(X32, y32, sample_weight32, seed=42),
+
+    with pytest.raises(ValueError, match='Buffer dtype mismatch'):
+        ArrayDataset32(X64, y64, sample_weight64, seed=42),
+
+    with pytest.raises(ValueError, match='Buffer dtype mismatch'):
+        CSRDataset64(X_csr32.data, X_csr32.indptr, X_csr32.indices, y32,
+                     sample_weight32, seed=42),
+
+    with pytest.raises(ValueError, match='Buffer dtype mismatch'):
+        CSRDataset32(X_csr64.data, X_csr64.indptr, X_csr64.indices, y64,
+                     sample_weight64, seed=42),
--- a/venv/Lib/site-packages/sklearn/utils/tests/test_shortest_path.py
+++ b/venv/Lib/site-packages/sklearn/utils/tests/test_shortest_path.py
@ -0,0 +1,95 @@
+from collections import defaultdict
+
+import numpy as np
+from numpy.testing import assert_array_almost_equal
+from sklearn.utils.graph import (graph_shortest_path,
+                                 single_source_shortest_path_length)
+
+
+def floyd_warshall_slow(graph, directed=False):
+    N = graph.shape[0]
+
+    #set nonzero entries to infinity
+    graph[np.where(graph == 0)] = np.inf
+
+    #set diagonal to zero
+    graph.flat[::N + 1] = 0
+
+    if not directed:
+        graph = np.minimum(graph, graph.T)
+
+    for k in range(N):
+        for i in range(N):
+            for j in range(N):
+                graph[i, j] = min(graph[i, j], graph[i, k] + graph[k, j])
+
+    graph[np.where(np.isinf(graph))] = 0
+
+    return graph
+
+
+def generate_graph(N=20):
+    #sparse grid of distances
+    rng = np.random.RandomState(0)
+    dist_matrix = rng.random_sample((N, N))
+
+    #make symmetric: distances are not direction-dependent
+    dist_matrix = dist_matrix + dist_matrix.T
+
+    #make graph sparse
+    i = (rng.randint(N, size=N * N // 2), rng.randint(N, size=N * N // 2))
+    dist_matrix[i] = 0
+
+    #set diagonal to zero
+    dist_matrix.flat[::N + 1] = 0
+
+    return dist_matrix
+
+
+def test_floyd_warshall():
+    dist_matrix = generate_graph(20)
+
+    for directed in (True, False):
+        graph_FW = graph_shortest_path(dist_matrix, directed, 'FW')
+        graph_py = floyd_warshall_slow(dist_matrix.copy(), directed)
+
+        assert_array_almost_equal(graph_FW, graph_py)
+
+
+def test_dijkstra():
+    dist_matrix = generate_graph(20)
+
+    for directed in (True, False):
+        graph_D = graph_shortest_path(dist_matrix, directed, 'D')
+        graph_py = floyd_warshall_slow(dist_matrix.copy(), directed)
+
+        assert_array_almost_equal(graph_D, graph_py)
+
+
+def test_shortest_path():
+    dist_matrix = generate_graph(20)
+    # We compare path length and not costs (-> set distances to 0 or 1)
+    dist_matrix[dist_matrix != 0] = 1
+
+    for directed in (True, False):
+        if not directed:
+            dist_matrix = np.minimum(dist_matrix, dist_matrix.T)
+
+        graph_py = floyd_warshall_slow(dist_matrix.copy(), directed)
+        for i in range(dist_matrix.shape[0]):
+            # Non-reachable nodes have distance 0 in graph_py
+            dist_dict = defaultdict(int)
+            dist_dict.update(single_source_shortest_path_length(dist_matrix,
+                                                                i))
+
+            for j in range(graph_py[i].shape[0]):
+                assert_array_almost_equal(dist_dict[j], graph_py[i, j])
+
+
+def test_dijkstra_bug_fix():
+    X = np.array([[0., 0., 4.],
+                  [1., 0., 2.],
+                  [0., 5., 0.]])
+    dist_FW = graph_shortest_path(X, directed=False, method='FW')
+    dist_D = graph_shortest_path(X, directed=False, method='D')
+    assert_array_almost_equal(dist_D, dist_FW)
--- a/venv/Lib/site-packages/sklearn/utils/tests/test_show_versions.py
+++ b/venv/Lib/site-packages/sklearn/utils/tests/test_show_versions.py
@ -0,0 +1,37 @@
+
+from sklearn.utils._show_versions import _get_sys_info
+from sklearn.utils._show_versions import _get_deps_info
+from sklearn.utils._show_versions import show_versions
+from sklearn.utils._testing import ignore_warnings
+
+
+def test_get_sys_info():
+    sys_info = _get_sys_info()
+
+    assert 'python' in sys_info
+    assert 'executable' in sys_info
+    assert 'machine' in sys_info
+
+
+def test_get_deps_info():
+    with ignore_warnings():
+        deps_info = _get_deps_info()
+
+    assert 'pip' in deps_info
+    assert 'setuptools' in deps_info
+    assert 'sklearn' in deps_info
+    assert 'numpy' in deps_info
+    assert 'scipy' in deps_info
+    assert 'Cython' in deps_info
+    assert 'pandas' in deps_info
+    assert 'matplotlib' in deps_info
+    assert 'joblib' in deps_info
+
+
+def test_show_versions(capsys):
+    with ignore_warnings():
+        show_versions()
+        out, err = capsys.readouterr()
+
+    assert 'python' in out
+    assert 'numpy' in out
--- a/venv/Lib/site-packages/sklearn/utils/tests/test_sparsefuncs.py
+++ b/venv/Lib/site-packages/sklearn/utils/tests/test_sparsefuncs.py
@ -0,0 +1,617 @@
+import pytest
+import numpy as np
+import scipy.sparse as sp
+
+from scipy import linalg
+from numpy.testing import assert_array_almost_equal, assert_array_equal
+from numpy.random import RandomState
+
+from sklearn.datasets import make_classification
+from sklearn.utils.sparsefuncs import (mean_variance_axis,
+                                       incr_mean_variance_axis,
+                                       inplace_column_scale,
+                                       inplace_row_scale,
+                                       inplace_swap_row, inplace_swap_column,
+                                       min_max_axis,
+                                       count_nonzero, csc_median_axis_0)
+from sklearn.utils.sparsefuncs_fast import (assign_rows_csr,
+                                            inplace_csr_row_normalize_l1,
+                                            inplace_csr_row_normalize_l2,
+                                            csr_row_norms)
+from sklearn.utils._testing import assert_allclose
+
+
+def test_mean_variance_axis0():
+    X, _ = make_classification(5, 4, random_state=0)
+    # Sparsify the array a little bit
+    X[0, 0] = 0
+    X[2, 1] = 0
+    X[4, 3] = 0
+    X_lil = sp.lil_matrix(X)
+    X_lil[1, 0] = 0
+    X[1, 0] = 0
+
+    with pytest.raises(TypeError):
+        mean_variance_axis(X_lil, axis=0)
+
+    X_csr = sp.csr_matrix(X_lil)
+    X_csc = sp.csc_matrix(X_lil)
+
+    expected_dtypes = [(np.float32, np.float32),
+                       (np.float64, np.float64),
+                       (np.int32, np.float64),
+                       (np.int64, np.float64)]
+
+    for input_dtype, output_dtype in expected_dtypes:
+        X_test = X.astype(input_dtype)
+        for X_sparse in (X_csr, X_csc):
+            X_sparse = X_sparse.astype(input_dtype)
+            X_means, X_vars = mean_variance_axis(X_sparse, axis=0)
+            assert X_means.dtype == output_dtype
+            assert X_vars.dtype == output_dtype
+            assert_array_almost_equal(X_means, np.mean(X_test, axis=0))
+            assert_array_almost_equal(X_vars, np.var(X_test, axis=0))
+
+
+def test_mean_variance_axis1():
+    X, _ = make_classification(5, 4, random_state=0)
+    # Sparsify the array a little bit
+    X[0, 0] = 0
+    X[2, 1] = 0
+    X[4, 3] = 0
+    X_lil = sp.lil_matrix(X)
+    X_lil[1, 0] = 0
+    X[1, 0] = 0
+
+    with pytest.raises(TypeError):
+        mean_variance_axis(X_lil, axis=1)
+
+    X_csr = sp.csr_matrix(X_lil)
+    X_csc = sp.csc_matrix(X_lil)
+
+    expected_dtypes = [(np.float32, np.float32),
+                       (np.float64, np.float64),
+                       (np.int32, np.float64),
+                       (np.int64, np.float64)]
+
+    for input_dtype, output_dtype in expected_dtypes:
+        X_test = X.astype(input_dtype)
+        for X_sparse in (X_csr, X_csc):
+            X_sparse = X_sparse.astype(input_dtype)
+            X_means, X_vars = mean_variance_axis(X_sparse, axis=0)
+            assert X_means.dtype == output_dtype
+            assert X_vars.dtype == output_dtype
+            assert_array_almost_equal(X_means, np.mean(X_test, axis=0))
+            assert_array_almost_equal(X_vars, np.var(X_test, axis=0))
+
+
+def test_incr_mean_variance_axis():
+    for axis in [0, 1]:
+        rng = np.random.RandomState(0)
+        n_features = 50
+        n_samples = 10
+        data_chunks = [rng.randint(0, 2, size=n_features)
+                       for i in range(n_samples)]
+
+        # default params for incr_mean_variance
+        last_mean = np.zeros(n_features)
+        last_var = np.zeros_like(last_mean)
+        last_n = np.zeros_like(last_mean, dtype=np.int64)
+
+        # Test errors
+        X = np.array(data_chunks[0])
+        X = np.atleast_2d(X)
+        X_lil = sp.lil_matrix(X)
+        X_csr = sp.csr_matrix(X_lil)
+
+        with pytest.raises(TypeError):
+            incr_mean_variance_axis(X=axis, axis=last_mean, last_mean=last_var,
+                                    last_var=last_n)
+        with pytest.raises(TypeError):
+            incr_mean_variance_axis(X_lil, axis=axis, last_mean=last_mean,
+                                    last_var=last_var, last_n=last_n)
+
+        # Test _incr_mean_and_var with a 1 row input
+        X_means, X_vars = mean_variance_axis(X_csr, axis)
+        X_means_incr, X_vars_incr, n_incr = \
+            incr_mean_variance_axis(X_csr, axis=axis, last_mean=last_mean,
+                                    last_var=last_var, last_n=last_n)
+        assert_array_almost_equal(X_means, X_means_incr)
+        assert_array_almost_equal(X_vars, X_vars_incr)
+        # X.shape[axis] picks # samples
+        assert_array_equal(X.shape[axis], n_incr)
+
+        X_csc = sp.csc_matrix(X_lil)
+        X_means, X_vars = mean_variance_axis(X_csc, axis)
+        assert_array_almost_equal(X_means, X_means_incr)
+        assert_array_almost_equal(X_vars, X_vars_incr)
+        assert_array_equal(X.shape[axis], n_incr)
+
+        # Test _incremental_mean_and_var with whole data
+        X = np.vstack(data_chunks)
+        X_lil = sp.lil_matrix(X)
+        X_csr = sp.csr_matrix(X_lil)
+        X_csc = sp.csc_matrix(X_lil)
+
+        expected_dtypes = [(np.float32, np.float32),
+                           (np.float64, np.float64),
+                           (np.int32, np.float64),
+                           (np.int64, np.float64)]
+
+        for input_dtype, output_dtype in expected_dtypes:
+            for X_sparse in (X_csr, X_csc):
+                X_sparse = X_sparse.astype(input_dtype)
+                last_mean = last_mean.astype(output_dtype)
+                last_var = last_var.astype(output_dtype)
+                X_means, X_vars = mean_variance_axis(X_sparse, axis)
+                X_means_incr, X_vars_incr, n_incr = \
+                    incr_mean_variance_axis(X_sparse, axis=axis,
+                                            last_mean=last_mean,
+                                            last_var=last_var,
+                                            last_n=last_n)
+                assert X_means_incr.dtype == output_dtype
+                assert X_vars_incr.dtype == output_dtype
+                assert_array_almost_equal(X_means, X_means_incr)
+                assert_array_almost_equal(X_vars, X_vars_incr)
+                assert_array_equal(X.shape[axis], n_incr)
+
+
+@pytest.mark.parametrize(
+    "X1, X2",
+    [
+        (sp.random(5, 2, density=0.8, format='csr', random_state=0),
+         sp.random(13, 2, density=0.8, format='csr', random_state=0)),
+        (sp.random(5, 2, density=0.8, format='csr', random_state=0),
+         sp.hstack([sp.csr_matrix(np.full((13, 1), fill_value=np.nan)),
+                    sp.random(13, 1, density=0.8, random_state=42)],
+                   format="csr"))
+    ]
+)
+def test_incr_mean_variance_axis_equivalence_mean_variance(X1, X2):
+    # non-regression test for:
+    # https://github.com/scikit-learn/scikit-learn/issues/16448
+    # check that computing the incremental mean and variance is equivalent to
+    # computing the mean and variance on the stacked dataset.
+    axis = 0
+    last_mean, last_var = np.zeros(X1.shape[1]), np.zeros(X1.shape[1])
+    last_n = np.zeros(X1.shape[1], dtype=np.int64)
+    updated_mean, updated_var, updated_n = incr_mean_variance_axis(
+        X1, axis=axis, last_mean=last_mean, last_var=last_var, last_n=last_n
+    )
+    updated_mean, updated_var, updated_n = incr_mean_variance_axis(
+        X2, axis=axis, last_mean=updated_mean, last_var=updated_var,
+        last_n=updated_n
+    )
+    X = sp.vstack([X1, X2])
+    assert_allclose(updated_mean, np.nanmean(X.A, axis=axis))
+    assert_allclose(updated_var, np.nanvar(X.A, axis=axis))
+    assert_allclose(updated_n, np.count_nonzero(~np.isnan(X.A), axis=0))
+
+
+def test_incr_mean_variance_no_new_n():
+    # check the behaviour when we update the variance with an empty matrix
+    axis = 0
+    X1 = sp.random(5, 1, density=0.8, random_state=0).tocsr()
+    X2 = sp.random(0, 1, density=0.8, random_state=0).tocsr()
+    last_mean, last_var = np.zeros(X1.shape[1]), np.zeros(X1.shape[1])
+    last_n = np.zeros(X1.shape[1], dtype=np.int64)
+    last_mean, last_var, last_n = incr_mean_variance_axis(
+        X1, axis=axis, last_mean=last_mean, last_var=last_var, last_n=last_n
+    )
+    # update statistic with a column which should ignored
+    updated_mean, updated_var, updated_n = incr_mean_variance_axis(
+        X2, axis=axis, last_mean=last_mean, last_var=last_var, last_n=last_n
+    )
+    assert_allclose(updated_mean, last_mean)
+    assert_allclose(updated_var, last_var)
+    assert_allclose(updated_n, last_n)
+
+
+@pytest.mark.parametrize("axis", [0, 1])
+@pytest.mark.parametrize("sparse_constructor", [sp.csc_matrix, sp.csr_matrix])
+def test_incr_mean_variance_axis_ignore_nan(axis, sparse_constructor):
+    old_means = np.array([535., 535., 535., 535.])
+    old_variances = np.array([4225., 4225., 4225., 4225.])
+    old_sample_count = np.array([2, 2, 2, 2], dtype=np.int64)
+
+    X = sparse_constructor(
+        np.array([[170, 170, 170, 170],
+                  [430, 430, 430, 430],
+                  [300, 300, 300, 300]]))
+
+    X_nan = sparse_constructor(
+        np.array([[170, np.nan, 170, 170],
+                  [np.nan, 170, 430, 430],
+                  [430, 430, np.nan, 300],
+                  [300, 300, 300, np.nan]]))
+
+    # we avoid creating specific data for axis 0 and 1: translating the data is
+    # enough.
+    if axis:
+        X = X.T
+        X_nan = X_nan.T
+
+    # take a copy of the old statistics since they are modified in place.
+    X_means, X_vars, X_sample_count = incr_mean_variance_axis(
+        X, axis=axis, last_mean=old_means.copy(),
+        last_var=old_variances.copy(), last_n=old_sample_count.copy())
+    X_nan_means, X_nan_vars, X_nan_sample_count = incr_mean_variance_axis(
+        X_nan, axis=axis, last_mean=old_means.copy(),
+        last_var=old_variances.copy(), last_n=old_sample_count.copy())
+
+    assert_allclose(X_nan_means, X_means)
+    assert_allclose(X_nan_vars, X_vars)
+    assert_allclose(X_nan_sample_count, X_sample_count)
+
+
+def test_mean_variance_illegal_axis():
+    X, _ = make_classification(5, 4, random_state=0)
+    # Sparsify the array a little bit
+    X[0, 0] = 0
+    X[2, 1] = 0
+    X[4, 3] = 0
+    X_csr = sp.csr_matrix(X)
+    with pytest.raises(ValueError):
+        mean_variance_axis(X_csr, axis=-3)
+    with pytest.raises(ValueError):
+        mean_variance_axis(X_csr, axis=2)
+    with pytest.raises(ValueError):
+        mean_variance_axis(X_csr, axis=-1)
+
+    with pytest.raises(ValueError):
+        incr_mean_variance_axis(X_csr, axis=-3, last_mean=None, last_var=None,
+                                last_n=None)
+
+    with pytest.raises(ValueError):
+        incr_mean_variance_axis(X_csr, axis=2, last_mean=None, last_var=None,
+                                last_n=None)
+
+    with pytest.raises(ValueError):
+        incr_mean_variance_axis(X_csr, axis=-1, last_mean=None, last_var=None,
+                                last_n=None)
+
+
+def test_densify_rows():
+    for dtype in (np.float32, np.float64):
+        X = sp.csr_matrix([[0, 3, 0],
+                        [2, 4, 0],
+                        [0, 0, 0],
+                        [9, 8, 7],
+                        [4, 0, 5]], dtype=dtype)
+        X_rows = np.array([0, 2, 3], dtype=np.intp)
+        out = np.ones((6, X.shape[1]), dtype=dtype)
+        out_rows = np.array([1, 3, 4], dtype=np.intp)
+
+        expect = np.ones_like(out)
+        expect[out_rows] = X[X_rows, :].toarray()
+
+        assign_rows_csr(X, X_rows, out_rows, out)
+        assert_array_equal(out, expect)
+
+
+def test_inplace_column_scale():
+    rng = np.random.RandomState(0)
+    X = sp.rand(100, 200, 0.05)
+    Xr = X.tocsr()
+    Xc = X.tocsc()
+    XA = X.toarray()
+    scale = rng.rand(200)
+    XA *= scale
+
+    inplace_column_scale(Xc, scale)
+    inplace_column_scale(Xr, scale)
+    assert_array_almost_equal(Xr.toarray(), Xc.toarray())
+    assert_array_almost_equal(XA, Xc.toarray())
+    assert_array_almost_equal(XA, Xr.toarray())
+    with pytest.raises(TypeError):
+        inplace_column_scale(X.tolil(), scale)
+
+    X = X.astype(np.float32)
+    scale = scale.astype(np.float32)
+    Xr = X.tocsr()
+    Xc = X.tocsc()
+    XA = X.toarray()
+    XA *= scale
+    inplace_column_scale(Xc, scale)
+    inplace_column_scale(Xr, scale)
+    assert_array_almost_equal(Xr.toarray(), Xc.toarray())
+    assert_array_almost_equal(XA, Xc.toarray())
+    assert_array_almost_equal(XA, Xr.toarray())
+    with pytest.raises(TypeError):
+        inplace_column_scale(X.tolil(), scale)
+
+
+def test_inplace_row_scale():
+    rng = np.random.RandomState(0)
+    X = sp.rand(100, 200, 0.05)
+    Xr = X.tocsr()
+    Xc = X.tocsc()
+    XA = X.toarray()
+    scale = rng.rand(100)
+    XA *= scale.reshape(-1, 1)
+
+    inplace_row_scale(Xc, scale)
+    inplace_row_scale(Xr, scale)
+    assert_array_almost_equal(Xr.toarray(), Xc.toarray())
+    assert_array_almost_equal(XA, Xc.toarray())
+    assert_array_almost_equal(XA, Xr.toarray())
+    with pytest.raises(TypeError):
+        inplace_column_scale(X.tolil(), scale)
+
+    X = X.astype(np.float32)
+    scale = scale.astype(np.float32)
+    Xr = X.tocsr()
+    Xc = X.tocsc()
+    XA = X.toarray()
+    XA *= scale.reshape(-1, 1)
+    inplace_row_scale(Xc, scale)
+    inplace_row_scale(Xr, scale)
+    assert_array_almost_equal(Xr.toarray(), Xc.toarray())
+    assert_array_almost_equal(XA, Xc.toarray())
+    assert_array_almost_equal(XA, Xr.toarray())
+    with pytest.raises(TypeError):
+        inplace_column_scale(X.tolil(), scale)
+
+
+def test_inplace_swap_row():
+    X = np.array([[0, 3, 0],
+                  [2, 4, 0],
+                  [0, 0, 0],
+                  [9, 8, 7],
+                  [4, 0, 5]], dtype=np.float64)
+    X_csr = sp.csr_matrix(X)
+    X_csc = sp.csc_matrix(X)
+
+    swap = linalg.get_blas_funcs(('swap',), (X,))
+    swap = swap[0]
+    X[0], X[-1] = swap(X[0], X[-1])
+    inplace_swap_row(X_csr, 0, -1)
+    inplace_swap_row(X_csc, 0, -1)
+    assert_array_equal(X_csr.toarray(), X_csc.toarray())
+    assert_array_equal(X, X_csc.toarray())
+    assert_array_equal(X, X_csr.toarray())
+
+    X[2], X[3] = swap(X[2], X[3])
+    inplace_swap_row(X_csr, 2, 3)
+    inplace_swap_row(X_csc, 2, 3)
+    assert_array_equal(X_csr.toarray(), X_csc.toarray())
+    assert_array_equal(X, X_csc.toarray())
+    assert_array_equal(X, X_csr.toarray())
+    with pytest.raises(TypeError):
+        inplace_swap_row(X_csr.tolil())
+
+    X = np.array([[0, 3, 0],
+                  [2, 4, 0],
+                  [0, 0, 0],
+                  [9, 8, 7],
+                  [4, 0, 5]], dtype=np.float32)
+    X_csr = sp.csr_matrix(X)
+    X_csc = sp.csc_matrix(X)
+    swap = linalg.get_blas_funcs(('swap',), (X,))
+    swap = swap[0]
+    X[0], X[-1] = swap(X[0], X[-1])
+    inplace_swap_row(X_csr, 0, -1)
+    inplace_swap_row(X_csc, 0, -1)
+    assert_array_equal(X_csr.toarray(), X_csc.toarray())
+    assert_array_equal(X, X_csc.toarray())
+    assert_array_equal(X, X_csr.toarray())
+    X[2], X[3] = swap(X[2], X[3])
+    inplace_swap_row(X_csr, 2, 3)
+    inplace_swap_row(X_csc, 2, 3)
+    assert_array_equal(X_csr.toarray(), X_csc.toarray())
+    assert_array_equal(X, X_csc.toarray())
+    assert_array_equal(X, X_csr.toarray())
+    with pytest.raises(TypeError):
+        inplace_swap_row(X_csr.tolil())
+
+
+def test_inplace_swap_column():
+    X = np.array([[0, 3, 0],
+                  [2, 4, 0],
+                  [0, 0, 0],
+                  [9, 8, 7],
+                  [4, 0, 5]], dtype=np.float64)
+    X_csr = sp.csr_matrix(X)
+    X_csc = sp.csc_matrix(X)
+
+    swap = linalg.get_blas_funcs(('swap',), (X,))
+    swap = swap[0]
+    X[:, 0], X[:, -1] = swap(X[:, 0], X[:, -1])
+    inplace_swap_column(X_csr, 0, -1)
+    inplace_swap_column(X_csc, 0, -1)
+    assert_array_equal(X_csr.toarray(), X_csc.toarray())
+    assert_array_equal(X, X_csc.toarray())
+    assert_array_equal(X, X_csr.toarray())
+
+    X[:, 0], X[:, 1] = swap(X[:, 0], X[:, 1])
+    inplace_swap_column(X_csr, 0, 1)
+    inplace_swap_column(X_csc, 0, 1)
+    assert_array_equal(X_csr.toarray(), X_csc.toarray())
+    assert_array_equal(X, X_csc.toarray())
+    assert_array_equal(X, X_csr.toarray())
+    with pytest.raises(TypeError):
+        inplace_swap_column(X_csr.tolil())
+
+    X = np.array([[0, 3, 0],
+                  [2, 4, 0],
+                  [0, 0, 0],
+                  [9, 8, 7],
+                  [4, 0, 5]], dtype=np.float32)
+    X_csr = sp.csr_matrix(X)
+    X_csc = sp.csc_matrix(X)
+    swap = linalg.get_blas_funcs(('swap',), (X,))
+    swap = swap[0]
+    X[:, 0], X[:, -1] = swap(X[:, 0], X[:, -1])
+    inplace_swap_column(X_csr, 0, -1)
+    inplace_swap_column(X_csc, 0, -1)
+    assert_array_equal(X_csr.toarray(), X_csc.toarray())
+    assert_array_equal(X, X_csc.toarray())
+    assert_array_equal(X, X_csr.toarray())
+    X[:, 0], X[:, 1] = swap(X[:, 0], X[:, 1])
+    inplace_swap_column(X_csr, 0, 1)
+    inplace_swap_column(X_csc, 0, 1)
+    assert_array_equal(X_csr.toarray(), X_csc.toarray())
+    assert_array_equal(X, X_csc.toarray())
+    assert_array_equal(X, X_csr.toarray())
+    with pytest.raises(TypeError):
+        inplace_swap_column(X_csr.tolil())
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+@pytest.mark.parametrize("axis", [0, 1, None])
+@pytest.mark.parametrize("sparse_format", [sp.csr_matrix, sp.csc_matrix])
+@pytest.mark.parametrize(
+    "missing_values, min_func, max_func, ignore_nan",
+    [(0, np.min, np.max, False),
+     (np.nan, np.nanmin, np.nanmax, True)]
+)
+@pytest.mark.parametrize("large_indices", [True, False])
+def test_min_max(dtype, axis, sparse_format, missing_values, min_func,
+                 max_func, ignore_nan, large_indices):
+    X = np.array([[0, 3, 0],
+                  [2, -1, missing_values],
+                  [0, 0, 0],
+                  [9, missing_values, 7],
+                  [4, 0, 5]], dtype=dtype)
+    X_sparse = sparse_format(X)
+    if large_indices:
+        X_sparse.indices = X_sparse.indices.astype('int64')
+        X_sparse.indptr = X_sparse.indptr.astype('int64')
+
+    mins_sparse, maxs_sparse = min_max_axis(X_sparse, axis=axis,
+                                            ignore_nan=ignore_nan)
+    assert_array_equal(mins_sparse, min_func(X, axis=axis))
+    assert_array_equal(maxs_sparse, max_func(X, axis=axis))
+
+
+def test_min_max_axis_errors():
+    X = np.array([[0, 3, 0],
+                  [2, -1, 0],
+                  [0, 0, 0],
+                  [9, 8, 7],
+                  [4, 0, 5]], dtype=np.float64)
+    X_csr = sp.csr_matrix(X)
+    X_csc = sp.csc_matrix(X)
+    with pytest.raises(TypeError):
+        min_max_axis(X_csr.tolil(), axis=0)
+    with pytest.raises(ValueError):
+        min_max_axis(X_csr, axis=2)
+    with pytest.raises(ValueError):
+        min_max_axis(X_csc, axis=-3)
+
+
+def test_count_nonzero():
+    X = np.array([[0, 3, 0],
+                  [2, -1, 0],
+                  [0, 0, 0],
+                  [9, 8, 7],
+                  [4, 0, 5]], dtype=np.float64)
+    X_csr = sp.csr_matrix(X)
+    X_csc = sp.csc_matrix(X)
+    X_nonzero = X != 0
+    sample_weight = [.5, .2, .3, .1, .1]
+    X_nonzero_weighted = X_nonzero * np.array(sample_weight)[:, None]
+
+    for axis in [0, 1, -1, -2, None]:
+        assert_array_almost_equal(count_nonzero(X_csr, axis=axis),
+                                  X_nonzero.sum(axis=axis))
+        assert_array_almost_equal(count_nonzero(X_csr, axis=axis,
+                                                sample_weight=sample_weight),
+                                  X_nonzero_weighted.sum(axis=axis))
+
+    with pytest.raises(TypeError):
+        count_nonzero(X_csc)
+    with pytest.raises(ValueError):
+        count_nonzero(X_csr, axis=2)
+
+    assert (count_nonzero(X_csr, axis=0).dtype ==
+            count_nonzero(X_csr, axis=1).dtype)
+    assert (count_nonzero(X_csr, axis=0, sample_weight=sample_weight).dtype ==
+            count_nonzero(X_csr, axis=1, sample_weight=sample_weight).dtype)
+
+    # Check dtypes with large sparse matrices too
+    # XXX: test fails on 32bit (Windows/Linux)
+    try:
+        X_csr.indices = X_csr.indices.astype(np.int64)
+        X_csr.indptr = X_csr.indptr.astype(np.int64)
+        assert (count_nonzero(X_csr, axis=0).dtype ==
+                count_nonzero(X_csr, axis=1).dtype)
+        assert (count_nonzero(X_csr, axis=0,
+                              sample_weight=sample_weight).dtype ==
+                count_nonzero(X_csr, axis=1,
+                              sample_weight=sample_weight).dtype)
+    except TypeError as e:
+        assert ("according to the rule 'safe'" in e.args[0]
+                and np.intp().nbytes < 8), e
+
+
+def test_csc_row_median():
+    # Test csc_row_median actually calculates the median.
+
+    # Test that it gives the same output when X is dense.
+    rng = np.random.RandomState(0)
+    X = rng.rand(100, 50)
+    dense_median = np.median(X, axis=0)
+    csc = sp.csc_matrix(X)
+    sparse_median = csc_median_axis_0(csc)
+    assert_array_equal(sparse_median, dense_median)
+
+    # Test that it gives the same output when X is sparse
+    X = rng.rand(51, 100)
+    X[X < 0.7] = 0.0
+    ind = rng.randint(0, 50, 10)
+    X[ind] = -X[ind]
+    csc = sp.csc_matrix(X)
+    dense_median = np.median(X, axis=0)
+    sparse_median = csc_median_axis_0(csc)
+    assert_array_equal(sparse_median, dense_median)
+
+    # Test for toy data.
+    X = [[0, -2], [-1, -1], [1, 0], [2, 1]]
+    csc = sp.csc_matrix(X)
+    assert_array_equal(csc_median_axis_0(csc), np.array([0.5, -0.5]))
+    X = [[0, -2], [-1, -5], [1, -3]]
+    csc = sp.csc_matrix(X)
+    assert_array_equal(csc_median_axis_0(csc), np.array([0., -3]))
+
+    # Test that it raises an Error for non-csc matrices.
+    with pytest.raises(TypeError):
+        csc_median_axis_0(sp.csr_matrix(X))
+
+
+def test_inplace_normalize():
+    ones = np.ones((10, 1))
+    rs = RandomState(10)
+
+    for inplace_csr_row_normalize in (inplace_csr_row_normalize_l1,
+                                      inplace_csr_row_normalize_l2):
+        for dtype in (np.float64, np.float32):
+            X = rs.randn(10, 5).astype(dtype)
+            X_csr = sp.csr_matrix(X)
+            for index_dtype in [np.int32, np.int64]:
+                # csr_matrix will use int32 indices by default,
+                # up-casting those to int64 when necessary
+                if index_dtype is np.int64:
+                    X_csr.indptr = X_csr.indptr.astype(index_dtype)
+                    X_csr.indices = X_csr.indices.astype(index_dtype)
+                assert X_csr.indices.dtype == index_dtype
+                assert X_csr.indptr.dtype == index_dtype
+                inplace_csr_row_normalize(X_csr)
+                assert X_csr.dtype == dtype
+                if inplace_csr_row_normalize is inplace_csr_row_normalize_l2:
+                    X_csr.data **= 2
+                assert_array_almost_equal(np.abs(X_csr).sum(axis=1), ones)
+
+
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+def test_csr_row_norms(dtype):
+    # checks that csr_row_norms returns the same output as
+    # scipy.sparse.linalg.norm, and that the dype is the same as X.dtype.
+    X = sp.random(100, 10, format='csr', dtype=dtype, random_state=42)
+
+    scipy_norms = sp.linalg.norm(X, axis=1)**2
+    norms = csr_row_norms(X)
+
+    assert norms.dtype == dtype
+    rtol = 1e-6 if dtype == np.float32 else 1e-7
+    assert_allclose(norms, scipy_norms, rtol=rtol)
--- a/venv/Lib/site-packages/sklearn/utils/tests/test_testing.py
+++ b/venv/Lib/site-packages/sklearn/utils/tests/test_testing.py
@ -0,0 +1,696 @@
+import warnings
+import unittest
+import sys
+import os
+import atexit
+
+import numpy as np
+
+from scipy import sparse
+
+import pytest
+
+from sklearn.utils.deprecation import deprecated
+from sklearn.utils.metaestimators import if_delegate_has_method
+from sklearn.utils._testing import (
+    assert_raises,
+    assert_less,
+    assert_greater,
+    assert_less_equal,
+    assert_greater_equal,
+    assert_warns,
+    assert_no_warnings,
+    assert_equal,
+    assert_not_equal,
+    assert_in,
+    assert_not_in,
+    set_random_state,
+    assert_raise_message,
+    ignore_warnings,
+    check_docstring_parameters,
+    assert_allclose_dense_sparse,
+    assert_raises_regex,
+    TempMemmap,
+    create_memmap_backed_data,
+    _delete_folder,
+    _convert_container)
+
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
+
+
+@pytest.mark.filterwarnings("ignore",
+                            category=FutureWarning)  # 0.24
+def test_assert_less():
+    assert 0 < 1
+    with pytest.raises(AssertionError):
+        assert_less(1, 0)
+
+
+@pytest.mark.filterwarnings("ignore",
+                            category=FutureWarning)  # 0.24
+def test_assert_greater():
+    assert 1 > 0
+    with pytest.raises(AssertionError):
+        assert_greater(0, 1)
+
+
+@pytest.mark.filterwarnings("ignore",
+                            category=FutureWarning)  # 0.24
+def test_assert_less_equal():
+    assert 0 <= 1
+    assert 1 <= 1
+    with pytest.raises(AssertionError):
+        assert_less_equal(1, 0)
+
+
+@pytest.mark.filterwarnings("ignore",
+                            category=FutureWarning)  # 0.24
+def test_assert_greater_equal():
+    assert 1 >= 0
+    assert 1 >= 1
+    with pytest.raises(AssertionError):
+        assert_greater_equal(0, 1)
+
+
+def test_set_random_state():
+    lda = LinearDiscriminantAnalysis()
+    tree = DecisionTreeClassifier()
+    # Linear Discriminant Analysis doesn't have random state: smoke test
+    set_random_state(lda, 3)
+    set_random_state(tree, 3)
+    assert tree.random_state == 3
+
+
+def test_assert_allclose_dense_sparse():
+    x = np.arange(9).reshape(3, 3)
+    msg = "Not equal to tolerance "
+    y = sparse.csc_matrix(x)
+    for X in [x, y]:
+        # basic compare
+        with pytest.raises(AssertionError, match=msg):
+            assert_allclose_dense_sparse(X, X*2)
+        assert_allclose_dense_sparse(X, X)
+
+    with pytest.raises(ValueError, match="Can only compare two sparse"):
+        assert_allclose_dense_sparse(x, y)
+
+    A = sparse.diags(np.ones(5), offsets=0).tocsr()
+    B = sparse.csr_matrix(np.ones((1, 5)))
+    with pytest.raises(AssertionError, match="Arrays are not equal"):
+        assert_allclose_dense_sparse(B, A)
+
+
+def test_assert_raises_msg():
+    with assert_raises_regex(AssertionError, 'Hello world'):
+        with assert_raises(ValueError, msg='Hello world'):
+            pass
+
+
+def test_assert_raise_message():
+    def _raise_ValueError(message):
+        raise ValueError(message)
+
+    def _no_raise():
+        pass
+
+    assert_raise_message(ValueError, "test",
+                         _raise_ValueError, "test")
+
+    assert_raises(AssertionError,
+                  assert_raise_message, ValueError, "something else",
+                  _raise_ValueError, "test")
+
+    assert_raises(ValueError,
+                  assert_raise_message, TypeError, "something else",
+                  _raise_ValueError, "test")
+
+    assert_raises(AssertionError,
+                  assert_raise_message, ValueError, "test",
+                  _no_raise)
+
+    # multiple exceptions in a tuple
+    assert_raises(AssertionError,
+                  assert_raise_message, (ValueError, AttributeError),
+                  "test", _no_raise)
+
+
+def test_ignore_warning():
+    # This check that ignore_warning decorateur and context manager are working
+    # as expected
+    def _warning_function():
+        warnings.warn("deprecation warning", DeprecationWarning)
+
+    def _multiple_warning_function():
+        warnings.warn("deprecation warning", DeprecationWarning)
+        warnings.warn("deprecation warning")
+
+    # Check the function directly
+    assert_no_warnings(ignore_warnings(_warning_function))
+    assert_no_warnings(ignore_warnings(_warning_function,
+                                       category=DeprecationWarning))
+    assert_warns(DeprecationWarning, ignore_warnings(_warning_function,
+                                                     category=UserWarning))
+    assert_warns(UserWarning,
+                 ignore_warnings(_multiple_warning_function,
+                                 category=FutureWarning))
+    assert_warns(DeprecationWarning,
+                 ignore_warnings(_multiple_warning_function,
+                                 category=UserWarning))
+    assert_no_warnings(ignore_warnings(_warning_function,
+                                       category=(DeprecationWarning,
+                                                 UserWarning)))
+
+    # Check the decorator
+    @ignore_warnings
+    def decorator_no_warning():
+        _warning_function()
+        _multiple_warning_function()
+
+    @ignore_warnings(category=(DeprecationWarning, UserWarning))
+    def decorator_no_warning_multiple():
+        _multiple_warning_function()
+
+    @ignore_warnings(category=DeprecationWarning)
+    def decorator_no_deprecation_warning():
+        _warning_function()
+
+    @ignore_warnings(category=UserWarning)
+    def decorator_no_user_warning():
+        _warning_function()
+
+    @ignore_warnings(category=DeprecationWarning)
+    def decorator_no_deprecation_multiple_warning():
+        _multiple_warning_function()
+
+    @ignore_warnings(category=UserWarning)
+    def decorator_no_user_multiple_warning():
+        _multiple_warning_function()
+
+    assert_no_warnings(decorator_no_warning)
+    assert_no_warnings(decorator_no_warning_multiple)
+    assert_no_warnings(decorator_no_deprecation_warning)
+    assert_warns(DeprecationWarning, decorator_no_user_warning)
+    assert_warns(UserWarning, decorator_no_deprecation_multiple_warning)
+    assert_warns(DeprecationWarning, decorator_no_user_multiple_warning)
+
+    # Check the context manager
+    def context_manager_no_warning():
+        with ignore_warnings():
+            _warning_function()
+
+    def context_manager_no_warning_multiple():
+        with ignore_warnings(category=(DeprecationWarning, UserWarning)):
+            _multiple_warning_function()
+
+    def context_manager_no_deprecation_warning():
+        with ignore_warnings(category=DeprecationWarning):
+            _warning_function()
+
+    def context_manager_no_user_warning():
+        with ignore_warnings(category=UserWarning):
+            _warning_function()
+
+    def context_manager_no_deprecation_multiple_warning():
+        with ignore_warnings(category=DeprecationWarning):
+            _multiple_warning_function()
+
+    def context_manager_no_user_multiple_warning():
+        with ignore_warnings(category=UserWarning):
+            _multiple_warning_function()
+
+    assert_no_warnings(context_manager_no_warning)
+    assert_no_warnings(context_manager_no_warning_multiple)
+    assert_no_warnings(context_manager_no_deprecation_warning)
+    assert_warns(DeprecationWarning, context_manager_no_user_warning)
+    assert_warns(UserWarning, context_manager_no_deprecation_multiple_warning)
+    assert_warns(DeprecationWarning, context_manager_no_user_multiple_warning)
+
+    # Check that passing warning class as first positional argument
+    warning_class = UserWarning
+    match = "'obj' should be a callable.+you should use 'category=UserWarning'"
+
+    with pytest.raises(ValueError, match=match):
+        silence_warnings_func = ignore_warnings(warning_class)(
+            _warning_function)
+        silence_warnings_func()
+
+    with pytest.raises(ValueError, match=match):
+        @ignore_warnings(warning_class)
+        def test():
+            pass
+
+
+class TestWarns(unittest.TestCase):
+    def test_warn(self):
+        def f():
+            warnings.warn("yo")
+            return 3
+
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", UserWarning)
+            filters_orig = warnings.filters[:]
+            assert assert_warns(UserWarning, f) == 3
+            # test that assert_warns doesn't have side effects on warnings
+            # filters
+            assert warnings.filters == filters_orig
+        with pytest.raises(AssertionError):
+            assert_no_warnings(f)
+        assert assert_no_warnings(lambda x: x, 1) == 1
+
+    def test_warn_wrong_warning(self):
+        def f():
+            warnings.warn("yo", FutureWarning)
+
+        failed = False
+        filters = sys.modules['warnings'].filters[:]
+        try:
+            try:
+                # Should raise an AssertionError
+
+                # assert_warns has a special handling of "FutureWarning" that
+                # pytest.warns does not have
+                assert_warns(UserWarning, f)
+                failed = True
+            except AssertionError:
+                pass
+        finally:
+            sys.modules['warnings'].filters = filters
+
+        if failed:
+            raise AssertionError("wrong warning caught by assert_warn")
+
+
+# Tests for docstrings:
+
+def f_ok(a, b):
+    """Function f
+
+    Parameters
+    ----------
+    a : int
+        Parameter a
+    b : float
+        Parameter b
+
+    Returns
+    -------
+    c : list
+        Parameter c
+    """
+    c = a + b
+    return c
+
+
+def f_bad_sections(a, b):
+    """Function f
+
+    Parameters
+    ----------
+    a : int
+        Parameter a
+    b : float
+        Parameter b
+
+    Results
+    -------
+    c : list
+        Parameter c
+    """
+    c = a + b
+    return c
+
+
+def f_bad_order(b, a):
+    """Function f
+
+    Parameters
+    ----------
+    a : int
+        Parameter a
+    b : float
+        Parameter b
+
+    Returns
+    -------
+    c : list
+        Parameter c
+    """
+    c = a + b
+    return c
+
+
+def f_too_many_param_docstring(a, b):
+    """Function f
+
+    Parameters
+    ----------
+    a : int
+        Parameter a
+    b : int
+        Parameter b
+    c : int
+        Parameter c
+
+    Returns
+    -------
+    d : list
+        Parameter c
+    """
+    d = a + b
+    return d
+
+
+def f_missing(a, b):
+    """Function f
+
+    Parameters
+    ----------
+    a : int
+        Parameter a
+
+    Returns
+    -------
+    c : list
+        Parameter c
+    """
+    c = a + b
+    return c
+
+
+def f_check_param_definition(a, b, c, d, e):
+    """Function f
+
+    Parameters
+    ----------
+    a: int
+        Parameter a
+    b:
+        Parameter b
+    c :
+        Parameter c
+    d:int
+        Parameter d
+    e
+        No typespec is allowed without colon
+    """
+    return a + b + c + d
+
+
+class Klass:
+    def f_missing(self, X, y):
+        pass
+
+    def f_bad_sections(self, X, y):
+        """Function f
+
+        Parameter
+        ----------
+        a : int
+            Parameter a
+        b : float
+            Parameter b
+
+        Results
+        -------
+        c : list
+            Parameter c
+        """
+        pass
+
+
+class MockEst:
+    def __init__(self):
+        """MockEstimator"""
+    def fit(self, X, y):
+        return X
+
+    def predict(self, X):
+        return X
+
+    def predict_proba(self, X):
+        return X
+
+    def score(self, X):
+        return 1.
+
+
+class MockMetaEstimator:
+    def __init__(self, delegate):
+        """MetaEstimator to check if doctest on delegated methods work.
+
+        Parameters
+        ---------
+        delegate : estimator
+            Delegated estimator.
+        """
+        self.delegate = delegate
+
+    @if_delegate_has_method(delegate=('delegate'))
+    def predict(self, X):
+        """This is available only if delegate has predict.
+
+        Parameters
+        ----------
+        y : ndarray
+            Parameter y
+        """
+        return self.delegate.predict(X)
+
+    @if_delegate_has_method(delegate=('delegate'))
+    @deprecated("Testing a deprecated delegated method")
+    def score(self, X):
+        """This is available only if delegate has score.
+
+        Parameters
+        ---------
+        y : ndarray
+            Parameter y
+        """
+
+    @if_delegate_has_method(delegate=('delegate'))
+    def predict_proba(self, X):
+        """This is available only if delegate has predict_proba.
+
+        Parameters
+        ---------
+        X : ndarray
+            Parameter X
+        """
+        return X
+
+    @deprecated('Testing deprecated function with wrong params')
+    def fit(self, X, y):
+        """Incorrect docstring but should not be tested"""
+
+
+def test_check_docstring_parameters():
+    pytest.importorskip('numpydoc',
+                        reason="numpydoc is required to test the docstrings")
+
+    incorrect = check_docstring_parameters(f_ok)
+    assert incorrect == []
+    incorrect = check_docstring_parameters(f_ok, ignore=['b'])
+    assert incorrect == []
+    incorrect = check_docstring_parameters(f_missing, ignore=['b'])
+    assert incorrect == []
+    with pytest.raises(RuntimeError, match="Unknown section Results"):
+        check_docstring_parameters(f_bad_sections)
+    with pytest.raises(RuntimeError, match="Unknown section Parameter"):
+        check_docstring_parameters(Klass.f_bad_sections)
+
+    incorrect = check_docstring_parameters(f_check_param_definition)
+    assert (
+        incorrect == [
+            "sklearn.utils.tests.test_testing.f_check_param_definition There "
+            "was no space between the param name and colon ('a: int')",
+
+            "sklearn.utils.tests.test_testing.f_check_param_definition There "
+            "was no space between the param name and colon ('b:')",
+
+            "sklearn.utils.tests.test_testing.f_check_param_definition "
+            "Parameter 'c :' has an empty type spec. Remove the colon",
+
+            "sklearn.utils.tests.test_testing.f_check_param_definition There "
+            "was no space between the param name and colon ('d:int')",
+        ])
+
+    messages = [
+            ["In function: sklearn.utils.tests.test_testing.f_bad_order",
+             "There's a parameter name mismatch in function docstring w.r.t."
+             " function signature, at index 0 diff: 'b' != 'a'",
+             "Full diff:",
+             "- ['b', 'a']",
+             "+ ['a', 'b']"],
+
+            ["In function: " +
+                "sklearn.utils.tests.test_testing.f_too_many_param_docstring",
+             "Parameters in function docstring have more items w.r.t. function"
+             " signature, first extra item: c",
+             "Full diff:",
+             "- ['a', 'b']",
+             "+ ['a', 'b', 'c']",
+             "?          +++++"],
+
+            ["In function: sklearn.utils.tests.test_testing.f_missing",
+             "Parameters in function docstring have less items w.r.t. function"
+             " signature, first missing item: b",
+             "Full diff:",
+             "- ['a', 'b']",
+             "+ ['a']"],
+
+            ["In function: sklearn.utils.tests.test_testing.Klass.f_missing",
+             "Parameters in function docstring have less items w.r.t. function"
+             " signature, first missing item: X",
+             "Full diff:",
+             "- ['X', 'y']",
+             "+ []"],
+
+            ["In function: " +
+             "sklearn.utils.tests.test_testing.MockMetaEstimator.predict",
+             "There's a parameter name mismatch in function docstring w.r.t."
+             " function signature, at index 0 diff: 'X' != 'y'",
+             "Full diff:",
+             "- ['X']",
+             "?   ^",
+             "+ ['y']",
+             "?   ^"],
+
+            ["In function: " +
+             "sklearn.utils.tests.test_testing.MockMetaEstimator."
+             + "predict_proba",
+             "Parameters in function docstring have less items w.r.t. function"
+             " signature, first missing item: X",
+             "Full diff:",
+             "- ['X']",
+             "+ []"],
+
+            ["In function: " +
+                "sklearn.utils.tests.test_testing.MockMetaEstimator.score",
+             "Parameters in function docstring have less items w.r.t. function"
+             " signature, first missing item: X",
+             "Full diff:",
+             "- ['X']",
+             "+ []"],
+
+            ["In function: " +
+                "sklearn.utils.tests.test_testing.MockMetaEstimator.fit",
+             "Parameters in function docstring have less items w.r.t. function"
+             " signature, first missing item: X",
+             "Full diff:",
+             "- ['X', 'y']",
+             "+ []"],
+
+            ]
+
+    mock_meta = MockMetaEstimator(delegate=MockEst())
+
+    for msg, f in zip(messages,
+                      [f_bad_order,
+                       f_too_many_param_docstring,
+                       f_missing,
+                       Klass.f_missing,
+                       mock_meta.predict,
+                       mock_meta.predict_proba,
+                       mock_meta.score,
+                       mock_meta.fit]):
+        incorrect = check_docstring_parameters(f)
+        assert msg == incorrect, ('\n"%s"\n not in \n"%s"' % (msg, incorrect))
+
+
+class RegistrationCounter:
+    def __init__(self):
+        self.nb_calls = 0
+
+    def __call__(self, to_register_func):
+        self.nb_calls += 1
+        assert to_register_func.func is _delete_folder
+
+
+def check_memmap(input_array, mmap_data, mmap_mode='r'):
+    assert isinstance(mmap_data, np.memmap)
+    writeable = mmap_mode != 'r'
+    assert mmap_data.flags.writeable is writeable
+    np.testing.assert_array_equal(input_array, mmap_data)
+
+
+def test_tempmemmap(monkeypatch):
+    registration_counter = RegistrationCounter()
+    monkeypatch.setattr(atexit, 'register', registration_counter)
+
+    input_array = np.ones(3)
+    with TempMemmap(input_array) as data:
+        check_memmap(input_array, data)
+        temp_folder = os.path.dirname(data.filename)
+    if os.name != 'nt':
+        assert not os.path.exists(temp_folder)
+    assert registration_counter.nb_calls == 1
+
+    mmap_mode = 'r+'
+    with TempMemmap(input_array, mmap_mode=mmap_mode) as data:
+        check_memmap(input_array, data, mmap_mode=mmap_mode)
+        temp_folder = os.path.dirname(data.filename)
+    if os.name != 'nt':
+        assert not os.path.exists(temp_folder)
+    assert registration_counter.nb_calls == 2
+
+
+def test_create_memmap_backed_data(monkeypatch):
+    registration_counter = RegistrationCounter()
+    monkeypatch.setattr(atexit, 'register', registration_counter)
+
+    input_array = np.ones(3)
+    data = create_memmap_backed_data(input_array)
+    check_memmap(input_array, data)
+    assert registration_counter.nb_calls == 1
+
+    data, folder = create_memmap_backed_data(input_array,
+                                             return_folder=True)
+    check_memmap(input_array, data)
+    assert folder == os.path.dirname(data.filename)
+    assert registration_counter.nb_calls == 2
+
+    mmap_mode = 'r+'
+    data = create_memmap_backed_data(input_array, mmap_mode=mmap_mode)
+    check_memmap(input_array, data, mmap_mode)
+    assert registration_counter.nb_calls == 3
+
+    input_list = [input_array, input_array + 1, input_array + 2]
+    mmap_data_list = create_memmap_backed_data(input_list)
+    for input_array, data in zip(input_list, mmap_data_list):
+        check_memmap(input_array, data)
+    assert registration_counter.nb_calls == 4
+
+
+# 0.24
+@pytest.mark.parametrize('callable, args', [
+    (assert_equal, (0, 0)),
+    (assert_not_equal, (0, 1)),
+    (assert_greater, (1, 0)),
+    (assert_greater_equal, (1, 0)),
+    (assert_less, (0, 1)),
+    (assert_less_equal, (0, 1)),
+    (assert_in, (0, [0])),
+    (assert_not_in, (0, [1]))])
+def test_deprecated_helpers(callable, args):
+    msg = ('is deprecated in version 0.22 and will be removed in version '
+           '0.24. Please use "assert" instead')
+    with pytest.warns(FutureWarning, match=msg):
+        callable(*args)
+
+
+@pytest.mark.parametrize(
+    "constructor_name, container_type",
+    [('list', list),
+     ('tuple', tuple),
+     ('array', np.ndarray),
+     ('sparse', sparse.csr_matrix),
+     ('dataframe', pytest.importorskip('pandas').DataFrame),
+     ('series', pytest.importorskip('pandas').Series),
+     ('index', pytest.importorskip('pandas').Index),
+     ('slice', slice)]
+)
+def test_convert_container(constructor_name, container_type):
+    container = [0, 1]
+    assert isinstance(_convert_container(container, constructor_name),
+                      container_type)
--- a/venv/Lib/site-packages/sklearn/utils/tests/test_utils.py
+++ b/venv/Lib/site-packages/sklearn/utils/tests/test_utils.py
@ -0,0 +1,697 @@
+from copy import copy
+from itertools import chain
+import warnings
+import string
+import timeit
+
+import pytest
+import numpy as np
+import scipy.sparse as sp
+
+from sklearn.utils._testing import (assert_array_equal,
+                                    assert_allclose_dense_sparse,
+                                    assert_warns_message,
+                                    assert_no_warnings,
+                                    _convert_container)
+from sklearn.utils import check_random_state
+from sklearn.utils import _determine_key_type
+from sklearn.utils import deprecated
+from sklearn.utils import gen_batches
+from sklearn.utils import _get_column_indices
+from sklearn.utils import resample
+from sklearn.utils import safe_mask
+from sklearn.utils import column_or_1d
+from sklearn.utils import _safe_indexing
+from sklearn.utils import shuffle
+from sklearn.utils import gen_even_slices
+from sklearn.utils import _message_with_time, _print_elapsed_time
+from sklearn.utils import get_chunk_n_rows
+from sklearn.utils import is_scalar_nan
+from sklearn.utils import _to_object_array
+from sklearn.utils._mocking import MockDataFrame
+from sklearn import config_context
+
+# toy array
+X_toy = np.arange(9).reshape((3, 3))
+
+
+def test_make_rng():
+    # Check the check_random_state utility function behavior
+    assert check_random_state(None) is np.random.mtrand._rand
+    assert check_random_state(np.random) is np.random.mtrand._rand
+
+    rng_42 = np.random.RandomState(42)
+    assert check_random_state(42).randint(100) == rng_42.randint(100)
+
+    rng_42 = np.random.RandomState(42)
+    assert check_random_state(rng_42) is rng_42
+
+    rng_42 = np.random.RandomState(42)
+    assert check_random_state(43).randint(100) != rng_42.randint(100)
+
+    with pytest.raises(ValueError):
+        check_random_state("some invalid seed")
+
+
+def test_gen_batches():
+    # Make sure gen_batches errors on invalid batch_size
+
+    assert_array_equal(
+        list(gen_batches(4, 2)),
+        [slice(0, 2, None), slice(2, 4, None)]
+    )
+    msg_zero = "gen_batches got batch_size=0, must be positive"
+    with pytest.raises(ValueError, match=msg_zero):
+        next(gen_batches(4, 0))
+
+    msg_float = "gen_batches got batch_size=0.5, must be an integer"
+    with pytest.raises(TypeError, match=msg_float):
+        next(gen_batches(4, 0.5))
+
+
+def test_deprecated():
+    # Test whether the deprecated decorator issues appropriate warnings
+    # Copied almost verbatim from https://docs.python.org/library/warnings.html
+
+    # First a function...
+    with warnings.catch_warnings(record=True) as w:
+        warnings.simplefilter("always")
+
+        @deprecated()
+        def ham():
+            return "spam"
+
+        spam = ham()
+
+        assert spam == "spam"     # function must remain usable
+
+        assert len(w) == 1
+        assert issubclass(w[0].category, FutureWarning)
+        assert "deprecated" in str(w[0].message).lower()
+
+    # ... then a class.
+    with warnings.catch_warnings(record=True) as w:
+        warnings.simplefilter("always")
+
+        @deprecated("don't use this")
+        class Ham:
+            SPAM = 1
+
+        ham = Ham()
+
+        assert hasattr(ham, "SPAM")
+
+        assert len(w) == 1
+        assert issubclass(w[0].category, FutureWarning)
+        assert "deprecated" in str(w[0].message).lower()
+
+
+def test_resample():
+    # Border case not worth mentioning in doctests
+    assert resample() is None
+
+    # Check that invalid arguments yield ValueError
+    with pytest.raises(ValueError):
+        resample([0], [0, 1])
+    with pytest.raises(ValueError):
+        resample([0, 1], [0, 1], replace=False, n_samples=3)
+
+    with pytest.raises(ValueError):
+        resample([0, 1], [0, 1], meaning_of_life=42)
+    # Issue:6581, n_samples can be more when replace is True (default).
+    assert len(resample([1, 2], n_samples=5)) == 5
+
+
+def test_resample_stratified():
+    # Make sure resample can stratify
+    rng = np.random.RandomState(0)
+    n_samples = 100
+    p = .9
+    X = rng.normal(size=(n_samples, 1))
+    y = rng.binomial(1, p, size=n_samples)
+
+    _, y_not_stratified = resample(X, y, n_samples=10, random_state=0,
+                                   stratify=None)
+    assert np.all(y_not_stratified == 1)
+
+    _, y_stratified = resample(X, y, n_samples=10, random_state=0, stratify=y)
+    assert not np.all(y_stratified == 1)
+    assert np.sum(y_stratified) == 9  # all 1s, one 0
+
+
+def test_resample_stratified_replace():
+    # Make sure stratified resampling supports the replace parameter
+    rng = np.random.RandomState(0)
+    n_samples = 100
+    X = rng.normal(size=(n_samples, 1))
+    y = rng.randint(0, 2, size=n_samples)
+
+    X_replace, _ = resample(X, y, replace=True, n_samples=50,
+                            random_state=rng, stratify=y)
+    X_no_replace, _ = resample(X, y, replace=False, n_samples=50,
+                               random_state=rng, stratify=y)
+    assert np.unique(X_replace).shape[0] < 50
+    assert np.unique(X_no_replace).shape[0] == 50
+
+    # make sure n_samples can be greater than X.shape[0] if we sample with
+    # replacement
+    X_replace, _ = resample(X, y, replace=True, n_samples=1000,
+                            random_state=rng, stratify=y)
+    assert X_replace.shape[0] == 1000
+    assert np.unique(X_replace).shape[0] == 100
+
+
+def test_resample_stratify_2dy():
+    # Make sure y can be 2d when stratifying
+    rng = np.random.RandomState(0)
+    n_samples = 100
+    X = rng.normal(size=(n_samples, 1))
+    y = rng.randint(0, 2, size=(n_samples, 2))
+    X, y = resample(X, y, n_samples=50, random_state=rng, stratify=y)
+    assert y.ndim == 2
+
+
+def test_resample_stratify_sparse_error():
+    # resample must be ndarray
+    rng = np.random.RandomState(0)
+    n_samples = 100
+    X = rng.normal(size=(n_samples, 2))
+    y = rng.randint(0, 2, size=n_samples)
+    stratify = sp.csr_matrix(y)
+    with pytest.raises(TypeError, match='A sparse matrix was passed'):
+        X, y = resample(X, y, n_samples=50, random_state=rng,
+                        stratify=stratify)
+
+
+def test_safe_mask():
+    random_state = check_random_state(0)
+    X = random_state.rand(5, 4)
+    X_csr = sp.csr_matrix(X)
+    mask = [False, False, True, True, True]
+
+    mask = safe_mask(X, mask)
+    assert X[mask].shape[0] == 3
+
+    mask = safe_mask(X_csr, mask)
+    assert X_csr[mask].shape[0] == 3
+
+
+def test_column_or_1d():
+    EXAMPLES = [
+        ("binary", ["spam", "egg", "spam"]),
+        ("binary", [0, 1, 0, 1]),
+        ("continuous", np.arange(10) / 20.),
+        ("multiclass", [1, 2, 3]),
+        ("multiclass", [0, 1, 2, 2, 0]),
+        ("multiclass", [[1], [2], [3]]),
+        ("multilabel-indicator", [[0, 1, 0], [0, 0, 1]]),
+        ("multiclass-multioutput", [[1, 2, 3]]),
+        ("multiclass-multioutput", [[1, 1], [2, 2], [3, 1]]),
+        ("multiclass-multioutput", [[5, 1], [4, 2], [3, 1]]),
+        ("multiclass-multioutput", [[1, 2, 3]]),
+        ("continuous-multioutput", np.arange(30).reshape((-1, 3))),
+    ]
+
+    for y_type, y in EXAMPLES:
+        if y_type in ["binary", 'multiclass', "continuous"]:
+            assert_array_equal(column_or_1d(y), np.ravel(y))
+        else:
+            with pytest.raises(ValueError):
+                column_or_1d(y)
+
+
+@pytest.mark.parametrize(
+    "key, dtype",
+    [(0, 'int'),
+     ('0', 'str'),
+     (True, 'bool'),
+     (np.bool_(True), 'bool'),
+     ([0, 1, 2], 'int'),
+     (['0', '1', '2'], 'str'),
+     ((0, 1, 2), 'int'),
+     (('0', '1', '2'), 'str'),
+     (slice(None, None), None),
+     (slice(0, 2), 'int'),
+     (np.array([0, 1, 2], dtype=np.int32), 'int'),
+     (np.array([0, 1, 2], dtype=np.int64), 'int'),
+     (np.array([0, 1, 2], dtype=np.uint8), 'int'),
+     ([True, False], 'bool'),
+     ((True, False), 'bool'),
+     (np.array([True, False]), 'bool'),
+     ('col_0', 'str'),
+     (['col_0', 'col_1', 'col_2'], 'str'),
+     (('col_0', 'col_1', 'col_2'), 'str'),
+     (slice('begin', 'end'), 'str'),
+     (np.array(['col_0', 'col_1', 'col_2']), 'str'),
+     (np.array(['col_0', 'col_1', 'col_2'], dtype=object), 'str')]
+)
+def test_determine_key_type(key, dtype):
+    assert _determine_key_type(key) == dtype
+
+
+def test_determine_key_type_error():
+    with pytest.raises(ValueError, match="No valid specification of the"):
+        _determine_key_type(1.0)
+
+
+def test_determine_key_type_slice_error():
+    with pytest.raises(TypeError, match="Only array-like or scalar are"):
+        _determine_key_type(slice(0, 2, 1), accept_slice=False)
+
+
+@pytest.mark.parametrize(
+    "array_type", ["list", "array", "sparse", "dataframe"]
+)
+@pytest.mark.parametrize(
+    "indices_type", ["list", "tuple", "array", "series", "slice"]
+)
+def test_safe_indexing_2d_container_axis_0(array_type, indices_type):
+    indices = [1, 2]
+    if indices_type == 'slice' and isinstance(indices[1], int):
+        indices[1] += 1
+    array = _convert_container([[1, 2, 3], [4, 5, 6], [7, 8, 9]], array_type)
+    indices = _convert_container(indices, indices_type)
+    subset = _safe_indexing(array, indices, axis=0)
+    assert_allclose_dense_sparse(
+        subset, _convert_container([[4, 5, 6], [7, 8, 9]], array_type)
+    )
+
+
+@pytest.mark.parametrize("array_type", ["list", "array", "series"])
+@pytest.mark.parametrize(
+    "indices_type", ["list", "tuple", "array", "series", "slice"]
+)
+def test_safe_indexing_1d_container(array_type, indices_type):
+    indices = [1, 2]
+    if indices_type == 'slice' and isinstance(indices[1], int):
+        indices[1] += 1
+    array = _convert_container([1, 2, 3, 4, 5, 6, 7, 8, 9], array_type)
+    indices = _convert_container(indices, indices_type)
+    subset = _safe_indexing(array, indices, axis=0)
+    assert_allclose_dense_sparse(
+        subset, _convert_container([2, 3], array_type)
+    )
+
+
+@pytest.mark.parametrize("array_type", ["array", "sparse", "dataframe"])
+@pytest.mark.parametrize(
+    "indices_type", ["list", "tuple", "array", "series", "slice"]
+)
+@pytest.mark.parametrize("indices", [[1, 2], ["col_1", "col_2"]])
+def test_safe_indexing_2d_container_axis_1(array_type, indices_type, indices):
+    # validation of the indices
+    # we make a copy because indices is mutable and shared between tests
+    indices_converted = copy(indices)
+    if indices_type == 'slice' and isinstance(indices[1], int):
+        indices_converted[1] += 1
+
+    columns_name = ['col_0', 'col_1', 'col_2']
+    array = _convert_container(
+        [[1, 2, 3], [4, 5, 6], [7, 8, 9]], array_type, columns_name
+    )
+    indices_converted = _convert_container(indices_converted, indices_type)
+
+    if isinstance(indices[0], str) and array_type != 'dataframe':
+        err_msg = ("Specifying the columns using strings is only supported "
+                   "for pandas DataFrames")
+        with pytest.raises(ValueError, match=err_msg):
+            _safe_indexing(array, indices_converted, axis=1)
+    else:
+        subset = _safe_indexing(array, indices_converted, axis=1)
+        assert_allclose_dense_sparse(
+            subset, _convert_container([[2, 3], [5, 6], [8, 9]], array_type)
+        )
+
+
+@pytest.mark.parametrize("array_read_only", [True, False])
+@pytest.mark.parametrize("indices_read_only", [True, False])
+@pytest.mark.parametrize("array_type", ["array", "sparse", "dataframe"])
+@pytest.mark.parametrize("indices_type", ["array", "series"])
+@pytest.mark.parametrize(
+    "axis, expected_array",
+    [(0, [[4, 5, 6], [7, 8, 9]]), (1, [[2, 3], [5, 6], [8, 9]])]
+)
+def test_safe_indexing_2d_read_only_axis_1(array_read_only, indices_read_only,
+                                           array_type, indices_type, axis,
+                                           expected_array):
+    array = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
+    if array_read_only:
+        array.setflags(write=False)
+    array = _convert_container(array, array_type)
+    indices = np.array([1, 2])
+    if indices_read_only:
+        indices.setflags(write=False)
+    indices = _convert_container(indices, indices_type)
+    subset = _safe_indexing(array, indices, axis=axis)
+    assert_allclose_dense_sparse(
+        subset, _convert_container(expected_array, array_type)
+    )
+
+
+@pytest.mark.parametrize("array_type", ["list", "array", "series"])
+@pytest.mark.parametrize("indices_type", ["list", "tuple", "array", "series"])
+def test_safe_indexing_1d_container_mask(array_type, indices_type):
+    indices = [False] + [True] * 2 + [False] * 6
+    array = _convert_container([1, 2, 3, 4, 5, 6, 7, 8, 9], array_type)
+    indices = _convert_container(indices, indices_type)
+    subset = _safe_indexing(array, indices, axis=0)
+    assert_allclose_dense_sparse(
+        subset, _convert_container([2, 3], array_type)
+    )
+
+
+@pytest.mark.parametrize("array_type", ["array", "sparse", "dataframe"])
+@pytest.mark.parametrize("indices_type", ["list", "tuple", "array", "series"])
+@pytest.mark.parametrize(
+    "axis, expected_subset",
+    [(0, [[4, 5, 6], [7, 8, 9]]),
+     (1, [[2, 3], [5, 6], [8, 9]])]
+)
+def test_safe_indexing_2d_mask(array_type, indices_type, axis,
+                               expected_subset):
+    columns_name = ['col_0', 'col_1', 'col_2']
+    array = _convert_container(
+        [[1, 2, 3], [4, 5, 6], [7, 8, 9]], array_type, columns_name
+    )
+    indices = [False, True, True]
+    indices = _convert_container(indices, indices_type)
+
+    subset = _safe_indexing(array, indices, axis=axis)
+    assert_allclose_dense_sparse(
+        subset, _convert_container(expected_subset, array_type)
+    )
+
+
+@pytest.mark.parametrize(
+    "array_type, expected_output_type",
+    [("list", "list"), ("array", "array"),
+     ("sparse", "sparse"), ("dataframe", "series")]
+)
+def test_safe_indexing_2d_scalar_axis_0(array_type, expected_output_type):
+    array = _convert_container([[1, 2, 3], [4, 5, 6], [7, 8, 9]], array_type)
+    indices = 2
+    subset = _safe_indexing(array, indices, axis=0)
+    expected_array = _convert_container([7, 8, 9], expected_output_type)
+    assert_allclose_dense_sparse(subset, expected_array)
+
+
+@pytest.mark.parametrize("array_type", ["list", "array", "series"])
+def test_safe_indexing_1d_scalar(array_type):
+    array = _convert_container([1, 2, 3, 4, 5, 6, 7, 8, 9], array_type)
+    indices = 2
+    subset = _safe_indexing(array, indices, axis=0)
+    assert subset == 3
+
+
+@pytest.mark.parametrize(
+    "array_type, expected_output_type",
+    [("array", "array"), ("sparse", "sparse"), ("dataframe", "series")]
+)
+@pytest.mark.parametrize("indices", [2, "col_2"])
+def test_safe_indexing_2d_scalar_axis_1(array_type, expected_output_type,
+                                        indices):
+    columns_name = ['col_0', 'col_1', 'col_2']
+    array = _convert_container(
+        [[1, 2, 3], [4, 5, 6], [7, 8, 9]], array_type, columns_name
+    )
+
+    if isinstance(indices, str) and array_type != 'dataframe':
+        err_msg = ("Specifying the columns using strings is only supported "
+                   "for pandas DataFrames")
+        with pytest.raises(ValueError, match=err_msg):
+            _safe_indexing(array, indices, axis=1)
+    else:
+        subset = _safe_indexing(array, indices, axis=1)
+        expected_output = [3, 6, 9]
+        if expected_output_type == 'sparse':
+            # sparse matrix are keeping the 2D shape
+            expected_output = [[3], [6], [9]]
+        expected_array = _convert_container(
+            expected_output, expected_output_type
+        )
+        assert_allclose_dense_sparse(subset, expected_array)
+
+
+@pytest.mark.parametrize("array_type", ["list", "array", "sparse"])
+def test_safe_indexing_None_axis_0(array_type):
+    X = _convert_container([[1, 2, 3], [4, 5, 6], [7, 8, 9]], array_type)
+    X_subset = _safe_indexing(X, None, axis=0)
+    assert_allclose_dense_sparse(X_subset, X)
+
+
+def test_safe_indexing_pandas_no_matching_cols_error():
+    pd = pytest.importorskip('pandas')
+    err_msg = "No valid specification of the columns."
+    X = pd.DataFrame(X_toy)
+    with pytest.raises(ValueError, match=err_msg):
+        _safe_indexing(X, [1.0], axis=1)
+
+
+@pytest.mark.parametrize("axis", [None, 3])
+def test_safe_indexing_error_axis(axis):
+    with pytest.raises(ValueError, match="'axis' should be either 0"):
+        _safe_indexing(X_toy, [0, 1], axis=axis)
+
+
+@pytest.mark.parametrize("X_constructor", ['array', 'series'])
+def test_safe_indexing_1d_array_error(X_constructor):
+    # check that we are raising an error if the array-like passed is 1D and
+    # we try to index on the 2nd dimension
+    X = list(range(5))
+    if X_constructor == 'array':
+        X_constructor = np.asarray(X)
+    elif X_constructor == 'series':
+        pd = pytest.importorskip("pandas")
+        X_constructor = pd.Series(X)
+
+    err_msg = "'X' should be a 2D NumPy array, 2D sparse matrix or pandas"
+    with pytest.raises(ValueError, match=err_msg):
+        _safe_indexing(X_constructor, [0, 1], axis=1)
+
+
+def test_safe_indexing_container_axis_0_unsupported_type():
+    indices = ["col_1", "col_2"]
+    array = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
+    err_msg = "String indexing is not supported with 'axis=0'"
+    with pytest.raises(ValueError, match=err_msg):
+        _safe_indexing(array, indices, axis=0)
+
+
+@pytest.mark.parametrize(
+    "key, err_msg",
+    [(10, r"all features must be in \[0, 2\]"),
+     ('whatever', 'A given column is not a column of the dataframe')]
+)
+def test_get_column_indices_error(key, err_msg):
+    pd = pytest.importorskip("pandas")
+    X_df = pd.DataFrame(X_toy, columns=['col_0', 'col_1', 'col_2'])
+
+    with pytest.raises(ValueError, match=err_msg):
+        _get_column_indices(X_df, key)
+
+
+@pytest.mark.parametrize(
+    "key",
+    [['col1'], ['col2'], ['col1', 'col2'], ['col1', 'col3'], ['col2', 'col3']]
+)
+def test_get_column_indices_pandas_nonunique_columns_error(key):
+    pd = pytest.importorskip('pandas')
+    toy = np.zeros((1, 5), dtype=int)
+    columns = ['col1', 'col1', 'col2', 'col3', 'col2']
+    X = pd.DataFrame(toy, columns=columns)
+
+    err_msg = "Selected columns, {}, are not unique in dataframe".format(key)
+    with pytest.raises(ValueError) as exc_info:
+        _get_column_indices(X, key)
+    assert str(exc_info.value) == err_msg
+
+
+def test_shuffle_on_ndim_equals_three():
+    def to_tuple(A):    # to make the inner arrays hashable
+        return tuple(tuple(tuple(C) for C in B) for B in A)
+
+    A = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])  # A.shape = (2,2,2)
+    S = set(to_tuple(A))
+    shuffle(A)  # shouldn't raise a ValueError for dim = 3
+    assert set(to_tuple(A)) == S
+
+
+def test_shuffle_dont_convert_to_array():
+    # Check that shuffle does not try to convert to numpy arrays with float
+    # dtypes can let any indexable datastructure pass-through.
+    a = ['a', 'b', 'c']
+    b = np.array(['a', 'b', 'c'], dtype=object)
+    c = [1, 2, 3]
+    d = MockDataFrame(np.array([['a', 0],
+                                ['b', 1],
+                                ['c', 2]],
+                      dtype=object))
+    e = sp.csc_matrix(np.arange(6).reshape(3, 2))
+    a_s, b_s, c_s, d_s, e_s = shuffle(a, b, c, d, e, random_state=0)
+
+    assert a_s == ['c', 'b', 'a']
+    assert type(a_s) == list
+
+    assert_array_equal(b_s, ['c', 'b', 'a'])
+    assert b_s.dtype == object
+
+    assert c_s == [3, 2, 1]
+    assert type(c_s) == list
+
+    assert_array_equal(d_s, np.array([['c', 2],
+                                      ['b', 1],
+                                      ['a', 0]],
+                                     dtype=object))
+    assert type(d_s) == MockDataFrame
+
+    assert_array_equal(e_s.toarray(), np.array([[4, 5],
+                                                [2, 3],
+                                                [0, 1]]))
+
+
+def test_gen_even_slices():
+    # check that gen_even_slices contains all samples
+    some_range = range(10)
+    joined_range = list(chain(*[some_range[slice] for slice in
+                                gen_even_slices(10, 3)]))
+    assert_array_equal(some_range, joined_range)
+
+    # check that passing negative n_chunks raises an error
+    slices = gen_even_slices(10, -1)
+    with pytest.raises(ValueError, match="gen_even_slices got n_packs=-1,"
+                                         " must be >=1"):
+        next(slices)
+
+
+@pytest.mark.parametrize(
+    ('row_bytes', 'max_n_rows', 'working_memory', 'expected', 'warning'),
+    [(1024, None, 1, 1024, None),
+     (1024, None, 0.99999999, 1023, None),
+     (1023, None, 1, 1025, None),
+     (1025, None, 1, 1023, None),
+     (1024, None, 2, 2048, None),
+     (1024, 7, 1, 7, None),
+     (1024 * 1024, None, 1, 1, None),
+     (1024 * 1024 + 1, None, 1, 1,
+      'Could not adhere to working_memory config. '
+      'Currently 1MiB, 2MiB required.'),
+     ])
+def test_get_chunk_n_rows(row_bytes, max_n_rows, working_memory,
+                          expected, warning):
+    if warning is not None:
+        def check_warning(*args, **kw):
+            return assert_warns_message(UserWarning, warning, *args, **kw)
+    else:
+        check_warning = assert_no_warnings
+
+    actual = check_warning(get_chunk_n_rows,
+                           row_bytes=row_bytes,
+                           max_n_rows=max_n_rows,
+                           working_memory=working_memory)
+
+    assert actual == expected
+    assert type(actual) is type(expected)
+    with config_context(working_memory=working_memory):
+        actual = check_warning(get_chunk_n_rows,
+                               row_bytes=row_bytes,
+                               max_n_rows=max_n_rows)
+        assert actual == expected
+        assert type(actual) is type(expected)
+
+
+@pytest.mark.parametrize(
+    ['source', 'message', 'is_long'],
+    [
+        ('ABC', string.ascii_lowercase, False),
+        ('ABCDEF', string.ascii_lowercase, False),
+        ('ABC', string.ascii_lowercase * 3, True),
+        ('ABC' * 10, string.ascii_lowercase, True),
+        ('ABC', string.ascii_lowercase + u'\u1048', False),
+    ])
+@pytest.mark.parametrize(
+    ['time', 'time_str'],
+    [
+        (0.2, '   0.2s'),
+        (20, '  20.0s'),
+        (2000, '33.3min'),
+        (20000, '333.3min'),
+    ])
+def test_message_with_time(source, message, is_long, time, time_str):
+    out = _message_with_time(source, message, time)
+    if is_long:
+        assert len(out) > 70
+    else:
+        assert len(out) == 70
+
+    assert out.startswith('[' + source + '] ')
+    out = out[len(source) + 3:]
+
+    assert out.endswith(time_str)
+    out = out[:-len(time_str)]
+    assert out.endswith(', total=')
+    out = out[:-len(', total=')]
+    assert out.endswith(message)
+    out = out[:-len(message)]
+    assert out.endswith(' ')
+    out = out[:-1]
+
+    if is_long:
+        assert not out
+    else:
+        assert list(set(out)) == ['.']
+
+
+@pytest.mark.parametrize(
+    ['message', 'expected'],
+    [
+        ('hello', _message_with_time('ABC', 'hello', 0.1) + '\n'),
+        ('', _message_with_time('ABC', '', 0.1) + '\n'),
+        (None, ''),
+    ])
+def test_print_elapsed_time(message, expected, capsys, monkeypatch):
+    monkeypatch.setattr(timeit, 'default_timer', lambda: 0)
+    with _print_elapsed_time('ABC', message):
+        monkeypatch.setattr(timeit, 'default_timer', lambda: 0.1)
+    assert capsys.readouterr().out == expected
+
+
+@pytest.mark.parametrize("value, result", [(float("nan"), True),
+                                           (np.nan, True),
+                                           (np.float("nan"), True),
+                                           (np.float32("nan"), True),
+                                           (np.float64("nan"), True),
+                                           (0, False),
+                                           (0., False),
+                                           (None, False),
+                                           ("", False),
+                                           ("nan", False),
+                                           ([np.nan], False)])
+def test_is_scalar_nan(value, result):
+    assert is_scalar_nan(value) is result
+
+
+def dummy_func():
+    pass
+
+
+def test_deprecation_joblib_api(tmpdir):
+
+    # Only parallel_backend and register_parallel_backend are not deprecated in
+    # sklearn.utils
+    from sklearn.utils import parallel_backend, register_parallel_backend
+    assert_no_warnings(parallel_backend, 'loky', None)
+    assert_no_warnings(register_parallel_backend, 'failing', None)
+
+    from sklearn.utils._joblib import joblib
+    del joblib.parallel.BACKENDS['failing']
+
+
+@pytest.mark.parametrize(
+    "sequence",
+    [[np.array(1), np.array(2)], [[1, 2], [3, 4]]]
+)
+def test_to_object_array(sequence):
+    out = _to_object_array(sequence)
+    assert isinstance(out, np.ndarray)
+    assert out.dtype.kind == 'O'
+    assert out.ndim == 1
--- a/venv/Lib/site-packages/sklearn/utils/tests/test_validation.py
+++ b/venv/Lib/site-packages/sklearn/utils/tests/test_validation.py