Uploaded Test files

2020-11-12 11:05:57 -05:00 · 2020-11-12 11:05:57 -05:00 · 2e81cb7d99
commit 2e81cb7d99
parent f584ad9d97
16627 changed files with 2065359 additions and 102444 deletions
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/init.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/init.py
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/init.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/init.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_ball_tree.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_ball_tree.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_dist_metrics.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_dist_metrics.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_graph.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_graph.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_kd_tree.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_kd_tree.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_kde.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_kde.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_lof.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_lof.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_nca.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_nca.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_nearest_centroid.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_nearest_centroid.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_neighbors.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_neighbors.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_neighbors_pipeline.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_neighbors_pipeline.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_neighbors_tree.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_neighbors_tree.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_quad_tree.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/pycache/test_quad_tree.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/test_ball_tree.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/test_ball_tree.py
@ -0,0 +1,67 @@
+import itertools
+
+import numpy as np
+import pytest
+from numpy.testing import assert_array_almost_equal
+from sklearn.neighbors._ball_tree import BallTree
+from sklearn.neighbors import DistanceMetric
+from sklearn.utils import check_random_state
+
+rng = np.random.RandomState(10)
+V_mahalanobis = rng.rand(3, 3)
+V_mahalanobis = np.dot(V_mahalanobis, V_mahalanobis.T)
+
+DIMENSION = 3
+
+METRICS = {'euclidean': {},
+           'manhattan': {},
+           'minkowski': dict(p=3),
+           'chebyshev': {},
+           'seuclidean': dict(V=rng.random_sample(DIMENSION)),
+           'wminkowski': dict(p=3, w=rng.random_sample(DIMENSION)),
+           'mahalanobis': dict(V=V_mahalanobis)}
+
+DISCRETE_METRICS = ['hamming',
+                    'canberra',
+                    'braycurtis']
+
+BOOLEAN_METRICS = ['matching', 'jaccard', 'dice', 'kulsinski',
+                   'rogerstanimoto', 'russellrao', 'sokalmichener',
+                   'sokalsneath']
+
+
+def brute_force_neighbors(X, Y, k, metric, **kwargs):
+    D = DistanceMetric.get_metric(metric, **kwargs).pairwise(Y, X)
+    ind = np.argsort(D, axis=1)[:, :k]
+    dist = D[np.arange(Y.shape[0])[:, None], ind]
+    return dist, ind
+
+
+@pytest.mark.parametrize('metric',
+                         itertools.chain(BOOLEAN_METRICS, DISCRETE_METRICS))
+def test_ball_tree_query_metrics(metric):
+    rng = check_random_state(0)
+    if metric in BOOLEAN_METRICS:
+        X = rng.random_sample((40, 10)).round(0)
+        Y = rng.random_sample((10, 10)).round(0)
+    elif metric in DISCRETE_METRICS:
+        X = (4 * rng.random_sample((40, 10))).round(0)
+        Y = (4 * rng.random_sample((10, 10))).round(0)
+
+    k = 5
+
+    bt = BallTree(X, leaf_size=1, metric=metric)
+    dist1, ind1 = bt.query(Y, k)
+    dist2, ind2 = brute_force_neighbors(X, Y, k, metric)
+    assert_array_almost_equal(dist1, dist2)
+
+
+def test_query_haversine():
+    rng = check_random_state(0)
+    X = 2 * np.pi * rng.random_sample((40, 2))
+    bt = BallTree(X, leaf_size=1, metric='haversine')
+    dist1, ind1 = bt.query(X, k=5)
+    dist2, ind2 = brute_force_neighbors(X, X, k=5, metric='haversine')
+
+    assert_array_almost_equal(dist1, dist2)
+    assert_array_almost_equal(ind1, ind2)
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/test_dist_metrics.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/test_dist_metrics.py
@ -0,0 +1,203 @@
+import itertools
+import pickle
+
+import numpy as np
+from numpy.testing import assert_array_almost_equal
+
+import pytest
+
+from scipy.spatial.distance import cdist
+from sklearn.neighbors import DistanceMetric
+from sklearn.neighbors import BallTree
+from sklearn.utils import check_random_state
+from sklearn.utils._testing import assert_raises_regex
+from sklearn.utils.fixes import sp_version, parse_version
+
+
+def dist_func(x1, x2, p):
+    return np.sum((x1 - x2) ** p) ** (1. / p)
+
+
+rng = check_random_state(0)
+d = 4
+n1 = 20
+n2 = 25
+X1 = rng.random_sample((n1, d)).astype('float64', copy=False)
+X2 = rng.random_sample((n2, d)).astype('float64', copy=False)
+
+# make boolean arrays: ones and zeros
+X1_bool = X1.round(0)
+X2_bool = X2.round(0)
+
+V = rng.random_sample((d, d))
+VI = np.dot(V, V.T)
+
+BOOL_METRICS = ['matching', 'jaccard', 'dice',
+                'kulsinski', 'rogerstanimoto', 'russellrao',
+                'sokalmichener', 'sokalsneath']
+
+METRICS_DEFAULT_PARAMS = {'euclidean': {},
+                          'cityblock': {},
+                          'minkowski': dict(p=(1, 1.5, 2, 3)),
+                          'chebyshev': {},
+                          'seuclidean': dict(V=(rng.random_sample(d),)),
+                          'wminkowski': dict(p=(1, 1.5, 3),
+                                             w=(rng.random_sample(d),)),
+                          'mahalanobis': dict(VI=(VI,)),
+                          'hamming': {},
+                          'canberra': {},
+                          'braycurtis': {}}
+
+
+@pytest.mark.parametrize('metric', METRICS_DEFAULT_PARAMS)
+def test_cdist(metric):
+    argdict = METRICS_DEFAULT_PARAMS[metric]
+    keys = argdict.keys()
+    for vals in itertools.product(*argdict.values()):
+        kwargs = dict(zip(keys, vals))
+        D_true = cdist(X1, X2, metric, **kwargs)
+        check_cdist(metric, kwargs, D_true)
+
+
+@pytest.mark.parametrize('metric', BOOL_METRICS)
+def test_cdist_bool_metric(metric):
+    D_true = cdist(X1_bool, X2_bool, metric)
+    check_cdist_bool(metric, D_true)
+
+
+def check_cdist(metric, kwargs, D_true):
+    dm = DistanceMetric.get_metric(metric, **kwargs)
+    D12 = dm.pairwise(X1, X2)
+    assert_array_almost_equal(D12, D_true)
+
+
+def check_cdist_bool(metric, D_true):
+    dm = DistanceMetric.get_metric(metric)
+    D12 = dm.pairwise(X1_bool, X2_bool)
+    assert_array_almost_equal(D12, D_true)
+
+
+@pytest.mark.parametrize('metric', METRICS_DEFAULT_PARAMS)
+def test_pdist(metric):
+    argdict = METRICS_DEFAULT_PARAMS[metric]
+    keys = argdict.keys()
+    for vals in itertools.product(*argdict.values()):
+        kwargs = dict(zip(keys, vals))
+        D_true = cdist(X1, X1, metric, **kwargs)
+        check_pdist(metric, kwargs, D_true)
+
+
+@pytest.mark.parametrize('metric', BOOL_METRICS)
+def test_pdist_bool_metrics(metric):
+    D_true = cdist(X1_bool, X1_bool, metric)
+    check_pdist_bool(metric, D_true)
+
+
+def check_pdist(metric, kwargs, D_true):
+    dm = DistanceMetric.get_metric(metric, **kwargs)
+    D12 = dm.pairwise(X1)
+    assert_array_almost_equal(D12, D_true)
+
+
+def check_pdist_bool(metric, D_true):
+    dm = DistanceMetric.get_metric(metric)
+    D12 = dm.pairwise(X1_bool)
+    # Based on https://github.com/scipy/scipy/pull/7373
+    # When comparing two all-zero vectors, scipy>=1.2.0 jaccard metric
+    # was changed to return 0, instead of nan.
+    if metric == 'jaccard' and sp_version < parse_version('1.2.0'):
+        D_true[np.isnan(D_true)] = 0
+    assert_array_almost_equal(D12, D_true)
+
+
+@pytest.mark.parametrize('metric', METRICS_DEFAULT_PARAMS)
+def test_pickle(metric):
+    argdict = METRICS_DEFAULT_PARAMS[metric]
+    keys = argdict.keys()
+    for vals in itertools.product(*argdict.values()):
+        kwargs = dict(zip(keys, vals))
+        check_pickle(metric, kwargs)
+
+
+@pytest.mark.parametrize('metric', BOOL_METRICS)
+def test_pickle_bool_metrics(metric):
+    dm = DistanceMetric.get_metric(metric)
+    D1 = dm.pairwise(X1_bool)
+    dm2 = pickle.loads(pickle.dumps(dm))
+    D2 = dm2.pairwise(X1_bool)
+    assert_array_almost_equal(D1, D2)
+
+
+def check_pickle(metric, kwargs):
+    dm = DistanceMetric.get_metric(metric, **kwargs)
+    D1 = dm.pairwise(X1)
+    dm2 = pickle.loads(pickle.dumps(dm))
+    D2 = dm2.pairwise(X1)
+    assert_array_almost_equal(D1, D2)
+
+
+def test_haversine_metric():
+    def haversine_slow(x1, x2):
+        return 2 * np.arcsin(np.sqrt(np.sin(0.5 * (x1[0] - x2[0])) ** 2
+                                     + np.cos(x1[0]) * np.cos(x2[0]) *
+                                     np.sin(0.5 * (x1[1] - x2[1])) ** 2))
+
+    X = np.random.random((10, 2))
+
+    haversine = DistanceMetric.get_metric("haversine")
+
+    D1 = haversine.pairwise(X)
+    D2 = np.zeros_like(D1)
+    for i, x1 in enumerate(X):
+        for j, x2 in enumerate(X):
+            D2[i, j] = haversine_slow(x1, x2)
+
+    assert_array_almost_equal(D1, D2)
+    assert_array_almost_equal(haversine.dist_to_rdist(D1),
+                              np.sin(0.5 * D2) ** 2)
+
+
+def test_pyfunc_metric():
+    X = np.random.random((10, 3))
+
+    euclidean = DistanceMetric.get_metric("euclidean")
+    pyfunc = DistanceMetric.get_metric("pyfunc", func=dist_func, p=2)
+
+    # Check if both callable metric and predefined metric initialized
+    # DistanceMetric object is picklable
+    euclidean_pkl = pickle.loads(pickle.dumps(euclidean))
+    pyfunc_pkl = pickle.loads(pickle.dumps(pyfunc))
+
+    D1 = euclidean.pairwise(X)
+    D2 = pyfunc.pairwise(X)
+
+    D1_pkl = euclidean_pkl.pairwise(X)
+    D2_pkl = pyfunc_pkl.pairwise(X)
+
+    assert_array_almost_equal(D1, D2)
+    assert_array_almost_equal(D1_pkl, D2_pkl)
+
+
+def test_bad_pyfunc_metric():
+    def wrong_distance(x, y):
+        return "1"
+
+    X = np.ones((5, 2))
+    assert_raises_regex(TypeError,
+                        "Custom distance function must accept two vectors",
+                        BallTree, X, metric=wrong_distance)
+
+
+def test_input_data_size():
+    # Regression test for #6288
+    # Previously, a metric requiring a particular input dimension would fail
+    def custom_metric(x, y):
+        assert x.shape[0] == 3
+        return np.sum((x - y) ** 2)
+
+    rng = check_random_state(0)
+    X = rng.rand(10, 3)
+
+    pyfunc = DistanceMetric.get_metric("pyfunc", func=custom_metric)
+    eucl = DistanceMetric.get_metric("euclidean")
+    assert_array_almost_equal(pyfunc.pairwise(X), eucl.pairwise(X) ** 2)
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/test_graph.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/test_graph.py
@ -0,0 +1,79 @@
+import numpy as np
+
+from sklearn.metrics import euclidean_distances
+from sklearn.neighbors import KNeighborsTransformer, RadiusNeighborsTransformer
+from sklearn.neighbors._base import _is_sorted_by_data
+
+
+def test_transformer_result():
+    # Test the number of neighbors returned
+    n_neighbors = 5
+    n_samples_fit = 20
+    n_queries = 18
+    n_features = 10
+
+    rng = np.random.RandomState(42)
+    X = rng.randn(n_samples_fit, n_features)
+    X2 = rng.randn(n_queries, n_features)
+    radius = np.percentile(euclidean_distances(X), 10)
+
+    # with n_neighbors
+    for mode in ['distance', 'connectivity']:
+        add_one = mode == 'distance'
+        nnt = KNeighborsTransformer(n_neighbors=n_neighbors, mode=mode)
+        Xt = nnt.fit_transform(X)
+        assert Xt.shape == (n_samples_fit, n_samples_fit)
+        assert Xt.data.shape == (n_samples_fit * (n_neighbors + add_one), )
+        assert Xt.format == 'csr'
+        assert _is_sorted_by_data(Xt)
+
+        X2t = nnt.transform(X2)
+        assert X2t.shape == (n_queries, n_samples_fit)
+        assert X2t.data.shape == (n_queries * (n_neighbors + add_one), )
+        assert X2t.format == 'csr'
+        assert _is_sorted_by_data(X2t)
+
+    # with radius
+    for mode in ['distance', 'connectivity']:
+        add_one = mode == 'distance'
+        nnt = RadiusNeighborsTransformer(radius=radius, mode=mode)
+        Xt = nnt.fit_transform(X)
+        assert Xt.shape == (n_samples_fit, n_samples_fit)
+        assert not Xt.data.shape == (n_samples_fit * (n_neighbors + add_one), )
+        assert Xt.format == 'csr'
+        assert _is_sorted_by_data(Xt)
+
+        X2t = nnt.transform(X2)
+        assert X2t.shape == (n_queries, n_samples_fit)
+        assert not X2t.data.shape == (n_queries * (n_neighbors + add_one), )
+        assert X2t.format == 'csr'
+        assert _is_sorted_by_data(X2t)
+
+
+def _has_explicit_diagonal(X):
+    """Return True if the diagonal is explicitly stored"""
+    X = X.tocoo()
+    explicit = X.row[X.row == X.col]
+    return len(explicit) == X.shape[0]
+
+
+def test_explicit_diagonal():
+    # Test that the diagonal is explicitly stored in the sparse graph
+    n_neighbors = 5
+    n_samples_fit, n_samples_transform, n_features = 20, 18, 10
+    rng = np.random.RandomState(42)
+    X = rng.randn(n_samples_fit, n_features)
+    X2 = rng.randn(n_samples_transform, n_features)
+
+    nnt = KNeighborsTransformer(n_neighbors=n_neighbors)
+    Xt = nnt.fit_transform(X)
+    assert _has_explicit_diagonal(Xt)
+    assert np.all(Xt.data.reshape(n_samples_fit, n_neighbors + 1)[:, 0] == 0)
+
+    Xt = nnt.transform(X)
+    assert _has_explicit_diagonal(Xt)
+    assert np.all(Xt.data.reshape(n_samples_fit, n_neighbors + 1)[:, 0] == 0)
+
+    # Using transform on new data should not always have zero diagonal
+    X2t = nnt.transform(X2)
+    assert not _has_explicit_diagonal(X2t)
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/test_kd_tree.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/test_kd_tree.py
@ -0,0 +1,6 @@
+DIMENSION = 3
+
+METRICS = {'euclidean': {},
+           'manhattan': {},
+           'chebyshev': {},
+           'minkowski': dict(p=3)}
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/test_kde.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/test_kde.py
@ -0,0 +1,250 @@
+import numpy as np
+
+import pytest
+
+from sklearn.utils._testing import assert_allclose, assert_raises
+from sklearn.neighbors import KernelDensity, KDTree, NearestNeighbors
+from sklearn.neighbors._ball_tree import kernel_norm
+from sklearn.pipeline import make_pipeline
+from sklearn.datasets import make_blobs
+from sklearn.model_selection import GridSearchCV
+from sklearn.preprocessing import StandardScaler
+from sklearn.exceptions import NotFittedError
+import joblib
+
+
+# XXX Duplicated in test_neighbors_tree, test_kde
+def compute_kernel_slow(Y, X, kernel, h):
+    d = np.sqrt(((Y[:, None, :] - X) ** 2).sum(-1))
+    norm = kernel_norm(h, X.shape[1], kernel) / X.shape[0]
+
+    if kernel == 'gaussian':
+        return norm * np.exp(-0.5 * (d * d) / (h * h)).sum(-1)
+    elif kernel == 'tophat':
+        return norm * (d < h).sum(-1)
+    elif kernel == 'epanechnikov':
+        return norm * ((1.0 - (d * d) / (h * h)) * (d < h)).sum(-1)
+    elif kernel == 'exponential':
+        return norm * (np.exp(-d / h)).sum(-1)
+    elif kernel == 'linear':
+        return norm * ((1 - d / h) * (d < h)).sum(-1)
+    elif kernel == 'cosine':
+        return norm * (np.cos(0.5 * np.pi * d / h) * (d < h)).sum(-1)
+    else:
+        raise ValueError('kernel not recognized')
+
+
+def check_results(kernel, bandwidth, atol, rtol, X, Y, dens_true):
+    kde = KernelDensity(kernel=kernel, bandwidth=bandwidth,
+                        atol=atol, rtol=rtol)
+    log_dens = kde.fit(X).score_samples(Y)
+    assert_allclose(np.exp(log_dens), dens_true,
+                    atol=atol, rtol=max(1E-7, rtol))
+    assert_allclose(np.exp(kde.score(Y)),
+                    np.prod(dens_true),
+                    atol=atol, rtol=max(1E-7, rtol))
+
+
+@pytest.mark.parametrize(
+        'kernel',
+        ['gaussian', 'tophat', 'epanechnikov',
+         'exponential', 'linear', 'cosine'])
+@pytest.mark.parametrize('bandwidth', [0.01, 0.1, 1])
+def test_kernel_density(kernel, bandwidth):
+    n_samples, n_features = (100, 3)
+
+    rng = np.random.RandomState(0)
+    X = rng.randn(n_samples, n_features)
+    Y = rng.randn(n_samples, n_features)
+
+    dens_true = compute_kernel_slow(Y, X, kernel, bandwidth)
+
+    for rtol in [0, 1E-5]:
+        for atol in [1E-6, 1E-2]:
+            for breadth_first in (True, False):
+                check_results(kernel, bandwidth, atol, rtol,
+                              X, Y, dens_true)
+
+
+def test_kernel_density_sampling(n_samples=100, n_features=3):
+    rng = np.random.RandomState(0)
+    X = rng.randn(n_samples, n_features)
+
+    bandwidth = 0.2
+
+    for kernel in ['gaussian', 'tophat']:
+        # draw a tophat sample
+        kde = KernelDensity(bandwidth=bandwidth, kernel=kernel).fit(X)
+        samp = kde.sample(100)
+        assert X.shape == samp.shape
+
+        # check that samples are in the right range
+        nbrs = NearestNeighbors(n_neighbors=1).fit(X)
+        dist, ind = nbrs.kneighbors(X, return_distance=True)
+
+        if kernel == 'tophat':
+            assert np.all(dist < bandwidth)
+        elif kernel == 'gaussian':
+            # 5 standard deviations is safe for 100 samples, but there's a
+            # very small chance this test could fail.
+            assert np.all(dist < 5 * bandwidth)
+
+    # check unsupported kernels
+    for kernel in ['epanechnikov', 'exponential', 'linear', 'cosine']:
+        kde = KernelDensity(bandwidth=bandwidth, kernel=kernel).fit(X)
+        assert_raises(NotImplementedError, kde.sample, 100)
+
+    # non-regression test: used to return a scalar
+    X = rng.randn(4, 1)
+    kde = KernelDensity(kernel="gaussian").fit(X)
+    assert kde.sample().shape == (1, 1)
+
+
+@pytest.mark.parametrize('algorithm', ['auto', 'ball_tree', 'kd_tree'])
+@pytest.mark.parametrize('metric',
+                         ['euclidean', 'minkowski', 'manhattan',
+                          'chebyshev', 'haversine'])
+def test_kde_algorithm_metric_choice(algorithm, metric):
+    # Smoke test for various metrics and algorithms
+    rng = np.random.RandomState(0)
+    X = rng.randn(10, 2)    # 2 features required for haversine dist.
+    Y = rng.randn(10, 2)
+
+    if algorithm == 'kd_tree' and metric not in KDTree.valid_metrics:
+        assert_raises(ValueError, KernelDensity,
+                      algorithm=algorithm, metric=metric)
+    else:
+        kde = KernelDensity(algorithm=algorithm, metric=metric)
+        kde.fit(X)
+        y_dens = kde.score_samples(Y)
+        assert y_dens.shape == Y.shape[:1]
+
+
+def test_kde_score(n_samples=100, n_features=3):
+    pass
+    # FIXME
+    # rng = np.random.RandomState(0)
+    # X = rng.random_sample((n_samples, n_features))
+    # Y = rng.random_sample((n_samples, n_features))
+
+
+def test_kde_badargs():
+    assert_raises(ValueError, KernelDensity,
+                  algorithm='blah')
+    assert_raises(ValueError, KernelDensity,
+                  bandwidth=0)
+    assert_raises(ValueError, KernelDensity,
+                  kernel='blah')
+    assert_raises(ValueError, KernelDensity,
+                  metric='blah')
+    assert_raises(ValueError, KernelDensity,
+                  algorithm='kd_tree', metric='blah')
+    kde = KernelDensity()
+    assert_raises(ValueError, kde.fit, np.random.random((200, 10)),
+                  sample_weight=np.random.random((200, 10)))
+    assert_raises(ValueError, kde.fit, np.random.random((200, 10)),
+                  sample_weight=-np.random.random(200))
+
+
+def test_kde_pipeline_gridsearch():
+    # test that kde plays nice in pipelines and grid-searches
+    X, _ = make_blobs(cluster_std=.1, random_state=1,
+                      centers=[[0, 1], [1, 0], [0, 0]])
+    pipe1 = make_pipeline(StandardScaler(with_mean=False, with_std=False),
+                          KernelDensity(kernel="gaussian"))
+    params = dict(kerneldensity__bandwidth=[0.001, 0.01, 0.1, 1, 10])
+    search = GridSearchCV(pipe1, param_grid=params)
+    search.fit(X)
+    assert search.best_params_['kerneldensity__bandwidth'] == .1
+
+
+def test_kde_sample_weights():
+    n_samples = 400
+    size_test = 20
+    weights_neutral = np.full(n_samples, 3.)
+    for d in [1, 2, 10]:
+        rng = np.random.RandomState(0)
+        X = rng.rand(n_samples, d)
+        weights = 1 + (10 * X.sum(axis=1)).astype(np.int8)
+        X_repetitions = np.repeat(X, weights, axis=0)
+        n_samples_test = size_test // d
+        test_points = rng.rand(n_samples_test, d)
+        for algorithm in ['auto', 'ball_tree', 'kd_tree']:
+            for metric in ['euclidean', 'minkowski', 'manhattan',
+                           'chebyshev']:
+                if algorithm != 'kd_tree' or metric in KDTree.valid_metrics:
+                    kde = KernelDensity(algorithm=algorithm, metric=metric)
+
+                    # Test that adding a constant sample weight has no effect
+                    kde.fit(X, sample_weight=weights_neutral)
+                    scores_const_weight = kde.score_samples(test_points)
+                    sample_const_weight = kde.sample(random_state=1234)
+                    kde.fit(X)
+                    scores_no_weight = kde.score_samples(test_points)
+                    sample_no_weight = kde.sample(random_state=1234)
+                    assert_allclose(scores_const_weight, scores_no_weight)
+                    assert_allclose(sample_const_weight, sample_no_weight)
+
+                    # Test equivalence between sampling and (integer) weights
+                    kde.fit(X, sample_weight=weights)
+                    scores_weight = kde.score_samples(test_points)
+                    sample_weight = kde.sample(random_state=1234)
+                    kde.fit(X_repetitions)
+                    scores_ref_sampling = kde.score_samples(test_points)
+                    sample_ref_sampling = kde.sample(random_state=1234)
+                    assert_allclose(scores_weight, scores_ref_sampling)
+                    assert_allclose(sample_weight, sample_ref_sampling)
+
+                    # Test that sample weights has a non-trivial effect
+                    diff = np.max(np.abs(scores_no_weight - scores_weight))
+                    assert diff > 0.001
+
+                    # Test invariance with respect to arbitrary scaling
+                    scale_factor = rng.rand()
+                    kde.fit(X, sample_weight=(scale_factor * weights))
+                    scores_scaled_weight = kde.score_samples(test_points)
+                    assert_allclose(scores_scaled_weight, scores_weight)
+
+
+def test_sample_weight_invalid():
+    # Check sample weighting raises errors.
+    kde = KernelDensity()
+    data = np.reshape([1., 2., 3.], (-1, 1))
+
+    sample_weight = [0.1, -0.2, 0.3]
+    expected_err = "sample_weight must have positive values"
+    with pytest.raises(ValueError, match=expected_err):
+        kde.fit(data, sample_weight=sample_weight)
+
+
+@pytest.mark.parametrize('sample_weight', [None, [0.1, 0.2, 0.3]])
+def test_pickling(tmpdir, sample_weight):
+    # Make sure that predictions are the same before and after pickling. Used
+    # to be a bug because sample_weights wasn't pickled and the resulting tree
+    # would miss some info.
+
+    kde = KernelDensity()
+    data = np.reshape([1., 2., 3.], (-1, 1))
+    kde.fit(data, sample_weight=sample_weight)
+
+    X = np.reshape([1.1, 2.1], (-1, 1))
+    scores = kde.score_samples(X)
+
+    file_path = str(tmpdir.join('dump.pkl'))
+    joblib.dump(kde, file_path)
+    kde = joblib.load(file_path)
+    scores_pickled = kde.score_samples(X)
+
+    assert_allclose(scores, scores_pickled)
+
+
+@pytest.mark.parametrize('method', ['score_samples', 'sample'])
+def test_check_is_fitted(method):
+    # Check that predict raises an exception in an unfitted estimator.
+    # Unfitted estimators should raise a NotFittedError.
+    rng = np.random.RandomState(0)
+    X = rng.randn(10, 2)
+    kde = KernelDensity()
+
+    with pytest.raises(NotFittedError):
+        getattr(kde, method)(X)
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/test_lof.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/test_lof.py
@ -0,0 +1,232 @@
+# Authors: Nicolas Goix <nicolas.goix@telecom-paristech.fr>
+#          Alexandre Gramfort <alexandre.gramfort@telecom-paristech.fr>
+# License: BSD 3 clause
+
+from math import sqrt
+
+import numpy as np
+from sklearn import neighbors
+
+import pytest
+from numpy.testing import assert_array_equal
+
+from sklearn import metrics
+from sklearn.metrics import roc_auc_score
+
+from sklearn.utils import check_random_state
+from sklearn.utils._testing import assert_array_almost_equal
+from sklearn.utils._testing import assert_warns_message
+from sklearn.utils._testing import assert_raises
+from sklearn.utils._testing import assert_raises_regex
+from sklearn.utils.estimator_checks import check_estimator
+from sklearn.utils.estimator_checks import check_outlier_corruption
+
+from sklearn.datasets import load_iris
+
+
+# load the iris dataset
+# and randomly permute it
+rng = check_random_state(0)
+iris = load_iris()
+perm = rng.permutation(iris.target.size)
+iris.data = iris.data[perm]
+iris.target = iris.target[perm]
+
+
+def test_lof():
+    # Toy sample (the last two samples are outliers):
+    X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1], [5, 3], [-4, 2]]
+
+    # Test LocalOutlierFactor:
+    clf = neighbors.LocalOutlierFactor(n_neighbors=5)
+    score = clf.fit(X).negative_outlier_factor_
+    assert_array_equal(clf._fit_X, X)
+
+    # Assert largest outlier score is smaller than smallest inlier score:
+    assert np.min(score[:-2]) > np.max(score[-2:])
+
+    # Assert predict() works:
+    clf = neighbors.LocalOutlierFactor(contamination=0.25,
+                                       n_neighbors=5).fit(X)
+    assert_array_equal(clf._predict(), 6 * [1] + 2 * [-1])
+    assert_array_equal(clf.fit_predict(X), 6 * [1] + 2 * [-1])
+
+
+def test_lof_performance():
+    # Generate train/test data
+    rng = check_random_state(2)
+    X = 0.3 * rng.randn(120, 2)
+    X_train = X[:100]
+
+    # Generate some abnormal novel observations
+    X_outliers = rng.uniform(low=-4, high=4, size=(20, 2))
+    X_test = np.r_[X[100:], X_outliers]
+    y_test = np.array([0] * 20 + [1] * 20)
+
+    # fit the model for novelty detection
+    clf = neighbors.LocalOutlierFactor(novelty=True).fit(X_train)
+
+    # predict scores (the lower, the more normal)
+    y_pred = -clf.decision_function(X_test)
+
+    # check that roc_auc is good
+    assert roc_auc_score(y_test, y_pred) > .99
+
+
+def test_lof_values():
+    # toy samples:
+    X_train = [[1, 1], [1, 2], [2, 1]]
+    clf1 = neighbors.LocalOutlierFactor(n_neighbors=2,
+                                        contamination=0.1,
+                                        novelty=True).fit(X_train)
+    clf2 = neighbors.LocalOutlierFactor(n_neighbors=2,
+                                        novelty=True).fit(X_train)
+    s_0 = 2. * sqrt(2.) / (1. + sqrt(2.))
+    s_1 = (1. + sqrt(2)) * (1. / (4. * sqrt(2.)) + 1. / (2. + 2. * sqrt(2)))
+    # check predict()
+    assert_array_almost_equal(-clf1.negative_outlier_factor_, [s_0, s_1, s_1])
+    assert_array_almost_equal(-clf2.negative_outlier_factor_, [s_0, s_1, s_1])
+    # check predict(one sample not in train)
+    assert_array_almost_equal(-clf1.score_samples([[2., 2.]]), [s_0])
+    assert_array_almost_equal(-clf2.score_samples([[2., 2.]]), [s_0])
+    # check predict(one sample already in train)
+    assert_array_almost_equal(-clf1.score_samples([[1., 1.]]), [s_1])
+    assert_array_almost_equal(-clf2.score_samples([[1., 1.]]), [s_1])
+
+
+def test_lof_precomputed(random_state=42):
+    """Tests LOF with a distance matrix."""
+    # Note: smaller samples may result in spurious test success
+    rng = np.random.RandomState(random_state)
+    X = rng.random_sample((10, 4))
+    Y = rng.random_sample((3, 4))
+    DXX = metrics.pairwise_distances(X, metric='euclidean')
+    DYX = metrics.pairwise_distances(Y, X, metric='euclidean')
+    # As a feature matrix (n_samples by n_features)
+    lof_X = neighbors.LocalOutlierFactor(n_neighbors=3, novelty=True)
+    lof_X.fit(X)
+    pred_X_X = lof_X._predict()
+    pred_X_Y = lof_X.predict(Y)
+
+    # As a dense distance matrix (n_samples by n_samples)
+    lof_D = neighbors.LocalOutlierFactor(n_neighbors=3, algorithm='brute',
+                                         metric='precomputed', novelty=True)
+    lof_D.fit(DXX)
+    pred_D_X = lof_D._predict()
+    pred_D_Y = lof_D.predict(DYX)
+
+    assert_array_almost_equal(pred_X_X, pred_D_X)
+    assert_array_almost_equal(pred_X_Y, pred_D_Y)
+
+
+def test_n_neighbors_attribute():
+    X = iris.data
+    clf = neighbors.LocalOutlierFactor(n_neighbors=500).fit(X)
+    assert clf.n_neighbors_ == X.shape[0] - 1
+
+    clf = neighbors.LocalOutlierFactor(n_neighbors=500)
+    assert_warns_message(UserWarning,
+                         "n_neighbors will be set to (n_samples - 1)",
+                         clf.fit, X)
+    assert clf.n_neighbors_ == X.shape[0] - 1
+
+
+def test_score_samples():
+    X_train = [[1, 1], [1, 2], [2, 1]]
+    clf1 = neighbors.LocalOutlierFactor(n_neighbors=2,
+                                        contamination=0.1,
+                                        novelty=True).fit(X_train)
+    clf2 = neighbors.LocalOutlierFactor(n_neighbors=2,
+                                        novelty=True).fit(X_train)
+    assert_array_equal(clf1.score_samples([[2., 2.]]),
+                       clf1.decision_function([[2., 2.]]) + clf1.offset_)
+    assert_array_equal(clf2.score_samples([[2., 2.]]),
+                       clf2.decision_function([[2., 2.]]) + clf2.offset_)
+    assert_array_equal(clf1.score_samples([[2., 2.]]),
+                       clf2.score_samples([[2., 2.]]))
+
+
+def test_contamination():
+    X = [[1, 1], [1, 0]]
+    clf = neighbors.LocalOutlierFactor(contamination=0.6)
+    assert_raises(ValueError, clf.fit, X)
+
+
+def test_novelty_errors():
+    X = iris.data
+
+    # check errors for novelty=False
+    clf = neighbors.LocalOutlierFactor()
+    clf.fit(X)
+    # predict, decision_function and score_samples raise ValueError
+    for method in ['predict', 'decision_function', 'score_samples']:
+        msg = ('{} is not available when novelty=False'.format(method))
+        assert_raises_regex(AttributeError, msg, getattr, clf, method)
+
+    # check errors for novelty=True
+    clf = neighbors.LocalOutlierFactor(novelty=True)
+    msg = 'fit_predict is not available when novelty=True'
+    assert_raises_regex(AttributeError, msg, getattr, clf, 'fit_predict')
+
+
+def test_novelty_training_scores():
+    # check that the scores of the training samples are still accessible
+    # when novelty=True through the negative_outlier_factor_ attribute
+    X = iris.data
+
+    # fit with novelty=False
+    clf_1 = neighbors.LocalOutlierFactor()
+    clf_1.fit(X)
+    scores_1 = clf_1.negative_outlier_factor_
+
+    # fit with novelty=True
+    clf_2 = neighbors.LocalOutlierFactor(novelty=True)
+    clf_2.fit(X)
+    scores_2 = clf_2.negative_outlier_factor_
+
+    assert_array_almost_equal(scores_1, scores_2)
+
+
+def test_hasattr_prediction():
+    # check availability of prediction methods depending on novelty value.
+    X = [[1, 1], [1, 2], [2, 1]]
+
+    # when novelty=True
+    clf = neighbors.LocalOutlierFactor(novelty=True)
+    clf.fit(X)
+    assert hasattr(clf, 'predict')
+    assert hasattr(clf, 'decision_function')
+    assert hasattr(clf, 'score_samples')
+    assert not hasattr(clf, 'fit_predict')
+
+    # when novelty=False
+    clf = neighbors.LocalOutlierFactor(novelty=False)
+    clf.fit(X)
+    assert hasattr(clf, 'fit_predict')
+    assert not hasattr(clf, 'predict')
+    assert not hasattr(clf, 'decision_function')
+    assert not hasattr(clf, 'score_samples')
+
+
+def test_novelty_true_common_tests():
+
+    # the common tests are run for the default LOF (novelty=False).
+    # here we run these common tests for LOF when novelty=True
+    check_estimator(neighbors.LocalOutlierFactor(novelty=True))
+
+
+@pytest.mark.parametrize('expected_outliers', [30, 53])
+def test_predicted_outlier_number(expected_outliers):
+    # the number of predicted outliers should be equal to the number of
+    # expected outliers unless there are ties in the abnormality scores.
+    X = iris.data
+    n_samples = X.shape[0]
+    contamination = float(expected_outliers)/n_samples
+
+    clf = neighbors.LocalOutlierFactor(contamination=contamination)
+    y_pred = clf.fit_predict(X)
+
+    num_outliers = np.sum(y_pred != 1)
+    if num_outliers != expected_outliers:
+        y_dec = clf.negative_outlier_factor_
+        check_outlier_corruption(num_outliers, expected_outliers, y_dec)
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/test_nca.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/test_nca.py
@ -0,0 +1,534 @@
+# coding: utf-8
+"""
+Testing for Neighborhood Component Analysis module (sklearn.neighbors.nca)
+"""
+
+# Authors: William de Vazelhes <wdevazelhes@gmail.com>
+#          John Chiotellis <ioannis.chiotellis@in.tum.de>
+# License: BSD 3 clause
+
+import pytest
+import re
+import numpy as np
+from numpy.testing import assert_array_equal, assert_array_almost_equal
+from scipy.optimize import check_grad
+from sklearn import clone
+from sklearn.exceptions import ConvergenceWarning
+from sklearn.utils import check_random_state
+from sklearn.utils._testing import (assert_raises,
+                                   assert_raise_message, assert_warns_message)
+from sklearn.datasets import load_iris, make_classification, make_blobs
+from sklearn.neighbors import NeighborhoodComponentsAnalysis
+from sklearn.metrics import pairwise_distances
+
+
+rng = check_random_state(0)
+# load and shuffle iris dataset
+iris = load_iris()
+perm = rng.permutation(iris.target.size)
+iris_data = iris.data[perm]
+iris_target = iris.target[perm]
+EPS = np.finfo(float).eps
+
+
+def test_simple_example():
+    """Test on a simple example.
+
+    Puts four points in the input space where the opposite labels points are
+    next to each other. After transform the samples from the same class
+    should be next to each other.
+
+    """
+    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
+    y = np.array([1, 0, 1, 0])
+    nca = NeighborhoodComponentsAnalysis(n_components=2, init='identity',
+                                         random_state=42)
+    nca.fit(X, y)
+    X_t = nca.transform(X)
+    assert_array_equal(pairwise_distances(X_t).argsort()[:, 1],
+                       np.array([2, 3, 0, 1]))
+
+
+def test_toy_example_collapse_points():
+    """Test on a toy example of three points that should collapse
+
+    We build a simple example: two points from the same class and a point from
+    a different class in the middle of them. On this simple example, the new
+    (transformed) points should all collapse into one single point. Indeed, the
+    objective is 2/(1 + exp(d/2)), with d the euclidean distance between the
+    two samples from the same class. This is maximized for d=0 (because d>=0),
+    with an objective equal to 1 (loss=-1.).
+
+    """
+    rng = np.random.RandomState(42)
+    input_dim = 5
+    two_points = rng.randn(2, input_dim)
+    X = np.vstack([two_points, two_points.mean(axis=0)[np.newaxis, :]])
+    y = [0, 0, 1]
+
+    class LossStorer:
+
+        def __init__(self, X, y):
+            self.loss = np.inf  # initialize the loss to very high
+            # Initialize a fake NCA and variables needed to compute the loss:
+            self.fake_nca = NeighborhoodComponentsAnalysis()
+            self.fake_nca.n_iter_ = np.inf
+            self.X, y, _ = self.fake_nca._validate_params(X, y)
+            self.same_class_mask = y[:, np.newaxis] == y[np.newaxis, :]
+
+        def callback(self, transformation, n_iter):
+            """Stores the last value of the loss function"""
+            self.loss, _ = self.fake_nca._loss_grad_lbfgs(transformation,
+                                                          self.X,
+                                                          self.same_class_mask,
+                                                          -1.0)
+
+    loss_storer = LossStorer(X, y)
+    nca = NeighborhoodComponentsAnalysis(random_state=42,
+                                         callback=loss_storer.callback)
+    X_t = nca.fit_transform(X, y)
+    print(X_t)
+    # test that points are collapsed into one point
+    assert_array_almost_equal(X_t - X_t[0], 0.)
+    assert abs(loss_storer.loss + 1) < 1e-10
+
+
+def test_finite_differences():
+    """Test gradient of loss function
+
+    Assert that the gradient is almost equal to its finite differences
+    approximation.
+    """
+    # Initialize the transformation `M`, as well as `X` and `y` and `NCA`
+    rng = np.random.RandomState(42)
+    X, y = make_classification()
+    M = rng.randn(rng.randint(1, X.shape[1] + 1),
+                  X.shape[1])
+    nca = NeighborhoodComponentsAnalysis()
+    nca.n_iter_ = 0
+    mask = y[:, np.newaxis] == y[np.newaxis, :]
+
+    def fun(M):
+        return nca._loss_grad_lbfgs(M, X, mask)[0]
+
+    def grad(M):
+        return nca._loss_grad_lbfgs(M, X, mask)[1]
+
+    # compute relative error
+    rel_diff = check_grad(fun, grad, M.ravel()) / np.linalg.norm(grad(M))
+    np.testing.assert_almost_equal(rel_diff, 0., decimal=5)
+
+
+def test_params_validation():
+    # Test that invalid parameters raise value error
+    X = np.arange(12).reshape(4, 3)
+    y = [1, 1, 2, 2]
+    NCA = NeighborhoodComponentsAnalysis
+    rng = np.random.RandomState(42)
+
+    # TypeError
+    assert_raises(TypeError, NCA(max_iter='21').fit, X, y)
+    assert_raises(TypeError, NCA(verbose='true').fit, X, y)
+    assert_raises(TypeError, NCA(tol='1').fit, X, y)
+    assert_raises(TypeError, NCA(n_components='invalid').fit, X, y)
+    assert_raises(TypeError, NCA(warm_start=1).fit, X, y)
+
+    # ValueError
+    assert_raise_message(ValueError,
+                         "`init` must be 'auto', 'pca', 'lda', 'identity', "
+                         "'random' or a numpy array of shape "
+                         "(n_components, n_features).",
+                         NCA(init=1).fit, X, y)
+    assert_raise_message(ValueError,
+                         '`max_iter`= -1, must be >= 1.',
+                         NCA(max_iter=-1).fit, X, y)
+
+    init = rng.rand(5, 3)
+    assert_raise_message(ValueError,
+                         'The output dimensionality ({}) of the given linear '
+                         'transformation `init` cannot be greater than its '
+                         'input dimensionality ({}).'
+                         .format(init.shape[0], init.shape[1]),
+                         NCA(init=init).fit, X, y)
+
+    n_components = 10
+    assert_raise_message(ValueError,
+                         'The preferred dimensionality of the '
+                         'projected space `n_components` ({}) cannot '
+                         'be greater than the given data '
+                         'dimensionality ({})!'
+                         .format(n_components, X.shape[1]),
+                         NCA(n_components=n_components).fit, X, y)
+
+
+def test_transformation_dimensions():
+    X = np.arange(12).reshape(4, 3)
+    y = [1, 1, 2, 2]
+
+    # Fail if transformation input dimension does not match inputs dimensions
+    transformation = np.array([[1, 2], [3, 4]])
+    assert_raises(ValueError,
+                  NeighborhoodComponentsAnalysis(init=transformation).fit,
+                  X, y)
+
+    # Fail if transformation output dimension is larger than
+    # transformation input dimension
+    transformation = np.array([[1, 2], [3, 4], [5, 6]])
+    # len(transformation) > len(transformation[0])
+    assert_raises(ValueError,
+                  NeighborhoodComponentsAnalysis(init=transformation).fit,
+                  X, y)
+
+    # Pass otherwise
+    transformation = np.arange(9).reshape(3, 3)
+    NeighborhoodComponentsAnalysis(init=transformation).fit(X, y)
+
+
+def test_n_components():
+    rng = np.random.RandomState(42)
+    X = np.arange(12).reshape(4, 3)
+    y = [1, 1, 2, 2]
+
+    init = rng.rand(X.shape[1] - 1, 3)
+
+    # n_components = X.shape[1] != transformation.shape[0]
+    n_components = X.shape[1]
+    nca = NeighborhoodComponentsAnalysis(init=init, n_components=n_components)
+    assert_raise_message(ValueError,
+                         'The preferred dimensionality of the '
+                         'projected space `n_components` ({}) does not match '
+                         'the output dimensionality of the given '
+                         'linear transformation `init` ({})!'
+                         .format(n_components, init.shape[0]),
+                         nca.fit, X, y)
+
+    # n_components > X.shape[1]
+    n_components = X.shape[1] + 2
+    nca = NeighborhoodComponentsAnalysis(init=init, n_components=n_components)
+    assert_raise_message(ValueError,
+                         'The preferred dimensionality of the '
+                         'projected space `n_components` ({}) cannot '
+                         'be greater than the given data '
+                         'dimensionality ({})!'
+                         .format(n_components, X.shape[1]),
+                         nca.fit, X, y)
+
+    # n_components < X.shape[1]
+    nca = NeighborhoodComponentsAnalysis(n_components=2, init='identity')
+    nca.fit(X, y)
+
+
+def test_init_transformation():
+    rng = np.random.RandomState(42)
+    X, y = make_blobs(n_samples=30, centers=6, n_features=5, random_state=0)
+
+    # Start learning from scratch
+    nca = NeighborhoodComponentsAnalysis(init='identity')
+    nca.fit(X, y)
+
+    # Initialize with random
+    nca_random = NeighborhoodComponentsAnalysis(init='random')
+    nca_random.fit(X, y)
+
+    # Initialize with auto
+    nca_auto = NeighborhoodComponentsAnalysis(init='auto')
+    nca_auto.fit(X, y)
+
+    # Initialize with PCA
+    nca_pca = NeighborhoodComponentsAnalysis(init='pca')
+    nca_pca.fit(X, y)
+
+    # Initialize with LDA
+    nca_lda = NeighborhoodComponentsAnalysis(init='lda')
+    nca_lda.fit(X, y)
+
+    init = rng.rand(X.shape[1], X.shape[1])
+    nca = NeighborhoodComponentsAnalysis(init=init)
+    nca.fit(X, y)
+
+    # init.shape[1] must match X.shape[1]
+    init = rng.rand(X.shape[1], X.shape[1] + 1)
+    nca = NeighborhoodComponentsAnalysis(init=init)
+    assert_raise_message(ValueError,
+                         'The input dimensionality ({}) of the given '
+                         'linear transformation `init` must match the '
+                         'dimensionality of the given inputs `X` ({}).'
+                         .format(init.shape[1], X.shape[1]),
+                         nca.fit, X, y)
+
+    # init.shape[0] must be <= init.shape[1]
+    init = rng.rand(X.shape[1] + 1, X.shape[1])
+    nca = NeighborhoodComponentsAnalysis(init=init)
+    assert_raise_message(ValueError,
+                         'The output dimensionality ({}) of the given '
+                         'linear transformation `init` cannot be '
+                         'greater than its input dimensionality ({}).'
+                         .format(init.shape[0], init.shape[1]),
+                         nca.fit, X, y)
+
+    # init.shape[0] must match n_components
+    init = rng.rand(X.shape[1], X.shape[1])
+    n_components = X.shape[1] - 2
+    nca = NeighborhoodComponentsAnalysis(init=init, n_components=n_components)
+    assert_raise_message(ValueError,
+                         'The preferred dimensionality of the '
+                         'projected space `n_components` ({}) does not match '
+                         'the output dimensionality of the given '
+                         'linear transformation `init` ({})!'
+                         .format(n_components, init.shape[0]),
+                         nca.fit, X, y)
+
+
+@pytest.mark.parametrize('n_samples', [3, 5, 7, 11])
+@pytest.mark.parametrize('n_features', [3, 5, 7, 11])
+@pytest.mark.parametrize('n_classes', [5, 7, 11])
+@pytest.mark.parametrize('n_components', [3, 5, 7, 11])
+def test_auto_init(n_samples, n_features, n_classes, n_components):
+    # Test that auto choose the init as expected with every configuration
+    # of order of n_samples, n_features, n_classes and n_components.
+    rng = np.random.RandomState(42)
+    nca_base = NeighborhoodComponentsAnalysis(init='auto',
+                                              n_components=n_components,
+                                              max_iter=1,
+                                              random_state=rng)
+    if n_classes >= n_samples:
+        pass
+        # n_classes > n_samples is impossible, and n_classes == n_samples
+        # throws an error from lda but is an absurd case
+    else:
+        X = rng.randn(n_samples, n_features)
+        y = np.tile(range(n_classes), n_samples // n_classes + 1)[:n_samples]
+        if n_components > n_features:
+            # this would return a ValueError, which is already tested in
+            # test_params_validation
+            pass
+        else:
+            nca = clone(nca_base)
+            nca.fit(X, y)
+            if n_components <= min(n_classes - 1, n_features):
+                nca_other = clone(nca_base).set_params(init='lda')
+            elif n_components < min(n_features, n_samples):
+                nca_other = clone(nca_base).set_params(init='pca')
+            else:
+                nca_other = clone(nca_base).set_params(init='identity')
+            nca_other.fit(X, y)
+            assert_array_almost_equal(nca.components_, nca_other.components_)
+
+
+def test_warm_start_validation():
+    X, y = make_classification(n_samples=30, n_features=5, n_classes=4,
+                               n_redundant=0, n_informative=5, random_state=0)
+
+    nca = NeighborhoodComponentsAnalysis(warm_start=True, max_iter=5)
+    nca.fit(X, y)
+
+    X_less_features, y = make_classification(n_samples=30, n_features=4,
+                                             n_classes=4, n_redundant=0,
+                                             n_informative=4, random_state=0)
+    assert_raise_message(ValueError,
+                         'The new inputs dimensionality ({}) does not '
+                         'match the input dimensionality of the '
+                         'previously learned transformation ({}).'
+                         .format(X_less_features.shape[1],
+                                 nca.components_.shape[1]),
+                         nca.fit, X_less_features, y)
+
+
+def test_warm_start_effectiveness():
+    # A 1-iteration second fit on same data should give almost same result
+    # with warm starting, and quite different result without warm starting.
+
+    nca_warm = NeighborhoodComponentsAnalysis(warm_start=True, random_state=0)
+    nca_warm.fit(iris_data, iris_target)
+    transformation_warm = nca_warm.components_
+    nca_warm.max_iter = 1
+    nca_warm.fit(iris_data, iris_target)
+    transformation_warm_plus_one = nca_warm.components_
+
+    nca_cold = NeighborhoodComponentsAnalysis(warm_start=False, random_state=0)
+    nca_cold.fit(iris_data, iris_target)
+    transformation_cold = nca_cold.components_
+    nca_cold.max_iter = 1
+    nca_cold.fit(iris_data, iris_target)
+    transformation_cold_plus_one = nca_cold.components_
+
+    diff_warm = np.sum(np.abs(transformation_warm_plus_one -
+                              transformation_warm))
+    diff_cold = np.sum(np.abs(transformation_cold_plus_one -
+                              transformation_cold))
+    assert diff_warm < 3.0, ("Transformer changed significantly after one "
+                             "iteration even though it was warm-started.")
+
+    assert diff_cold > diff_warm, ("Cold-started transformer changed less "
+                                   "significantly than warm-started "
+                                   "transformer after one iteration.")
+
+
+@pytest.mark.parametrize('init_name', ['pca', 'lda', 'identity', 'random',
+                                       'precomputed'])
+def test_verbose(init_name, capsys):
+    # assert there is proper output when verbose = 1, for every initialization
+    # except auto because auto will call one of the others
+    rng = np.random.RandomState(42)
+    X, y = make_blobs(n_samples=30, centers=6, n_features=5, random_state=0)
+    regexp_init = r'... done in \ *\d+\.\d{2}s'
+    msgs = {'pca': "Finding principal components" + regexp_init,
+            'lda': "Finding most discriminative components" + regexp_init}
+    if init_name == 'precomputed':
+        init = rng.randn(X.shape[1], X.shape[1])
+    else:
+        init = init_name
+    nca = NeighborhoodComponentsAnalysis(verbose=1, init=init)
+    nca.fit(X, y)
+    out, _ = capsys.readouterr()
+
+    # check output
+    lines = re.split('\n+', out)
+    # if pca or lda init, an additional line is printed, so we test
+    # it and remove it to test the rest equally among initializations
+    if init_name in ['pca', 'lda']:
+        assert re.match(msgs[init_name], lines[0])
+        lines = lines[1:]
+    assert lines[0] == '[NeighborhoodComponentsAnalysis]'
+    header = '{:>10} {:>20} {:>10}'.format('Iteration', 'Objective Value',
+                                           'Time(s)')
+    assert lines[1] == '[NeighborhoodComponentsAnalysis] {}'.format(header)
+    assert lines[2] == ('[NeighborhoodComponentsAnalysis] {}'
+                        .format('-' * len(header)))
+    for line in lines[3:-2]:
+        # The following regex will match for instance:
+        # '[NeighborhoodComponentsAnalysis]  0    6.988936e+01   0.01'
+        assert re.match(r'\[NeighborhoodComponentsAnalysis\] *\d+ *\d\.\d{6}e'
+                        r'[+|-]\d+\ *\d+\.\d{2}', line)
+    assert re.match(r'\[NeighborhoodComponentsAnalysis\] Training took\ *'
+                    r'\d+\.\d{2}s\.', lines[-2])
+    assert lines[-1] == ''
+
+
+def test_no_verbose(capsys):
+    # assert by default there is no output (verbose=0)
+    nca = NeighborhoodComponentsAnalysis()
+    nca.fit(iris_data, iris_target)
+    out, _ = capsys.readouterr()
+    # check output
+    assert(out == '')
+
+
+def test_singleton_class():
+    X = iris_data
+    y = iris_target
+
+    # one singleton class
+    singleton_class = 1
+    ind_singleton, = np.where(y == singleton_class)
+    y[ind_singleton] = 2
+    y[ind_singleton[0]] = singleton_class
+
+    nca = NeighborhoodComponentsAnalysis(max_iter=30)
+    nca.fit(X, y)
+
+    # One non-singleton class
+    ind_1, = np.where(y == 1)
+    ind_2, = np.where(y == 2)
+    y[ind_1] = 0
+    y[ind_1[0]] = 1
+    y[ind_2] = 0
+    y[ind_2[0]] = 2
+
+    nca = NeighborhoodComponentsAnalysis(max_iter=30)
+    nca.fit(X, y)
+
+    # Only singleton classes
+    ind_0, = np.where(y == 0)
+    ind_1, = np.where(y == 1)
+    ind_2, = np.where(y == 2)
+    X = X[[ind_0[0], ind_1[0], ind_2[0]]]
+    y = y[[ind_0[0], ind_1[0], ind_2[0]]]
+
+    nca = NeighborhoodComponentsAnalysis(init='identity', max_iter=30)
+    nca.fit(X, y)
+    assert_array_equal(X, nca.transform(X))
+
+
+def test_one_class():
+    X = iris_data[iris_target == 0]
+    y = iris_target[iris_target == 0]
+
+    nca = NeighborhoodComponentsAnalysis(max_iter=30,
+                                         n_components=X.shape[1],
+                                         init='identity')
+    nca.fit(X, y)
+    assert_array_equal(X, nca.transform(X))
+
+
+def test_callback(capsys):
+    X = iris_data
+    y = iris_target
+
+    nca = NeighborhoodComponentsAnalysis(callback='my_cb')
+    assert_raises(ValueError, nca.fit, X, y)
+
+    max_iter = 10
+
+    def my_cb(transformation, n_iter):
+        assert transformation.shape == (iris_data.shape[1]**2,)
+        rem_iter = max_iter - n_iter
+        print('{} iterations remaining...'.format(rem_iter))
+
+    # assert that my_cb is called
+    nca = NeighborhoodComponentsAnalysis(max_iter=max_iter,
+                                         callback=my_cb, verbose=1)
+    nca.fit(iris_data, iris_target)
+    out, _ = capsys.readouterr()
+
+    # check output
+    assert('{} iterations remaining...'.format(max_iter - 1) in out)
+
+
+def test_expected_transformation_shape():
+    """Test that the transformation has the expected shape."""
+    X = iris_data
+    y = iris_target
+
+    class TransformationStorer:
+
+        def __init__(self, X, y):
+            # Initialize a fake NCA and variables needed to call the loss
+            # function:
+            self.fake_nca = NeighborhoodComponentsAnalysis()
+            self.fake_nca.n_iter_ = np.inf
+            self.X, y, _ = self.fake_nca._validate_params(X, y)
+            self.same_class_mask = y[:, np.newaxis] == y[np.newaxis, :]
+
+        def callback(self, transformation, n_iter):
+            """Stores the last value of the transformation taken as input by
+            the optimizer"""
+            self.transformation = transformation
+
+    transformation_storer = TransformationStorer(X, y)
+    cb = transformation_storer.callback
+    nca = NeighborhoodComponentsAnalysis(max_iter=5, callback=cb)
+    nca.fit(X, y)
+    assert transformation_storer.transformation.size == X.shape[1]**2
+
+
+def test_convergence_warning():
+    nca = NeighborhoodComponentsAnalysis(max_iter=2, verbose=1)
+    cls_name = nca.__class__.__name__
+    assert_warns_message(ConvergenceWarning,
+                         '[{}] NCA did not converge'.format(cls_name),
+                         nca.fit, iris_data, iris_target)
+
+
+@pytest.mark.parametrize('param, value', [('n_components', np.int32(3)),
+                                          ('max_iter', np.int32(100)),
+                                          ('tol', np.float32(0.0001))])
+def test_parameters_valid_types(param, value):
+    # check that no error is raised when parameters have numpy integer or
+    # floating types.
+    nca = NeighborhoodComponentsAnalysis(**{param: value})
+
+    X = iris_data
+    y = iris_target
+
+    nca.fit(X, y)
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/test_nearest_centroid.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/test_nearest_centroid.py
@ -0,0 +1,148 @@
+"""
+Testing for the nearest centroid module.
+"""
+
+import numpy as np
+from scipy import sparse as sp
+from numpy.testing import assert_array_equal
+
+from sklearn.neighbors import NearestCentroid
+from sklearn import datasets
+from sklearn.utils._testing import assert_raises
+
+# toy sample
+X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]
+X_csr = sp.csr_matrix(X)  # Sparse matrix
+y = [-1, -1, -1, 1, 1, 1]
+T = [[-1, -1], [2, 2], [3, 2]]
+T_csr = sp.csr_matrix(T)
+true_result = [-1, 1, 1]
+
+# also load the iris dataset
+# and randomly permute it
+iris = datasets.load_iris()
+rng = np.random.RandomState(1)
+perm = rng.permutation(iris.target.size)
+iris.data = iris.data[perm]
+iris.target = iris.target[perm]
+
+
+def test_classification_toy():
+    # Check classification on a toy dataset, including sparse versions.
+    clf = NearestCentroid()
+    clf.fit(X, y)
+    assert_array_equal(clf.predict(T), true_result)
+
+    # Same test, but with a sparse matrix to fit and test.
+    clf = NearestCentroid()
+    clf.fit(X_csr, y)
+    assert_array_equal(clf.predict(T_csr), true_result)
+
+    # Fit with sparse, test with non-sparse
+    clf = NearestCentroid()
+    clf.fit(X_csr, y)
+    assert_array_equal(clf.predict(T), true_result)
+
+    # Fit with non-sparse, test with sparse
+    clf = NearestCentroid()
+    clf.fit(X, y)
+    assert_array_equal(clf.predict(T_csr), true_result)
+
+    # Fit and predict with non-CSR sparse matrices
+    clf = NearestCentroid()
+    clf.fit(X_csr.tocoo(), y)
+    assert_array_equal(clf.predict(T_csr.tolil()), true_result)
+
+
+def test_precomputed():
+    clf = NearestCentroid(metric='precomputed')
+    with assert_raises(ValueError):
+        clf.fit(X, y)
+
+
+def test_iris():
+    # Check consistency on dataset iris.
+    for metric in ('euclidean', 'cosine'):
+        clf = NearestCentroid(metric=metric).fit(iris.data, iris.target)
+        score = np.mean(clf.predict(iris.data) == iris.target)
+        assert score > 0.9, "Failed with score = " + str(score)
+
+
+def test_iris_shrinkage():
+    # Check consistency on dataset iris, when using shrinkage.
+    for metric in ('euclidean', 'cosine'):
+        for shrink_threshold in [None, 0.1, 0.5]:
+            clf = NearestCentroid(metric=metric,
+                                  shrink_threshold=shrink_threshold)
+            clf = clf.fit(iris.data, iris.target)
+            score = np.mean(clf.predict(iris.data) == iris.target)
+            assert score > 0.8, "Failed with score = " + str(score)
+
+
+def test_pickle():
+    import pickle
+
+    # classification
+    obj = NearestCentroid()
+    obj.fit(iris.data, iris.target)
+    score = obj.score(iris.data, iris.target)
+    s = pickle.dumps(obj)
+
+    obj2 = pickle.loads(s)
+    assert type(obj2) == obj.__class__
+    score2 = obj2.score(iris.data, iris.target)
+    assert_array_equal(score, score2,
+                       "Failed to generate same score"
+                       " after pickling (classification).")
+
+
+def test_shrinkage_correct():
+    # Ensure that the shrinking is correct.
+    # The expected result is calculated by R (pamr),
+    # which is implemented by the author of the original paper.
+    # (One need to modify the code to output the new centroid in pamr.predict)
+
+    X = np.array([[0, 1], [1, 0], [1, 1], [2, 0], [6, 8]])
+    y = np.array([1, 1, 2, 2, 2])
+    clf = NearestCentroid(shrink_threshold=0.1)
+    clf.fit(X, y)
+    expected_result = np.array([[0.7787310, 0.8545292], [2.814179, 2.763647]])
+    np.testing.assert_array_almost_equal(clf.centroids_, expected_result)
+
+
+def test_shrinkage_threshold_decoded_y():
+    clf = NearestCentroid(shrink_threshold=0.01)
+    y_ind = np.asarray(y)
+    y_ind[y_ind == -1] = 0
+    clf.fit(X, y_ind)
+    centroid_encoded = clf.centroids_
+    clf.fit(X, y)
+    assert_array_equal(centroid_encoded, clf.centroids_)
+
+
+def test_predict_translated_data():
+    # Test that NearestCentroid gives same results on translated data
+
+    rng = np.random.RandomState(0)
+    X = rng.rand(50, 50)
+    y = rng.randint(0, 3, 50)
+    noise = rng.rand(50)
+    clf = NearestCentroid(shrink_threshold=0.1)
+    clf.fit(X, y)
+    y_init = clf.predict(X)
+    clf = NearestCentroid(shrink_threshold=0.1)
+    X_noise = X + noise
+    clf.fit(X_noise, y)
+    y_translate = clf.predict(X_noise)
+    assert_array_equal(y_init, y_translate)
+
+
+def test_manhattan_metric():
+    # Test the manhattan metric.
+
+    clf = NearestCentroid(metric='manhattan')
+    clf.fit(X, y)
+    dense_centroid = clf.centroids_
+    clf.fit(X_csr, y)
+    assert_array_equal(clf.centroids_, dense_centroid)
+    assert_array_equal(dense_centroid, [[-1, -1], [1, 1]])
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/test_neighbors.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/test_neighbors.py
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/test_neighbors_pipeline.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/test_neighbors_pipeline.py
@ -0,0 +1,221 @@
+"""
+This is testing the equivalence between some estimators with internal nearest
+neighbors computations, and the corresponding pipeline versions with
+KNeighborsTransformer or RadiusNeighborsTransformer to precompute the
+neighbors.
+"""
+
+import numpy as np
+
+from sklearn.utils._testing import assert_array_almost_equal
+from sklearn.cluster.tests.common import generate_clustered_data
+from sklearn.datasets import make_blobs
+from sklearn.pipeline import make_pipeline
+from sklearn.base import clone
+
+from sklearn.neighbors import KNeighborsTransformer
+from sklearn.neighbors import RadiusNeighborsTransformer
+
+from sklearn.cluster import DBSCAN
+from sklearn.cluster import SpectralClustering
+from sklearn.neighbors import KNeighborsRegressor
+from sklearn.neighbors import RadiusNeighborsRegressor
+from sklearn.neighbors import LocalOutlierFactor
+from sklearn.manifold import SpectralEmbedding
+from sklearn.manifold import Isomap
+from sklearn.manifold import TSNE
+
+
+def test_spectral_clustering():
+    # Test chaining KNeighborsTransformer and SpectralClustering
+    n_neighbors = 5
+    X, _ = make_blobs(random_state=0)
+
+    # compare the chained version and the compact version
+    est_chain = make_pipeline(
+        KNeighborsTransformer(n_neighbors=n_neighbors, mode='connectivity'),
+        SpectralClustering(n_neighbors=n_neighbors, affinity='precomputed',
+                           random_state=42))
+    est_compact = SpectralClustering(
+        n_neighbors=n_neighbors, affinity='nearest_neighbors', random_state=42)
+    labels_compact = est_compact.fit_predict(X)
+    labels_chain = est_chain.fit_predict(X)
+    assert_array_almost_equal(labels_chain, labels_compact)
+
+
+def test_spectral_embedding():
+    # Test chaining KNeighborsTransformer and SpectralEmbedding
+    n_neighbors = 5
+
+    n_samples = 1000
+    centers = np.array([
+        [0.0, 5.0, 0.0, 0.0, 0.0],
+        [0.0, 0.0, 4.0, 0.0, 0.0],
+        [1.0, 0.0, 0.0, 5.0, 1.0],
+    ])
+    S, true_labels = make_blobs(n_samples=n_samples, centers=centers,
+                                cluster_std=1., random_state=42)
+
+    # compare the chained version and the compact version
+    est_chain = make_pipeline(
+        KNeighborsTransformer(n_neighbors=n_neighbors, mode='connectivity'),
+        SpectralEmbedding(n_neighbors=n_neighbors, affinity='precomputed',
+                          random_state=42))
+    est_compact = SpectralEmbedding(
+        n_neighbors=n_neighbors, affinity='nearest_neighbors', random_state=42)
+    St_compact = est_compact.fit_transform(S)
+    St_chain = est_chain.fit_transform(S)
+    assert_array_almost_equal(St_chain, St_compact)
+
+
+def test_dbscan():
+    # Test chaining RadiusNeighborsTransformer and DBSCAN
+    radius = 0.3
+    n_clusters = 3
+    X = generate_clustered_data(n_clusters=n_clusters)
+
+    # compare the chained version and the compact version
+    est_chain = make_pipeline(
+        RadiusNeighborsTransformer(radius=radius, mode='distance'),
+        DBSCAN(metric='precomputed', eps=radius))
+    est_compact = DBSCAN(eps=radius)
+
+    labels_chain = est_chain.fit_predict(X)
+    labels_compact = est_compact.fit_predict(X)
+    assert_array_almost_equal(labels_chain, labels_compact)
+
+
+def test_isomap():
+    # Test chaining KNeighborsTransformer and Isomap with
+    # neighbors_algorithm='precomputed'
+    algorithm = 'auto'
+    n_neighbors = 10
+
+    X, _ = make_blobs(random_state=0)
+    X2, _ = make_blobs(random_state=1)
+
+    # compare the chained version and the compact version
+    est_chain = make_pipeline(
+        KNeighborsTransformer(n_neighbors=n_neighbors, algorithm=algorithm,
+                              mode='distance'),
+        Isomap(n_neighbors=n_neighbors, metric='precomputed'))
+    est_compact = Isomap(n_neighbors=n_neighbors,
+                         neighbors_algorithm=algorithm)
+
+    Xt_chain = est_chain.fit_transform(X)
+    Xt_compact = est_compact.fit_transform(X)
+    assert_array_almost_equal(Xt_chain, Xt_compact)
+
+    Xt_chain = est_chain.transform(X2)
+    Xt_compact = est_compact.transform(X2)
+    assert_array_almost_equal(Xt_chain, Xt_compact)
+
+
+def test_tsne():
+    # Test chaining KNeighborsTransformer and TSNE
+    n_iter = 250
+    perplexity = 5
+    n_neighbors = int(3. * perplexity + 1)
+
+    rng = np.random.RandomState(0)
+    X = rng.randn(20, 2)
+
+    for metric in ['minkowski', 'sqeuclidean']:
+
+        # compare the chained version and the compact version
+        est_chain = make_pipeline(
+            KNeighborsTransformer(n_neighbors=n_neighbors, mode='distance',
+                                  metric=metric),
+            TSNE(metric='precomputed', perplexity=perplexity,
+                 method="barnes_hut", random_state=42, n_iter=n_iter))
+        est_compact = TSNE(metric=metric, perplexity=perplexity, n_iter=n_iter,
+                           method="barnes_hut", random_state=42)
+
+        Xt_chain = est_chain.fit_transform(X)
+        Xt_compact = est_compact.fit_transform(X)
+        assert_array_almost_equal(Xt_chain, Xt_compact)
+
+
+def test_lof_novelty_false():
+    # Test chaining KNeighborsTransformer and LocalOutlierFactor
+    n_neighbors = 4
+
+    rng = np.random.RandomState(0)
+    X = rng.randn(40, 2)
+
+    # compare the chained version and the compact version
+    est_chain = make_pipeline(
+        KNeighborsTransformer(n_neighbors=n_neighbors, mode='distance'),
+        LocalOutlierFactor(metric='precomputed', n_neighbors=n_neighbors,
+                           novelty=False, contamination="auto"))
+    est_compact = LocalOutlierFactor(n_neighbors=n_neighbors, novelty=False,
+                                     contamination="auto")
+
+    pred_chain = est_chain.fit_predict(X)
+    pred_compact = est_compact.fit_predict(X)
+    assert_array_almost_equal(pred_chain, pred_compact)
+
+
+def test_lof_novelty_true():
+    # Test chaining KNeighborsTransformer and LocalOutlierFactor
+    n_neighbors = 4
+
+    rng = np.random.RandomState(0)
+    X1 = rng.randn(40, 2)
+    X2 = rng.randn(40, 2)
+
+    # compare the chained version and the compact version
+    est_chain = make_pipeline(
+        KNeighborsTransformer(n_neighbors=n_neighbors, mode='distance'),
+        LocalOutlierFactor(metric='precomputed', n_neighbors=n_neighbors,
+                           novelty=True, contamination="auto"))
+    est_compact = LocalOutlierFactor(n_neighbors=n_neighbors, novelty=True,
+                                     contamination="auto")
+
+    pred_chain = est_chain.fit(X1).predict(X2)
+    pred_compact = est_compact.fit(X1).predict(X2)
+    assert_array_almost_equal(pred_chain, pred_compact)
+
+
+def test_kneighbors_regressor():
+    # Test chaining KNeighborsTransformer and classifiers/regressors
+    rng = np.random.RandomState(0)
+    X = 2 * rng.rand(40, 5) - 1
+    X2 = 2 * rng.rand(40, 5) - 1
+    y = rng.rand(40, 1)
+
+    n_neighbors = 12
+    radius = 1.5
+    # We precompute more neighbors than necessary, to have equivalence between
+    # k-neighbors estimator after radius-neighbors transformer, and vice-versa.
+    factor = 2
+
+    k_trans = KNeighborsTransformer(n_neighbors=n_neighbors, mode='distance')
+    k_trans_factor = KNeighborsTransformer(n_neighbors=int(
+        n_neighbors * factor), mode='distance')
+
+    r_trans = RadiusNeighborsTransformer(radius=radius, mode='distance')
+    r_trans_factor = RadiusNeighborsTransformer(radius=int(
+        radius * factor), mode='distance')
+
+    k_reg = KNeighborsRegressor(n_neighbors=n_neighbors)
+    r_reg = RadiusNeighborsRegressor(radius=radius)
+
+    test_list = [
+        (k_trans, k_reg),
+        (k_trans_factor, r_reg),
+        (r_trans, r_reg),
+        (r_trans_factor, k_reg),
+    ]
+
+    for trans, reg in test_list:
+        # compare the chained version and the compact version
+        reg_compact = clone(reg)
+        reg_precomp = clone(reg)
+        reg_precomp.set_params(metric='precomputed')
+
+        reg_chain = make_pipeline(clone(trans), reg_precomp)
+
+        y_pred_chain = reg_chain.fit(X, y).predict(X2)
+        y_pred_compact = reg_compact.fit(X, y).predict(X2)
+        assert_array_almost_equal(y_pred_chain, y_pred_compact)
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/test_neighbors_tree.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/test_neighbors_tree.py
@ -0,0 +1,279 @@
+# License: BSD 3 clause
+
+import pickle
+import itertools
+
+import numpy as np
+import pytest
+
+from sklearn.neighbors import DistanceMetric
+from sklearn.neighbors._ball_tree import (
+    BallTree, kernel_norm, DTYPE, ITYPE,
+    NeighborsHeap as NeighborsHeapBT,
+    simultaneous_sort as simultaneous_sort_bt,
+    nodeheap_sort as nodeheap_sort_bt)
+from sklearn.neighbors._kd_tree import (
+    KDTree, NeighborsHeap as NeighborsHeapKDT,
+    simultaneous_sort as simultaneous_sort_kdt,
+    nodeheap_sort as nodeheap_sort_kdt)
+
+from sklearn.utils import check_random_state
+from numpy.testing import assert_array_almost_equal, assert_allclose
+
+rng = np.random.RandomState(42)
+V_mahalanobis = rng.rand(3, 3)
+V_mahalanobis = np.dot(V_mahalanobis, V_mahalanobis.T)
+
+DIMENSION = 3
+
+METRICS = {'euclidean': {},
+           'manhattan': {},
+           'minkowski': dict(p=3),
+           'chebyshev': {},
+           'seuclidean': dict(V=rng.random_sample(DIMENSION)),
+           'wminkowski': dict(p=3, w=rng.random_sample(DIMENSION)),
+           'mahalanobis': dict(V=V_mahalanobis)}
+
+KD_TREE_METRICS = ['euclidean', 'manhattan', 'chebyshev', 'minkowski']
+BALL_TREE_METRICS = list(METRICS)
+
+
+def dist_func(x1, x2, p):
+    return np.sum((x1 - x2) ** p) ** (1. / p)
+
+
+def compute_kernel_slow(Y, X, kernel, h):
+    d = np.sqrt(((Y[:, None, :] - X) ** 2).sum(-1))
+    norm = kernel_norm(h, X.shape[1], kernel)
+
+    if kernel == 'gaussian':
+        return norm * np.exp(-0.5 * (d * d) / (h * h)).sum(-1)
+    elif kernel == 'tophat':
+        return norm * (d < h).sum(-1)
+    elif kernel == 'epanechnikov':
+        return norm * ((1.0 - (d * d) / (h * h)) * (d < h)).sum(-1)
+    elif kernel == 'exponential':
+        return norm * (np.exp(-d / h)).sum(-1)
+    elif kernel == 'linear':
+        return norm * ((1 - d / h) * (d < h)).sum(-1)
+    elif kernel == 'cosine':
+        return norm * (np.cos(0.5 * np.pi * d / h) * (d < h)).sum(-1)
+    else:
+        raise ValueError('kernel not recognized')
+
+
+def brute_force_neighbors(X, Y, k, metric, **kwargs):
+    D = DistanceMetric.get_metric(metric, **kwargs).pairwise(Y, X)
+    ind = np.argsort(D, axis=1)[:, :k]
+    dist = D[np.arange(Y.shape[0])[:, None], ind]
+    return dist, ind
+
+
+@pytest.mark.parametrize('Cls', [KDTree, BallTree])
+@pytest.mark.parametrize("kernel", ['gaussian', 'tophat', 'epanechnikov',
+                                    'exponential', 'linear', 'cosine'])
+@pytest.mark.parametrize("h", [0.01, 0.1, 1])
+@pytest.mark.parametrize("rtol", [0, 1E-5])
+@pytest.mark.parametrize("atol", [1E-6, 1E-2])
+@pytest.mark.parametrize("breadth_first", [True, False])
+def test_kernel_density(Cls, kernel, h, rtol, atol, breadth_first,
+                        n_samples=100, n_features=3):
+    rng = check_random_state(1)
+    X = rng.random_sample((n_samples, n_features))
+    Y = rng.random_sample((n_samples, n_features))
+    dens_true = compute_kernel_slow(Y, X, kernel, h)
+
+    tree = Cls(X, leaf_size=10)
+    dens = tree.kernel_density(Y, h, atol=atol, rtol=rtol,
+                               kernel=kernel,
+                               breadth_first=breadth_first)
+    assert_allclose(dens, dens_true,
+                    atol=atol, rtol=max(rtol, 1e-7))
+
+
+@pytest.mark.parametrize('Cls', [KDTree, BallTree])
+def test_neighbor_tree_query_radius(Cls, n_samples=100, n_features=10):
+    rng = check_random_state(0)
+    X = 2 * rng.random_sample(size=(n_samples, n_features)) - 1
+    query_pt = np.zeros(n_features, dtype=float)
+
+    eps = 1E-15  # roundoff error can cause test to fail
+    tree = Cls(X, leaf_size=5)
+    rad = np.sqrt(((X - query_pt) ** 2).sum(1))
+
+    for r in np.linspace(rad[0], rad[-1], 100):
+        ind = tree.query_radius([query_pt], r + eps)[0]
+        i = np.where(rad <= r + eps)[0]
+
+        ind.sort()
+        i.sort()
+
+        assert_array_almost_equal(i, ind)
+
+
+@pytest.mark.parametrize('Cls', [KDTree, BallTree])
+def test_neighbor_tree_query_radius_distance(Cls, n_samples=100,
+                                             n_features=10):
+    rng = check_random_state(0)
+    X = 2 * rng.random_sample(size=(n_samples, n_features)) - 1
+    query_pt = np.zeros(n_features, dtype=float)
+
+    eps = 1E-15  # roundoff error can cause test to fail
+    tree = Cls(X, leaf_size=5)
+    rad = np.sqrt(((X - query_pt) ** 2).sum(1))
+
+    for r in np.linspace(rad[0], rad[-1], 100):
+        ind, dist = tree.query_radius([query_pt], r + eps,
+                                      return_distance=True)
+
+        ind = ind[0]
+        dist = dist[0]
+
+        d = np.sqrt(((query_pt - X[ind]) ** 2).sum(1))
+
+        assert_array_almost_equal(d, dist)
+
+
+@pytest.mark.parametrize('Cls', [KDTree, BallTree])
+@pytest.mark.parametrize('dualtree', (True, False))
+def test_neighbor_tree_two_point(Cls, dualtree, n_samples=100, n_features=3):
+    rng = check_random_state(0)
+    X = rng.random_sample((n_samples, n_features))
+    Y = rng.random_sample((n_samples, n_features))
+    r = np.linspace(0, 1, 10)
+    tree = Cls(X, leaf_size=10)
+
+    D = DistanceMetric.get_metric("euclidean").pairwise(Y, X)
+    counts_true = [(D <= ri).sum() for ri in r]
+
+    counts = tree.two_point_correlation(Y, r=r, dualtree=dualtree)
+    assert_array_almost_equal(counts, counts_true)
+
+
+@pytest.mark.parametrize('NeighborsHeap', [NeighborsHeapBT, NeighborsHeapKDT])
+def test_neighbors_heap(NeighborsHeap, n_pts=5, n_nbrs=10):
+    heap = NeighborsHeap(n_pts, n_nbrs)
+    rng = check_random_state(0)
+
+    for row in range(n_pts):
+        d_in = rng.random_sample(2 * n_nbrs).astype(DTYPE, copy=False)
+        i_in = np.arange(2 * n_nbrs, dtype=ITYPE)
+        for d, i in zip(d_in, i_in):
+            heap.push(row, d, i)
+
+        ind = np.argsort(d_in)
+        d_in = d_in[ind]
+        i_in = i_in[ind]
+
+        d_heap, i_heap = heap.get_arrays(sort=True)
+
+        assert_array_almost_equal(d_in[:n_nbrs], d_heap[row])
+        assert_array_almost_equal(i_in[:n_nbrs], i_heap[row])
+
+
+@pytest.mark.parametrize('nodeheap_sort', [nodeheap_sort_bt,
+                                           nodeheap_sort_kdt])
+def test_node_heap(nodeheap_sort, n_nodes=50):
+    rng = check_random_state(0)
+    vals = rng.random_sample(n_nodes).astype(DTYPE, copy=False)
+
+    i1 = np.argsort(vals)
+    vals2, i2 = nodeheap_sort(vals)
+
+    assert_array_almost_equal(i1, i2)
+    assert_array_almost_equal(vals[i1], vals2)
+
+
+@pytest.mark.parametrize('simultaneous_sort', [simultaneous_sort_bt,
+                                               simultaneous_sort_kdt])
+def test_simultaneous_sort(simultaneous_sort, n_rows=10, n_pts=201):
+    rng = check_random_state(0)
+    dist = rng.random_sample((n_rows, n_pts)).astype(DTYPE, copy=False)
+    ind = (np.arange(n_pts) + np.zeros((n_rows, 1))).astype(ITYPE, copy=False)
+
+    dist2 = dist.copy()
+    ind2 = ind.copy()
+
+    # simultaneous sort rows using function
+    simultaneous_sort(dist, ind)
+
+    # simultaneous sort rows using numpy
+    i = np.argsort(dist2, axis=1)
+    row_ind = np.arange(n_rows)[:, None]
+    dist2 = dist2[row_ind, i]
+    ind2 = ind2[row_ind, i]
+
+    assert_array_almost_equal(dist, dist2)
+    assert_array_almost_equal(ind, ind2)
+
+
+@pytest.mark.parametrize('Cls', [KDTree, BallTree])
+def test_gaussian_kde(Cls, n_samples=1000):
+    # Compare gaussian KDE results to scipy.stats.gaussian_kde
+    from scipy.stats import gaussian_kde
+    rng = check_random_state(0)
+    x_in = rng.normal(0, 1, n_samples)
+    x_out = np.linspace(-5, 5, 30)
+
+    for h in [0.01, 0.1, 1]:
+        tree = Cls(x_in[:, None])
+        gkde = gaussian_kde(x_in, bw_method=h / np.std(x_in))
+
+        dens_tree = tree.kernel_density(x_out[:, None], h) / n_samples
+        dens_gkde = gkde.evaluate(x_out)
+
+        assert_array_almost_equal(dens_tree, dens_gkde, decimal=3)
+
+
+@pytest.mark.parametrize(
+        'Cls, metric',
+        itertools.chain(
+            [(KDTree, metric) for metric in KD_TREE_METRICS],
+            [(BallTree, metric) for metric in BALL_TREE_METRICS]))
+@pytest.mark.parametrize('k', (1, 3, 5))
+@pytest.mark.parametrize('dualtree', (True, False))
+@pytest.mark.parametrize('breadth_first', (True, False))
+def test_nn_tree_query(Cls, metric, k, dualtree, breadth_first):
+    rng = check_random_state(0)
+    X = rng.random_sample((40, DIMENSION))
+    Y = rng.random_sample((10, DIMENSION))
+
+    kwargs = METRICS[metric]
+
+    kdt = Cls(X, leaf_size=1, metric=metric, **kwargs)
+    dist1, ind1 = kdt.query(Y, k, dualtree=dualtree,
+                            breadth_first=breadth_first)
+    dist2, ind2 = brute_force_neighbors(X, Y, k, metric, **kwargs)
+
+    # don't check indices here: if there are any duplicate distances,
+    # the indices may not match.  Distances should not have this problem.
+    assert_array_almost_equal(dist1, dist2)
+
+
+@pytest.mark.parametrize(
+        "Cls, metric",
+        [(KDTree, 'euclidean'), (BallTree, 'euclidean'),
+         (BallTree, dist_func)])
+@pytest.mark.parametrize('protocol', (0, 1, 2))
+def test_pickle(Cls, metric, protocol):
+    rng = check_random_state(0)
+    X = rng.random_sample((10, 3))
+
+    if hasattr(metric, '__call__'):
+        kwargs = {'p': 2}
+    else:
+        kwargs = {}
+
+    tree1 = Cls(X, leaf_size=1, metric=metric, **kwargs)
+
+    ind1, dist1 = tree1.query(X)
+
+    s = pickle.dumps(tree1, protocol=protocol)
+    tree2 = pickle.loads(s)
+
+    ind2, dist2 = tree2.query(X)
+
+    assert_array_almost_equal(ind1, ind2)
+    assert_array_almost_equal(dist1, dist2)
+
+    assert isinstance(tree2, Cls)
--- a/venv/Lib/site-packages/sklearn/neighbors/tests/test_quad_tree.py
+++ b/venv/Lib/site-packages/sklearn/neighbors/tests/test_quad_tree.py
@ -0,0 +1,104 @@
+import pickle
+import numpy as np
+
+import pytest
+
+from sklearn.neighbors._quad_tree import _QuadTree
+from sklearn.utils import check_random_state
+
+
+def test_quadtree_boundary_computation():
+    # Introduce a point into a quad tree with boundaries not easy to compute.
+    Xs = []
+
+    # check a random case
+    Xs.append(np.array([[-1, 1], [-4, -1]], dtype=np.float32))
+    # check the case where only 0 are inserted
+    Xs.append(np.array([[0, 0], [0, 0]], dtype=np.float32))
+    # check the case where only negative are inserted
+    Xs.append(np.array([[-1, -2], [-4, 0]], dtype=np.float32))
+    # check the case where only small numbers are inserted
+    Xs.append(np.array([[-1e-6, 1e-6], [-4e-6, -1e-6]], dtype=np.float32))
+
+    for X in Xs:
+        tree = _QuadTree(n_dimensions=2, verbose=0)
+        tree.build_tree(X)
+        tree._check_coherence()
+
+
+def test_quadtree_similar_point():
+    # Introduce a point into a quad tree where a similar point already exists.
+    # Test will hang if it doesn't complete.
+    Xs = []
+
+    # check the case where points are actually different
+    Xs.append(np.array([[1, 2], [3, 4]], dtype=np.float32))
+    # check the case where points are the same on X axis
+    Xs.append(np.array([[1.0, 2.0], [1.0, 3.0]], dtype=np.float32))
+    # check the case where points are arbitrarily close on X axis
+    Xs.append(np.array([[1.00001, 2.0], [1.00002, 3.0]], dtype=np.float32))
+    # check the case where points are the same on Y axis
+    Xs.append(np.array([[1.0, 2.0], [3.0, 2.0]], dtype=np.float32))
+    # check the case where points are arbitrarily close on Y axis
+    Xs.append(np.array([[1.0, 2.00001], [3.0, 2.00002]], dtype=np.float32))
+    # check the case where points are arbitrarily close on both axes
+    Xs.append(np.array([[1.00001, 2.00001], [1.00002, 2.00002]],
+              dtype=np.float32))
+
+    # check the case where points are arbitrarily close on both axes
+    # close to machine epsilon - x axis
+    Xs.append(np.array([[1, 0.0003817754041], [2, 0.0003817753750]],
+              dtype=np.float32))
+
+    # check the case where points are arbitrarily close on both axes
+    # close to machine epsilon - y axis
+    Xs.append(np.array([[0.0003817754041, 1.0], [0.0003817753750, 2.0]],
+              dtype=np.float32))
+
+    for X in Xs:
+        tree = _QuadTree(n_dimensions=2, verbose=0)
+        tree.build_tree(X)
+        tree._check_coherence()
+
+
+@pytest.mark.parametrize('n_dimensions', (2, 3))
+@pytest.mark.parametrize('protocol', (0, 1, 2))
+def test_quad_tree_pickle(n_dimensions, protocol):
+    rng = check_random_state(0)
+
+    X = rng.random_sample((10, n_dimensions))
+
+    tree = _QuadTree(n_dimensions=n_dimensions, verbose=0)
+    tree.build_tree(X)
+
+    s = pickle.dumps(tree, protocol=protocol)
+    bt2 = pickle.loads(s)
+
+    for x in X:
+        cell_x_tree = tree.get_cell(x)
+        cell_x_bt2 = bt2.get_cell(x)
+        assert cell_x_tree == cell_x_bt2
+
+
+@pytest.mark.parametrize('n_dimensions', (2, 3))
+def test_qt_insert_duplicate(n_dimensions):
+    rng = check_random_state(0)
+
+    X = rng.random_sample((10, n_dimensions))
+    Xd = np.r_[X, X[:5]]
+    tree = _QuadTree(n_dimensions=n_dimensions, verbose=0)
+    tree.build_tree(Xd)
+
+    cumulative_size = tree.cumulative_size
+    leafs = tree.leafs
+
+    # Assert that the first 5 are indeed duplicated and that the next
+    # ones are single point leaf
+    for i, x in enumerate(X):
+        cell_id = tree.get_cell(x)
+        assert leafs[cell_id]
+        assert cumulative_size[cell_id] == 1 + (i < 5)
+
+
+def test_summarize():
+    _QuadTree.test_summarize()