Uploaded Test files

2020-11-12 11:05:57 -05:00 · 2020-11-12 11:05:57 -05:00 · 2e81cb7d99
commit 2e81cb7d99
parent f584ad9d97
16627 changed files with 2065359 additions and 102444 deletions
--- a/venv/Lib/site-packages/sklearn/manifold/tests/init.py
+++ b/venv/Lib/site-packages/sklearn/manifold/tests/init.py
--- a/venv/Lib/site-packages/sklearn/manifold/tests/pycache/init.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/manifold/tests/pycache/init.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/manifold/tests/pycache/test_isomap.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/manifold/tests/pycache/test_isomap.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/manifold/tests/pycache/test_locally_linear.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/manifold/tests/pycache/test_locally_linear.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/manifold/tests/pycache/test_mds.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/manifold/tests/pycache/test_mds.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/manifold/tests/pycache/test_spectral_embedding.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/manifold/tests/pycache/test_spectral_embedding.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/manifold/tests/pycache/test_t_sne.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/manifold/tests/pycache/test_t_sne.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/manifold/tests/test_isomap.py
+++ b/venv/Lib/site-packages/sklearn/manifold/tests/test_isomap.py
@ -0,0 +1,188 @@
+from itertools import product
+import numpy as np
+from numpy.testing import assert_almost_equal, assert_array_almost_equal
+import pytest
+
+from sklearn import datasets
+from sklearn import manifold
+from sklearn import neighbors
+from sklearn import pipeline
+from sklearn import preprocessing
+
+from scipy.sparse import rand as sparse_rand
+
+eigen_solvers = ['auto', 'dense', 'arpack']
+path_methods = ['auto', 'FW', 'D']
+
+
+def test_isomap_simple_grid():
+    # Isomap should preserve distances when all neighbors are used
+    N_per_side = 5
+    Npts = N_per_side ** 2
+    n_neighbors = Npts - 1
+
+    # grid of equidistant points in 2D, n_components = n_dim
+    X = np.array(list(product(range(N_per_side), repeat=2)))
+
+    # distances from each point to all others
+    G = neighbors.kneighbors_graph(X, n_neighbors,
+                                   mode='distance').toarray()
+
+    for eigen_solver in eigen_solvers:
+        for path_method in path_methods:
+            clf = manifold.Isomap(n_neighbors=n_neighbors, n_components=2,
+                                  eigen_solver=eigen_solver,
+                                  path_method=path_method)
+            clf.fit(X)
+
+            G_iso = neighbors.kneighbors_graph(clf.embedding_,
+                                               n_neighbors,
+                                               mode='distance').toarray()
+            assert_array_almost_equal(G, G_iso)
+
+
+def test_isomap_reconstruction_error():
+    # Same setup as in test_isomap_simple_grid, with an added dimension
+    N_per_side = 5
+    Npts = N_per_side ** 2
+    n_neighbors = Npts - 1
+
+    # grid of equidistant points in 2D, n_components = n_dim
+    X = np.array(list(product(range(N_per_side), repeat=2)))
+
+    # add noise in a third dimension
+    rng = np.random.RandomState(0)
+    noise = 0.1 * rng.randn(Npts, 1)
+    X = np.concatenate((X, noise), 1)
+
+    # compute input kernel
+    G = neighbors.kneighbors_graph(X, n_neighbors,
+                                   mode='distance').toarray()
+
+    centerer = preprocessing.KernelCenterer()
+    K = centerer.fit_transform(-0.5 * G ** 2)
+
+    for eigen_solver in eigen_solvers:
+        for path_method in path_methods:
+            clf = manifold.Isomap(n_neighbors=n_neighbors, n_components=2,
+                                  eigen_solver=eigen_solver,
+                                  path_method=path_method)
+            clf.fit(X)
+
+            # compute output kernel
+            G_iso = neighbors.kneighbors_graph(clf.embedding_,
+                                               n_neighbors,
+                                               mode='distance').toarray()
+
+            K_iso = centerer.fit_transform(-0.5 * G_iso ** 2)
+
+            # make sure error agrees
+            reconstruction_error = np.linalg.norm(K - K_iso) / Npts
+            assert_almost_equal(reconstruction_error,
+                                clf.reconstruction_error())
+
+
+def test_transform():
+    n_samples = 200
+    n_components = 10
+    noise_scale = 0.01
+
+    # Create S-curve dataset
+    X, y = datasets.make_s_curve(n_samples, random_state=0)
+
+    # Compute isomap embedding
+    iso = manifold.Isomap(n_components=n_components, n_neighbors=2)
+    X_iso = iso.fit_transform(X)
+
+    # Re-embed a noisy version of the points
+    rng = np.random.RandomState(0)
+    noise = noise_scale * rng.randn(*X.shape)
+    X_iso2 = iso.transform(X + noise)
+
+    # Make sure the rms error on re-embedding is comparable to noise_scale
+    assert np.sqrt(np.mean((X_iso - X_iso2) ** 2)) < 2 * noise_scale
+
+
+def test_pipeline():
+    # check that Isomap works fine as a transformer in a Pipeline
+    # only checks that no error is raised.
+    # TODO check that it actually does something useful
+    X, y = datasets.make_blobs(random_state=0)
+    clf = pipeline.Pipeline(
+        [('isomap', manifold.Isomap()),
+         ('clf', neighbors.KNeighborsClassifier())])
+    clf.fit(X, y)
+    assert .9 < clf.score(X, y)
+
+
+def test_pipeline_with_nearest_neighbors_transformer():
+    # Test chaining NearestNeighborsTransformer and Isomap with
+    # neighbors_algorithm='precomputed'
+    algorithm = 'auto'
+    n_neighbors = 10
+
+    X, _ = datasets.make_blobs(random_state=0)
+    X2, _ = datasets.make_blobs(random_state=1)
+
+    # compare the chained version and the compact version
+    est_chain = pipeline.make_pipeline(
+        neighbors.KNeighborsTransformer(
+            n_neighbors=n_neighbors, algorithm=algorithm, mode='distance'),
+        manifold.Isomap(n_neighbors=n_neighbors, metric='precomputed'))
+    est_compact = manifold.Isomap(n_neighbors=n_neighbors,
+                                  neighbors_algorithm=algorithm)
+
+    Xt_chain = est_chain.fit_transform(X)
+    Xt_compact = est_compact.fit_transform(X)
+    assert_array_almost_equal(Xt_chain, Xt_compact)
+
+    Xt_chain = est_chain.transform(X2)
+    Xt_compact = est_compact.transform(X2)
+    assert_array_almost_equal(Xt_chain, Xt_compact)
+
+
+def test_different_metric():
+    # Test that the metric parameters work correctly, and default to euclidean
+    def custom_metric(x1, x2):
+        return np.sqrt(np.sum(x1 ** 2 + x2 ** 2))
+
+    # metric, p, is_euclidean
+    metrics = [('euclidean', 2, True),
+               ('manhattan', 1, False),
+               ('minkowski', 1, False),
+               ('minkowski', 2, True),
+               (custom_metric, 2, False)]
+
+    X, _ = datasets.make_blobs(random_state=0)
+    reference = manifold.Isomap().fit_transform(X)
+
+    for metric, p, is_euclidean in metrics:
+        embedding = manifold.Isomap(metric=metric, p=p).fit_transform(X)
+
+        if is_euclidean:
+            assert_array_almost_equal(embedding, reference)
+        else:
+            with pytest.raises(AssertionError, match='not almost equal'):
+                assert_array_almost_equal(embedding, reference)
+
+
+def test_isomap_clone_bug():
+    # regression test for bug reported in #6062
+    model = manifold.Isomap()
+    for n_neighbors in [10, 15, 20]:
+        model.set_params(n_neighbors=n_neighbors)
+        model.fit(np.random.rand(50, 2))
+        assert (model.nbrs_.n_neighbors ==
+                     n_neighbors)
+
+
+def test_sparse_input():
+    X = sparse_rand(100, 3, density=0.1, format='csr')
+
+    # Should not error
+    for eigen_solver in eigen_solvers:
+        for path_method in path_methods:
+            clf = manifold.Isomap(n_components=2,
+                                  eigen_solver=eigen_solver,
+                                  path_method=path_method)
+            clf.fit(X)
--- a/venv/Lib/site-packages/sklearn/manifold/tests/test_locally_linear.py
+++ b/venv/Lib/site-packages/sklearn/manifold/tests/test_locally_linear.py
@ -0,0 +1,146 @@
+from itertools import product
+
+import numpy as np
+from numpy.testing import assert_almost_equal, assert_array_almost_equal
+from scipy import linalg
+import pytest
+
+from sklearn import neighbors, manifold
+from sklearn.manifold._locally_linear import barycenter_kneighbors_graph
+from sklearn.utils._testing import ignore_warnings
+from sklearn.utils._testing import assert_raise_message
+
+eigen_solvers = ['dense', 'arpack']
+
+
+# ----------------------------------------------------------------------
+# Test utility routines
+def test_barycenter_kneighbors_graph():
+    X = np.array([[0, 1], [1.01, 1.], [2, 0]])
+
+    A = barycenter_kneighbors_graph(X, 1)
+    assert_array_almost_equal(
+        A.toarray(),
+        [[0.,  1.,  0.],
+         [1.,  0.,  0.],
+         [0.,  1.,  0.]])
+
+    A = barycenter_kneighbors_graph(X, 2)
+    # check that columns sum to one
+    assert_array_almost_equal(np.sum(A.toarray(), 1), np.ones(3))
+    pred = np.dot(A.toarray(), X)
+    assert linalg.norm(pred - X) / X.shape[0] < 1
+
+
+# ----------------------------------------------------------------------
+# Test LLE by computing the reconstruction error on some manifolds.
+
+def test_lle_simple_grid():
+    # note: ARPACK is numerically unstable, so this test will fail for
+    #       some random seeds.  We choose 2 because the tests pass.
+    rng = np.random.RandomState(2)
+
+    # grid of equidistant points in 2D, n_components = n_dim
+    X = np.array(list(product(range(5), repeat=2)))
+    X = X + 1e-10 * rng.uniform(size=X.shape)
+    n_components = 2
+    clf = manifold.LocallyLinearEmbedding(n_neighbors=5,
+                                          n_components=n_components,
+                                          random_state=rng)
+    tol = 0.1
+
+    N = barycenter_kneighbors_graph(X, clf.n_neighbors).toarray()
+    reconstruction_error = linalg.norm(np.dot(N, X) - X, 'fro')
+    assert reconstruction_error < tol
+
+    for solver in eigen_solvers:
+        clf.set_params(eigen_solver=solver)
+        clf.fit(X)
+        assert clf.embedding_.shape[1] == n_components
+        reconstruction_error = linalg.norm(
+            np.dot(N, clf.embedding_) - clf.embedding_, 'fro') ** 2
+
+        assert reconstruction_error < tol
+        assert_almost_equal(clf.reconstruction_error_,
+                            reconstruction_error, decimal=1)
+
+    # re-embed a noisy version of X using the transform method
+    noise = rng.randn(*X.shape) / 100
+    X_reembedded = clf.transform(X + noise)
+    assert linalg.norm(X_reembedded - clf.embedding_) < tol
+
+
+def test_lle_manifold():
+    rng = np.random.RandomState(0)
+    # similar test on a slightly more complex manifold
+    X = np.array(list(product(np.arange(18), repeat=2)))
+    X = np.c_[X, X[:, 0] ** 2 / 18]
+    X = X + 1e-10 * rng.uniform(size=X.shape)
+    n_components = 2
+    for method in ["standard", "hessian", "modified", "ltsa"]:
+        clf = manifold.LocallyLinearEmbedding(n_neighbors=6,
+                                              n_components=n_components,
+                                              method=method, random_state=0)
+        tol = 1.5 if method == "standard" else 3
+
+        N = barycenter_kneighbors_graph(X, clf.n_neighbors).toarray()
+        reconstruction_error = linalg.norm(np.dot(N, X) - X)
+        assert reconstruction_error < tol
+
+        for solver in eigen_solvers:
+            clf.set_params(eigen_solver=solver)
+            clf.fit(X)
+            assert clf.embedding_.shape[1] == n_components
+            reconstruction_error = linalg.norm(
+                np.dot(N, clf.embedding_) - clf.embedding_, 'fro') ** 2
+            details = ("solver: %s, method: %s" % (solver, method))
+            assert reconstruction_error < tol, details
+            assert (np.abs(clf.reconstruction_error_ -
+                           reconstruction_error) <
+                    tol * reconstruction_error), details
+
+
+# Test the error raised when parameter passed to lle is invalid
+def test_lle_init_parameters():
+    X = np.random.rand(5, 3)
+
+    clf = manifold.LocallyLinearEmbedding(eigen_solver="error")
+    msg = "unrecognized eigen_solver 'error'"
+    assert_raise_message(ValueError, msg, clf.fit, X)
+
+    clf = manifold.LocallyLinearEmbedding(method="error")
+    msg = "unrecognized method 'error'"
+    assert_raise_message(ValueError, msg, clf.fit, X)
+
+
+def test_pipeline():
+    # check that LocallyLinearEmbedding works fine as a Pipeline
+    # only checks that no error is raised.
+    # TODO check that it actually does something useful
+    from sklearn import pipeline, datasets
+    X, y = datasets.make_blobs(random_state=0)
+    clf = pipeline.Pipeline(
+        [('filter', manifold.LocallyLinearEmbedding(random_state=0)),
+         ('clf', neighbors.KNeighborsClassifier())])
+    clf.fit(X, y)
+    assert .9 < clf.score(X, y)
+
+
+# Test the error raised when the weight matrix is singular
+def test_singular_matrix():
+    M = np.ones((10, 3))
+    f = ignore_warnings
+    with pytest.raises(ValueError):
+        f(manifold.locally_linear_embedding(M, n_neighbors=2, n_components=1,
+                                            method='standard',
+                                            eigen_solver='arpack'))
+
+
+# regression test for #6033
+def test_integer_input():
+    rand = np.random.RandomState(0)
+    X = rand.randint(0, 100, size=(20, 3))
+
+    for method in ["standard", "hessian", "modified", "ltsa"]:
+        clf = manifold.LocallyLinearEmbedding(method=method, n_neighbors=10)
+        clf.fit(X)  # this previously raised a TypeError
--- a/venv/Lib/site-packages/sklearn/manifold/tests/test_mds.py
+++ b/venv/Lib/site-packages/sklearn/manifold/tests/test_mds.py
@ -0,0 +1,64 @@
+import numpy as np
+from numpy.testing import assert_array_almost_equal
+import pytest
+
+from sklearn.manifold import _mds as mds
+
+
+def test_smacof():
+    # test metric smacof using the data of "Modern Multidimensional Scaling",
+    # Borg & Groenen, p 154
+    sim = np.array([[0, 5, 3, 4],
+                    [5, 0, 2, 2],
+                    [3, 2, 0, 1],
+                    [4, 2, 1, 0]])
+    Z = np.array([[-.266, -.539],
+                  [.451, .252],
+                  [.016, -.238],
+                  [-.200, .524]])
+    X, _ = mds.smacof(sim, init=Z, n_components=2, max_iter=1, n_init=1)
+    X_true = np.array([[-1.415, -2.471],
+                       [1.633, 1.107],
+                       [.249, -.067],
+                       [-.468, 1.431]])
+    assert_array_almost_equal(X, X_true, decimal=3)
+
+
+def test_smacof_error():
+    # Not symmetric similarity matrix:
+    sim = np.array([[0, 5, 9, 4],
+                    [5, 0, 2, 2],
+                    [3, 2, 0, 1],
+                    [4, 2, 1, 0]])
+
+    with pytest.raises(ValueError):
+        mds.smacof(sim)
+
+    # Not squared similarity matrix:
+    sim = np.array([[0, 5, 9, 4],
+                    [5, 0, 2, 2],
+                    [4, 2, 1, 0]])
+
+    with pytest.raises(ValueError):
+        mds.smacof(sim)
+
+    # init not None and not correct format:
+    sim = np.array([[0, 5, 3, 4],
+                    [5, 0, 2, 2],
+                    [3, 2, 0, 1],
+                    [4, 2, 1, 0]])
+
+    Z = np.array([[-.266, -.539],
+                  [.016, -.238],
+                  [-.200, .524]])
+    with pytest.raises(ValueError):
+        mds.smacof(sim, init=Z, n_init=1)
+
+
+def test_MDS():
+    sim = np.array([[0, 5, 3, 4],
+                    [5, 0, 2, 2],
+                    [3, 2, 0, 1],
+                    [4, 2, 1, 0]])
+    mds_clf = mds.MDS(metric=False, n_jobs=3, dissimilarity="precomputed")
+    mds_clf.fit(sim)
--- a/venv/Lib/site-packages/sklearn/manifold/tests/test_spectral_embedding.py
+++ b/venv/Lib/site-packages/sklearn/manifold/tests/test_spectral_embedding.py
@ -0,0 +1,347 @@
+import pytest
+
+import numpy as np
+
+from scipy import sparse
+from scipy.sparse import csgraph
+from scipy.linalg import eigh
+
+from sklearn.manifold import SpectralEmbedding
+from sklearn.manifold._spectral_embedding import _graph_is_connected
+from sklearn.manifold._spectral_embedding import _graph_connected_component
+from sklearn.manifold import spectral_embedding
+from sklearn.metrics.pairwise import rbf_kernel
+from sklearn.metrics import normalized_mutual_info_score
+from sklearn.neighbors import NearestNeighbors
+from sklearn.cluster import KMeans
+from sklearn.datasets import make_blobs
+from sklearn.utils.extmath import _deterministic_vector_sign_flip
+from sklearn.utils._testing import assert_array_almost_equal
+from sklearn.utils._testing import assert_array_equal
+
+
+# non centered, sparse centers to check the
+centers = np.array([
+    [0.0, 5.0, 0.0, 0.0, 0.0],
+    [0.0, 0.0, 4.0, 0.0, 0.0],
+    [1.0, 0.0, 0.0, 5.0, 1.0],
+])
+n_samples = 1000
+n_clusters, n_features = centers.shape
+S, true_labels = make_blobs(n_samples=n_samples, centers=centers,
+                            cluster_std=1., random_state=42)
+
+
+def _assert_equal_with_sign_flipping(A, B, tol=0.0):
+    """ Check array A and B are equal with possible sign flipping on
+    each columns"""
+    tol_squared = tol ** 2
+    for A_col, B_col in zip(A.T, B.T):
+        assert (np.max((A_col - B_col) ** 2) <= tol_squared or
+                np.max((A_col + B_col) ** 2) <= tol_squared)
+
+
+def test_sparse_graph_connected_component():
+    rng = np.random.RandomState(42)
+    n_samples = 300
+    boundaries = [0, 42, 121, 200, n_samples]
+    p = rng.permutation(n_samples)
+    connections = []
+
+    for start, stop in zip(boundaries[:-1], boundaries[1:]):
+        group = p[start:stop]
+        # Connect all elements within the group at least once via an
+        # arbitrary path that spans the group.
+        for i in range(len(group) - 1):
+            connections.append((group[i], group[i + 1]))
+
+        # Add some more random connections within the group
+        min_idx, max_idx = 0, len(group) - 1
+        n_random_connections = 1000
+        source = rng.randint(min_idx, max_idx, size=n_random_connections)
+        target = rng.randint(min_idx, max_idx, size=n_random_connections)
+        connections.extend(zip(group[source], group[target]))
+
+    # Build a symmetric affinity matrix
+    row_idx, column_idx = tuple(np.array(connections).T)
+    data = rng.uniform(.1, 42, size=len(connections))
+    affinity = sparse.coo_matrix((data, (row_idx, column_idx)))
+    affinity = 0.5 * (affinity + affinity.T)
+
+    for start, stop in zip(boundaries[:-1], boundaries[1:]):
+        component_1 = _graph_connected_component(affinity, p[start])
+        component_size = stop - start
+        assert component_1.sum() == component_size
+
+        # We should retrieve the same component mask by starting by both ends
+        # of the group
+        component_2 = _graph_connected_component(affinity, p[stop - 1])
+        assert component_2.sum() == component_size
+        assert_array_equal(component_1, component_2)
+
+
+def test_spectral_embedding_two_components(seed=36):
+    # Test spectral embedding with two components
+    random_state = np.random.RandomState(seed)
+    n_sample = 100
+    affinity = np.zeros(shape=[n_sample * 2, n_sample * 2])
+    # first component
+    affinity[0:n_sample,
+             0:n_sample] = np.abs(random_state.randn(n_sample, n_sample)) + 2
+    # second component
+    affinity[n_sample::,
+             n_sample::] = np.abs(random_state.randn(n_sample, n_sample)) + 2
+
+    # Test of internal _graph_connected_component before connection
+    component = _graph_connected_component(affinity, 0)
+    assert component[:n_sample].all()
+    assert not component[n_sample:].any()
+    component = _graph_connected_component(affinity, -1)
+    assert not component[:n_sample].any()
+    assert component[n_sample:].all()
+
+    # connection
+    affinity[0, n_sample + 1] = 1
+    affinity[n_sample + 1, 0] = 1
+    affinity.flat[::2 * n_sample + 1] = 0
+    affinity = 0.5 * (affinity + affinity.T)
+
+    true_label = np.zeros(shape=2 * n_sample)
+    true_label[0:n_sample] = 1
+
+    se_precomp = SpectralEmbedding(n_components=1, affinity="precomputed",
+                                   random_state=np.random.RandomState(seed))
+    embedded_coordinate = se_precomp.fit_transform(affinity)
+    # Some numpy versions are touchy with types
+    embedded_coordinate = \
+        se_precomp.fit_transform(affinity.astype(np.float32))
+    # thresholding on the first components using 0.
+    label_ = np.array(embedded_coordinate.ravel() < 0, dtype="float")
+    assert normalized_mutual_info_score(
+        true_label, label_) == pytest.approx(1.0)
+
+
+@pytest.mark.parametrize("X", [S, sparse.csr_matrix(S)],
+                         ids=["dense", "sparse"])
+def test_spectral_embedding_precomputed_affinity(X, seed=36):
+    # Test spectral embedding with precomputed kernel
+    gamma = 1.0
+    se_precomp = SpectralEmbedding(n_components=2, affinity="precomputed",
+                                   random_state=np.random.RandomState(seed))
+    se_rbf = SpectralEmbedding(n_components=2, affinity="rbf",
+                               gamma=gamma,
+                               random_state=np.random.RandomState(seed))
+    embed_precomp = se_precomp.fit_transform(rbf_kernel(X, gamma=gamma))
+    embed_rbf = se_rbf.fit_transform(X)
+    assert_array_almost_equal(
+        se_precomp.affinity_matrix_, se_rbf.affinity_matrix_)
+    _assert_equal_with_sign_flipping(embed_precomp, embed_rbf, 0.05)
+
+
+def test_precomputed_nearest_neighbors_filtering():
+    # Test precomputed graph filtering when containing too many neighbors
+    n_neighbors = 2
+    results = []
+    for additional_neighbors in [0, 10]:
+        nn = NearestNeighbors(
+            n_neighbors=n_neighbors + additional_neighbors).fit(S)
+        graph = nn.kneighbors_graph(S, mode='connectivity')
+        embedding = SpectralEmbedding(random_state=0, n_components=2,
+                                      affinity='precomputed_nearest_neighbors',
+                                      n_neighbors=n_neighbors
+                                      ).fit(graph).embedding_
+        results.append(embedding)
+
+    assert_array_equal(results[0], results[1])
+
+
+@pytest.mark.parametrize("X", [S, sparse.csr_matrix(S)],
+                         ids=["dense", "sparse"])
+def test_spectral_embedding_callable_affinity(X, seed=36):
+    # Test spectral embedding with callable affinity
+    gamma = 0.9
+    kern = rbf_kernel(S, gamma=gamma)
+    se_callable = SpectralEmbedding(n_components=2,
+                                    affinity=(
+                                        lambda x: rbf_kernel(x, gamma=gamma)),
+                                    gamma=gamma,
+                                    random_state=np.random.RandomState(seed))
+    se_rbf = SpectralEmbedding(n_components=2, affinity="rbf",
+                               gamma=gamma,
+                               random_state=np.random.RandomState(seed))
+    embed_rbf = se_rbf.fit_transform(X)
+    embed_callable = se_callable.fit_transform(X)
+    assert_array_almost_equal(
+        se_callable.affinity_matrix_, se_rbf.affinity_matrix_)
+    assert_array_almost_equal(kern, se_rbf.affinity_matrix_)
+    _assert_equal_with_sign_flipping(embed_rbf, embed_callable, 0.05)
+
+
+# TODO: Remove when pyamg does replaces sp.rand call with np.random.rand
+# https://github.com/scikit-learn/scikit-learn/issues/15913
+@pytest.mark.filterwarnings(
+    "ignore:scipy.rand is deprecated:DeprecationWarning:pyamg.*")
+def test_spectral_embedding_amg_solver(seed=36):
+    # Test spectral embedding with amg solver
+    pytest.importorskip('pyamg')
+
+    se_amg = SpectralEmbedding(n_components=2, affinity="nearest_neighbors",
+                               eigen_solver="amg", n_neighbors=5,
+                               random_state=np.random.RandomState(seed))
+    se_arpack = SpectralEmbedding(n_components=2, affinity="nearest_neighbors",
+                                  eigen_solver="arpack", n_neighbors=5,
+                                  random_state=np.random.RandomState(seed))
+    embed_amg = se_amg.fit_transform(S)
+    embed_arpack = se_arpack.fit_transform(S)
+    _assert_equal_with_sign_flipping(embed_amg, embed_arpack, 1e-5)
+
+    # same with special case in which amg is not actually used
+    # regression test for #10715
+    # affinity between nodes
+    row = [0, 0, 1, 2, 3, 3, 4]
+    col = [1, 2, 2, 3, 4, 5, 5]
+    val = [100, 100, 100, 1, 100, 100, 100]
+
+    affinity = sparse.coo_matrix((val + val, (row + col, col + row)),
+                                 shape=(6, 6)).toarray()
+    se_amg.affinity = "precomputed"
+    se_arpack.affinity = "precomputed"
+    embed_amg = se_amg.fit_transform(affinity)
+    embed_arpack = se_arpack.fit_transform(affinity)
+    _assert_equal_with_sign_flipping(embed_amg, embed_arpack, 1e-5)
+
+
+# TODO: Remove filterwarnings when pyamg does replaces sp.rand call with
+# np.random.rand:
+# https://github.com/scikit-learn/scikit-learn/issues/15913
+@pytest.mark.filterwarnings(
+    "ignore:scipy.rand is deprecated:DeprecationWarning:pyamg.*")
+def test_spectral_embedding_amg_solver_failure():
+    # Non-regression test for amg solver failure (issue #13393 on github)
+    pytest.importorskip('pyamg')
+    seed = 36
+    num_nodes = 100
+    X = sparse.rand(num_nodes, num_nodes, density=0.1, random_state=seed)
+    upper = sparse.triu(X) - sparse.diags(X.diagonal())
+    sym_matrix = upper + upper.T
+    embedding = spectral_embedding(sym_matrix,
+                                   n_components=10,
+                                   eigen_solver='amg',
+                                   random_state=0)
+
+    # Check that the learned embedding is stable w.r.t. random solver init:
+    for i in range(3):
+        new_embedding = spectral_embedding(sym_matrix,
+                                           n_components=10,
+                                           eigen_solver='amg',
+                                           random_state=i + 1)
+        _assert_equal_with_sign_flipping(embedding, new_embedding, tol=0.05)
+
+
+@pytest.mark.filterwarnings("ignore:the behavior of nmi will "
+                            "change in version 0.22")
+def test_pipeline_spectral_clustering(seed=36):
+    # Test using pipeline to do spectral clustering
+    random_state = np.random.RandomState(seed)
+    se_rbf = SpectralEmbedding(n_components=n_clusters,
+                               affinity="rbf",
+                               random_state=random_state)
+    se_knn = SpectralEmbedding(n_components=n_clusters,
+                               affinity="nearest_neighbors",
+                               n_neighbors=5,
+                               random_state=random_state)
+    for se in [se_rbf, se_knn]:
+        km = KMeans(n_clusters=n_clusters, random_state=random_state)
+        km.fit(se.fit_transform(S))
+        assert_array_almost_equal(
+            normalized_mutual_info_score(
+                km.labels_,
+                true_labels), 1.0, 2)
+
+
+def test_spectral_embedding_unknown_eigensolver(seed=36):
+    # Test that SpectralClustering fails with an unknown eigensolver
+    se = SpectralEmbedding(n_components=1, affinity="precomputed",
+                           random_state=np.random.RandomState(seed),
+                           eigen_solver="<unknown>")
+    with pytest.raises(ValueError):
+        se.fit(S)
+
+
+def test_spectral_embedding_unknown_affinity(seed=36):
+    # Test that SpectralClustering fails with an unknown affinity type
+    se = SpectralEmbedding(n_components=1, affinity="<unknown>",
+                           random_state=np.random.RandomState(seed))
+    with pytest.raises(ValueError):
+        se.fit(S)
+
+
+def test_connectivity(seed=36):
+    # Test that graph connectivity test works as expected
+    graph = np.array([[1, 0, 0, 0, 0],
+                      [0, 1, 1, 0, 0],
+                      [0, 1, 1, 1, 0],
+                      [0, 0, 1, 1, 1],
+                      [0, 0, 0, 1, 1]])
+    assert not _graph_is_connected(graph)
+    assert not _graph_is_connected(sparse.csr_matrix(graph))
+    assert not _graph_is_connected(sparse.csc_matrix(graph))
+    graph = np.array([[1, 1, 0, 0, 0],
+                      [1, 1, 1, 0, 0],
+                      [0, 1, 1, 1, 0],
+                      [0, 0, 1, 1, 1],
+                      [0, 0, 0, 1, 1]])
+    assert _graph_is_connected(graph)
+    assert _graph_is_connected(sparse.csr_matrix(graph))
+    assert _graph_is_connected(sparse.csc_matrix(graph))
+
+
+def test_spectral_embedding_deterministic():
+    # Test that Spectral Embedding is deterministic
+    random_state = np.random.RandomState(36)
+    data = random_state.randn(10, 30)
+    sims = rbf_kernel(data)
+    embedding_1 = spectral_embedding(sims)
+    embedding_2 = spectral_embedding(sims)
+    assert_array_almost_equal(embedding_1, embedding_2)
+
+
+def test_spectral_embedding_unnormalized():
+    # Test that spectral_embedding is also processing unnormalized laplacian
+    # correctly
+    random_state = np.random.RandomState(36)
+    data = random_state.randn(10, 30)
+    sims = rbf_kernel(data)
+    n_components = 8
+    embedding_1 = spectral_embedding(sims,
+                                     norm_laplacian=False,
+                                     n_components=n_components,
+                                     drop_first=False)
+
+    # Verify using manual computation with dense eigh
+    laplacian, dd = csgraph.laplacian(sims, normed=False,
+                                      return_diag=True)
+    _, diffusion_map = eigh(laplacian)
+    embedding_2 = diffusion_map.T[:n_components]
+    embedding_2 = _deterministic_vector_sign_flip(embedding_2).T
+
+    assert_array_almost_equal(embedding_1, embedding_2)
+
+
+def test_spectral_embedding_first_eigen_vector():
+    # Test that the first eigenvector of spectral_embedding
+    # is constant and that the second is not (for a connected graph)
+    random_state = np.random.RandomState(36)
+    data = random_state.randn(10, 30)
+    sims = rbf_kernel(data)
+    n_components = 2
+
+    for seed in range(10):
+        embedding = spectral_embedding(sims,
+                                       norm_laplacian=False,
+                                       n_components=n_components,
+                                       drop_first=False,
+                                       random_state=seed)
+
+        assert np.std(embedding[:, 0]) == pytest.approx(0)
+        assert np.std(embedding[:, 1]) > 1e-3
--- a/venv/Lib/site-packages/sklearn/manifold/tests/test_t_sne.py
+++ b/venv/Lib/site-packages/sklearn/manifold/tests/test_t_sne.py
@ -0,0 +1,893 @@
+import sys
+from io import StringIO
+import numpy as np
+from numpy.testing import assert_allclose
+import scipy.sparse as sp
+import pytest
+
+from sklearn.neighbors import NearestNeighbors
+from sklearn.neighbors import kneighbors_graph
+from sklearn.exceptions import EfficiencyWarning
+from sklearn.utils._testing import ignore_warnings
+from sklearn.utils._testing import assert_almost_equal
+from sklearn.utils._testing import assert_array_equal
+from sklearn.utils._testing import assert_array_almost_equal
+from sklearn.utils._testing import skip_if_32bit
+from sklearn.utils import check_random_state
+from sklearn.manifold._t_sne import _joint_probabilities
+from sklearn.manifold._t_sne import _joint_probabilities_nn
+from sklearn.manifold._t_sne import _kl_divergence
+from sklearn.manifold._t_sne import _kl_divergence_bh
+from sklearn.manifold._t_sne import _gradient_descent
+from sklearn.manifold._t_sne import trustworthiness
+from sklearn.manifold import TSNE
+# mypy error: Module 'sklearn.manifold' has no attribute '_barnes_hut_tsne'
+from sklearn.manifold import _barnes_hut_tsne  # type: ignore
+from sklearn.manifold._utils import _binary_search_perplexity
+from sklearn.datasets import make_blobs
+from scipy.optimize import check_grad
+from scipy.spatial.distance import pdist
+from scipy.spatial.distance import squareform
+from sklearn.metrics.pairwise import pairwise_distances
+from sklearn.metrics.pairwise import manhattan_distances
+from sklearn.metrics.pairwise import cosine_distances
+
+
+x = np.linspace(0, 1, 10)
+xx, yy = np.meshgrid(x, x)
+X_2d_grid = np.hstack([
+    xx.ravel().reshape(-1, 1),
+    yy.ravel().reshape(-1, 1),
+])
+
+
+def test_gradient_descent_stops():
+    # Test stopping conditions of gradient descent.
+    class ObjectiveSmallGradient:
+        def __init__(self):
+            self.it = -1
+
+        def __call__(self, _, compute_error=True):
+            self.it += 1
+            return (10 - self.it) / 10.0, np.array([1e-5])
+
+    def flat_function(_, compute_error=True):
+        return 0.0, np.ones(1)
+
+    # Gradient norm
+    old_stdout = sys.stdout
+    sys.stdout = StringIO()
+    try:
+        _, error, it = _gradient_descent(
+            ObjectiveSmallGradient(), np.zeros(1), 0, n_iter=100,
+            n_iter_without_progress=100, momentum=0.0, learning_rate=0.0,
+            min_gain=0.0, min_grad_norm=1e-5, verbose=2)
+    finally:
+        out = sys.stdout.getvalue()
+        sys.stdout.close()
+        sys.stdout = old_stdout
+    assert error == 1.0
+    assert it == 0
+    assert("gradient norm" in out)
+
+    # Maximum number of iterations without improvement
+    old_stdout = sys.stdout
+    sys.stdout = StringIO()
+    try:
+        _, error, it = _gradient_descent(
+            flat_function, np.zeros(1), 0, n_iter=100,
+            n_iter_without_progress=10, momentum=0.0, learning_rate=0.0,
+            min_gain=0.0, min_grad_norm=0.0, verbose=2)
+    finally:
+        out = sys.stdout.getvalue()
+        sys.stdout.close()
+        sys.stdout = old_stdout
+    assert error == 0.0
+    assert it == 11
+    assert("did not make any progress" in out)
+
+    # Maximum number of iterations
+    old_stdout = sys.stdout
+    sys.stdout = StringIO()
+    try:
+        _, error, it = _gradient_descent(
+            ObjectiveSmallGradient(), np.zeros(1), 0, n_iter=11,
+            n_iter_without_progress=100, momentum=0.0, learning_rate=0.0,
+            min_gain=0.0, min_grad_norm=0.0, verbose=2)
+    finally:
+        out = sys.stdout.getvalue()
+        sys.stdout.close()
+        sys.stdout = old_stdout
+    assert error == 0.0
+    assert it == 10
+    assert("Iteration 10" in out)
+
+
+def test_binary_search():
+    # Test if the binary search finds Gaussians with desired perplexity.
+    random_state = check_random_state(0)
+    data = random_state.randn(50, 5)
+    distances = pairwise_distances(data).astype(np.float32)
+    desired_perplexity = 25.0
+    P = _binary_search_perplexity(distances, desired_perplexity, verbose=0)
+    P = np.maximum(P, np.finfo(np.double).eps)
+    mean_perplexity = np.mean([np.exp(-np.sum(P[i] * np.log(P[i])))
+                               for i in range(P.shape[0])])
+    assert_almost_equal(mean_perplexity, desired_perplexity, decimal=3)
+
+
+def test_binary_search_neighbors():
+    # Binary perplexity search approximation.
+    # Should be approximately equal to the slow method when we use
+    # all points as neighbors.
+    n_samples = 200
+    desired_perplexity = 25.0
+    random_state = check_random_state(0)
+    data = random_state.randn(n_samples, 2).astype(np.float32, copy=False)
+    distances = pairwise_distances(data)
+    P1 = _binary_search_perplexity(distances, desired_perplexity, verbose=0)
+
+    # Test that when we use all the neighbors the results are identical
+    n_neighbors = n_samples - 1
+    nn = NearestNeighbors().fit(data)
+    distance_graph = nn.kneighbors_graph(n_neighbors=n_neighbors,
+                                         mode='distance')
+    distances_nn = distance_graph.data.astype(np.float32, copy=False)
+    distances_nn = distances_nn.reshape(n_samples, n_neighbors)
+    P2 = _binary_search_perplexity(distances_nn, desired_perplexity, verbose=0)
+
+    indptr = distance_graph.indptr
+    P1_nn = np.array([P1[k, distance_graph.indices[indptr[k]:indptr[k + 1]]]
+                     for k in range(n_samples)])
+    assert_array_almost_equal(P1_nn, P2, decimal=4)
+
+    # Test that the highest P_ij are the same when fewer neighbors are used
+    for k in np.linspace(150, n_samples - 1, 5):
+        k = int(k)
+        topn = k * 10  # check the top 10 * k entries out of k * k entries
+        distance_graph = nn.kneighbors_graph(n_neighbors=k, mode='distance')
+        distances_nn = distance_graph.data.astype(np.float32, copy=False)
+        distances_nn = distances_nn.reshape(n_samples, k)
+        P2k = _binary_search_perplexity(distances_nn, desired_perplexity,
+                                        verbose=0)
+        assert_array_almost_equal(P1_nn, P2, decimal=2)
+        idx = np.argsort(P1.ravel())[::-1]
+        P1top = P1.ravel()[idx][:topn]
+        idx = np.argsort(P2k.ravel())[::-1]
+        P2top = P2k.ravel()[idx][:topn]
+        assert_array_almost_equal(P1top, P2top, decimal=2)
+
+
+def test_binary_perplexity_stability():
+    # Binary perplexity search should be stable.
+    # The binary_search_perplexity had a bug wherein the P array
+    # was uninitialized, leading to sporadically failing tests.
+    n_neighbors = 10
+    n_samples = 100
+    random_state = check_random_state(0)
+    data = random_state.randn(n_samples, 5)
+    nn = NearestNeighbors().fit(data)
+    distance_graph = nn.kneighbors_graph(n_neighbors=n_neighbors,
+                                         mode='distance')
+    distances = distance_graph.data.astype(np.float32, copy=False)
+    distances = distances.reshape(n_samples, n_neighbors)
+    last_P = None
+    desired_perplexity = 3
+    for _ in range(100):
+        P = _binary_search_perplexity(distances.copy(), desired_perplexity,
+                                      verbose=0)
+        P1 = _joint_probabilities_nn(distance_graph, desired_perplexity,
+                                     verbose=0)
+        # Convert the sparse matrix to a dense one for testing
+        P1 = P1.toarray()
+        if last_P is None:
+            last_P = P
+            last_P1 = P1
+        else:
+            assert_array_almost_equal(P, last_P, decimal=4)
+            assert_array_almost_equal(P1, last_P1, decimal=4)
+
+
+def test_gradient():
+    # Test gradient of Kullback-Leibler divergence.
+    random_state = check_random_state(0)
+
+    n_samples = 50
+    n_features = 2
+    n_components = 2
+    alpha = 1.0
+
+    distances = random_state.randn(n_samples, n_features).astype(np.float32)
+    distances = np.abs(distances.dot(distances.T))
+    np.fill_diagonal(distances, 0.0)
+    X_embedded = random_state.randn(n_samples, n_components).astype(np.float32)
+
+    P = _joint_probabilities(distances, desired_perplexity=25.0,
+                             verbose=0)
+
+    def fun(params):
+        return _kl_divergence(params, P, alpha, n_samples, n_components)[0]
+
+    def grad(params):
+        return _kl_divergence(params, P, alpha, n_samples, n_components)[1]
+
+    assert_almost_equal(check_grad(fun, grad, X_embedded.ravel()), 0.0,
+                        decimal=5)
+
+
+def test_trustworthiness():
+    # Test trustworthiness score.
+    random_state = check_random_state(0)
+
+    # Affine transformation
+    X = random_state.randn(100, 2)
+    assert trustworthiness(X, 5.0 + X / 10.0) == 1.0
+
+    # Randomly shuffled
+    X = np.arange(100).reshape(-1, 1)
+    X_embedded = X.copy()
+    random_state.shuffle(X_embedded)
+    assert trustworthiness(X, X_embedded) < 0.6
+
+    # Completely different
+    X = np.arange(5).reshape(-1, 1)
+    X_embedded = np.array([[0], [2], [4], [1], [3]])
+    assert_almost_equal(trustworthiness(X, X_embedded, n_neighbors=1), 0.2)
+
+
+@pytest.mark.parametrize("method", ['exact', 'barnes_hut'])
+@pytest.mark.parametrize("init", ('random', 'pca'))
+def test_preserve_trustworthiness_approximately(method, init):
+    # Nearest neighbors should be preserved approximately.
+    random_state = check_random_state(0)
+    n_components = 2
+    X = random_state.randn(50, n_components).astype(np.float32)
+    tsne = TSNE(n_components=n_components, init=init, random_state=0,
+                method=method, n_iter=700)
+    X_embedded = tsne.fit_transform(X)
+    t = trustworthiness(X, X_embedded, n_neighbors=1)
+    assert t > 0.85
+
+
+def test_optimization_minimizes_kl_divergence():
+    """t-SNE should give a lower KL divergence with more iterations."""
+    random_state = check_random_state(0)
+    X, _ = make_blobs(n_features=3, random_state=random_state)
+    kl_divergences = []
+    for n_iter in [250, 300, 350]:
+        tsne = TSNE(n_components=2, perplexity=10, learning_rate=100.0,
+                    n_iter=n_iter, random_state=0)
+        tsne.fit_transform(X)
+        kl_divergences.append(tsne.kl_divergence_)
+    assert kl_divergences[1] <= kl_divergences[0]
+    assert kl_divergences[2] <= kl_divergences[1]
+
+
+@pytest.mark.parametrize('method', ['exact', 'barnes_hut'])
+def test_fit_csr_matrix(method):
+    # X can be a sparse matrix.
+    rng = check_random_state(0)
+    X = rng.randn(50, 2)
+    X[(rng.randint(0, 50, 25), rng.randint(0, 2, 25))] = 0.0
+    X_csr = sp.csr_matrix(X)
+    tsne = TSNE(n_components=2, perplexity=10, learning_rate=100.0,
+                random_state=0, method=method, n_iter=750)
+    X_embedded = tsne.fit_transform(X_csr)
+    assert_allclose(trustworthiness(X_csr, X_embedded, n_neighbors=1),
+                    1.0, rtol=1.1e-1)
+
+
+def test_preserve_trustworthiness_approximately_with_precomputed_distances():
+    # Nearest neighbors should be preserved approximately.
+    random_state = check_random_state(0)
+    for i in range(3):
+        X = random_state.randn(80, 2)
+        D = squareform(pdist(X), "sqeuclidean")
+        tsne = TSNE(n_components=2, perplexity=2, learning_rate=100.0,
+                    early_exaggeration=2.0, metric="precomputed",
+                    random_state=i, verbose=0, n_iter=500)
+        X_embedded = tsne.fit_transform(D)
+        t = trustworthiness(D, X_embedded, n_neighbors=1, metric="precomputed")
+        assert t > .95
+
+
+def test_trustworthiness_not_euclidean_metric():
+    # Test trustworthiness with a metric different from 'euclidean' and
+    # 'precomputed'
+    random_state = check_random_state(0)
+    X = random_state.randn(100, 2)
+    assert (trustworthiness(X, X, metric='cosine') ==
+            trustworthiness(pairwise_distances(X, metric='cosine'), X,
+                            metric='precomputed'))
+
+
+def test_early_exaggeration_too_small():
+    # Early exaggeration factor must be >= 1.
+    tsne = TSNE(early_exaggeration=0.99)
+    with pytest.raises(ValueError, match="early_exaggeration .*"):
+        tsne.fit_transform(np.array([[0.0], [0.0]]))
+
+
+def test_too_few_iterations():
+    # Number of gradient descent iterations must be at least 200.
+    tsne = TSNE(n_iter=199)
+    with pytest.raises(ValueError, match="n_iter .*"):
+        tsne.fit_transform(np.array([[0.0], [0.0]]))
+
+
+@pytest.mark.parametrize('method, retype', [
+    ('exact', np.asarray),
+    ('barnes_hut', np.asarray),
+    ('barnes_hut', sp.csr_matrix),
+])
+@pytest.mark.parametrize('D, message_regex', [
+    ([[0.0], [1.0]], ".* square distance matrix"),
+    ([[0., -1.], [1., 0.]], ".* positive.*"),
+])
+def test_bad_precomputed_distances(method, D, retype, message_regex):
+    tsne = TSNE(metric="precomputed", method=method)
+    with pytest.raises(ValueError, match=message_regex):
+        tsne.fit_transform(retype(D))
+
+
+def test_exact_no_precomputed_sparse():
+    tsne = TSNE(metric='precomputed', method='exact')
+    with pytest.raises(TypeError, match='sparse'):
+        tsne.fit_transform(sp.csr_matrix([[0, 5], [5, 0]]))
+
+
+def test_high_perplexity_precomputed_sparse_distances():
+    # Perplexity should be less than 50
+    dist = np.array([[1., 0., 0.], [0., 1., 0.], [1., 0., 0.]])
+    bad_dist = sp.csr_matrix(dist)
+    tsne = TSNE(metric="precomputed")
+    msg = "3 neighbors per samples are required, but some samples have only 1"
+    with pytest.raises(ValueError, match=msg):
+        tsne.fit_transform(bad_dist)
+
+
+@ignore_warnings(category=EfficiencyWarning)
+def test_sparse_precomputed_distance():
+    """Make sure that TSNE works identically for sparse and dense matrix"""
+    random_state = check_random_state(0)
+    X = random_state.randn(100, 2)
+
+    D_sparse = kneighbors_graph(X, n_neighbors=100, mode='distance',
+                                include_self=True)
+    D = pairwise_distances(X)
+    assert sp.issparse(D_sparse)
+    assert_almost_equal(D_sparse.A, D)
+
+    tsne = TSNE(metric="precomputed", random_state=0)
+    Xt_dense = tsne.fit_transform(D)
+
+    for fmt in ['csr', 'lil']:
+        Xt_sparse = tsne.fit_transform(D_sparse.asformat(fmt))
+        assert_almost_equal(Xt_dense, Xt_sparse)
+
+
+def test_non_positive_computed_distances():
+    # Computed distance matrices must be positive.
+    def metric(x, y):
+        return -1
+
+    tsne = TSNE(metric=metric, method='exact')
+    X = np.array([[0.0, 0.0], [1.0, 1.0]])
+    with pytest.raises(ValueError, match="All distances .*metric given.*"):
+        tsne.fit_transform(X)
+
+
+def test_init_not_available():
+    # 'init' must be 'pca', 'random', or numpy array.
+    tsne = TSNE(init="not available")
+    m = "'init' must be 'pca', 'random', or a numpy array"
+    with pytest.raises(ValueError, match=m):
+        tsne.fit_transform(np.array([[0.0], [1.0]]))
+
+
+def test_init_ndarray():
+    # Initialize TSNE with ndarray and test fit
+    tsne = TSNE(init=np.zeros((100, 2)))
+    X_embedded = tsne.fit_transform(np.ones((100, 5)))
+    assert_array_equal(np.zeros((100, 2)), X_embedded)
+
+
+def test_init_ndarray_precomputed():
+    # Initialize TSNE with ndarray and metric 'precomputed'
+    # Make sure no FutureWarning is thrown from _fit
+    tsne = TSNE(init=np.zeros((100, 2)), metric="precomputed")
+    tsne.fit(np.zeros((100, 100)))
+
+
+def test_distance_not_available():
+    # 'metric' must be valid.
+    tsne = TSNE(metric="not available", method='exact')
+    with pytest.raises(ValueError, match="Unknown metric not available.*"):
+        tsne.fit_transform(np.array([[0.0], [1.0]]))
+
+    tsne = TSNE(metric="not available", method='barnes_hut')
+    with pytest.raises(ValueError, match="Metric 'not available' not valid.*"):
+        tsne.fit_transform(np.array([[0.0], [1.0]]))
+
+
+def test_method_not_available():
+    # 'nethod' must be 'barnes_hut' or 'exact'
+    tsne = TSNE(method='not available')
+    with pytest.raises(ValueError, match="'method' must be 'barnes_hut' or "):
+        tsne.fit_transform(np.array([[0.0], [1.0]]))
+
+
+def test_angle_out_of_range_checks():
+    # check the angle parameter range
+    for angle in [-1, -1e-6, 1 + 1e-6, 2]:
+        tsne = TSNE(angle=angle)
+        with pytest.raises(ValueError, match="'angle' must be between "
+                                             "0.0 - 1.0"):
+            tsne.fit_transform(np.array([[0.0], [1.0]]))
+
+
+def test_pca_initialization_not_compatible_with_precomputed_kernel():
+    # Precomputed distance matrices must be square matrices.
+    tsne = TSNE(metric="precomputed", init="pca")
+    with pytest.raises(ValueError, match="The parameter init=\"pca\" cannot"
+                                         " be used with"
+                                         " metric=\"precomputed\"."):
+        tsne.fit_transform(np.array([[0.0], [1.0]]))
+
+
+def test_n_components_range():
+    # barnes_hut method should only be used with n_components <= 3
+    tsne = TSNE(n_components=4, method="barnes_hut")
+    with pytest.raises(ValueError, match="'n_components' should be .*"):
+        tsne.fit_transform(np.array([[0.0], [1.0]]))
+
+
+def test_early_exaggeration_used():
+    # check that the ``early_exaggeration`` parameter has an effect
+    random_state = check_random_state(0)
+    n_components = 2
+    methods = ['exact', 'barnes_hut']
+    X = random_state.randn(25, n_components).astype(np.float32)
+    for method in methods:
+        tsne = TSNE(n_components=n_components, perplexity=1,
+                    learning_rate=100.0, init="pca", random_state=0,
+                    method=method, early_exaggeration=1.0, n_iter=250)
+        X_embedded1 = tsne.fit_transform(X)
+        tsne = TSNE(n_components=n_components, perplexity=1,
+                    learning_rate=100.0, init="pca", random_state=0,
+                    method=method, early_exaggeration=10.0, n_iter=250)
+        X_embedded2 = tsne.fit_transform(X)
+
+        assert not np.allclose(X_embedded1, X_embedded2)
+
+
+def test_n_iter_used():
+    # check that the ``n_iter`` parameter has an effect
+    random_state = check_random_state(0)
+    n_components = 2
+    methods = ['exact', 'barnes_hut']
+    X = random_state.randn(25, n_components).astype(np.float32)
+    for method in methods:
+        for n_iter in [251, 500]:
+            tsne = TSNE(n_components=n_components, perplexity=1,
+                        learning_rate=0.5, init="random", random_state=0,
+                        method=method, early_exaggeration=1.0, n_iter=n_iter)
+            tsne.fit_transform(X)
+
+            assert tsne.n_iter_ == n_iter - 1
+
+
+def test_answer_gradient_two_points():
+    # Test the tree with only a single set of children.
+    #
+    # These tests & answers have been checked against the reference
+    # implementation by LvdM.
+    pos_input = np.array([[1.0, 0.0], [0.0, 1.0]])
+    pos_output = np.array([[-4.961291e-05, -1.072243e-04],
+                           [9.259460e-05, 2.702024e-04]])
+    neighbors = np.array([[1],
+                          [0]])
+    grad_output = np.array([[-2.37012478e-05, -6.29044398e-05],
+                            [2.37012478e-05, 6.29044398e-05]])
+    _run_answer_test(pos_input, pos_output, neighbors, grad_output)
+
+
+def test_answer_gradient_four_points():
+    # Four points tests the tree with multiple levels of children.
+    #
+    # These tests & answers have been checked against the reference
+    # implementation by LvdM.
+    pos_input = np.array([[1.0, 0.0], [0.0, 1.0],
+                          [5.0, 2.0], [7.3, 2.2]])
+    pos_output = np.array([[6.080564e-05, -7.120823e-05],
+                           [-1.718945e-04, -4.000536e-05],
+                           [-2.271720e-04, 8.663310e-05],
+                           [-1.032577e-04, -3.582033e-05]])
+    neighbors = np.array([[1, 2, 3],
+                          [0, 2, 3],
+                          [1, 0, 3],
+                          [1, 2, 0]])
+    grad_output = np.array([[5.81128448e-05, -7.78033454e-06],
+                            [-5.81526851e-05, 7.80976444e-06],
+                            [4.24275173e-08, -3.69569698e-08],
+                            [-2.58720939e-09, 7.52706374e-09]])
+    _run_answer_test(pos_input, pos_output, neighbors, grad_output)
+
+
+def test_skip_num_points_gradient():
+    # Test the kwargs option skip_num_points.
+    #
+    # Skip num points should make it such that the Barnes_hut gradient
+    # is not calculated for indices below skip_num_point.
+    # Aside from skip_num_points=2 and the first two gradient rows
+    # being set to zero, these data points are the same as in
+    # test_answer_gradient_four_points()
+    pos_input = np.array([[1.0, 0.0], [0.0, 1.0],
+                          [5.0, 2.0], [7.3, 2.2]])
+    pos_output = np.array([[6.080564e-05, -7.120823e-05],
+                           [-1.718945e-04, -4.000536e-05],
+                           [-2.271720e-04, 8.663310e-05],
+                           [-1.032577e-04, -3.582033e-05]])
+    neighbors = np.array([[1, 2, 3],
+                          [0, 2, 3],
+                          [1, 0, 3],
+                          [1, 2, 0]])
+    grad_output = np.array([[0.0, 0.0],
+                            [0.0, 0.0],
+                            [4.24275173e-08, -3.69569698e-08],
+                            [-2.58720939e-09, 7.52706374e-09]])
+    _run_answer_test(pos_input, pos_output, neighbors, grad_output,
+                     False, 0.1, 2)
+
+
+def _run_answer_test(pos_input, pos_output, neighbors, grad_output,
+                     verbose=False, perplexity=0.1, skip_num_points=0):
+    distances = pairwise_distances(pos_input).astype(np.float32)
+    args = distances, perplexity, verbose
+    pos_output = pos_output.astype(np.float32)
+    neighbors = neighbors.astype(np.int64, copy=False)
+    pij_input = _joint_probabilities(*args)
+    pij_input = squareform(pij_input).astype(np.float32)
+    grad_bh = np.zeros(pos_output.shape, dtype=np.float32)
+
+    from scipy.sparse import csr_matrix
+    P = csr_matrix(pij_input)
+
+    neighbors = P.indices.astype(np.int64)
+    indptr = P.indptr.astype(np.int64)
+
+    _barnes_hut_tsne.gradient(P.data, pos_output, neighbors, indptr,
+                              grad_bh, 0.5, 2, 1, skip_num_points=0)
+    assert_array_almost_equal(grad_bh, grad_output, decimal=4)
+
+
+def test_verbose():
+    # Verbose options write to stdout.
+    random_state = check_random_state(0)
+    tsne = TSNE(verbose=2)
+    X = random_state.randn(5, 2)
+
+    old_stdout = sys.stdout
+    sys.stdout = StringIO()
+    try:
+        tsne.fit_transform(X)
+    finally:
+        out = sys.stdout.getvalue()
+        sys.stdout.close()
+        sys.stdout = old_stdout
+
+    assert("[t-SNE]" in out)
+    assert("nearest neighbors..." in out)
+    assert("Computed conditional probabilities" in out)
+    assert("Mean sigma" in out)
+    assert("early exaggeration" in out)
+
+
+def test_chebyshev_metric():
+    # t-SNE should allow metrics that cannot be squared (issue #3526).
+    random_state = check_random_state(0)
+    tsne = TSNE(metric="chebyshev")
+    X = random_state.randn(5, 2)
+    tsne.fit_transform(X)
+
+
+def test_reduction_to_one_component():
+    # t-SNE should allow reduction to one component (issue #4154).
+    random_state = check_random_state(0)
+    tsne = TSNE(n_components=1)
+    X = random_state.randn(5, 2)
+    X_embedded = tsne.fit(X).embedding_
+    assert(np.all(np.isfinite(X_embedded)))
+
+
+@pytest.mark.parametrize('method', ['barnes_hut', 'exact'])
+@pytest.mark.parametrize('dt', [np.float32, np.float64])
+def test_64bit(method, dt):
+    # Ensure 64bit arrays are handled correctly.
+    random_state = check_random_state(0)
+
+    X = random_state.randn(10, 2).astype(dt, copy=False)
+    tsne = TSNE(n_components=2, perplexity=2, learning_rate=100.0,
+                random_state=0, method=method, verbose=0,
+                n_iter=300)
+    X_embedded = tsne.fit_transform(X)
+    effective_type = X_embedded.dtype
+
+    # tsne cython code is only single precision, so the output will
+    # always be single precision, irrespectively of the input dtype
+    assert effective_type == np.float32
+
+
+@pytest.mark.parametrize('method', ['barnes_hut', 'exact'])
+def test_kl_divergence_not_nan(method):
+    # Ensure kl_divergence_ is computed at last iteration
+    # even though n_iter % n_iter_check != 0, i.e. 1003 % 50 != 0
+    random_state = check_random_state(0)
+
+    X = random_state.randn(50, 2)
+    tsne = TSNE(n_components=2, perplexity=2, learning_rate=100.0,
+                random_state=0, method=method, verbose=0, n_iter=503)
+    tsne.fit_transform(X)
+
+    assert not np.isnan(tsne.kl_divergence_)
+
+
+def test_barnes_hut_angle():
+    # When Barnes-Hut's angle=0 this corresponds to the exact method.
+    angle = 0.0
+    perplexity = 10
+    n_samples = 100
+    for n_components in [2, 3]:
+        n_features = 5
+        degrees_of_freedom = float(n_components - 1.0)
+
+        random_state = check_random_state(0)
+        data = random_state.randn(n_samples, n_features)
+        distances = pairwise_distances(data)
+        params = random_state.randn(n_samples, n_components)
+        P = _joint_probabilities(distances, perplexity, verbose=0)
+        kl_exact, grad_exact = _kl_divergence(params, P, degrees_of_freedom,
+                                              n_samples, n_components)
+
+        n_neighbors = n_samples - 1
+        distances_csr = NearestNeighbors().fit(data).kneighbors_graph(
+            n_neighbors=n_neighbors, mode='distance')
+        P_bh = _joint_probabilities_nn(distances_csr, perplexity, verbose=0)
+        kl_bh, grad_bh = _kl_divergence_bh(params, P_bh, degrees_of_freedom,
+                                           n_samples, n_components,
+                                           angle=angle, skip_num_points=0,
+                                           verbose=0)
+
+        P = squareform(P)
+        P_bh = P_bh.toarray()
+        assert_array_almost_equal(P_bh, P, decimal=5)
+        assert_almost_equal(kl_exact, kl_bh, decimal=3)
+
+
+@skip_if_32bit
+def test_n_iter_without_progress():
+    # Use a dummy negative n_iter_without_progress and check output on stdout
+    random_state = check_random_state(0)
+    X = random_state.randn(100, 10)
+    for method in ["barnes_hut", "exact"]:
+        tsne = TSNE(n_iter_without_progress=-1, verbose=2, learning_rate=1e8,
+                    random_state=0, method=method, n_iter=351, init="random")
+        tsne._N_ITER_CHECK = 1
+        tsne._EXPLORATION_N_ITER = 0
+
+        old_stdout = sys.stdout
+        sys.stdout = StringIO()
+        try:
+            tsne.fit_transform(X)
+        finally:
+            out = sys.stdout.getvalue()
+            sys.stdout.close()
+            sys.stdout = old_stdout
+
+        # The output needs to contain the value of n_iter_without_progress
+        assert ("did not make any progress during the "
+                "last -1 episodes. Finished." in out)
+
+
+def test_min_grad_norm():
+    # Make sure that the parameter min_grad_norm is used correctly
+    random_state = check_random_state(0)
+    X = random_state.randn(100, 2)
+    min_grad_norm = 0.002
+    tsne = TSNE(min_grad_norm=min_grad_norm, verbose=2,
+                random_state=0, method='exact')
+
+    old_stdout = sys.stdout
+    sys.stdout = StringIO()
+    try:
+        tsne.fit_transform(X)
+    finally:
+        out = sys.stdout.getvalue()
+        sys.stdout.close()
+        sys.stdout = old_stdout
+
+    lines_out = out.split('\n')
+
+    # extract the gradient norm from the verbose output
+    gradient_norm_values = []
+    for line in lines_out:
+        # When the computation is Finished just an old gradient norm value
+        # is repeated that we do not need to store
+        if 'Finished' in line:
+            break
+
+        start_grad_norm = line.find('gradient norm')
+        if start_grad_norm >= 0:
+            line = line[start_grad_norm:]
+            line = line.replace('gradient norm = ', '').split(' ')[0]
+            gradient_norm_values.append(float(line))
+
+    # Compute how often the gradient norm is smaller than min_grad_norm
+    gradient_norm_values = np.array(gradient_norm_values)
+    n_smaller_gradient_norms = \
+        len(gradient_norm_values[gradient_norm_values <= min_grad_norm])
+
+    # The gradient norm can be smaller than min_grad_norm at most once,
+    # because in the moment it becomes smaller the optimization stops
+    assert n_smaller_gradient_norms <= 1
+
+
+def test_accessible_kl_divergence():
+    # Ensures that the accessible kl_divergence matches the computed value
+    random_state = check_random_state(0)
+    X = random_state.randn(50, 2)
+    tsne = TSNE(n_iter_without_progress=2, verbose=2,
+                random_state=0, method='exact',
+                n_iter=500)
+
+    old_stdout = sys.stdout
+    sys.stdout = StringIO()
+    try:
+        tsne.fit_transform(X)
+    finally:
+        out = sys.stdout.getvalue()
+        sys.stdout.close()
+        sys.stdout = old_stdout
+
+    # The output needs to contain the accessible kl_divergence as the error at
+    # the last iteration
+    for line in out.split('\n')[::-1]:
+        if 'Iteration' in line:
+            _, _, error = line.partition('error = ')
+            if error:
+                error, _, _ = error.partition(',')
+                break
+    assert_almost_equal(tsne.kl_divergence_, float(error), decimal=5)
+
+
+@pytest.mark.parametrize('method', ['barnes_hut', 'exact'])
+def test_uniform_grid(method):
+    """Make sure that TSNE can approximately recover a uniform 2D grid
+
+    Due to ties in distances between point in X_2d_grid, this test is platform
+    dependent for ``method='barnes_hut'`` due to numerical imprecision.
+
+    Also, t-SNE is not assured to converge to the right solution because bad
+    initialization can lead to convergence to bad local minimum (the
+    optimization problem is non-convex). To avoid breaking the test too often,
+    we re-run t-SNE from the final point when the convergence is not good
+    enough.
+    """
+    seeds = range(3)
+    n_iter = 500
+    for seed in seeds:
+        tsne = TSNE(n_components=2, init='random', random_state=seed,
+                    perplexity=50, n_iter=n_iter, method=method)
+        Y = tsne.fit_transform(X_2d_grid)
+
+        try_name = "{}_{}".format(method, seed)
+        try:
+            assert_uniform_grid(Y, try_name)
+        except AssertionError:
+            # If the test fails a first time, re-run with init=Y to see if
+            # this was caused by a bad initialization. Note that this will
+            # also run an early_exaggeration step.
+            try_name += ":rerun"
+            tsne.init = Y
+            Y = tsne.fit_transform(X_2d_grid)
+            assert_uniform_grid(Y, try_name)
+
+
+def assert_uniform_grid(Y, try_name=None):
+    # Ensure that the resulting embedding leads to approximately
+    # uniformly spaced points: the distance to the closest neighbors
+    # should be non-zero and approximately constant.
+    nn = NearestNeighbors(n_neighbors=1).fit(Y)
+    dist_to_nn = nn.kneighbors(return_distance=True)[0].ravel()
+    assert dist_to_nn.min() > 0.1
+
+    smallest_to_mean = dist_to_nn.min() / np.mean(dist_to_nn)
+    largest_to_mean = dist_to_nn.max() / np.mean(dist_to_nn)
+
+    assert smallest_to_mean > .5, try_name
+    assert largest_to_mean < 2, try_name
+
+
+def test_bh_match_exact():
+    # check that the ``barnes_hut`` method match the exact one when
+    # ``angle = 0`` and ``perplexity > n_samples / 3``
+    random_state = check_random_state(0)
+    n_features = 10
+    X = random_state.randn(30, n_features).astype(np.float32)
+    X_embeddeds = {}
+    n_iter = {}
+    for method in ['exact', 'barnes_hut']:
+        tsne = TSNE(n_components=2, method=method, learning_rate=1.0,
+                    init="random", random_state=0, n_iter=251,
+                    perplexity=30.0, angle=0)
+        # Kill the early_exaggeration
+        tsne._EXPLORATION_N_ITER = 0
+        X_embeddeds[method] = tsne.fit_transform(X)
+        n_iter[method] = tsne.n_iter_
+
+    assert n_iter['exact'] == n_iter['barnes_hut']
+    assert_allclose(X_embeddeds['exact'], X_embeddeds['barnes_hut'], rtol=1e-4)
+
+
+def test_gradient_bh_multithread_match_sequential():
+    # check that the bh gradient with different num_threads gives the same
+    # results
+
+    n_features = 10
+    n_samples = 30
+    n_components = 2
+    degrees_of_freedom = 1
+
+    angle = 3
+    perplexity = 5
+
+    random_state = check_random_state(0)
+    data = random_state.randn(n_samples, n_features).astype(np.float32)
+    params = random_state.randn(n_samples, n_components)
+
+    n_neighbors = n_samples - 1
+    distances_csr = NearestNeighbors().fit(data).kneighbors_graph(
+        n_neighbors=n_neighbors, mode='distance')
+    P_bh = _joint_probabilities_nn(distances_csr, perplexity, verbose=0)
+    kl_sequential, grad_sequential = _kl_divergence_bh(
+        params, P_bh, degrees_of_freedom, n_samples, n_components,
+        angle=angle, skip_num_points=0, verbose=0, num_threads=1)
+    for num_threads in [2, 4]:
+        kl_multithread, grad_multithread = _kl_divergence_bh(
+            params, P_bh, degrees_of_freedom, n_samples, n_components,
+            angle=angle, skip_num_points=0, verbose=0, num_threads=num_threads)
+
+        assert_allclose(kl_multithread, kl_sequential, rtol=1e-6)
+        assert_allclose(grad_multithread, grad_multithread)
+
+
+def test_tsne_with_different_distance_metrics():
+    """Make sure that TSNE works for different distance metrics"""
+    random_state = check_random_state(0)
+    n_components_original = 3
+    n_components_embedding = 2
+    X = random_state.randn(50, n_components_original).astype(np.float32)
+    metrics = ['manhattan', 'cosine']
+    dist_funcs = [manhattan_distances, cosine_distances]
+    for metric, dist_func in zip(metrics, dist_funcs):
+        X_transformed_tsne = TSNE(
+            metric=metric, n_components=n_components_embedding,
+            random_state=0, n_iter=300).fit_transform(X)
+        X_transformed_tsne_precomputed = TSNE(
+            metric='precomputed', n_components=n_components_embedding,
+            random_state=0, n_iter=300).fit_transform(dist_func(X))
+        assert_array_equal(X_transformed_tsne, X_transformed_tsne_precomputed)
+
+
+@pytest.mark.parametrize('method', ['exact', 'barnes_hut'])
+def test_tsne_n_jobs(method):
+    """Make sure that the n_jobs parameter doesn't impact the output"""
+    random_state = check_random_state(0)
+    n_features = 10
+    X = random_state.randn(30, n_features)
+    X_tr_ref = TSNE(n_components=2, method=method, perplexity=30.0,
+                    angle=0, n_jobs=1, random_state=0).fit_transform(X)
+    X_tr = TSNE(n_components=2, method=method, perplexity=30.0,
+                angle=0, n_jobs=2, random_state=0).fit_transform(X)
+
+    assert_allclose(X_tr_ref, X_tr)