Uploaded Test files

2020-11-12 11:05:57 -05:00 · 2020-11-12 11:05:57 -05:00 · 2e81cb7d99
commit 2e81cb7d99
parent f584ad9d97
16627 changed files with 2065359 additions and 102444 deletions
--- a/venv/Lib/site-packages/sklearn/manifold/init.py
+++ b/venv/Lib/site-packages/sklearn/manifold/init.py
@ -0,0 +1,13 @@
+"""
+The :mod:`sklearn.manifold` module implements data embedding techniques.
+"""
+
+from ._locally_linear import locally_linear_embedding, LocallyLinearEmbedding
+from ._isomap import Isomap
+from ._mds import MDS, smacof
+from ._spectral_embedding import SpectralEmbedding, spectral_embedding
+from ._t_sne import TSNE, trustworthiness
+
+__all__ = ['locally_linear_embedding', 'LocallyLinearEmbedding', 'Isomap',
+           'MDS', 'smacof', 'SpectralEmbedding', 'spectral_embedding', "TSNE",
+           'trustworthiness']
--- a/venv/Lib/site-packages/sklearn/manifold/pycache/init.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/manifold/pycache/init.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/manifold/pycache/_isomap.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/manifold/pycache/_isomap.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/manifold/pycache/_locally_linear.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/manifold/pycache/_locally_linear.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/manifold/pycache/_mds.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/manifold/pycache/_mds.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/manifold/pycache/_spectral_embedding.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/manifold/pycache/_spectral_embedding.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/manifold/pycache/_t_sne.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/manifold/pycache/_t_sne.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/manifold/pycache/isomap.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/manifold/pycache/isomap.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/manifold/pycache/locally_linear.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/manifold/pycache/locally_linear.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/manifold/pycache/mds.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/manifold/pycache/mds.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/manifold/pycache/setup.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/manifold/pycache/setup.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/manifold/pycache/spectral_embedding_.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/manifold/pycache/spectral_embedding_.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/manifold/pycache/t_sne.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/manifold/pycache/t_sne.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/manifold/_barnes_hut_tsne.cp36-win32.pyd
+++ b/venv/Lib/site-packages/sklearn/manifold/_barnes_hut_tsne.cp36-win32.pyd
--- a/venv/Lib/site-packages/sklearn/manifold/_isomap.py
+++ b/venv/Lib/site-packages/sklearn/manifold/_isomap.py
@ -0,0 +1,282 @@
+"""Isomap for manifold learning"""
+
+# Author: Jake Vanderplas  -- <vanderplas@astro.washington.edu>
+# License: BSD 3 clause (C) 2011
+
+import numpy as np
+from ..base import BaseEstimator, TransformerMixin
+from ..neighbors import NearestNeighbors, kneighbors_graph
+from ..utils.deprecation import deprecated
+from ..utils.validation import check_is_fitted
+from ..utils.validation import _deprecate_positional_args
+from ..utils.graph import graph_shortest_path
+from ..decomposition import KernelPCA
+from ..preprocessing import KernelCenterer
+
+
+class Isomap(TransformerMixin, BaseEstimator):
+    """Isomap Embedding
+
+    Non-linear dimensionality reduction through Isometric Mapping
+
+    Read more in the :ref:`User Guide <isomap>`.
+
+    Parameters
+    ----------
+    n_neighbors : integer
+        number of neighbors to consider for each point.
+
+    n_components : integer
+        number of coordinates for the manifold
+
+    eigen_solver : ['auto'|'arpack'|'dense']
+        'auto' : Attempt to choose the most efficient solver
+        for the given problem.
+
+        'arpack' : Use Arnoldi decomposition to find the eigenvalues
+        and eigenvectors.
+
+        'dense' : Use a direct solver (i.e. LAPACK)
+        for the eigenvalue decomposition.
+
+    tol : float
+        Convergence tolerance passed to arpack or lobpcg.
+        not used if eigen_solver == 'dense'.
+
+    max_iter : integer
+        Maximum number of iterations for the arpack solver.
+        not used if eigen_solver == 'dense'.
+
+    path_method : string ['auto'|'FW'|'D']
+        Method to use in finding shortest path.
+
+        'auto' : attempt to choose the best algorithm automatically.
+
+        'FW' : Floyd-Warshall algorithm.
+
+        'D' : Dijkstra's algorithm.
+
+    neighbors_algorithm : string ['auto'|'brute'|'kd_tree'|'ball_tree']
+        Algorithm to use for nearest neighbors search,
+        passed to neighbors.NearestNeighbors instance.
+
+    n_jobs : int or None, default=None
+        The number of parallel jobs to run.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    metric : string, or callable, default="minkowski"
+        The metric to use when calculating distance between instances in a
+        feature array. If metric is a string or callable, it must be one of
+        the options allowed by :func:`sklearn.metrics.pairwise_distances` for
+        its metric parameter.
+        If metric is "precomputed", X is assumed to be a distance matrix and
+        must be square. X may be a :term:`Glossary <sparse graph>`.
+
+        .. versionadded:: 0.22
+
+    p : int, default=2
+        Parameter for the Minkowski metric from
+        sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is
+        equivalent to using manhattan_distance (l1), and euclidean_distance
+        (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
+
+        .. versionadded:: 0.22
+
+    metric_params : dict, default=None
+        Additional keyword arguments for the metric function.
+
+        .. versionadded:: 0.22
+
+    Attributes
+    ----------
+    embedding_ : array-like, shape (n_samples, n_components)
+        Stores the embedding vectors.
+
+    kernel_pca_ : object
+        :class:`~sklearn.decomposition.KernelPCA` object used to implement the
+        embedding.
+
+    nbrs_ : sklearn.neighbors.NearestNeighbors instance
+        Stores nearest neighbors instance, including BallTree or KDtree
+        if applicable.
+
+    dist_matrix_ : array-like, shape (n_samples, n_samples)
+        Stores the geodesic distance matrix of training data.
+
+    Examples
+    --------
+    >>> from sklearn.datasets import load_digits
+    >>> from sklearn.manifold import Isomap
+    >>> X, _ = load_digits(return_X_y=True)
+    >>> X.shape
+    (1797, 64)
+    >>> embedding = Isomap(n_components=2)
+    >>> X_transformed = embedding.fit_transform(X[:100])
+    >>> X_transformed.shape
+    (100, 2)
+
+    References
+    ----------
+
+    .. [1] Tenenbaum, J.B.; De Silva, V.; & Langford, J.C. A global geometric
+           framework for nonlinear dimensionality reduction. Science 290 (5500)
+    """
+    @_deprecate_positional_args
+    def __init__(self, *, n_neighbors=5, n_components=2, eigen_solver='auto',
+                 tol=0, max_iter=None, path_method='auto',
+                 neighbors_algorithm='auto', n_jobs=None, metric='minkowski',
+                 p=2, metric_params=None):
+        self.n_neighbors = n_neighbors
+        self.n_components = n_components
+        self.eigen_solver = eigen_solver
+        self.tol = tol
+        self.max_iter = max_iter
+        self.path_method = path_method
+        self.neighbors_algorithm = neighbors_algorithm
+        self.n_jobs = n_jobs
+        self.metric = metric
+        self.p = p
+        self.metric_params = metric_params
+
+    def _fit_transform(self, X):
+        self.nbrs_ = NearestNeighbors(n_neighbors=self.n_neighbors,
+                                      algorithm=self.neighbors_algorithm,
+                                      metric=self.metric, p=self.p,
+                                      metric_params=self.metric_params,
+                                      n_jobs=self.n_jobs)
+        self.nbrs_.fit(X)
+        self.n_features_in_ = self.nbrs_.n_features_in_
+
+        self.kernel_pca_ = KernelPCA(n_components=self.n_components,
+                                     kernel="precomputed",
+                                     eigen_solver=self.eigen_solver,
+                                     tol=self.tol, max_iter=self.max_iter,
+                                     n_jobs=self.n_jobs)
+
+        kng = kneighbors_graph(self.nbrs_, self.n_neighbors,
+                               metric=self.metric, p=self.p,
+                               metric_params=self.metric_params,
+                               mode='distance', n_jobs=self.n_jobs)
+
+        self.dist_matrix_ = graph_shortest_path(kng,
+                                                method=self.path_method,
+                                                directed=False)
+        G = self.dist_matrix_ ** 2
+        G *= -0.5
+
+        self.embedding_ = self.kernel_pca_.fit_transform(G)
+
+    # mypy error: Decorated property not supported
+    @deprecated(  # type: ignore
+        "Attribute `training_data_` was deprecated in version 0.22 and"
+        " will be removed in 0.24."
+    )
+    @property
+    def training_data_(self):
+        check_is_fitted(self)
+        return self.nbrs_._fit_X
+
+    def reconstruction_error(self):
+        """Compute the reconstruction error for the embedding.
+
+        Returns
+        -------
+        reconstruction_error : float
+
+        Notes
+        -----
+        The cost function of an isomap embedding is
+
+        ``E = frobenius_norm[K(D) - K(D_fit)] / n_samples``
+
+        Where D is the matrix of distances for the input data X,
+        D_fit is the matrix of distances for the output embedding X_fit,
+        and K is the isomap kernel:
+
+        ``K(D) = -0.5 * (I - 1/n_samples) * D^2 * (I - 1/n_samples)``
+        """
+        G = -0.5 * self.dist_matrix_ ** 2
+        G_center = KernelCenterer().fit_transform(G)
+        evals = self.kernel_pca_.lambdas_
+        return np.sqrt(np.sum(G_center ** 2) - np.sum(evals ** 2)) / G.shape[0]
+
+    def fit(self, X, y=None):
+        """Compute the embedding vectors for data X
+
+        Parameters
+        ----------
+        X : {array-like, sparse graph, BallTree, KDTree, NearestNeighbors}
+            Sample data, shape = (n_samples, n_features), in the form of a
+            numpy array, sparse graph, precomputed tree, or NearestNeighbors
+            object.
+
+        y : Ignored
+
+        Returns
+        -------
+        self : returns an instance of self.
+        """
+        self._fit_transform(X)
+        return self
+
+    def fit_transform(self, X, y=None):
+        """Fit the model from data in X and transform X.
+
+        Parameters
+        ----------
+        X : {array-like, sparse graph, BallTree, KDTree}
+            Training vector, where n_samples in the number of samples
+            and n_features is the number of features.
+
+        y : Ignored
+
+        Returns
+        -------
+        X_new : array-like, shape (n_samples, n_components)
+        """
+        self._fit_transform(X)
+        return self.embedding_
+
+    def transform(self, X):
+        """Transform X.
+
+        This is implemented by linking the points X into the graph of geodesic
+        distances of the training data. First the `n_neighbors` nearest
+        neighbors of X are found in the training data, and from these the
+        shortest geodesic distances from each point in X to each point in
+        the training data are computed in order to construct the kernel.
+        The embedding of X is the projection of this kernel onto the
+        embedding vectors of the training set.
+
+        Parameters
+        ----------
+        X : array-like, shape (n_queries, n_features)
+            If neighbors_algorithm='precomputed', X is assumed to be a
+            distance matrix or a sparse graph of shape
+            (n_queries, n_samples_fit).
+
+        Returns
+        -------
+        X_new : array-like, shape (n_queries, n_components)
+        """
+        check_is_fitted(self)
+        distances, indices = self.nbrs_.kneighbors(X, return_distance=True)
+
+        # Create the graph of shortest distances from X to
+        # training data via the nearest neighbors of X.
+        # This can be done as a single array operation, but it potentially
+        # takes a lot of memory.  To avoid that, use a loop:
+
+        n_samples_fit = self.nbrs_.n_samples_fit_
+        n_queries = distances.shape[0]
+        G_X = np.zeros((n_queries, n_samples_fit))
+        for i in range(n_queries):
+            G_X[i] = np.min(self.dist_matrix_[indices[i]] +
+                            distances[i][:, None], 0)
+
+        G_X **= 2
+        G_X *= -0.5
+
+        return self.kernel_pca_.transform(G_X)
--- a/venv/Lib/site-packages/sklearn/manifold/_locally_linear.py
+++ b/venv/Lib/site-packages/sklearn/manifold/_locally_linear.py
@ -0,0 +1,729 @@
+"""Locally Linear Embedding"""
+
+# Author: Fabian Pedregosa -- <fabian.pedregosa@inria.fr>
+#         Jake Vanderplas  -- <vanderplas@astro.washington.edu>
+# License: BSD 3 clause (C) INRIA 2011
+
+import numpy as np
+from scipy.linalg import eigh, svd, qr, solve
+from scipy.sparse import eye, csr_matrix
+from scipy.sparse.linalg import eigsh
+
+from ..base import BaseEstimator, TransformerMixin, _UnstableArchMixin
+from ..utils import check_random_state, check_array
+from ..utils.extmath import stable_cumsum
+from ..utils.validation import check_is_fitted
+from ..utils.validation import FLOAT_DTYPES
+from ..utils.validation import _deprecate_positional_args
+from ..neighbors import NearestNeighbors
+
+
+def barycenter_weights(X, Z, reg=1e-3):
+    """Compute barycenter weights of X from Y along the first axis
+
+    We estimate the weights to assign to each point in Y[i] to recover
+    the point X[i]. The barycenter weights sum to 1.
+
+    Parameters
+    ----------
+    X : array-like, shape (n_samples, n_dim)
+
+    Z : array-like, shape (n_samples, n_neighbors, n_dim)
+
+    reg : float, optional
+        amount of regularization to add for the problem to be
+        well-posed in the case of n_neighbors > n_dim
+
+    Returns
+    -------
+    B : array-like, shape (n_samples, n_neighbors)
+
+    Notes
+    -----
+    See developers note for more information.
+    """
+    X = check_array(X, dtype=FLOAT_DTYPES)
+    Z = check_array(Z, dtype=FLOAT_DTYPES, allow_nd=True)
+
+    n_samples, n_neighbors = X.shape[0], Z.shape[1]
+    B = np.empty((n_samples, n_neighbors), dtype=X.dtype)
+    v = np.ones(n_neighbors, dtype=X.dtype)
+
+    # this might raise a LinalgError if G is singular and has trace
+    # zero
+    for i, A in enumerate(Z.transpose(0, 2, 1)):
+        C = A.T - X[i]  # broadcasting
+        G = np.dot(C, C.T)
+        trace = np.trace(G)
+        if trace > 0:
+            R = reg * trace
+        else:
+            R = reg
+        G.flat[::Z.shape[1] + 1] += R
+        w = solve(G, v, sym_pos=True)
+        B[i, :] = w / np.sum(w)
+    return B
+
+
+def barycenter_kneighbors_graph(X, n_neighbors, reg=1e-3, n_jobs=None):
+    """Computes the barycenter weighted graph of k-Neighbors for points in X
+
+    Parameters
+    ----------
+    X : {array-like, NearestNeighbors}
+        Sample data, shape = (n_samples, n_features), in the form of a
+        numpy array or a NearestNeighbors object.
+
+    n_neighbors : int
+        Number of neighbors for each sample.
+
+    reg : float, optional
+        Amount of regularization when solving the least-squares
+        problem. Only relevant if mode='barycenter'. If None, use the
+        default.
+
+    n_jobs : int or None, optional (default=None)
+        The number of parallel jobs to run for neighbors search.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    Returns
+    -------
+    A : sparse matrix in CSR format, shape = [n_samples, n_samples]
+        A[i, j] is assigned the weight of edge that connects i to j.
+
+    See also
+    --------
+    sklearn.neighbors.kneighbors_graph
+    sklearn.neighbors.radius_neighbors_graph
+    """
+    knn = NearestNeighbors(n_neighbors=n_neighbors + 1, n_jobs=n_jobs).fit(X)
+    X = knn._fit_X
+    n_samples = knn.n_samples_fit_
+    ind = knn.kneighbors(X, return_distance=False)[:, 1:]
+    data = barycenter_weights(X, X[ind], reg=reg)
+    indptr = np.arange(0, n_samples * n_neighbors + 1, n_neighbors)
+    return csr_matrix((data.ravel(), ind.ravel(), indptr),
+                      shape=(n_samples, n_samples))
+
+
+def null_space(M, k, k_skip=1, eigen_solver='arpack', tol=1E-6, max_iter=100,
+               random_state=None):
+    """
+    Find the null space of a matrix M.
+
+    Parameters
+    ----------
+    M : {array, matrix, sparse matrix, LinearOperator}
+        Input covariance matrix: should be symmetric positive semi-definite
+
+    k : integer
+        Number of eigenvalues/vectors to return
+
+    k_skip : integer, optional
+        Number of low eigenvalues to skip.
+
+    eigen_solver : string, {'auto', 'arpack', 'dense'}
+        auto : algorithm will attempt to choose the best method for input data
+        arpack : use arnoldi iteration in shift-invert mode.
+                    For this method, M may be a dense matrix, sparse matrix,
+                    or general linear operator.
+                    Warning: ARPACK can be unstable for some problems.  It is
+                    best to try several random seeds in order to check results.
+        dense  : use standard dense matrix operations for the eigenvalue
+                    decomposition.  For this method, M must be an array
+                    or matrix type.  This method should be avoided for
+                    large problems.
+
+    tol : float, optional
+        Tolerance for 'arpack' method.
+        Not used if eigen_solver=='dense'.
+
+    max_iter : int
+        Maximum number of iterations for 'arpack' method.
+        Not used if eigen_solver=='dense'
+
+    random_state : int, RandomState instance, default=None
+        Determines the random number generator when ``solver`` == 'arpack'.
+        Pass an int for reproducible results across multiple function calls.
+        See :term: `Glossary <random_state>`.
+    """
+    if eigen_solver == 'auto':
+        if M.shape[0] > 200 and k + k_skip < 10:
+            eigen_solver = 'arpack'
+        else:
+            eigen_solver = 'dense'
+
+    if eigen_solver == 'arpack':
+        random_state = check_random_state(random_state)
+        # initialize with [-1,1] as in ARPACK
+        v0 = random_state.uniform(-1, 1, M.shape[0])
+        try:
+            eigen_values, eigen_vectors = eigsh(M, k + k_skip, sigma=0.0,
+                                                tol=tol, maxiter=max_iter,
+                                                v0=v0)
+        except RuntimeError as msg:
+            raise ValueError("Error in determining null-space with ARPACK. "
+                             "Error message: '%s'. "
+                             "Note that method='arpack' can fail when the "
+                             "weight matrix is singular or otherwise "
+                             "ill-behaved.  method='dense' is recommended. "
+                             "See online documentation for more information."
+                             % msg)
+
+        return eigen_vectors[:, k_skip:], np.sum(eigen_values[k_skip:])
+    elif eigen_solver == 'dense':
+        if hasattr(M, 'toarray'):
+            M = M.toarray()
+        eigen_values, eigen_vectors = eigh(
+            M, eigvals=(k_skip, k + k_skip - 1), overwrite_a=True)
+        index = np.argsort(np.abs(eigen_values))
+        return eigen_vectors[:, index], np.sum(eigen_values)
+    else:
+        raise ValueError("Unrecognized eigen_solver '%s'" % eigen_solver)
+
+
+@_deprecate_positional_args
+def locally_linear_embedding(
+        X, *, n_neighbors, n_components, reg=1e-3, eigen_solver='auto',
+        tol=1e-6, max_iter=100, method='standard', hessian_tol=1E-4,
+        modified_tol=1E-12, random_state=None, n_jobs=None):
+    """Perform a Locally Linear Embedding analysis on the data.
+
+    Read more in the :ref:`User Guide <locally_linear_embedding>`.
+
+    Parameters
+    ----------
+    X : {array-like, NearestNeighbors}
+        Sample data, shape = (n_samples, n_features), in the form of a
+        numpy array or a NearestNeighbors object.
+
+    n_neighbors : integer
+        number of neighbors to consider for each point.
+
+    n_components : integer
+        number of coordinates for the manifold.
+
+    reg : float
+        regularization constant, multiplies the trace of the local covariance
+        matrix of the distances.
+
+    eigen_solver : string, {'auto', 'arpack', 'dense'}
+        auto : algorithm will attempt to choose the best method for input data
+
+        arpack : use arnoldi iteration in shift-invert mode.
+                    For this method, M may be a dense matrix, sparse matrix,
+                    or general linear operator.
+                    Warning: ARPACK can be unstable for some problems.  It is
+                    best to try several random seeds in order to check results.
+
+        dense  : use standard dense matrix operations for the eigenvalue
+                    decomposition.  For this method, M must be an array
+                    or matrix type.  This method should be avoided for
+                    large problems.
+
+    tol : float, optional
+        Tolerance for 'arpack' method
+        Not used if eigen_solver=='dense'.
+
+    max_iter : integer
+        maximum number of iterations for the arpack solver.
+
+    method : {'standard', 'hessian', 'modified', 'ltsa'}
+        standard : use the standard locally linear embedding algorithm.
+                   see reference [1]_
+        hessian  : use the Hessian eigenmap method.  This method requires
+                   n_neighbors > n_components * (1 + (n_components + 1) / 2.
+                   see reference [2]_
+        modified : use the modified locally linear embedding algorithm.
+                   see reference [3]_
+        ltsa     : use local tangent space alignment algorithm
+                   see reference [4]_
+
+    hessian_tol : float, optional
+        Tolerance for Hessian eigenmapping method.
+        Only used if method == 'hessian'
+
+    modified_tol : float, optional
+        Tolerance for modified LLE method.
+        Only used if method == 'modified'
+
+    random_state : int, RandomState instance, default=None
+        Determines the random number generator when ``solver`` == 'arpack'.
+        Pass an int for reproducible results across multiple function calls.
+        See :term: `Glossary <random_state>`.
+
+    n_jobs : int or None, optional (default=None)
+        The number of parallel jobs to run for neighbors search.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    Returns
+    -------
+    Y : array-like, shape [n_samples, n_components]
+        Embedding vectors.
+
+    squared_error : float
+        Reconstruction error for the embedding vectors. Equivalent to
+        ``norm(Y - W Y, 'fro')**2``, where W are the reconstruction weights.
+
+    References
+    ----------
+
+    .. [1] Roweis, S. & Saul, L. Nonlinear dimensionality reduction
+        by locally linear embedding.  Science 290:2323 (2000).
+    .. [2] Donoho, D. & Grimes, C. Hessian eigenmaps: Locally
+        linear embedding techniques for high-dimensional data.
+        Proc Natl Acad Sci U S A.  100:5591 (2003).
+    .. [3] Zhang, Z. & Wang, J. MLLE: Modified Locally Linear
+        Embedding Using Multiple Weights.
+        http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.70.382
+    .. [4] Zhang, Z. & Zha, H. Principal manifolds and nonlinear
+        dimensionality reduction via tangent space alignment.
+        Journal of Shanghai Univ.  8:406 (2004)
+    """
+    if eigen_solver not in ('auto', 'arpack', 'dense'):
+        raise ValueError("unrecognized eigen_solver '%s'" % eigen_solver)
+
+    if method not in ('standard', 'hessian', 'modified', 'ltsa'):
+        raise ValueError("unrecognized method '%s'" % method)
+
+    nbrs = NearestNeighbors(n_neighbors=n_neighbors + 1, n_jobs=n_jobs)
+    nbrs.fit(X)
+    X = nbrs._fit_X
+
+    N, d_in = X.shape
+
+    if n_components > d_in:
+        raise ValueError("output dimension must be less than or equal "
+                         "to input dimension")
+    if n_neighbors >= N:
+        raise ValueError(
+            "Expected n_neighbors <= n_samples, "
+            " but n_samples = %d, n_neighbors = %d" %
+            (N, n_neighbors)
+        )
+
+    if n_neighbors <= 0:
+        raise ValueError("n_neighbors must be positive")
+
+    M_sparse = (eigen_solver != 'dense')
+
+    if method == 'standard':
+        W = barycenter_kneighbors_graph(
+            nbrs, n_neighbors=n_neighbors, reg=reg, n_jobs=n_jobs)
+
+        # we'll compute M = (I-W)'(I-W)
+        # depending on the solver, we'll do this differently
+        if M_sparse:
+            M = eye(*W.shape, format=W.format) - W
+            M = (M.T * M).tocsr()
+        else:
+            M = (W.T * W - W.T - W).toarray()
+            M.flat[::M.shape[0] + 1] += 1  # W = W - I = W - I
+
+    elif method == 'hessian':
+        dp = n_components * (n_components + 1) // 2
+
+        if n_neighbors <= n_components + dp:
+            raise ValueError("for method='hessian', n_neighbors must be "
+                             "greater than "
+                             "[n_components * (n_components + 3) / 2]")
+
+        neighbors = nbrs.kneighbors(X, n_neighbors=n_neighbors + 1,
+                                    return_distance=False)
+        neighbors = neighbors[:, 1:]
+
+        Yi = np.empty((n_neighbors, 1 + n_components + dp), dtype=np.float64)
+        Yi[:, 0] = 1
+
+        M = np.zeros((N, N), dtype=np.float64)
+
+        use_svd = (n_neighbors > d_in)
+
+        for i in range(N):
+            Gi = X[neighbors[i]]
+            Gi -= Gi.mean(0)
+
+            # build Hessian estimator
+            if use_svd:
+                U = svd(Gi, full_matrices=0)[0]
+            else:
+                Ci = np.dot(Gi, Gi.T)
+                U = eigh(Ci)[1][:, ::-1]
+
+            Yi[:, 1:1 + n_components] = U[:, :n_components]
+
+            j = 1 + n_components
+            for k in range(n_components):
+                Yi[:, j:j + n_components - k] = (U[:, k:k + 1] *
+                                                 U[:, k:n_components])
+                j += n_components - k
+
+            Q, R = qr(Yi)
+
+            w = Q[:, n_components + 1:]
+            S = w.sum(0)
+
+            S[np.where(abs(S) < hessian_tol)] = 1
+            w /= S
+
+            nbrs_x, nbrs_y = np.meshgrid(neighbors[i], neighbors[i])
+            M[nbrs_x, nbrs_y] += np.dot(w, w.T)
+
+        if M_sparse:
+            M = csr_matrix(M)
+
+    elif method == 'modified':
+        if n_neighbors < n_components:
+            raise ValueError("modified LLE requires "
+                             "n_neighbors >= n_components")
+
+        neighbors = nbrs.kneighbors(X, n_neighbors=n_neighbors + 1,
+                                    return_distance=False)
+        neighbors = neighbors[:, 1:]
+
+        # find the eigenvectors and eigenvalues of each local covariance
+        # matrix. We want V[i] to be a [n_neighbors x n_neighbors] matrix,
+        # where the columns are eigenvectors
+        V = np.zeros((N, n_neighbors, n_neighbors))
+        nev = min(d_in, n_neighbors)
+        evals = np.zeros([N, nev])
+
+        # choose the most efficient way to find the eigenvectors
+        use_svd = (n_neighbors > d_in)
+
+        if use_svd:
+            for i in range(N):
+                X_nbrs = X[neighbors[i]] - X[i]
+                V[i], evals[i], _ = svd(X_nbrs,
+                                        full_matrices=True)
+            evals **= 2
+        else:
+            for i in range(N):
+                X_nbrs = X[neighbors[i]] - X[i]
+                C_nbrs = np.dot(X_nbrs, X_nbrs.T)
+                evi, vi = eigh(C_nbrs)
+                evals[i] = evi[::-1]
+                V[i] = vi[:, ::-1]
+
+        # find regularized weights: this is like normal LLE.
+        # because we've already computed the SVD of each covariance matrix,
+        # it's faster to use this rather than np.linalg.solve
+        reg = 1E-3 * evals.sum(1)
+
+        tmp = np.dot(V.transpose(0, 2, 1), np.ones(n_neighbors))
+        tmp[:, :nev] /= evals + reg[:, None]
+        tmp[:, nev:] /= reg[:, None]
+
+        w_reg = np.zeros((N, n_neighbors))
+        for i in range(N):
+            w_reg[i] = np.dot(V[i], tmp[i])
+        w_reg /= w_reg.sum(1)[:, None]
+
+        # calculate eta: the median of the ratio of small to large eigenvalues
+        # across the points.  This is used to determine s_i, below
+        rho = evals[:, n_components:].sum(1) / evals[:, :n_components].sum(1)
+        eta = np.median(rho)
+
+        # find s_i, the size of the "almost null space" for each point:
+        # this is the size of the largest set of eigenvalues
+        # such that Sum[v; v in set]/Sum[v; v not in set] < eta
+        s_range = np.zeros(N, dtype=int)
+        evals_cumsum = stable_cumsum(evals, 1)
+        eta_range = evals_cumsum[:, -1:] / evals_cumsum[:, :-1] - 1
+        for i in range(N):
+            s_range[i] = np.searchsorted(eta_range[i, ::-1], eta)
+        s_range += n_neighbors - nev  # number of zero eigenvalues
+
+        # Now calculate M.
+        # This is the [N x N] matrix whose null space is the desired embedding
+        M = np.zeros((N, N), dtype=np.float64)
+        for i in range(N):
+            s_i = s_range[i]
+
+            # select bottom s_i eigenvectors and calculate alpha
+            Vi = V[i, :, n_neighbors - s_i:]
+            alpha_i = np.linalg.norm(Vi.sum(0)) / np.sqrt(s_i)
+
+            # compute Householder matrix which satisfies
+            #  Hi*Vi.T*ones(n_neighbors) = alpha_i*ones(s)
+            # using prescription from paper
+            h = np.full(s_i, alpha_i) - np.dot(Vi.T, np.ones(n_neighbors))
+
+            norm_h = np.linalg.norm(h)
+            if norm_h < modified_tol:
+                h *= 0
+            else:
+                h /= norm_h
+
+            # Householder matrix is
+            #  >> Hi = np.identity(s_i) - 2*np.outer(h,h)
+            # Then the weight matrix is
+            #  >> Wi = np.dot(Vi,Hi) + (1-alpha_i) * w_reg[i,:,None]
+            # We do this much more efficiently:
+            Wi = (Vi - 2 * np.outer(np.dot(Vi, h), h) +
+                  (1 - alpha_i) * w_reg[i, :, None])
+
+            # Update M as follows:
+            # >> W_hat = np.zeros( (N,s_i) )
+            # >> W_hat[neighbors[i],:] = Wi
+            # >> W_hat[i] -= 1
+            # >> M += np.dot(W_hat,W_hat.T)
+            # We can do this much more efficiently:
+            nbrs_x, nbrs_y = np.meshgrid(neighbors[i], neighbors[i])
+            M[nbrs_x, nbrs_y] += np.dot(Wi, Wi.T)
+            Wi_sum1 = Wi.sum(1)
+            M[i, neighbors[i]] -= Wi_sum1
+            M[neighbors[i], i] -= Wi_sum1
+            M[i, i] += s_i
+
+        if M_sparse:
+            M = csr_matrix(M)
+
+    elif method == 'ltsa':
+        neighbors = nbrs.kneighbors(X, n_neighbors=n_neighbors + 1,
+                                    return_distance=False)
+        neighbors = neighbors[:, 1:]
+
+        M = np.zeros((N, N))
+
+        use_svd = (n_neighbors > d_in)
+
+        for i in range(N):
+            Xi = X[neighbors[i]]
+            Xi -= Xi.mean(0)
+
+            # compute n_components largest eigenvalues of Xi * Xi^T
+            if use_svd:
+                v = svd(Xi, full_matrices=True)[0]
+            else:
+                Ci = np.dot(Xi, Xi.T)
+                v = eigh(Ci)[1][:, ::-1]
+
+            Gi = np.zeros((n_neighbors, n_components + 1))
+            Gi[:, 1:] = v[:, :n_components]
+            Gi[:, 0] = 1. / np.sqrt(n_neighbors)
+
+            GiGiT = np.dot(Gi, Gi.T)
+
+            nbrs_x, nbrs_y = np.meshgrid(neighbors[i], neighbors[i])
+            M[nbrs_x, nbrs_y] -= GiGiT
+            M[neighbors[i], neighbors[i]] += 1
+
+    return null_space(M, n_components, k_skip=1, eigen_solver=eigen_solver,
+                      tol=tol, max_iter=max_iter, random_state=random_state)
+
+
+class LocallyLinearEmbedding(TransformerMixin,
+                             _UnstableArchMixin, BaseEstimator):
+    """Locally Linear Embedding
+
+    Read more in the :ref:`User Guide <locally_linear_embedding>`.
+
+    Parameters
+    ----------
+    n_neighbors : integer
+        number of neighbors to consider for each point.
+
+    n_components : integer
+        number of coordinates for the manifold
+
+    reg : float
+        regularization constant, multiplies the trace of the local covariance
+        matrix of the distances.
+
+    eigen_solver : string, {'auto', 'arpack', 'dense'}
+        auto : algorithm will attempt to choose the best method for input data
+
+        arpack : use arnoldi iteration in shift-invert mode.
+                    For this method, M may be a dense matrix, sparse matrix,
+                    or general linear operator.
+                    Warning: ARPACK can be unstable for some problems.  It is
+                    best to try several random seeds in order to check results.
+
+        dense  : use standard dense matrix operations for the eigenvalue
+                    decomposition.  For this method, M must be an array
+                    or matrix type.  This method should be avoided for
+                    large problems.
+
+    tol : float, optional
+        Tolerance for 'arpack' method
+        Not used if eigen_solver=='dense'.
+
+    max_iter : integer
+        maximum number of iterations for the arpack solver.
+        Not used if eigen_solver=='dense'.
+
+    method : string ('standard', 'hessian', 'modified' or 'ltsa')
+        standard : use the standard locally linear embedding algorithm.  see
+                   reference [1]
+        hessian  : use the Hessian eigenmap method. This method requires
+                   ``n_neighbors > n_components * (1 + (n_components + 1) / 2``
+                   see reference [2]
+        modified : use the modified locally linear embedding algorithm.
+                   see reference [3]
+        ltsa     : use local tangent space alignment algorithm
+                   see reference [4]
+
+    hessian_tol : float, optional
+        Tolerance for Hessian eigenmapping method.
+        Only used if ``method == 'hessian'``
+
+    modified_tol : float, optional
+        Tolerance for modified LLE method.
+        Only used if ``method == 'modified'``
+
+    neighbors_algorithm : string ['auto'|'brute'|'kd_tree'|'ball_tree']
+        algorithm to use for nearest neighbors search,
+        passed to neighbors.NearestNeighbors instance
+
+    random_state : int, RandomState instance, default=None
+        Determines the random number generator when
+        ``eigen_solver`` == 'arpack'. Pass an int for reproducible results
+        across multiple function calls. See :term: `Glossary <random_state>`.
+
+    n_jobs : int or None, optional (default=None)
+        The number of parallel jobs to run.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    Attributes
+    ----------
+    embedding_ : array-like, shape [n_samples, n_components]
+        Stores the embedding vectors
+
+    reconstruction_error_ : float
+        Reconstruction error associated with `embedding_`
+
+    nbrs_ : NearestNeighbors object
+        Stores nearest neighbors instance, including BallTree or KDtree
+        if applicable.
+
+    Examples
+    --------
+    >>> from sklearn.datasets import load_digits
+    >>> from sklearn.manifold import LocallyLinearEmbedding
+    >>> X, _ = load_digits(return_X_y=True)
+    >>> X.shape
+    (1797, 64)
+    >>> embedding = LocallyLinearEmbedding(n_components=2)
+    >>> X_transformed = embedding.fit_transform(X[:100])
+    >>> X_transformed.shape
+    (100, 2)
+
+    References
+    ----------
+
+    .. [1] Roweis, S. & Saul, L. Nonlinear dimensionality reduction
+        by locally linear embedding.  Science 290:2323 (2000).
+    .. [2] Donoho, D. & Grimes, C. Hessian eigenmaps: Locally
+        linear embedding techniques for high-dimensional data.
+        Proc Natl Acad Sci U S A.  100:5591 (2003).
+    .. [3] Zhang, Z. & Wang, J. MLLE: Modified Locally Linear
+        Embedding Using Multiple Weights.
+        http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.70.382
+    .. [4] Zhang, Z. & Zha, H. Principal manifolds and nonlinear
+        dimensionality reduction via tangent space alignment.
+        Journal of Shanghai Univ.  8:406 (2004)
+    """
+    @_deprecate_positional_args
+    def __init__(self, *, n_neighbors=5, n_components=2, reg=1E-3,
+                 eigen_solver='auto', tol=1E-6, max_iter=100,
+                 method='standard', hessian_tol=1E-4, modified_tol=1E-12,
+                 neighbors_algorithm='auto', random_state=None, n_jobs=None):
+        self.n_neighbors = n_neighbors
+        self.n_components = n_components
+        self.reg = reg
+        self.eigen_solver = eigen_solver
+        self.tol = tol
+        self.max_iter = max_iter
+        self.method = method
+        self.hessian_tol = hessian_tol
+        self.modified_tol = modified_tol
+        self.random_state = random_state
+        self.neighbors_algorithm = neighbors_algorithm
+        self.n_jobs = n_jobs
+
+    def _fit_transform(self, X):
+        self.nbrs_ = NearestNeighbors(n_neighbors=self.n_neighbors,
+                                      algorithm=self.neighbors_algorithm,
+                                      n_jobs=self.n_jobs)
+
+        random_state = check_random_state(self.random_state)
+        X = self._validate_data(X, dtype=float)
+        self.nbrs_.fit(X)
+        self.embedding_, self.reconstruction_error_ = \
+            locally_linear_embedding(
+                X=self.nbrs_, n_neighbors=self.n_neighbors,
+                n_components=self.n_components,
+                eigen_solver=self.eigen_solver, tol=self.tol,
+                max_iter=self.max_iter, method=self.method,
+                hessian_tol=self.hessian_tol, modified_tol=self.modified_tol,
+                random_state=random_state, reg=self.reg, n_jobs=self.n_jobs)
+
+    def fit(self, X, y=None):
+        """Compute the embedding vectors for data X
+
+        Parameters
+        ----------
+        X : array-like of shape [n_samples, n_features]
+            training set.
+
+        y : Ignored
+
+        Returns
+        -------
+        self : returns an instance of self.
+        """
+        self._fit_transform(X)
+        return self
+
+    def fit_transform(self, X, y=None):
+        """Compute the embedding vectors for data X and transform X.
+
+        Parameters
+        ----------
+        X : array-like of shape [n_samples, n_features]
+            training set.
+
+        y : Ignored
+
+        Returns
+        -------
+        X_new : array-like, shape (n_samples, n_components)
+        """
+        self._fit_transform(X)
+        return self.embedding_
+
+    def transform(self, X):
+        """
+        Transform new points into embedding space.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+
+        Returns
+        -------
+        X_new : array, shape = [n_samples, n_components]
+
+        Notes
+        -----
+        Because of scaling performed by this method, it is discouraged to use
+        it together with methods that are not scale-invariant (like SVMs)
+        """
+        check_is_fitted(self)
+
+        X = check_array(X)
+        ind = self.nbrs_.kneighbors(X, n_neighbors=self.n_neighbors,
+                                    return_distance=False)
+        weights = barycenter_weights(X, self.nbrs_._fit_X[ind],
+                                     reg=self.reg)
+        X_new = np.empty((X.shape[0], self.n_components))
+        for i in range(X.shape[0]):
+            X_new[i] = np.dot(self.embedding_[ind[i]].T, weights[i])
+        return X_new
--- a/venv/Lib/site-packages/sklearn/manifold/_mds.py
+++ b/venv/Lib/site-packages/sklearn/manifold/_mds.py
@ -0,0 +1,439 @@
+"""
+Multi-dimensional Scaling (MDS)
+"""
+
+# author: Nelle Varoquaux <nelle.varoquaux@gmail.com>
+# License: BSD
+
+import numpy as np
+from joblib import Parallel, delayed, effective_n_jobs
+
+import warnings
+
+from ..base import BaseEstimator
+from ..metrics import euclidean_distances
+from ..utils import check_random_state, check_array, check_symmetric
+from ..isotonic import IsotonicRegression
+from ..utils.validation import _deprecate_positional_args
+
+
+def _smacof_single(dissimilarities, metric=True, n_components=2, init=None,
+                   max_iter=300, verbose=0, eps=1e-3, random_state=None):
+    """Computes multidimensional scaling using SMACOF algorithm
+
+    Parameters
+    ----------
+    dissimilarities : ndarray, shape (n_samples, n_samples)
+        Pairwise dissimilarities between the points. Must be symmetric.
+
+    metric : boolean, optional, default: True
+        Compute metric or nonmetric SMACOF algorithm.
+
+    n_components : int, optional, default: 2
+        Number of dimensions in which to immerse the dissimilarities. If an
+        ``init`` array is provided, this option is overridden and the shape of
+        ``init`` is used to determine the dimensionality of the embedding
+        space.
+
+    init : ndarray, shape (n_samples, n_components), optional, default: None
+        Starting configuration of the embedding to initialize the algorithm. By
+        default, the algorithm is initialized with a randomly chosen array.
+
+    max_iter : int, optional, default: 300
+        Maximum number of iterations of the SMACOF algorithm for a single run.
+
+    verbose : int, optional, default: 0
+        Level of verbosity.
+
+    eps : float, optional, default: 1e-3
+        Relative tolerance with respect to stress at which to declare
+        convergence.
+
+    random_state : int, RandomState instance, default=None
+        Determines the random number generator used to initialize the centers.
+        Pass an int for reproducible results across multiple function calls.
+        See :term: `Glossary <random_state>`.
+
+    Returns
+    -------
+    X : ndarray, shape (n_samples, n_components)
+        Coordinates of the points in a ``n_components``-space.
+
+    stress : float
+        The final value of the stress (sum of squared distance of the
+        disparities and the distances for all constrained points).
+
+    n_iter : int
+        The number of iterations corresponding to the best stress.
+    """
+    dissimilarities = check_symmetric(dissimilarities, raise_exception=True)
+
+    n_samples = dissimilarities.shape[0]
+    random_state = check_random_state(random_state)
+
+    sim_flat = ((1 - np.tri(n_samples)) * dissimilarities).ravel()
+    sim_flat_w = sim_flat[sim_flat != 0]
+    if init is None:
+        # Randomly choose initial configuration
+        X = random_state.rand(n_samples * n_components)
+        X = X.reshape((n_samples, n_components))
+    else:
+        # overrides the parameter p
+        n_components = init.shape[1]
+        if n_samples != init.shape[0]:
+            raise ValueError("init matrix should be of shape (%d, %d)" %
+                             (n_samples, n_components))
+        X = init
+
+    old_stress = None
+    ir = IsotonicRegression()
+    for it in range(max_iter):
+        # Compute distance and monotonic regression
+        dis = euclidean_distances(X)
+
+        if metric:
+            disparities = dissimilarities
+        else:
+            dis_flat = dis.ravel()
+            # dissimilarities with 0 are considered as missing values
+            dis_flat_w = dis_flat[sim_flat != 0]
+
+            # Compute the disparities using a monotonic regression
+            disparities_flat = ir.fit_transform(sim_flat_w, dis_flat_w)
+            disparities = dis_flat.copy()
+            disparities[sim_flat != 0] = disparities_flat
+            disparities = disparities.reshape((n_samples, n_samples))
+            disparities *= np.sqrt((n_samples * (n_samples - 1) / 2) /
+                                   (disparities ** 2).sum())
+
+        # Compute stress
+        stress = ((dis.ravel() - disparities.ravel()) ** 2).sum() / 2
+
+        # Update X using the Guttman transform
+        dis[dis == 0] = 1e-5
+        ratio = disparities / dis
+        B = - ratio
+        B[np.arange(len(B)), np.arange(len(B))] += ratio.sum(axis=1)
+        X = 1. / n_samples * np.dot(B, X)
+
+        dis = np.sqrt((X ** 2).sum(axis=1)).sum()
+        if verbose >= 2:
+            print('it: %d, stress %s' % (it, stress))
+        if old_stress is not None:
+            if(old_stress - stress / dis) < eps:
+                if verbose:
+                    print('breaking at iteration %d with stress %s' % (it,
+                                                                       stress))
+                break
+        old_stress = stress / dis
+
+    return X, stress, it + 1
+
+
+@_deprecate_positional_args
+def smacof(dissimilarities, *, metric=True, n_components=2, init=None,
+           n_init=8, n_jobs=None, max_iter=300, verbose=0, eps=1e-3,
+           random_state=None, return_n_iter=False):
+    """Computes multidimensional scaling using the SMACOF algorithm.
+
+    The SMACOF (Scaling by MAjorizing a COmplicated Function) algorithm is a
+    multidimensional scaling algorithm which minimizes an objective function
+    (the *stress*) using a majorization technique. Stress majorization, also
+    known as the Guttman Transform, guarantees a monotone convergence of
+    stress, and is more powerful than traditional techniques such as gradient
+    descent.
+
+    The SMACOF algorithm for metric MDS can summarized by the following steps:
+
+    1. Set an initial start configuration, randomly or not.
+    2. Compute the stress
+    3. Compute the Guttman Transform
+    4. Iterate 2 and 3 until convergence.
+
+    The nonmetric algorithm adds a monotonic regression step before computing
+    the stress.
+
+    Parameters
+    ----------
+    dissimilarities : ndarray, shape (n_samples, n_samples)
+        Pairwise dissimilarities between the points. Must be symmetric.
+
+    metric : boolean, optional, default: True
+        Compute metric or nonmetric SMACOF algorithm.
+
+    n_components : int, optional, default: 2
+        Number of dimensions in which to immerse the dissimilarities. If an
+        ``init`` array is provided, this option is overridden and the shape of
+        ``init`` is used to determine the dimensionality of the embedding
+        space.
+
+    init : ndarray, shape (n_samples, n_components), optional, default: None
+        Starting configuration of the embedding to initialize the algorithm. By
+        default, the algorithm is initialized with a randomly chosen array.
+
+    n_init : int, optional, default: 8
+        Number of times the SMACOF algorithm will be run with different
+        initializations. The final results will be the best output of the runs,
+        determined by the run with the smallest final stress. If ``init`` is
+        provided, this option is overridden and a single run is performed.
+
+    n_jobs : int or None, optional (default=None)
+        The number of jobs to use for the computation. If multiple
+        initializations are used (``n_init``), each run of the algorithm is
+        computed in parallel.
+
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    max_iter : int, optional, default: 300
+        Maximum number of iterations of the SMACOF algorithm for a single run.
+
+    verbose : int, optional, default: 0
+        Level of verbosity.
+
+    eps : float, optional, default: 1e-3
+        Relative tolerance with respect to stress at which to declare
+        convergence.
+
+    random_state : int, RandomState instance, default=None
+        Determines the random number generator used to initialize the centers.
+        Pass an int for reproducible results across multiple function calls.
+        See :term: `Glossary <random_state>`.
+
+    return_n_iter : bool, optional, default: False
+        Whether or not to return the number of iterations.
+
+    Returns
+    -------
+    X : ndarray, shape (n_samples, n_components)
+        Coordinates of the points in a ``n_components``-space.
+
+    stress : float
+        The final value of the stress (sum of squared distance of the
+        disparities and the distances for all constrained points).
+
+    n_iter : int
+        The number of iterations corresponding to the best stress. Returned
+        only if ``return_n_iter`` is set to ``True``.
+
+    Notes
+    -----
+    "Modern Multidimensional Scaling - Theory and Applications" Borg, I.;
+    Groenen P. Springer Series in Statistics (1997)
+
+    "Nonmetric multidimensional scaling: a numerical method" Kruskal, J.
+    Psychometrika, 29 (1964)
+
+    "Multidimensional scaling by optimizing goodness of fit to a nonmetric
+    hypothesis" Kruskal, J. Psychometrika, 29, (1964)
+    """
+
+    dissimilarities = check_array(dissimilarities)
+    random_state = check_random_state(random_state)
+
+    if hasattr(init, '__array__'):
+        init = np.asarray(init).copy()
+        if not n_init == 1:
+            warnings.warn(
+                'Explicit initial positions passed: '
+                'performing only one init of the MDS instead of %d'
+                % n_init)
+            n_init = 1
+
+    best_pos, best_stress = None, None
+
+    if effective_n_jobs(n_jobs) == 1:
+        for it in range(n_init):
+            pos, stress, n_iter_ = _smacof_single(
+                dissimilarities, metric=metric,
+                n_components=n_components, init=init,
+                max_iter=max_iter, verbose=verbose,
+                eps=eps, random_state=random_state)
+            if best_stress is None or stress < best_stress:
+                best_stress = stress
+                best_pos = pos.copy()
+                best_iter = n_iter_
+    else:
+        seeds = random_state.randint(np.iinfo(np.int32).max, size=n_init)
+        results = Parallel(n_jobs=n_jobs, verbose=max(verbose - 1, 0))(
+            delayed(_smacof_single)(
+                dissimilarities, metric=metric, n_components=n_components,
+                init=init, max_iter=max_iter, verbose=verbose, eps=eps,
+                random_state=seed)
+            for seed in seeds)
+        positions, stress, n_iters = zip(*results)
+        best = np.argmin(stress)
+        best_stress = stress[best]
+        best_pos = positions[best]
+        best_iter = n_iters[best]
+
+    if return_n_iter:
+        return best_pos, best_stress, best_iter
+    else:
+        return best_pos, best_stress
+
+
+class MDS(BaseEstimator):
+    """Multidimensional scaling
+
+    Read more in the :ref:`User Guide <multidimensional_scaling>`.
+
+    Parameters
+    ----------
+    n_components : int, optional, default: 2
+        Number of dimensions in which to immerse the dissimilarities.
+
+    metric : boolean, optional, default: True
+        If ``True``, perform metric MDS; otherwise, perform nonmetric MDS.
+
+    n_init : int, optional, default: 4
+        Number of times the SMACOF algorithm will be run with different
+        initializations. The final results will be the best output of the runs,
+        determined by the run with the smallest final stress.
+
+    max_iter : int, optional, default: 300
+        Maximum number of iterations of the SMACOF algorithm for a single run.
+
+    verbose : int, optional, default: 0
+        Level of verbosity.
+
+    eps : float, optional, default: 1e-3
+        Relative tolerance with respect to stress at which to declare
+        convergence.
+
+    n_jobs : int or None, optional (default=None)
+        The number of jobs to use for the computation. If multiple
+        initializations are used (``n_init``), each run of the algorithm is
+        computed in parallel.
+
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    random_state : int, RandomState instance, default=None
+        Determines the random number generator used to initialize the centers.
+        Pass an int for reproducible results across multiple function calls.
+        See :term: `Glossary <random_state>`.
+
+    dissimilarity : 'euclidean' | 'precomputed', optional, default: 'euclidean'
+        Dissimilarity measure to use:
+
+        - 'euclidean':
+            Pairwise Euclidean distances between points in the dataset.
+
+        - 'precomputed':
+            Pre-computed dissimilarities are passed directly to ``fit`` and
+            ``fit_transform``.
+
+    Attributes
+    ----------
+    embedding_ : array-like, shape (n_samples, n_components)
+        Stores the position of the dataset in the embedding space.
+
+    stress_ : float
+        The final value of the stress (sum of squared distance of the
+        disparities and the distances for all constrained points).
+
+    Examples
+    --------
+    >>> from sklearn.datasets import load_digits
+    >>> from sklearn.manifold import MDS
+    >>> X, _ = load_digits(return_X_y=True)
+    >>> X.shape
+    (1797, 64)
+    >>> embedding = MDS(n_components=2)
+    >>> X_transformed = embedding.fit_transform(X[:100])
+    >>> X_transformed.shape
+    (100, 2)
+
+    References
+    ----------
+    "Modern Multidimensional Scaling - Theory and Applications" Borg, I.;
+    Groenen P. Springer Series in Statistics (1997)
+
+    "Nonmetric multidimensional scaling: a numerical method" Kruskal, J.
+    Psychometrika, 29 (1964)
+
+    "Multidimensional scaling by optimizing goodness of fit to a nonmetric
+    hypothesis" Kruskal, J. Psychometrika, 29, (1964)
+
+    """
+    @_deprecate_positional_args
+    def __init__(self, n_components=2, *, metric=True, n_init=4,
+                 max_iter=300, verbose=0, eps=1e-3, n_jobs=None,
+                 random_state=None, dissimilarity="euclidean"):
+        self.n_components = n_components
+        self.dissimilarity = dissimilarity
+        self.metric = metric
+        self.n_init = n_init
+        self.max_iter = max_iter
+        self.eps = eps
+        self.verbose = verbose
+        self.n_jobs = n_jobs
+        self.random_state = random_state
+
+    @property
+    def _pairwise(self):
+        return self.kernel == "precomputed"
+
+    def fit(self, X, y=None, init=None):
+        """
+        Computes the position of the points in the embedding space
+
+        Parameters
+        ----------
+        X : array, shape (n_samples, n_features) or (n_samples, n_samples)
+            Input data. If ``dissimilarity=='precomputed'``, the input should
+            be the dissimilarity matrix.
+
+        y : Ignored
+
+        init : ndarray, shape (n_samples,), optional, default: None
+            Starting configuration of the embedding to initialize the SMACOF
+            algorithm. By default, the algorithm is initialized with a randomly
+            chosen array.
+        """
+        self.fit_transform(X, init=init)
+        return self
+
+    def fit_transform(self, X, y=None, init=None):
+        """
+        Fit the data from X, and returns the embedded coordinates
+
+        Parameters
+        ----------
+        X : array, shape (n_samples, n_features) or (n_samples, n_samples)
+            Input data. If ``dissimilarity=='precomputed'``, the input should
+            be the dissimilarity matrix.
+
+        y : Ignored
+
+        init : ndarray, shape (n_samples,), optional, default: None
+            Starting configuration of the embedding to initialize the SMACOF
+            algorithm. By default, the algorithm is initialized with a randomly
+            chosen array.
+        """
+        X = self._validate_data(X)
+        if X.shape[0] == X.shape[1] and self.dissimilarity != "precomputed":
+            warnings.warn("The MDS API has changed. ``fit`` now constructs an"
+                          " dissimilarity matrix from data. To use a custom "
+                          "dissimilarity matrix, set "
+                          "``dissimilarity='precomputed'``.")
+
+        if self.dissimilarity == "precomputed":
+            self.dissimilarity_matrix_ = X
+        elif self.dissimilarity == "euclidean":
+            self.dissimilarity_matrix_ = euclidean_distances(X)
+        else:
+            raise ValueError("Proximity must be 'precomputed' or 'euclidean'."
+                             " Got %s instead" % str(self.dissimilarity))
+
+        self.embedding_, self.stress_, self.n_iter_ = smacof(
+            self.dissimilarity_matrix_, metric=self.metric,
+            n_components=self.n_components, init=init, n_init=self.n_init,
+            n_jobs=self.n_jobs, max_iter=self.max_iter, verbose=self.verbose,
+            eps=self.eps, random_state=self.random_state,
+            return_n_iter=True)
+
+        return self.embedding_
--- a/venv/Lib/site-packages/sklearn/manifold/_spectral_embedding.py
+++ b/venv/Lib/site-packages/sklearn/manifold/_spectral_embedding.py
@ -0,0 +1,577 @@
+"""Spectral Embedding"""
+
+# Author: Gael Varoquaux <gael.varoquaux@normalesup.org>
+#         Wei LI <kuantkid@gmail.com>
+# License: BSD 3 clause
+
+
+import warnings
+
+import numpy as np
+from scipy import sparse
+from scipy.linalg import eigh
+from scipy.sparse.linalg import eigsh
+from scipy.sparse.csgraph import connected_components
+from scipy.sparse.csgraph import laplacian as csgraph_laplacian
+
+from ..base import BaseEstimator
+from ..utils import check_random_state, check_array, check_symmetric
+from ..utils.extmath import _deterministic_vector_sign_flip
+from ..utils.fixes import lobpcg
+from ..metrics.pairwise import rbf_kernel
+from ..neighbors import kneighbors_graph, NearestNeighbors
+from ..utils.validation import _deprecate_positional_args
+
+
+def _graph_connected_component(graph, node_id):
+    """Find the largest graph connected components that contains one
+    given node
+
+    Parameters
+    ----------
+    graph : array-like, shape: (n_samples, n_samples)
+        adjacency matrix of the graph, non-zero weight means an edge
+        between the nodes
+
+    node_id : int
+        The index of the query node of the graph
+
+    Returns
+    -------
+    connected_components_matrix : array-like, shape: (n_samples,)
+        An array of bool value indicating the indexes of the nodes
+        belonging to the largest connected components of the given query
+        node
+    """
+    n_node = graph.shape[0]
+    if sparse.issparse(graph):
+        # speed up row-wise access to boolean connection mask
+        graph = graph.tocsr()
+    connected_nodes = np.zeros(n_node, dtype=np.bool)
+    nodes_to_explore = np.zeros(n_node, dtype=np.bool)
+    nodes_to_explore[node_id] = True
+    for _ in range(n_node):
+        last_num_component = connected_nodes.sum()
+        np.logical_or(connected_nodes, nodes_to_explore, out=connected_nodes)
+        if last_num_component >= connected_nodes.sum():
+            break
+        indices = np.where(nodes_to_explore)[0]
+        nodes_to_explore.fill(False)
+        for i in indices:
+            if sparse.issparse(graph):
+                neighbors = graph[i].toarray().ravel()
+            else:
+                neighbors = graph[i]
+            np.logical_or(nodes_to_explore, neighbors, out=nodes_to_explore)
+    return connected_nodes
+
+
+def _graph_is_connected(graph):
+    """ Return whether the graph is connected (True) or Not (False)
+
+    Parameters
+    ----------
+    graph : array-like or sparse matrix, shape: (n_samples, n_samples)
+        adjacency matrix of the graph, non-zero weight means an edge
+        between the nodes
+
+    Returns
+    -------
+    is_connected : bool
+        True means the graph is fully connected and False means not
+    """
+    if sparse.isspmatrix(graph):
+        # sparse graph, find all the connected components
+        n_connected_components, _ = connected_components(graph)
+        return n_connected_components == 1
+    else:
+        # dense graph, find all connected components start from node 0
+        return _graph_connected_component(graph, 0).sum() == graph.shape[0]
+
+
+def _set_diag(laplacian, value, norm_laplacian):
+    """Set the diagonal of the laplacian matrix and convert it to a
+    sparse format well suited for eigenvalue decomposition
+
+    Parameters
+    ----------
+    laplacian : array or sparse matrix
+        The graph laplacian
+    value : float
+        The value of the diagonal
+    norm_laplacian : bool
+        Whether the value of the diagonal should be changed or not
+
+    Returns
+    -------
+    laplacian : array or sparse matrix
+        An array of matrix in a form that is well suited to fast
+        eigenvalue decomposition, depending on the band width of the
+        matrix.
+    """
+    n_nodes = laplacian.shape[0]
+    # We need all entries in the diagonal to values
+    if not sparse.isspmatrix(laplacian):
+        if norm_laplacian:
+            laplacian.flat[::n_nodes + 1] = value
+    else:
+        laplacian = laplacian.tocoo()
+        if norm_laplacian:
+            diag_idx = (laplacian.row == laplacian.col)
+            laplacian.data[diag_idx] = value
+        # If the matrix has a small number of diagonals (as in the
+        # case of structured matrices coming from images), the
+        # dia format might be best suited for matvec products:
+        n_diags = np.unique(laplacian.row - laplacian.col).size
+        if n_diags <= 7:
+            # 3 or less outer diagonals on each side
+            laplacian = laplacian.todia()
+        else:
+            # csr has the fastest matvec and is thus best suited to
+            # arpack
+            laplacian = laplacian.tocsr()
+    return laplacian
+
+
+@_deprecate_positional_args
+def spectral_embedding(adjacency, *, n_components=8, eigen_solver=None,
+                       random_state=None, eigen_tol=0.0,
+                       norm_laplacian=True, drop_first=True):
+    """Project the sample on the first eigenvectors of the graph Laplacian.
+
+    The adjacency matrix is used to compute a normalized graph Laplacian
+    whose spectrum (especially the eigenvectors associated to the
+    smallest eigenvalues) has an interpretation in terms of minimal
+    number of cuts necessary to split the graph into comparably sized
+    components.
+
+    This embedding can also 'work' even if the ``adjacency`` variable is
+    not strictly the adjacency matrix of a graph but more generally
+    an affinity or similarity matrix between samples (for instance the
+    heat kernel of a euclidean distance matrix or a k-NN matrix).
+
+    However care must taken to always make the affinity matrix symmetric
+    so that the eigenvector decomposition works as expected.
+
+    Note : Laplacian Eigenmaps is the actual algorithm implemented here.
+
+    Read more in the :ref:`User Guide <spectral_embedding>`.
+
+    Parameters
+    ----------
+    adjacency : array-like or sparse graph, shape: (n_samples, n_samples)
+        The adjacency matrix of the graph to embed.
+
+    n_components : integer, optional, default 8
+        The dimension of the projection subspace.
+
+    eigen_solver : {None, 'arpack', 'lobpcg', or 'amg'}, default None
+        The eigenvalue decomposition strategy to use. AMG requires pyamg
+        to be installed. It can be faster on very large, sparse problems,
+        but may also lead to instabilities.
+
+    random_state : int, RandomState instance, default=None
+        Determines the random number generator used for the initialization of
+        the lobpcg eigenvectors decomposition when ``solver`` == 'amg'. Pass
+        an int for reproducible results across multiple function calls.
+        See :term: `Glossary <random_state>`.
+
+    eigen_tol : float, optional, default=0.0
+        Stopping criterion for eigendecomposition of the Laplacian matrix
+        when using arpack eigen_solver.
+
+    norm_laplacian : bool, optional, default=True
+        If True, then compute normalized Laplacian.
+
+    drop_first : bool, optional, default=True
+        Whether to drop the first eigenvector. For spectral embedding, this
+        should be True as the first eigenvector should be constant vector for
+        connected graph, but for spectral clustering, this should be kept as
+        False to retain the first eigenvector.
+
+    Returns
+    -------
+    embedding : array, shape=(n_samples, n_components)
+        The reduced samples.
+
+    Notes
+    -----
+    Spectral Embedding (Laplacian Eigenmaps) is most useful when the graph
+    has one connected component. If there graph has many components, the first
+    few eigenvectors will simply uncover the connected components of the graph.
+
+    References
+    ----------
+    * https://en.wikipedia.org/wiki/LOBPCG
+
+    * Toward the Optimal Preconditioned Eigensolver: Locally Optimal
+      Block Preconditioned Conjugate Gradient Method
+      Andrew V. Knyazev
+      https://doi.org/10.1137%2FS1064827500366124
+    """
+    adjacency = check_symmetric(adjacency)
+
+    try:
+        from pyamg import smoothed_aggregation_solver
+    except ImportError:
+        if eigen_solver == "amg":
+            raise ValueError("The eigen_solver was set to 'amg', but pyamg is "
+                             "not available.")
+
+    if eigen_solver is None:
+        eigen_solver = 'arpack'
+    elif eigen_solver not in ('arpack', 'lobpcg', 'amg'):
+        raise ValueError("Unknown value for eigen_solver: '%s'."
+                         "Should be 'amg', 'arpack', or 'lobpcg'"
+                         % eigen_solver)
+
+    random_state = check_random_state(random_state)
+
+    n_nodes = adjacency.shape[0]
+    # Whether to drop the first eigenvector
+    if drop_first:
+        n_components = n_components + 1
+
+    if not _graph_is_connected(adjacency):
+        warnings.warn("Graph is not fully connected, spectral embedding"
+                      " may not work as expected.")
+
+    laplacian, dd = csgraph_laplacian(adjacency, normed=norm_laplacian,
+                                      return_diag=True)
+    if (eigen_solver == 'arpack' or eigen_solver != 'lobpcg' and
+       (not sparse.isspmatrix(laplacian) or n_nodes < 5 * n_components)):
+        # lobpcg used with eigen_solver='amg' has bugs for low number of nodes
+        # for details see the source code in scipy:
+        # https://github.com/scipy/scipy/blob/v0.11.0/scipy/sparse/linalg/eigen
+        # /lobpcg/lobpcg.py#L237
+        # or matlab:
+        # https://www.mathworks.com/matlabcentral/fileexchange/48-lobpcg-m
+        laplacian = _set_diag(laplacian, 1, norm_laplacian)
+
+        # Here we'll use shift-invert mode for fast eigenvalues
+        # (see https://docs.scipy.org/doc/scipy/reference/tutorial/arpack.html
+        #  for a short explanation of what this means)
+        # Because the normalized Laplacian has eigenvalues between 0 and 2,
+        # I - L has eigenvalues between -1 and 1.  ARPACK is most efficient
+        # when finding eigenvalues of largest magnitude (keyword which='LM')
+        # and when these eigenvalues are very large compared to the rest.
+        # For very large, very sparse graphs, I - L can have many, many
+        # eigenvalues very near 1.0.  This leads to slow convergence.  So
+        # instead, we'll use ARPACK's shift-invert mode, asking for the
+        # eigenvalues near 1.0.  This effectively spreads-out the spectrum
+        # near 1.0 and leads to much faster convergence: potentially an
+        # orders-of-magnitude speedup over simply using keyword which='LA'
+        # in standard mode.
+        try:
+            # We are computing the opposite of the laplacian inplace so as
+            # to spare a memory allocation of a possibly very large array
+            laplacian *= -1
+            v0 = random_state.uniform(-1, 1, laplacian.shape[0])
+            _, diffusion_map = eigsh(
+                laplacian, k=n_components, sigma=1.0, which='LM',
+                tol=eigen_tol, v0=v0)
+            embedding = diffusion_map.T[n_components::-1]
+            if norm_laplacian:
+                embedding = embedding / dd
+        except RuntimeError:
+            # When submatrices are exactly singular, an LU decomposition
+            # in arpack fails. We fallback to lobpcg
+            eigen_solver = "lobpcg"
+            # Revert the laplacian to its opposite to have lobpcg work
+            laplacian *= -1
+
+    elif eigen_solver == 'amg':
+        # Use AMG to get a preconditioner and speed up the eigenvalue
+        # problem.
+        if not sparse.issparse(laplacian):
+            warnings.warn("AMG works better for sparse matrices")
+        # lobpcg needs double precision floats
+        laplacian = check_array(laplacian, dtype=np.float64,
+                                accept_sparse=True)
+        laplacian = _set_diag(laplacian, 1, norm_laplacian)
+
+        # The Laplacian matrix is always singular, having at least one zero
+        # eigenvalue, corresponding to the trivial eigenvector, which is a
+        # constant. Using a singular matrix for preconditioning may result in
+        # random failures in LOBPCG and is not supported by the existing
+        # theory:
+        #     see https://doi.org/10.1007/s10208-015-9297-1
+        # Shift the Laplacian so its diagononal is not all ones. The shift
+        # does change the eigenpairs however, so we'll feed the shifted
+        # matrix to the solver and afterward set it back to the original.
+        diag_shift = 1e-5 * sparse.eye(laplacian.shape[0])
+        laplacian += diag_shift
+        ml = smoothed_aggregation_solver(check_array(laplacian,
+                                                     accept_sparse='csr'))
+        laplacian -= diag_shift
+
+        M = ml.aspreconditioner()
+        X = random_state.rand(laplacian.shape[0], n_components + 1)
+        X[:, 0] = dd.ravel()
+        _, diffusion_map = lobpcg(laplacian, X, M=M, tol=1.e-5,
+                                  largest=False)
+        embedding = diffusion_map.T
+        if norm_laplacian:
+            embedding = embedding / dd
+        if embedding.shape[0] == 1:
+            raise ValueError
+
+    if eigen_solver == "lobpcg":
+        # lobpcg needs double precision floats
+        laplacian = check_array(laplacian, dtype=np.float64,
+                                accept_sparse=True)
+        if n_nodes < 5 * n_components + 1:
+            # see note above under arpack why lobpcg has problems with small
+            # number of nodes
+            # lobpcg will fallback to eigh, so we short circuit it
+            if sparse.isspmatrix(laplacian):
+                laplacian = laplacian.toarray()
+            _, diffusion_map = eigh(laplacian)
+            embedding = diffusion_map.T[:n_components]
+            if norm_laplacian:
+                embedding = embedding / dd
+        else:
+            laplacian = _set_diag(laplacian, 1, norm_laplacian)
+            # We increase the number of eigenvectors requested, as lobpcg
+            # doesn't behave well in low dimension
+            X = random_state.rand(laplacian.shape[0], n_components + 1)
+            X[:, 0] = dd.ravel()
+            _, diffusion_map = lobpcg(laplacian, X, tol=1e-15,
+                                      largest=False, maxiter=2000)
+            embedding = diffusion_map.T[:n_components]
+            if norm_laplacian:
+                embedding = embedding / dd
+            if embedding.shape[0] == 1:
+                raise ValueError
+
+    embedding = _deterministic_vector_sign_flip(embedding)
+    if drop_first:
+        return embedding[1:n_components].T
+    else:
+        return embedding[:n_components].T
+
+
+class SpectralEmbedding(BaseEstimator):
+    """Spectral embedding for non-linear dimensionality reduction.
+
+    Forms an affinity matrix given by the specified function and
+    applies spectral decomposition to the corresponding graph laplacian.
+    The resulting transformation is given by the value of the
+    eigenvectors for each data point.
+
+    Note : Laplacian Eigenmaps is the actual algorithm implemented here.
+
+    Read more in the :ref:`User Guide <spectral_embedding>`.
+
+    Parameters
+    ----------
+    n_components : integer, default: 2
+        The dimension of the projected subspace.
+
+    affinity : string or callable, default : "nearest_neighbors"
+        How to construct the affinity matrix.
+         - 'nearest_neighbors' : construct the affinity matrix by computing a
+           graph of nearest neighbors.
+         - 'rbf' : construct the affinity matrix by computing a radial basis
+           function (RBF) kernel.
+         - 'precomputed' : interpret ``X`` as a precomputed affinity matrix.
+         - 'precomputed_nearest_neighbors' : interpret ``X`` as a sparse graph
+           of precomputed nearest neighbors, and constructs the affinity matrix
+           by selecting the ``n_neighbors`` nearest neighbors.
+         - callable : use passed in function as affinity
+           the function takes in data matrix (n_samples, n_features)
+           and return affinity matrix (n_samples, n_samples).
+
+    gamma : float, optional, default : 1/n_features
+        Kernel coefficient for rbf kernel.
+
+    random_state : int, RandomState instance, default=None
+        Determines the random number generator used for the initialization of
+        the lobpcg eigenvectors when ``solver`` == 'amg'.  Pass an int for
+        reproducible results across multiple function calls.
+        See :term: `Glossary <random_state>`.
+
+    eigen_solver : {None, 'arpack', 'lobpcg', or 'amg'}
+        The eigenvalue decomposition strategy to use. AMG requires pyamg
+        to be installed. It can be faster on very large, sparse problems.
+
+    n_neighbors : int, default : max(n_samples/10 , 1)
+        Number of nearest neighbors for nearest_neighbors graph building.
+
+    n_jobs : int or None, optional (default=None)
+        The number of parallel jobs to run.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    Attributes
+    ----------
+
+    embedding_ : array, shape = (n_samples, n_components)
+        Spectral embedding of the training matrix.
+
+    affinity_matrix_ : array, shape = (n_samples, n_samples)
+        Affinity_matrix constructed from samples or precomputed.
+
+    n_neighbors_ : int
+        Number of nearest neighbors effectively used.
+
+    Examples
+    --------
+    >>> from sklearn.datasets import load_digits
+    >>> from sklearn.manifold import SpectralEmbedding
+    >>> X, _ = load_digits(return_X_y=True)
+    >>> X.shape
+    (1797, 64)
+    >>> embedding = SpectralEmbedding(n_components=2)
+    >>> X_transformed = embedding.fit_transform(X[:100])
+    >>> X_transformed.shape
+    (100, 2)
+
+    References
+    ----------
+
+    - A Tutorial on Spectral Clustering, 2007
+      Ulrike von Luxburg
+      http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.165.9323
+
+    - On Spectral Clustering: Analysis and an algorithm, 2001
+      Andrew Y. Ng, Michael I. Jordan, Yair Weiss
+      http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.19.8100
+
+    - Normalized cuts and image segmentation, 2000
+      Jianbo Shi, Jitendra Malik
+      http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.160.2324
+    """
+    @_deprecate_positional_args
+    def __init__(self, n_components=2, *, affinity="nearest_neighbors",
+                 gamma=None, random_state=None, eigen_solver=None,
+                 n_neighbors=None, n_jobs=None):
+        self.n_components = n_components
+        self.affinity = affinity
+        self.gamma = gamma
+        self.random_state = random_state
+        self.eigen_solver = eigen_solver
+        self.n_neighbors = n_neighbors
+        self.n_jobs = n_jobs
+
+    @property
+    def _pairwise(self):
+        return self.affinity in ["precomputed",
+                                 "precomputed_nearest_neighbors"]
+
+    def _get_affinity_matrix(self, X, Y=None):
+        """Calculate the affinity matrix from data
+        Parameters
+        ----------
+        X : array-like, shape (n_samples, n_features)
+            Training vector, where n_samples is the number of samples
+            and n_features is the number of features.
+
+            If affinity is "precomputed"
+            X : array-like, shape (n_samples, n_samples),
+            Interpret X as precomputed adjacency graph computed from
+            samples.
+
+        Y: Ignored
+
+        Returns
+        -------
+        affinity_matrix, shape (n_samples, n_samples)
+        """
+        if self.affinity == 'precomputed':
+            self.affinity_matrix_ = X
+            return self.affinity_matrix_
+        if self.affinity == 'precomputed_nearest_neighbors':
+            estimator = NearestNeighbors(n_neighbors=self.n_neighbors,
+                                         n_jobs=self.n_jobs,
+                                         metric="precomputed").fit(X)
+            connectivity = estimator.kneighbors_graph(X=X, mode='connectivity')
+            self.affinity_matrix_ = 0.5 * (connectivity + connectivity.T)
+            return self.affinity_matrix_
+        if self.affinity == 'nearest_neighbors':
+            if sparse.issparse(X):
+                warnings.warn("Nearest neighbors affinity currently does "
+                              "not support sparse input, falling back to "
+                              "rbf affinity")
+                self.affinity = "rbf"
+            else:
+                self.n_neighbors_ = (self.n_neighbors
+                                     if self.n_neighbors is not None
+                                     else max(int(X.shape[0] / 10), 1))
+                self.affinity_matrix_ = kneighbors_graph(X, self.n_neighbors_,
+                                                         include_self=True,
+                                                         n_jobs=self.n_jobs)
+                # currently only symmetric affinity_matrix supported
+                self.affinity_matrix_ = 0.5 * (self.affinity_matrix_ +
+                                               self.affinity_matrix_.T)
+                return self.affinity_matrix_
+        if self.affinity == 'rbf':
+            self.gamma_ = (self.gamma
+                           if self.gamma is not None else 1.0 / X.shape[1])
+            self.affinity_matrix_ = rbf_kernel(X, gamma=self.gamma_)
+            return self.affinity_matrix_
+        self.affinity_matrix_ = self.affinity(X)
+        return self.affinity_matrix_
+
+    def fit(self, X, y=None):
+        """Fit the model from data in X.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix}, shape (n_samples, n_features)
+            Training vector, where n_samples is the number of samples
+            and n_features is the number of features.
+
+            If affinity is "precomputed"
+            X : {array-like, sparse matrix}, shape (n_samples, n_samples),
+            Interpret X as precomputed adjacency graph computed from
+            samples.
+
+        Returns
+        -------
+        self : object
+            Returns the instance itself.
+        """
+
+        X = self._validate_data(X, accept_sparse='csr', ensure_min_samples=2,
+                                estimator=self)
+
+        random_state = check_random_state(self.random_state)
+        if isinstance(self.affinity, str):
+            if self.affinity not in {"nearest_neighbors", "rbf", "precomputed",
+                                     "precomputed_nearest_neighbors"}:
+                raise ValueError(("%s is not a valid affinity. Expected "
+                                  "'precomputed', 'rbf', 'nearest_neighbors' "
+                                  "or a callable.") % self.affinity)
+        elif not callable(self.affinity):
+            raise ValueError(("'affinity' is expected to be an affinity "
+                              "name or a callable. Got: %s") % self.affinity)
+
+        affinity_matrix = self._get_affinity_matrix(X)
+        self.embedding_ = spectral_embedding(affinity_matrix,
+                                             n_components=self.n_components,
+                                             eigen_solver=self.eigen_solver,
+                                             random_state=random_state)
+        return self
+
+    def fit_transform(self, X, y=None):
+        """Fit the model from data in X and transform X.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix}, shape (n_samples, n_features)
+            Training vector, where n_samples is the number of samples
+            and n_features is the number of features.
+
+            If affinity is "precomputed"
+            X : {array-like, sparse matrix}, shape (n_samples, n_samples),
+            Interpret X as precomputed adjacency graph computed from
+            samples.
+
+        Returns
+        -------
+        X_new : array-like, shape (n_samples, n_components)
+        """
+        self.fit(X)
+        return self.embedding_
--- a/venv/Lib/site-packages/sklearn/manifold/_t_sne.py
+++ b/venv/Lib/site-packages/sklearn/manifold/_t_sne.py
@ -0,0 +1,910 @@
+# Author: Alexander Fabisch  -- <afabisch@informatik.uni-bremen.de>
+# Author: Christopher Moody <chrisemoody@gmail.com>
+# Author: Nick Travers <nickt@squareup.com>
+# License: BSD 3 clause (C) 2014
+
+# This is the exact and Barnes-Hut t-SNE implementation. There are other
+# modifications of the algorithm:
+# * Fast Optimization for t-SNE:
+#   https://cseweb.ucsd.edu/~lvdmaaten/workshops/nips2010/papers/vandermaaten.pdf
+
+from time import time
+import numpy as np
+from scipy import linalg
+from scipy.spatial.distance import pdist
+from scipy.spatial.distance import squareform
+from scipy.sparse import csr_matrix, issparse
+from ..neighbors import NearestNeighbors
+from ..base import BaseEstimator
+from ..utils import check_random_state
+from ..utils._openmp_helpers import _openmp_effective_n_threads
+from ..utils.validation import check_non_negative
+from ..utils.validation import _deprecate_positional_args
+from ..decomposition import PCA
+from ..metrics.pairwise import pairwise_distances
+from . import _utils
+# mypy error: Module 'sklearn.manifold' has no attribute '_barnes_hut_tsne'
+from . import _barnes_hut_tsne  # type: ignore
+
+
+MACHINE_EPSILON = np.finfo(np.double).eps
+
+
+def _joint_probabilities(distances, desired_perplexity, verbose):
+    """Compute joint probabilities p_ij from distances.
+
+    Parameters
+    ----------
+    distances : array, shape (n_samples * (n_samples-1) / 2,)
+        Distances of samples are stored as condensed matrices, i.e.
+        we omit the diagonal and duplicate entries and store everything
+        in a one-dimensional array.
+
+    desired_perplexity : float
+        Desired perplexity of the joint probability distributions.
+
+    verbose : int
+        Verbosity level.
+
+    Returns
+    -------
+    P : array, shape (n_samples * (n_samples-1) / 2,)
+        Condensed joint probability matrix.
+    """
+    # Compute conditional probabilities such that they approximately match
+    # the desired perplexity
+    distances = distances.astype(np.float32, copy=False)
+    conditional_P = _utils._binary_search_perplexity(
+        distances, desired_perplexity, verbose)
+    P = conditional_P + conditional_P.T
+    sum_P = np.maximum(np.sum(P), MACHINE_EPSILON)
+    P = np.maximum(squareform(P) / sum_P, MACHINE_EPSILON)
+    return P
+
+
+def _joint_probabilities_nn(distances, desired_perplexity, verbose):
+    """Compute joint probabilities p_ij from distances using just nearest
+    neighbors.
+
+    This method is approximately equal to _joint_probabilities. The latter
+    is O(N), but limiting the joint probability to nearest neighbors improves
+    this substantially to O(uN).
+
+    Parameters
+    ----------
+    distances : CSR sparse matrix, shape (n_samples, n_samples)
+        Distances of samples to its n_neighbors nearest neighbors. All other
+        distances are left to zero (and are not materialized in memory).
+
+    desired_perplexity : float
+        Desired perplexity of the joint probability distributions.
+
+    verbose : int
+        Verbosity level.
+
+    Returns
+    -------
+    P : csr sparse matrix, shape (n_samples, n_samples)
+        Condensed joint probability matrix with only nearest neighbors.
+    """
+    t0 = time()
+    # Compute conditional probabilities such that they approximately match
+    # the desired perplexity
+    distances.sort_indices()
+    n_samples = distances.shape[0]
+    distances_data = distances.data.reshape(n_samples, -1)
+    distances_data = distances_data.astype(np.float32, copy=False)
+    conditional_P = _utils._binary_search_perplexity(
+        distances_data, desired_perplexity, verbose)
+    assert np.all(np.isfinite(conditional_P)), \
+        "All probabilities should be finite"
+
+    # Symmetrize the joint probability distribution using sparse operations
+    P = csr_matrix((conditional_P.ravel(), distances.indices,
+                    distances.indptr),
+                   shape=(n_samples, n_samples))
+    P = P + P.T
+
+    # Normalize the joint probability distribution
+    sum_P = np.maximum(P.sum(), MACHINE_EPSILON)
+    P /= sum_P
+
+    assert np.all(np.abs(P.data) <= 1.0)
+    if verbose >= 2:
+        duration = time() - t0
+        print("[t-SNE] Computed conditional probabilities in {:.3f}s"
+              .format(duration))
+    return P
+
+
+def _kl_divergence(params, P, degrees_of_freedom, n_samples, n_components,
+                   skip_num_points=0, compute_error=True):
+    """t-SNE objective function: gradient of the KL divergence
+    of p_ijs and q_ijs and the absolute error.
+
+    Parameters
+    ----------
+    params : array, shape (n_params,)
+        Unraveled embedding.
+
+    P : array, shape (n_samples * (n_samples-1) / 2,)
+        Condensed joint probability matrix.
+
+    degrees_of_freedom : int
+        Degrees of freedom of the Student's-t distribution.
+
+    n_samples : int
+        Number of samples.
+
+    n_components : int
+        Dimension of the embedded space.
+
+    skip_num_points : int (optional, default:0)
+        This does not compute the gradient for points with indices below
+        `skip_num_points`. This is useful when computing transforms of new
+        data where you'd like to keep the old data fixed.
+
+    compute_error: bool (optional, default:True)
+        If False, the kl_divergence is not computed and returns NaN.
+
+    Returns
+    -------
+    kl_divergence : float
+        Kullback-Leibler divergence of p_ij and q_ij.
+
+    grad : array, shape (n_params,)
+        Unraveled gradient of the Kullback-Leibler divergence with respect to
+        the embedding.
+    """
+    X_embedded = params.reshape(n_samples, n_components)
+
+    # Q is a heavy-tailed distribution: Student's t-distribution
+    dist = pdist(X_embedded, "sqeuclidean")
+    dist /= degrees_of_freedom
+    dist += 1.
+    dist **= (degrees_of_freedom + 1.0) / -2.0
+    Q = np.maximum(dist / (2.0 * np.sum(dist)), MACHINE_EPSILON)
+
+    # Optimization trick below: np.dot(x, y) is faster than
+    # np.sum(x * y) because it calls BLAS
+
+    # Objective: C (Kullback-Leibler divergence of P and Q)
+    if compute_error:
+        kl_divergence = 2.0 * np.dot(
+            P, np.log(np.maximum(P, MACHINE_EPSILON) / Q))
+    else:
+        kl_divergence = np.nan
+
+    # Gradient: dC/dY
+    # pdist always returns double precision distances. Thus we need to take
+    grad = np.ndarray((n_samples, n_components), dtype=params.dtype)
+    PQd = squareform((P - Q) * dist)
+    for i in range(skip_num_points, n_samples):
+        grad[i] = np.dot(np.ravel(PQd[i], order='K'),
+                         X_embedded[i] - X_embedded)
+    grad = grad.ravel()
+    c = 2.0 * (degrees_of_freedom + 1.0) / degrees_of_freedom
+    grad *= c
+
+    return kl_divergence, grad
+
+
+def _kl_divergence_bh(params, P, degrees_of_freedom, n_samples, n_components,
+                      angle=0.5, skip_num_points=0, verbose=False,
+                      compute_error=True, num_threads=1):
+    """t-SNE objective function: KL divergence of p_ijs and q_ijs.
+
+    Uses Barnes-Hut tree methods to calculate the gradient that
+    runs in O(NlogN) instead of O(N^2)
+
+    Parameters
+    ----------
+    params : array, shape (n_params,)
+        Unraveled embedding.
+
+    P : csr sparse matrix, shape (n_samples, n_sample)
+        Sparse approximate joint probability matrix, computed only for the
+        k nearest-neighbors and symmetrized.
+
+    degrees_of_freedom : int
+        Degrees of freedom of the Student's-t distribution.
+
+    n_samples : int
+        Number of samples.
+
+    n_components : int
+        Dimension of the embedded space.
+
+    angle : float (default: 0.5)
+        This is the trade-off between speed and accuracy for Barnes-Hut T-SNE.
+        'angle' is the angular size (referred to as theta in [3]) of a distant
+        node as measured from a point. If this size is below 'angle' then it is
+        used as a summary node of all points contained within it.
+        This method is not very sensitive to changes in this parameter
+        in the range of 0.2 - 0.8. Angle less than 0.2 has quickly increasing
+        computation time and angle greater 0.8 has quickly increasing error.
+
+    skip_num_points : int (optional, default:0)
+        This does not compute the gradient for points with indices below
+        `skip_num_points`. This is useful when computing transforms of new
+        data where you'd like to keep the old data fixed.
+
+    verbose : int
+        Verbosity level.
+
+    compute_error: bool (optional, default:True)
+        If False, the kl_divergence is not computed and returns NaN.
+
+    num_threads : int (optional, default:1)
+        Number of threads used to compute the gradient. This is set here to
+        avoid calling _openmp_effective_n_threads for each gradient step.
+
+    Returns
+    -------
+    kl_divergence : float
+        Kullback-Leibler divergence of p_ij and q_ij.
+
+    grad : array, shape (n_params,)
+        Unraveled gradient of the Kullback-Leibler divergence with respect to
+        the embedding.
+    """
+    params = params.astype(np.float32, copy=False)
+    X_embedded = params.reshape(n_samples, n_components)
+
+    val_P = P.data.astype(np.float32, copy=False)
+    neighbors = P.indices.astype(np.int64, copy=False)
+    indptr = P.indptr.astype(np.int64, copy=False)
+
+    grad = np.zeros(X_embedded.shape, dtype=np.float32)
+    error = _barnes_hut_tsne.gradient(val_P, X_embedded, neighbors, indptr,
+                                      grad, angle, n_components, verbose,
+                                      dof=degrees_of_freedom,
+                                      compute_error=compute_error,
+                                      num_threads=num_threads)
+    c = 2.0 * (degrees_of_freedom + 1.0) / degrees_of_freedom
+    grad = grad.ravel()
+    grad *= c
+
+    return error, grad
+
+
+def _gradient_descent(objective, p0, it, n_iter,
+                      n_iter_check=1, n_iter_without_progress=300,
+                      momentum=0.8, learning_rate=200.0, min_gain=0.01,
+                      min_grad_norm=1e-7, verbose=0, args=None, kwargs=None):
+    """Batch gradient descent with momentum and individual gains.
+
+    Parameters
+    ----------
+    objective : function or callable
+        Should return a tuple of cost and gradient for a given parameter
+        vector. When expensive to compute, the cost can optionally
+        be None and can be computed every n_iter_check steps using
+        the objective_error function.
+
+    p0 : array-like, shape (n_params,)
+        Initial parameter vector.
+
+    it : int
+        Current number of iterations (this function will be called more than
+        once during the optimization).
+
+    n_iter : int
+        Maximum number of gradient descent iterations.
+
+    n_iter_check : int
+        Number of iterations before evaluating the global error. If the error
+        is sufficiently low, we abort the optimization.
+
+    n_iter_without_progress : int, optional (default: 300)
+        Maximum number of iterations without progress before we abort the
+        optimization.
+
+    momentum : float, within (0.0, 1.0), optional (default: 0.8)
+        The momentum generates a weight for previous gradients that decays
+        exponentially.
+
+    learning_rate : float, optional (default: 200.0)
+        The learning rate for t-SNE is usually in the range [10.0, 1000.0]. If
+        the learning rate is too high, the data may look like a 'ball' with any
+        point approximately equidistant from its nearest neighbours. If the
+        learning rate is too low, most points may look compressed in a dense
+        cloud with few outliers.
+
+    min_gain : float, optional (default: 0.01)
+        Minimum individual gain for each parameter.
+
+    min_grad_norm : float, optional (default: 1e-7)
+        If the gradient norm is below this threshold, the optimization will
+        be aborted.
+
+    verbose : int, optional (default: 0)
+        Verbosity level.
+
+    args : sequence
+        Arguments to pass to objective function.
+
+    kwargs : dict
+        Keyword arguments to pass to objective function.
+
+    Returns
+    -------
+    p : array, shape (n_params,)
+        Optimum parameters.
+
+    error : float
+        Optimum.
+
+    i : int
+        Last iteration.
+    """
+    if args is None:
+        args = []
+    if kwargs is None:
+        kwargs = {}
+
+    p = p0.copy().ravel()
+    update = np.zeros_like(p)
+    gains = np.ones_like(p)
+    error = np.finfo(np.float).max
+    best_error = np.finfo(np.float).max
+    best_iter = i = it
+
+    tic = time()
+    for i in range(it, n_iter):
+        check_convergence = (i + 1) % n_iter_check == 0
+        # only compute the error when needed
+        kwargs['compute_error'] = check_convergence or i == n_iter - 1
+
+        error, grad = objective(p, *args, **kwargs)
+        grad_norm = linalg.norm(grad)
+
+        inc = update * grad < 0.0
+        dec = np.invert(inc)
+        gains[inc] += 0.2
+        gains[dec] *= 0.8
+        np.clip(gains, min_gain, np.inf, out=gains)
+        grad *= gains
+        update = momentum * update - learning_rate * grad
+        p += update
+
+        if check_convergence:
+            toc = time()
+            duration = toc - tic
+            tic = toc
+
+            if verbose >= 2:
+                print("[t-SNE] Iteration %d: error = %.7f,"
+                      " gradient norm = %.7f"
+                      " (%s iterations in %0.3fs)"
+                      % (i + 1, error, grad_norm, n_iter_check, duration))
+
+            if error < best_error:
+                best_error = error
+                best_iter = i
+            elif i - best_iter > n_iter_without_progress:
+                if verbose >= 2:
+                    print("[t-SNE] Iteration %d: did not make any progress "
+                          "during the last %d episodes. Finished."
+                          % (i + 1, n_iter_without_progress))
+                break
+            if grad_norm <= min_grad_norm:
+                if verbose >= 2:
+                    print("[t-SNE] Iteration %d: gradient norm %f. Finished."
+                          % (i + 1, grad_norm))
+                break
+
+    return p, error, i
+
+
+@_deprecate_positional_args
+def trustworthiness(X, X_embedded, *, n_neighbors=5, metric='euclidean'):
+    r"""Expresses to what extent the local structure is retained.
+
+    The trustworthiness is within [0, 1]. It is defined as
+
+    .. math::
+
+        T(k) = 1 - \frac{2}{nk (2n - 3k - 1)} \sum^n_{i=1}
+            \sum_{j \in \mathcal{N}_{i}^{k}} \max(0, (r(i, j) - k))
+
+    where for each sample i, :math:`\mathcal{N}_{i}^{k}` are its k nearest
+    neighbors in the output space, and every sample j is its :math:`r(i, j)`-th
+    nearest neighbor in the input space. In other words, any unexpected nearest
+    neighbors in the output space are penalised in proportion to their rank in
+    the input space.
+
+    * "Neighborhood Preservation in Nonlinear Projection Methods: An
+      Experimental Study"
+      J. Venna, S. Kaski
+    * "Learning a Parametric Embedding by Preserving Local Structure"
+      L.J.P. van der Maaten
+
+    Parameters
+    ----------
+    X : array, shape (n_samples, n_features) or (n_samples, n_samples)
+        If the metric is 'precomputed' X must be a square distance
+        matrix. Otherwise it contains a sample per row.
+
+    X_embedded : array, shape (n_samples, n_components)
+        Embedding of the training data in low-dimensional space.
+
+    n_neighbors : int, optional (default: 5)
+        Number of neighbors k that will be considered.
+
+    metric : string, or callable, optional, default 'euclidean'
+        Which metric to use for computing pairwise distances between samples
+        from the original input space. If metric is 'precomputed', X must be a
+        matrix of pairwise distances or squared distances. Otherwise, see the
+        documentation of argument metric in sklearn.pairwise.pairwise_distances
+        for a list of available metrics.
+
+        .. versionadded:: 0.20
+
+    Returns
+    -------
+    trustworthiness : float
+        Trustworthiness of the low-dimensional embedding.
+    """
+    dist_X = pairwise_distances(X, metric=metric)
+    if metric == 'precomputed':
+        dist_X = dist_X.copy()
+    # we set the diagonal to np.inf to exclude the points themselves from
+    # their own neighborhood
+    np.fill_diagonal(dist_X, np.inf)
+    ind_X = np.argsort(dist_X, axis=1)
+    # `ind_X[i]` is the index of sorted distances between i and other samples
+    ind_X_embedded = NearestNeighbors(n_neighbors=n_neighbors).fit(
+            X_embedded).kneighbors(return_distance=False)
+
+    # We build an inverted index of neighbors in the input space: For sample i,
+    # we define `inverted_index[i]` as the inverted index of sorted distances:
+    # inverted_index[i][ind_X[i]] = np.arange(1, n_sample + 1)
+    n_samples = X.shape[0]
+    inverted_index = np.zeros((n_samples, n_samples), dtype=int)
+    ordered_indices = np.arange(n_samples + 1)
+    inverted_index[ordered_indices[:-1, np.newaxis],
+                   ind_X] = ordered_indices[1:]
+    ranks = inverted_index[ordered_indices[:-1, np.newaxis],
+                           ind_X_embedded] - n_neighbors
+    t = np.sum(ranks[ranks > 0])
+    t = 1.0 - t * (2.0 / (n_samples * n_neighbors *
+                          (2.0 * n_samples - 3.0 * n_neighbors - 1.0)))
+    return t
+
+
+class TSNE(BaseEstimator):
+    """t-distributed Stochastic Neighbor Embedding.
+
+    t-SNE [1] is a tool to visualize high-dimensional data. It converts
+    similarities between data points to joint probabilities and tries
+    to minimize the Kullback-Leibler divergence between the joint
+    probabilities of the low-dimensional embedding and the
+    high-dimensional data. t-SNE has a cost function that is not convex,
+    i.e. with different initializations we can get different results.
+
+    It is highly recommended to use another dimensionality reduction
+    method (e.g. PCA for dense data or TruncatedSVD for sparse data)
+    to reduce the number of dimensions to a reasonable amount (e.g. 50)
+    if the number of features is very high. This will suppress some
+    noise and speed up the computation of pairwise distances between
+    samples. For more tips see Laurens van der Maaten's FAQ [2].
+
+    Read more in the :ref:`User Guide <t_sne>`.
+
+    Parameters
+    ----------
+    n_components : int, optional (default: 2)
+        Dimension of the embedded space.
+
+    perplexity : float, optional (default: 30)
+        The perplexity is related to the number of nearest neighbors that
+        is used in other manifold learning algorithms. Larger datasets
+        usually require a larger perplexity. Consider selecting a value
+        between 5 and 50. Different values can result in significanlty
+        different results.
+
+    early_exaggeration : float, optional (default: 12.0)
+        Controls how tight natural clusters in the original space are in
+        the embedded space and how much space will be between them. For
+        larger values, the space between natural clusters will be larger
+        in the embedded space. Again, the choice of this parameter is not
+        very critical. If the cost function increases during initial
+        optimization, the early exaggeration factor or the learning rate
+        might be too high.
+
+    learning_rate : float, optional (default: 200.0)
+        The learning rate for t-SNE is usually in the range [10.0, 1000.0]. If
+        the learning rate is too high, the data may look like a 'ball' with any
+        point approximately equidistant from its nearest neighbours. If the
+        learning rate is too low, most points may look compressed in a dense
+        cloud with few outliers. If the cost function gets stuck in a bad local
+        minimum increasing the learning rate may help.
+
+    n_iter : int, optional (default: 1000)
+        Maximum number of iterations for the optimization. Should be at
+        least 250.
+
+    n_iter_without_progress : int, optional (default: 300)
+        Maximum number of iterations without progress before we abort the
+        optimization, used after 250 initial iterations with early
+        exaggeration. Note that progress is only checked every 50 iterations so
+        this value is rounded to the next multiple of 50.
+
+        .. versionadded:: 0.17
+           parameter *n_iter_without_progress* to control stopping criteria.
+
+    min_grad_norm : float, optional (default: 1e-7)
+        If the gradient norm is below this threshold, the optimization will
+        be stopped.
+
+    metric : string or callable, optional
+        The metric to use when calculating distance between instances in a
+        feature array. If metric is a string, it must be one of the options
+        allowed by scipy.spatial.distance.pdist for its metric parameter, or
+        a metric listed in pairwise.PAIRWISE_DISTANCE_FUNCTIONS.
+        If metric is "precomputed", X is assumed to be a distance matrix.
+        Alternatively, if metric is a callable function, it is called on each
+        pair of instances (rows) and the resulting value recorded. The callable
+        should take two arrays from X as input and return a value indicating
+        the distance between them. The default is "euclidean" which is
+        interpreted as squared euclidean distance.
+
+    init : string or numpy array, optional (default: "random")
+        Initialization of embedding. Possible options are 'random', 'pca',
+        and a numpy array of shape (n_samples, n_components).
+        PCA initialization cannot be used with precomputed distances and is
+        usually more globally stable than random initialization.
+
+    verbose : int, optional (default: 0)
+        Verbosity level.
+
+    random_state : int, RandomState instance, default=None
+        Determines the random number generator. Pass an int for reproducible
+        results across multiple function calls. Note that different
+        initializations might result in different local minima of the cost
+        function. See :term: `Glossary <random_state>`.
+
+    method : string (default: 'barnes_hut')
+        By default the gradient calculation algorithm uses Barnes-Hut
+        approximation running in O(NlogN) time. method='exact'
+        will run on the slower, but exact, algorithm in O(N^2) time. The
+        exact algorithm should be used when nearest-neighbor errors need
+        to be better than 3%. However, the exact method cannot scale to
+        millions of examples.
+
+        .. versionadded:: 0.17
+           Approximate optimization *method* via the Barnes-Hut.
+
+    angle : float (default: 0.5)
+        Only used if method='barnes_hut'
+        This is the trade-off between speed and accuracy for Barnes-Hut T-SNE.
+        'angle' is the angular size (referred to as theta in [3]) of a distant
+        node as measured from a point. If this size is below 'angle' then it is
+        used as a summary node of all points contained within it.
+        This method is not very sensitive to changes in this parameter
+        in the range of 0.2 - 0.8. Angle less than 0.2 has quickly increasing
+        computation time and angle greater 0.8 has quickly increasing error.
+
+    n_jobs : int or None, optional (default=None)
+        The number of parallel jobs to run for neighbors search. This parameter
+        has no impact when ``metric="precomputed"`` or
+        (``metric="euclidean"`` and ``method="exact"``).
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+        .. versionadded:: 0.22
+
+    Attributes
+    ----------
+    embedding_ : array-like, shape (n_samples, n_components)
+        Stores the embedding vectors.
+
+    kl_divergence_ : float
+        Kullback-Leibler divergence after optimization.
+
+    n_iter_ : int
+        Number of iterations run.
+
+    Examples
+    --------
+
+    >>> import numpy as np
+    >>> from sklearn.manifold import TSNE
+    >>> X = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]])
+    >>> X_embedded = TSNE(n_components=2).fit_transform(X)
+    >>> X_embedded.shape
+    (4, 2)
+
+    References
+    ----------
+
+    [1] van der Maaten, L.J.P.; Hinton, G.E. Visualizing High-Dimensional Data
+        Using t-SNE. Journal of Machine Learning Research 9:2579-2605, 2008.
+
+    [2] van der Maaten, L.J.P. t-Distributed Stochastic Neighbor Embedding
+        https://lvdmaaten.github.io/tsne/
+
+    [3] L.J.P. van der Maaten. Accelerating t-SNE using Tree-Based Algorithms.
+        Journal of Machine Learning Research 15(Oct):3221-3245, 2014.
+        https://lvdmaaten.github.io/publications/papers/JMLR_2014.pdf
+    """
+    # Control the number of exploration iterations with early_exaggeration on
+    _EXPLORATION_N_ITER = 250
+
+    # Control the number of iterations between progress checks
+    _N_ITER_CHECK = 50
+
+    @_deprecate_positional_args
+    def __init__(self, n_components=2, *, perplexity=30.0,
+                 early_exaggeration=12.0, learning_rate=200.0, n_iter=1000,
+                 n_iter_without_progress=300, min_grad_norm=1e-7,
+                 metric="euclidean", init="random", verbose=0,
+                 random_state=None, method='barnes_hut', angle=0.5,
+                 n_jobs=None):
+        self.n_components = n_components
+        self.perplexity = perplexity
+        self.early_exaggeration = early_exaggeration
+        self.learning_rate = learning_rate
+        self.n_iter = n_iter
+        self.n_iter_without_progress = n_iter_without_progress
+        self.min_grad_norm = min_grad_norm
+        self.metric = metric
+        self.init = init
+        self.verbose = verbose
+        self.random_state = random_state
+        self.method = method
+        self.angle = angle
+        self.n_jobs = n_jobs
+
+    def _fit(self, X, skip_num_points=0):
+        """Private function to fit the model using X as training data."""
+
+        if self.method not in ['barnes_hut', 'exact']:
+            raise ValueError("'method' must be 'barnes_hut' or 'exact'")
+        if self.angle < 0.0 or self.angle > 1.0:
+            raise ValueError("'angle' must be between 0.0 - 1.0")
+        if self.method == 'barnes_hut':
+            X = self._validate_data(X, accept_sparse=['csr'],
+                                    ensure_min_samples=2,
+                                    dtype=[np.float32, np.float64])
+        else:
+            X = self._validate_data(X, accept_sparse=['csr', 'csc', 'coo'],
+                                    dtype=[np.float32, np.float64])
+        if self.metric == "precomputed":
+            if isinstance(self.init, str) and self.init == 'pca':
+                raise ValueError("The parameter init=\"pca\" cannot be "
+                                 "used with metric=\"precomputed\".")
+            if X.shape[0] != X.shape[1]:
+                raise ValueError("X should be a square distance matrix")
+
+            check_non_negative(X, "TSNE.fit(). With metric='precomputed', X "
+                                  "should contain positive distances.")
+
+            if self.method == "exact" and issparse(X):
+                raise TypeError(
+                    'TSNE with method="exact" does not accept sparse '
+                    'precomputed distance matrix. Use method="barnes_hut" '
+                    'or provide the dense distance matrix.')
+
+        if self.method == 'barnes_hut' and self.n_components > 3:
+            raise ValueError("'n_components' should be inferior to 4 for the "
+                             "barnes_hut algorithm as it relies on "
+                             "quad-tree or oct-tree.")
+        random_state = check_random_state(self.random_state)
+
+        if self.early_exaggeration < 1.0:
+            raise ValueError("early_exaggeration must be at least 1, but is {}"
+                             .format(self.early_exaggeration))
+
+        if self.n_iter < 250:
+            raise ValueError("n_iter should be at least 250")
+
+        n_samples = X.shape[0]
+
+        neighbors_nn = None
+        if self.method == "exact":
+            # Retrieve the distance matrix, either using the precomputed one or
+            # computing it.
+            if self.metric == "precomputed":
+                distances = X
+            else:
+                if self.verbose:
+                    print("[t-SNE] Computing pairwise distances...")
+
+                if self.metric == "euclidean":
+                    distances = pairwise_distances(X, metric=self.metric,
+                                                   squared=True)
+                else:
+                    distances = pairwise_distances(X, metric=self.metric,
+                                                   n_jobs=self.n_jobs)
+
+                if np.any(distances < 0):
+                    raise ValueError("All distances should be positive, the "
+                                     "metric given is not correct")
+
+            # compute the joint probability distribution for the input space
+            P = _joint_probabilities(distances, self.perplexity, self.verbose)
+            assert np.all(np.isfinite(P)), "All probabilities should be finite"
+            assert np.all(P >= 0), "All probabilities should be non-negative"
+            assert np.all(P <= 1), ("All probabilities should be less "
+                                    "or then equal to one")
+
+        else:
+            # Compute the number of nearest neighbors to find.
+            # LvdM uses 3 * perplexity as the number of neighbors.
+            # In the event that we have very small # of points
+            # set the neighbors to n - 1.
+            n_neighbors = min(n_samples - 1, int(3. * self.perplexity + 1))
+
+            if self.verbose:
+                print("[t-SNE] Computing {} nearest neighbors..."
+                      .format(n_neighbors))
+
+            # Find the nearest neighbors for every point
+            knn = NearestNeighbors(algorithm='auto',
+                                   n_jobs=self.n_jobs,
+                                   n_neighbors=n_neighbors,
+                                   metric=self.metric)
+            t0 = time()
+            knn.fit(X)
+            duration = time() - t0
+            if self.verbose:
+                print("[t-SNE] Indexed {} samples in {:.3f}s...".format(
+                    n_samples, duration))
+
+            t0 = time()
+            distances_nn = knn.kneighbors_graph(mode='distance')
+            duration = time() - t0
+            if self.verbose:
+                print("[t-SNE] Computed neighbors for {} samples "
+                      "in {:.3f}s...".format(n_samples, duration))
+
+            # Free the memory used by the ball_tree
+            del knn
+
+            if self.metric == "euclidean":
+                # knn return the euclidean distance but we need it squared
+                # to be consistent with the 'exact' method. Note that the
+                # the method was derived using the euclidean method as in the
+                # input space. Not sure of the implication of using a different
+                # metric.
+                distances_nn.data **= 2
+
+            # compute the joint probability distribution for the input space
+            P = _joint_probabilities_nn(distances_nn, self.perplexity,
+                                        self.verbose)
+
+        if isinstance(self.init, np.ndarray):
+            X_embedded = self.init
+        elif self.init == 'pca':
+            pca = PCA(n_components=self.n_components, svd_solver='randomized',
+                      random_state=random_state)
+            X_embedded = pca.fit_transform(X).astype(np.float32, copy=False)
+        elif self.init == 'random':
+            # The embedding is initialized with iid samples from Gaussians with
+            # standard deviation 1e-4.
+            X_embedded = 1e-4 * random_state.randn(
+                n_samples, self.n_components).astype(np.float32)
+        else:
+            raise ValueError("'init' must be 'pca', 'random', or "
+                             "a numpy array")
+
+        # Degrees of freedom of the Student's t-distribution. The suggestion
+        # degrees_of_freedom = n_components - 1 comes from
+        # "Learning a Parametric Embedding by Preserving Local Structure"
+        # Laurens van der Maaten, 2009.
+        degrees_of_freedom = max(self.n_components - 1, 1)
+
+        return self._tsne(P, degrees_of_freedom, n_samples,
+                          X_embedded=X_embedded,
+                          neighbors=neighbors_nn,
+                          skip_num_points=skip_num_points)
+
+    def _tsne(self, P, degrees_of_freedom, n_samples, X_embedded,
+              neighbors=None, skip_num_points=0):
+        """Runs t-SNE."""
+        # t-SNE minimizes the Kullback-Leiber divergence of the Gaussians P
+        # and the Student's t-distributions Q. The optimization algorithm that
+        # we use is batch gradient descent with two stages:
+        # * initial optimization with early exaggeration and momentum at 0.5
+        # * final optimization with momentum at 0.8
+        params = X_embedded.ravel()
+
+        opt_args = {
+            "it": 0,
+            "n_iter_check": self._N_ITER_CHECK,
+            "min_grad_norm": self.min_grad_norm,
+            "learning_rate": self.learning_rate,
+            "verbose": self.verbose,
+            "kwargs": dict(skip_num_points=skip_num_points),
+            "args": [P, degrees_of_freedom, n_samples, self.n_components],
+            "n_iter_without_progress": self._EXPLORATION_N_ITER,
+            "n_iter": self._EXPLORATION_N_ITER,
+            "momentum": 0.5,
+        }
+        if self.method == 'barnes_hut':
+            obj_func = _kl_divergence_bh
+            opt_args['kwargs']['angle'] = self.angle
+            # Repeat verbose argument for _kl_divergence_bh
+            opt_args['kwargs']['verbose'] = self.verbose
+            # Get the number of threads for gradient computation here to
+            # avoid recomputing it at each iteration.
+            opt_args['kwargs']['num_threads'] = _openmp_effective_n_threads()
+        else:
+            obj_func = _kl_divergence
+
+        # Learning schedule (part 1): do 250 iteration with lower momentum but
+        # higher learning rate controlled via the early exaggeration parameter
+        P *= self.early_exaggeration
+        params, kl_divergence, it = _gradient_descent(obj_func, params,
+                                                      **opt_args)
+        if self.verbose:
+            print("[t-SNE] KL divergence after %d iterations with early "
+                  "exaggeration: %f" % (it + 1, kl_divergence))
+
+        # Learning schedule (part 2): disable early exaggeration and finish
+        # optimization with a higher momentum at 0.8
+        P /= self.early_exaggeration
+        remaining = self.n_iter - self._EXPLORATION_N_ITER
+        if it < self._EXPLORATION_N_ITER or remaining > 0:
+            opt_args['n_iter'] = self.n_iter
+            opt_args['it'] = it + 1
+            opt_args['momentum'] = 0.8
+            opt_args['n_iter_without_progress'] = self.n_iter_without_progress
+            params, kl_divergence, it = _gradient_descent(obj_func, params,
+                                                          **opt_args)
+
+        # Save the final number of iterations
+        self.n_iter_ = it
+
+        if self.verbose:
+            print("[t-SNE] KL divergence after %d iterations: %f"
+                  % (it + 1, kl_divergence))
+
+        X_embedded = params.reshape(n_samples, self.n_components)
+        self.kl_divergence_ = kl_divergence
+
+        return X_embedded
+
+    def fit_transform(self, X, y=None):
+        """Fit X into an embedded space and return that transformed
+        output.
+
+        Parameters
+        ----------
+        X : array, shape (n_samples, n_features) or (n_samples, n_samples)
+            If the metric is 'precomputed' X must be a square distance
+            matrix. Otherwise it contains a sample per row. If the method
+            is 'exact', X may be a sparse matrix of type 'csr', 'csc'
+            or 'coo'. If the method is 'barnes_hut' and the metric is
+            'precomputed', X may be a precomputed sparse graph.
+
+        y : Ignored
+
+        Returns
+        -------
+        X_new : array, shape (n_samples, n_components)
+            Embedding of the training data in low-dimensional space.
+        """
+        embedding = self._fit(X)
+        self.embedding_ = embedding
+        return self.embedding_
+
+    def fit(self, X, y=None):
+        """Fit X into an embedded space.
+
+        Parameters
+        ----------
+        X : array, shape (n_samples, n_features) or (n_samples, n_samples)
+            If the metric is 'precomputed' X must be a square distance
+            matrix. Otherwise it contains a sample per row. If the method
+            is 'exact', X may be a sparse matrix of type 'csr', 'csc'
+            or 'coo'. If the method is 'barnes_hut' and the metric is
+            'precomputed', X may be a precomputed sparse graph.
+
+        y : Ignored
+        """
+        self.fit_transform(X)
+        return self
--- a/venv/Lib/site-packages/sklearn/manifold/_utils.cp36-win32.pyd
+++ b/venv/Lib/site-packages/sklearn/manifold/_utils.cp36-win32.pyd
--- a/venv/Lib/site-packages/sklearn/manifold/isomap.py
+++ b/venv/Lib/site-packages/sklearn/manifold/isomap.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _isomap  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.manifold.isomap'
+correct_import_path = 'sklearn.manifold'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_isomap, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/manifold/locally_linear.py
+++ b/venv/Lib/site-packages/sklearn/manifold/locally_linear.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _locally_linear  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.manifold.locally_linear'
+correct_import_path = 'sklearn.manifold'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_locally_linear, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/manifold/mds.py
+++ b/venv/Lib/site-packages/sklearn/manifold/mds.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _mds  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.manifold.mds'
+correct_import_path = 'sklearn.manifold'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_mds, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/manifold/setup.py
+++ b/venv/Lib/site-packages/sklearn/manifold/setup.py
@ -0,0 +1,34 @@
+import os
+
+import numpy
+
+
+def configuration(parent_package="", top_path=None):
+    from numpy.distutils.misc_util import Configuration
+
+    config = Configuration("manifold", parent_package, top_path)
+
+    libraries = []
+    if os.name == 'posix':
+        libraries.append('m')
+
+    config.add_extension("_utils",
+                         sources=["_utils.pyx"],
+                         include_dirs=[numpy.get_include()],
+                         libraries=libraries,
+                         extra_compile_args=["-O3"])
+
+    config.add_extension("_barnes_hut_tsne",
+                         sources=["_barnes_hut_tsne.pyx"],
+                         include_dirs=[numpy.get_include()],
+                         libraries=libraries,
+                         extra_compile_args=['-O3'])
+
+    config.add_subpackage('tests')
+
+    return config
+
+
+if __name__ == "__main__":
+    from numpy.distutils.core import setup
+    setup(**configuration().todict())
--- a/venv/Lib/site-packages/sklearn/manifold/spectral_embedding_.py
+++ b/venv/Lib/site-packages/sklearn/manifold/spectral_embedding_.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _spectral_embedding  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.manifold.spectral_embedding_'
+correct_import_path = 'sklearn.manifold'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_spectral_embedding, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/manifold/t_sne.py
+++ b/venv/Lib/site-packages/sklearn/manifold/t_sne.py
@ -0,0 +1,18 @@
+
+# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
+import sys
+# mypy error: Module X has no attribute y (typically for C extensions)
+from . import _t_sne  # type: ignore
+from ..externals._pep562 import Pep562
+from ..utils.deprecation import _raise_dep_warning_if_not_pytest
+
+deprecated_path = 'sklearn.manifold.t_sne'
+correct_import_path = 'sklearn.manifold'
+
+_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
+
+def __getattr__(name):
+    return getattr(_t_sne, name)
+
+if not sys.version_info >= (3, 7):
+    Pep562(__name__)
--- a/venv/Lib/site-packages/sklearn/manifold/tests/init.py
+++ b/venv/Lib/site-packages/sklearn/manifold/tests/init.py
--- a/venv/Lib/site-packages/sklearn/manifold/tests/pycache/init.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/manifold/tests/pycache/init.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/manifold/tests/pycache/test_isomap.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/manifold/tests/pycache/test_isomap.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/manifold/tests/pycache/test_locally_linear.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/manifold/tests/pycache/test_locally_linear.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/manifold/tests/pycache/test_mds.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/manifold/tests/pycache/test_mds.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/manifold/tests/pycache/test_spectral_embedding.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/manifold/tests/pycache/test_spectral_embedding.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/manifold/tests/pycache/test_t_sne.cpython-36.pyc
+++ b/venv/Lib/site-packages/sklearn/manifold/tests/pycache/test_t_sne.cpython-36.pyc
--- a/venv/Lib/site-packages/sklearn/manifold/tests/test_isomap.py
+++ b/venv/Lib/site-packages/sklearn/manifold/tests/test_isomap.py
@ -0,0 +1,188 @@
+from itertools import product
+import numpy as np
+from numpy.testing import assert_almost_equal, assert_array_almost_equal
+import pytest
+
+from sklearn import datasets
+from sklearn import manifold
+from sklearn import neighbors
+from sklearn import pipeline
+from sklearn import preprocessing
+
+from scipy.sparse import rand as sparse_rand
+
+eigen_solvers = ['auto', 'dense', 'arpack']
+path_methods = ['auto', 'FW', 'D']
+
+
+def test_isomap_simple_grid():
+    # Isomap should preserve distances when all neighbors are used
+    N_per_side = 5
+    Npts = N_per_side ** 2
+    n_neighbors = Npts - 1
+
+    # grid of equidistant points in 2D, n_components = n_dim
+    X = np.array(list(product(range(N_per_side), repeat=2)))
+
+    # distances from each point to all others
+    G = neighbors.kneighbors_graph(X, n_neighbors,
+                                   mode='distance').toarray()
+
+    for eigen_solver in eigen_solvers:
+        for path_method in path_methods:
+            clf = manifold.Isomap(n_neighbors=n_neighbors, n_components=2,
+                                  eigen_solver=eigen_solver,
+                                  path_method=path_method)
+            clf.fit(X)
+
+            G_iso = neighbors.kneighbors_graph(clf.embedding_,
+                                               n_neighbors,
+                                               mode='distance').toarray()
+            assert_array_almost_equal(G, G_iso)
+
+
+def test_isomap_reconstruction_error():
+    # Same setup as in test_isomap_simple_grid, with an added dimension
+    N_per_side = 5
+    Npts = N_per_side ** 2
+    n_neighbors = Npts - 1
+
+    # grid of equidistant points in 2D, n_components = n_dim
+    X = np.array(list(product(range(N_per_side), repeat=2)))
+
+    # add noise in a third dimension
+    rng = np.random.RandomState(0)
+    noise = 0.1 * rng.randn(Npts, 1)
+    X = np.concatenate((X, noise), 1)
+
+    # compute input kernel
+    G = neighbors.kneighbors_graph(X, n_neighbors,
+                                   mode='distance').toarray()
+
+    centerer = preprocessing.KernelCenterer()
+    K = centerer.fit_transform(-0.5 * G ** 2)
+
+    for eigen_solver in eigen_solvers:
+        for path_method in path_methods:
+            clf = manifold.Isomap(n_neighbors=n_neighbors, n_components=2,
+                                  eigen_solver=eigen_solver,
+                                  path_method=path_method)
+            clf.fit(X)
+
+            # compute output kernel
+            G_iso = neighbors.kneighbors_graph(clf.embedding_,
+                                               n_neighbors,
+                                               mode='distance').toarray()
+
+            K_iso = centerer.fit_transform(-0.5 * G_iso ** 2)
+
+            # make sure error agrees
+            reconstruction_error = np.linalg.norm(K - K_iso) / Npts
+            assert_almost_equal(reconstruction_error,
+                                clf.reconstruction_error())
+
+
+def test_transform():
+    n_samples = 200
+    n_components = 10
+    noise_scale = 0.01
+
+    # Create S-curve dataset
+    X, y = datasets.make_s_curve(n_samples, random_state=0)
+
+    # Compute isomap embedding
+    iso = manifold.Isomap(n_components=n_components, n_neighbors=2)
+    X_iso = iso.fit_transform(X)
+
+    # Re-embed a noisy version of the points
+    rng = np.random.RandomState(0)
+    noise = noise_scale * rng.randn(*X.shape)
+    X_iso2 = iso.transform(X + noise)
+
+    # Make sure the rms error on re-embedding is comparable to noise_scale
+    assert np.sqrt(np.mean((X_iso - X_iso2) ** 2)) < 2 * noise_scale
+
+
+def test_pipeline():
+    # check that Isomap works fine as a transformer in a Pipeline
+    # only checks that no error is raised.
+    # TODO check that it actually does something useful
+    X, y = datasets.make_blobs(random_state=0)
+    clf = pipeline.Pipeline(
+        [('isomap', manifold.Isomap()),
+         ('clf', neighbors.KNeighborsClassifier())])
+    clf.fit(X, y)
+    assert .9 < clf.score(X, y)
+
+
+def test_pipeline_with_nearest_neighbors_transformer():
+    # Test chaining NearestNeighborsTransformer and Isomap with
+    # neighbors_algorithm='precomputed'
+    algorithm = 'auto'
+    n_neighbors = 10
+
+    X, _ = datasets.make_blobs(random_state=0)
+    X2, _ = datasets.make_blobs(random_state=1)
+
+    # compare the chained version and the compact version
+    est_chain = pipeline.make_pipeline(
+        neighbors.KNeighborsTransformer(
+            n_neighbors=n_neighbors, algorithm=algorithm, mode='distance'),
+        manifold.Isomap(n_neighbors=n_neighbors, metric='precomputed'))
+    est_compact = manifold.Isomap(n_neighbors=n_neighbors,
+                                  neighbors_algorithm=algorithm)
+
+    Xt_chain = est_chain.fit_transform(X)
+    Xt_compact = est_compact.fit_transform(X)
+    assert_array_almost_equal(Xt_chain, Xt_compact)
+
+    Xt_chain = est_chain.transform(X2)
+    Xt_compact = est_compact.transform(X2)
+    assert_array_almost_equal(Xt_chain, Xt_compact)
+
+
+def test_different_metric():
+    # Test that the metric parameters work correctly, and default to euclidean
+    def custom_metric(x1, x2):
+        return np.sqrt(np.sum(x1 ** 2 + x2 ** 2))
+
+    # metric, p, is_euclidean
+    metrics = [('euclidean', 2, True),
+               ('manhattan', 1, False),
+               ('minkowski', 1, False),
+               ('minkowski', 2, True),
+               (custom_metric, 2, False)]
+
+    X, _ = datasets.make_blobs(random_state=0)
+    reference = manifold.Isomap().fit_transform(X)
+
+    for metric, p, is_euclidean in metrics:
+        embedding = manifold.Isomap(metric=metric, p=p).fit_transform(X)
+
+        if is_euclidean:
+            assert_array_almost_equal(embedding, reference)
+        else:
+            with pytest.raises(AssertionError, match='not almost equal'):
+                assert_array_almost_equal(embedding, reference)
+
+
+def test_isomap_clone_bug():
+    # regression test for bug reported in #6062
+    model = manifold.Isomap()
+    for n_neighbors in [10, 15, 20]:
+        model.set_params(n_neighbors=n_neighbors)
+        model.fit(np.random.rand(50, 2))
+        assert (model.nbrs_.n_neighbors ==
+                     n_neighbors)
+
+
+def test_sparse_input():
+    X = sparse_rand(100, 3, density=0.1, format='csr')
+
+    # Should not error
+    for eigen_solver in eigen_solvers:
+        for path_method in path_methods:
+            clf = manifold.Isomap(n_components=2,
+                                  eigen_solver=eigen_solver,
+                                  path_method=path_method)
+            clf.fit(X)
--- a/venv/Lib/site-packages/sklearn/manifold/tests/test_locally_linear.py
+++ b/venv/Lib/site-packages/sklearn/manifold/tests/test_locally_linear.py
@ -0,0 +1,146 @@
+from itertools import product
+
+import numpy as np
+from numpy.testing import assert_almost_equal, assert_array_almost_equal
+from scipy import linalg
+import pytest
+
+from sklearn import neighbors, manifold
+from sklearn.manifold._locally_linear import barycenter_kneighbors_graph
+from sklearn.utils._testing import ignore_warnings
+from sklearn.utils._testing import assert_raise_message
+
+eigen_solvers = ['dense', 'arpack']
+
+
+# ----------------------------------------------------------------------
+# Test utility routines
+def test_barycenter_kneighbors_graph():
+    X = np.array([[0, 1], [1.01, 1.], [2, 0]])
+
+    A = barycenter_kneighbors_graph(X, 1)
+    assert_array_almost_equal(
+        A.toarray(),
+        [[0.,  1.,  0.],
+         [1.,  0.,  0.],
+         [0.,  1.,  0.]])
+
+    A = barycenter_kneighbors_graph(X, 2)
+    # check that columns sum to one
+    assert_array_almost_equal(np.sum(A.toarray(), 1), np.ones(3))
+    pred = np.dot(A.toarray(), X)
+    assert linalg.norm(pred - X) / X.shape[0] < 1
+
+
+# ----------------------------------------------------------------------
+# Test LLE by computing the reconstruction error on some manifolds.
+
+def test_lle_simple_grid():
+    # note: ARPACK is numerically unstable, so this test will fail for
+    #       some random seeds.  We choose 2 because the tests pass.
+    rng = np.random.RandomState(2)
+
+    # grid of equidistant points in 2D, n_components = n_dim
+    X = np.array(list(product(range(5), repeat=2)))
+    X = X + 1e-10 * rng.uniform(size=X.shape)
+    n_components = 2
+    clf = manifold.LocallyLinearEmbedding(n_neighbors=5,
+                                          n_components=n_components,
+                                          random_state=rng)
+    tol = 0.1
+
+    N = barycenter_kneighbors_graph(X, clf.n_neighbors).toarray()
+    reconstruction_error = linalg.norm(np.dot(N, X) - X, 'fro')
+    assert reconstruction_error < tol
+
+    for solver in eigen_solvers:
+        clf.set_params(eigen_solver=solver)
+        clf.fit(X)
+        assert clf.embedding_.shape[1] == n_components
+        reconstruction_error = linalg.norm(
+            np.dot(N, clf.embedding_) - clf.embedding_, 'fro') ** 2
+
+        assert reconstruction_error < tol
+        assert_almost_equal(clf.reconstruction_error_,
+                            reconstruction_error, decimal=1)
+
+    # re-embed a noisy version of X using the transform method
+    noise = rng.randn(*X.shape) / 100
+    X_reembedded = clf.transform(X + noise)
+    assert linalg.norm(X_reembedded - clf.embedding_) < tol
+
+
+def test_lle_manifold():
+    rng = np.random.RandomState(0)
+    # similar test on a slightly more complex manifold
+    X = np.array(list(product(np.arange(18), repeat=2)))
+    X = np.c_[X, X[:, 0] ** 2 / 18]
+    X = X + 1e-10 * rng.uniform(size=X.shape)
+    n_components = 2
+    for method in ["standard", "hessian", "modified", "ltsa"]:
+        clf = manifold.LocallyLinearEmbedding(n_neighbors=6,
+                                              n_components=n_components,
+                                              method=method, random_state=0)
+        tol = 1.5 if method == "standard" else 3
+
+        N = barycenter_kneighbors_graph(X, clf.n_neighbors).toarray()
+        reconstruction_error = linalg.norm(np.dot(N, X) - X)
+        assert reconstruction_error < tol
+
+        for solver in eigen_solvers:
+            clf.set_params(eigen_solver=solver)
+            clf.fit(X)
+            assert clf.embedding_.shape[1] == n_components
+            reconstruction_error = linalg.norm(
+                np.dot(N, clf.embedding_) - clf.embedding_, 'fro') ** 2
+            details = ("solver: %s, method: %s" % (solver, method))
+            assert reconstruction_error < tol, details
+            assert (np.abs(clf.reconstruction_error_ -
+                           reconstruction_error) <
+                    tol * reconstruction_error), details
+
+
+# Test the error raised when parameter passed to lle is invalid
+def test_lle_init_parameters():
+    X = np.random.rand(5, 3)
+
+    clf = manifold.LocallyLinearEmbedding(eigen_solver="error")
+    msg = "unrecognized eigen_solver 'error'"
+    assert_raise_message(ValueError, msg, clf.fit, X)
+
+    clf = manifold.LocallyLinearEmbedding(method="error")
+    msg = "unrecognized method 'error'"
+    assert_raise_message(ValueError, msg, clf.fit, X)
+
+
+def test_pipeline():
+    # check that LocallyLinearEmbedding works fine as a Pipeline
+    # only checks that no error is raised.
+    # TODO check that it actually does something useful
+    from sklearn import pipeline, datasets
+    X, y = datasets.make_blobs(random_state=0)
+    clf = pipeline.Pipeline(
+        [('filter', manifold.LocallyLinearEmbedding(random_state=0)),
+         ('clf', neighbors.KNeighborsClassifier())])
+    clf.fit(X, y)
+    assert .9 < clf.score(X, y)
+
+
+# Test the error raised when the weight matrix is singular
+def test_singular_matrix():
+    M = np.ones((10, 3))
+    f = ignore_warnings
+    with pytest.raises(ValueError):
+        f(manifold.locally_linear_embedding(M, n_neighbors=2, n_components=1,
+                                            method='standard',
+                                            eigen_solver='arpack'))
+
+
+# regression test for #6033
+def test_integer_input():
+    rand = np.random.RandomState(0)
+    X = rand.randint(0, 100, size=(20, 3))
+
+    for method in ["standard", "hessian", "modified", "ltsa"]:
+        clf = manifold.LocallyLinearEmbedding(method=method, n_neighbors=10)
+        clf.fit(X)  # this previously raised a TypeError
--- a/venv/Lib/site-packages/sklearn/manifold/tests/test_mds.py
+++ b/venv/Lib/site-packages/sklearn/manifold/tests/test_mds.py
@ -0,0 +1,64 @@
+import numpy as np
+from numpy.testing import assert_array_almost_equal
+import pytest
+
+from sklearn.manifold import _mds as mds
+
+
+def test_smacof():
+    # test metric smacof using the data of "Modern Multidimensional Scaling",
+    # Borg & Groenen, p 154
+    sim = np.array([[0, 5, 3, 4],
+                    [5, 0, 2, 2],
+                    [3, 2, 0, 1],
+                    [4, 2, 1, 0]])
+    Z = np.array([[-.266, -.539],
+                  [.451, .252],
+                  [.016, -.238],
+                  [-.200, .524]])
+    X, _ = mds.smacof(sim, init=Z, n_components=2, max_iter=1, n_init=1)
+    X_true = np.array([[-1.415, -2.471],
+                       [1.633, 1.107],
+                       [.249, -.067],
+                       [-.468, 1.431]])
+    assert_array_almost_equal(X, X_true, decimal=3)
+
+
+def test_smacof_error():
+    # Not symmetric similarity matrix:
+    sim = np.array([[0, 5, 9, 4],
+                    [5, 0, 2, 2],
+                    [3, 2, 0, 1],
+                    [4, 2, 1, 0]])
+
+    with pytest.raises(ValueError):
+        mds.smacof(sim)
+
+    # Not squared similarity matrix:
+    sim = np.array([[0, 5, 9, 4],
+                    [5, 0, 2, 2],
+                    [4, 2, 1, 0]])
+
+    with pytest.raises(ValueError):
+        mds.smacof(sim)
+
+    # init not None and not correct format:
+    sim = np.array([[0, 5, 3, 4],
+                    [5, 0, 2, 2],
+                    [3, 2, 0, 1],
+                    [4, 2, 1, 0]])
+
+    Z = np.array([[-.266, -.539],
+                  [.016, -.238],
+                  [-.200, .524]])
+    with pytest.raises(ValueError):
+        mds.smacof(sim, init=Z, n_init=1)
+
+
+def test_MDS():
+    sim = np.array([[0, 5, 3, 4],
+                    [5, 0, 2, 2],
+                    [3, 2, 0, 1],
+                    [4, 2, 1, 0]])
+    mds_clf = mds.MDS(metric=False, n_jobs=3, dissimilarity="precomputed")
+    mds_clf.fit(sim)
--- a/venv/Lib/site-packages/sklearn/manifold/tests/test_spectral_embedding.py
+++ b/venv/Lib/site-packages/sklearn/manifold/tests/test_spectral_embedding.py
@ -0,0 +1,347 @@
+import pytest
+
+import numpy as np
+
+from scipy import sparse
+from scipy.sparse import csgraph
+from scipy.linalg import eigh
+
+from sklearn.manifold import SpectralEmbedding
+from sklearn.manifold._spectral_embedding import _graph_is_connected
+from sklearn.manifold._spectral_embedding import _graph_connected_component
+from sklearn.manifold import spectral_embedding
+from sklearn.metrics.pairwise import rbf_kernel
+from sklearn.metrics import normalized_mutual_info_score
+from sklearn.neighbors import NearestNeighbors
+from sklearn.cluster import KMeans
+from sklearn.datasets import make_blobs
+from sklearn.utils.extmath import _deterministic_vector_sign_flip
+from sklearn.utils._testing import assert_array_almost_equal
+from sklearn.utils._testing import assert_array_equal
+
+
+# non centered, sparse centers to check the
+centers = np.array([
+    [0.0, 5.0, 0.0, 0.0, 0.0],
+    [0.0, 0.0, 4.0, 0.0, 0.0],
+    [1.0, 0.0, 0.0, 5.0, 1.0],
+])
+n_samples = 1000
+n_clusters, n_features = centers.shape
+S, true_labels = make_blobs(n_samples=n_samples, centers=centers,
+                            cluster_std=1., random_state=42)
+
+
+def _assert_equal_with_sign_flipping(A, B, tol=0.0):
+    """ Check array A and B are equal with possible sign flipping on
+    each columns"""
+    tol_squared = tol ** 2
+    for A_col, B_col in zip(A.T, B.T):
+        assert (np.max((A_col - B_col) ** 2) <= tol_squared or
+                np.max((A_col + B_col) ** 2) <= tol_squared)
+
+
+def test_sparse_graph_connected_component():
+    rng = np.random.RandomState(42)
+    n_samples = 300
+    boundaries = [0, 42, 121, 200, n_samples]
+    p = rng.permutation(n_samples)
+    connections = []
+
+    for start, stop in zip(boundaries[:-1], boundaries[1:]):
+        group = p[start:stop]
+        # Connect all elements within the group at least once via an
+        # arbitrary path that spans the group.
+        for i in range(len(group) - 1):
+            connections.append((group[i], group[i + 1]))
+
+        # Add some more random connections within the group
+        min_idx, max_idx = 0, len(group) - 1
+        n_random_connections = 1000
+        source = rng.randint(min_idx, max_idx, size=n_random_connections)
+        target = rng.randint(min_idx, max_idx, size=n_random_connections)
+        connections.extend(zip(group[source], group[target]))
+
+    # Build a symmetric affinity matrix
+    row_idx, column_idx = tuple(np.array(connections).T)
+    data = rng.uniform(.1, 42, size=len(connections))
+    affinity = sparse.coo_matrix((data, (row_idx, column_idx)))
+    affinity = 0.5 * (affinity + affinity.T)
+
+    for start, stop in zip(boundaries[:-1], boundaries[1:]):
+        component_1 = _graph_connected_component(affinity, p[start])
+        component_size = stop - start
+        assert component_1.sum() == component_size
+
+        # We should retrieve the same component mask by starting by both ends
+        # of the group
+        component_2 = _graph_connected_component(affinity, p[stop - 1])
+        assert component_2.sum() == component_size
+        assert_array_equal(component_1, component_2)
+
+
+def test_spectral_embedding_two_components(seed=36):
+    # Test spectral embedding with two components
+    random_state = np.random.RandomState(seed)
+    n_sample = 100
+    affinity = np.zeros(shape=[n_sample * 2, n_sample * 2])
+    # first component
+    affinity[0:n_sample,
+             0:n_sample] = np.abs(random_state.randn(n_sample, n_sample)) + 2
+    # second component
+    affinity[n_sample::,
+             n_sample::] = np.abs(random_state.randn(n_sample, n_sample)) + 2
+
+    # Test of internal _graph_connected_component before connection
+    component = _graph_connected_component(affinity, 0)
+    assert component[:n_sample].all()
+    assert not component[n_sample:].any()
+    component = _graph_connected_component(affinity, -1)
+    assert not component[:n_sample].any()
+    assert component[n_sample:].all()
+
+    # connection
+    affinity[0, n_sample + 1] = 1
+    affinity[n_sample + 1, 0] = 1
+    affinity.flat[::2 * n_sample + 1] = 0
+    affinity = 0.5 * (affinity + affinity.T)
+
+    true_label = np.zeros(shape=2 * n_sample)
+    true_label[0:n_sample] = 1
+
+    se_precomp = SpectralEmbedding(n_components=1, affinity="precomputed",
+                                   random_state=np.random.RandomState(seed))
+    embedded_coordinate = se_precomp.fit_transform(affinity)
+    # Some numpy versions are touchy with types
+    embedded_coordinate = \
+        se_precomp.fit_transform(affinity.astype(np.float32))
+    # thresholding on the first components using 0.
+    label_ = np.array(embedded_coordinate.ravel() < 0, dtype="float")
+    assert normalized_mutual_info_score(
+        true_label, label_) == pytest.approx(1.0)
+
+
+@pytest.mark.parametrize("X", [S, sparse.csr_matrix(S)],
+                         ids=["dense", "sparse"])
+def test_spectral_embedding_precomputed_affinity(X, seed=36):
+    # Test spectral embedding with precomputed kernel
+    gamma = 1.0
+    se_precomp = SpectralEmbedding(n_components=2, affinity="precomputed",
+                                   random_state=np.random.RandomState(seed))
+    se_rbf = SpectralEmbedding(n_components=2, affinity="rbf",
+                               gamma=gamma,
+                               random_state=np.random.RandomState(seed))
+    embed_precomp = se_precomp.fit_transform(rbf_kernel(X, gamma=gamma))
+    embed_rbf = se_rbf.fit_transform(X)
+    assert_array_almost_equal(
+        se_precomp.affinity_matrix_, se_rbf.affinity_matrix_)
+    _assert_equal_with_sign_flipping(embed_precomp, embed_rbf, 0.05)
+
+
+def test_precomputed_nearest_neighbors_filtering():
+    # Test precomputed graph filtering when containing too many neighbors
+    n_neighbors = 2
+    results = []
+    for additional_neighbors in [0, 10]:
+        nn = NearestNeighbors(
+            n_neighbors=n_neighbors + additional_neighbors).fit(S)
+        graph = nn.kneighbors_graph(S, mode='connectivity')
+        embedding = SpectralEmbedding(random_state=0, n_components=2,
+                                      affinity='precomputed_nearest_neighbors',
+                                      n_neighbors=n_neighbors
+                                      ).fit(graph).embedding_
+        results.append(embedding)
+
+    assert_array_equal(results[0], results[1])
+
+
+@pytest.mark.parametrize("X", [S, sparse.csr_matrix(S)],
+                         ids=["dense", "sparse"])
+def test_spectral_embedding_callable_affinity(X, seed=36):
+    # Test spectral embedding with callable affinity
+    gamma = 0.9
+    kern = rbf_kernel(S, gamma=gamma)
+    se_callable = SpectralEmbedding(n_components=2,
+                                    affinity=(
+                                        lambda x: rbf_kernel(x, gamma=gamma)),
+                                    gamma=gamma,
+                                    random_state=np.random.RandomState(seed))
+    se_rbf = SpectralEmbedding(n_components=2, affinity="rbf",
+                               gamma=gamma,
+                               random_state=np.random.RandomState(seed))
+    embed_rbf = se_rbf.fit_transform(X)
+    embed_callable = se_callable.fit_transform(X)
+    assert_array_almost_equal(
+        se_callable.affinity_matrix_, se_rbf.affinity_matrix_)
+    assert_array_almost_equal(kern, se_rbf.affinity_matrix_)
+    _assert_equal_with_sign_flipping(embed_rbf, embed_callable, 0.05)
+
+
+# TODO: Remove when pyamg does replaces sp.rand call with np.random.rand
+# https://github.com/scikit-learn/scikit-learn/issues/15913
+@pytest.mark.filterwarnings(
+    "ignore:scipy.rand is deprecated:DeprecationWarning:pyamg.*")
+def test_spectral_embedding_amg_solver(seed=36):
+    # Test spectral embedding with amg solver
+    pytest.importorskip('pyamg')
+
+    se_amg = SpectralEmbedding(n_components=2, affinity="nearest_neighbors",
+                               eigen_solver="amg", n_neighbors=5,
+                               random_state=np.random.RandomState(seed))
+    se_arpack = SpectralEmbedding(n_components=2, affinity="nearest_neighbors",
+                                  eigen_solver="arpack", n_neighbors=5,
+                                  random_state=np.random.RandomState(seed))
+    embed_amg = se_amg.fit_transform(S)
+    embed_arpack = se_arpack.fit_transform(S)
+    _assert_equal_with_sign_flipping(embed_amg, embed_arpack, 1e-5)
+
+    # same with special case in which amg is not actually used
+    # regression test for #10715
+    # affinity between nodes
+    row = [0, 0, 1, 2, 3, 3, 4]
+    col = [1, 2, 2, 3, 4, 5, 5]
+    val = [100, 100, 100, 1, 100, 100, 100]
+
+    affinity = sparse.coo_matrix((val + val, (row + col, col + row)),
+                                 shape=(6, 6)).toarray()
+    se_amg.affinity = "precomputed"
+    se_arpack.affinity = "precomputed"
+    embed_amg = se_amg.fit_transform(affinity)
+    embed_arpack = se_arpack.fit_transform(affinity)
+    _assert_equal_with_sign_flipping(embed_amg, embed_arpack, 1e-5)
+
+
+# TODO: Remove filterwarnings when pyamg does replaces sp.rand call with
+# np.random.rand:
+# https://github.com/scikit-learn/scikit-learn/issues/15913
+@pytest.mark.filterwarnings(
+    "ignore:scipy.rand is deprecated:DeprecationWarning:pyamg.*")
+def test_spectral_embedding_amg_solver_failure():
+    # Non-regression test for amg solver failure (issue #13393 on github)
+    pytest.importorskip('pyamg')
+    seed = 36
+    num_nodes = 100
+    X = sparse.rand(num_nodes, num_nodes, density=0.1, random_state=seed)
+    upper = sparse.triu(X) - sparse.diags(X.diagonal())
+    sym_matrix = upper + upper.T
+    embedding = spectral_embedding(sym_matrix,
+                                   n_components=10,
+                                   eigen_solver='amg',
+                                   random_state=0)
+
+    # Check that the learned embedding is stable w.r.t. random solver init:
+    for i in range(3):
+        new_embedding = spectral_embedding(sym_matrix,
+                                           n_components=10,
+                                           eigen_solver='amg',
+                                           random_state=i + 1)
+        _assert_equal_with_sign_flipping(embedding, new_embedding, tol=0.05)
+
+
+@pytest.mark.filterwarnings("ignore:the behavior of nmi will "
+                            "change in version 0.22")
+def test_pipeline_spectral_clustering(seed=36):
+    # Test using pipeline to do spectral clustering
+    random_state = np.random.RandomState(seed)
+    se_rbf = SpectralEmbedding(n_components=n_clusters,
+                               affinity="rbf",
+                               random_state=random_state)
+    se_knn = SpectralEmbedding(n_components=n_clusters,
+                               affinity="nearest_neighbors",
+                               n_neighbors=5,
+                               random_state=random_state)
+    for se in [se_rbf, se_knn]:
+        km = KMeans(n_clusters=n_clusters, random_state=random_state)
+        km.fit(se.fit_transform(S))
+        assert_array_almost_equal(
+            normalized_mutual_info_score(
+                km.labels_,
+                true_labels), 1.0, 2)
+
+
+def test_spectral_embedding_unknown_eigensolver(seed=36):
+    # Test that SpectralClustering fails with an unknown eigensolver
+    se = SpectralEmbedding(n_components=1, affinity="precomputed",
+                           random_state=np.random.RandomState(seed),
+                           eigen_solver="<unknown>")
+    with pytest.raises(ValueError):
+        se.fit(S)
+
+
+def test_spectral_embedding_unknown_affinity(seed=36):
+    # Test that SpectralClustering fails with an unknown affinity type
+    se = SpectralEmbedding(n_components=1, affinity="<unknown>",
+                           random_state=np.random.RandomState(seed))
+    with pytest.raises(ValueError):
+        se.fit(S)
+
+
+def test_connectivity(seed=36):
+    # Test that graph connectivity test works as expected
+    graph = np.array([[1, 0, 0, 0, 0],
+                      [0, 1, 1, 0, 0],
+                      [0, 1, 1, 1, 0],
+                      [0, 0, 1, 1, 1],
+                      [0, 0, 0, 1, 1]])
+    assert not _graph_is_connected(graph)
+    assert not _graph_is_connected(sparse.csr_matrix(graph))
+    assert not _graph_is_connected(sparse.csc_matrix(graph))
+    graph = np.array([[1, 1, 0, 0, 0],
+                      [1, 1, 1, 0, 0],
+                      [0, 1, 1, 1, 0],
+                      [0, 0, 1, 1, 1],
+                      [0, 0, 0, 1, 1]])
+    assert _graph_is_connected(graph)
+    assert _graph_is_connected(sparse.csr_matrix(graph))
+    assert _graph_is_connected(sparse.csc_matrix(graph))
+
+
+def test_spectral_embedding_deterministic():
+    # Test that Spectral Embedding is deterministic
+    random_state = np.random.RandomState(36)
+    data = random_state.randn(10, 30)
+    sims = rbf_kernel(data)
+    embedding_1 = spectral_embedding(sims)
+    embedding_2 = spectral_embedding(sims)
+    assert_array_almost_equal(embedding_1, embedding_2)
+
+
+def test_spectral_embedding_unnormalized():
+    # Test that spectral_embedding is also processing unnormalized laplacian
+    # correctly
+    random_state = np.random.RandomState(36)
+    data = random_state.randn(10, 30)
+    sims = rbf_kernel(data)
+    n_components = 8
+    embedding_1 = spectral_embedding(sims,
+                                     norm_laplacian=False,
+                                     n_components=n_components,
+                                     drop_first=False)
+
+    # Verify using manual computation with dense eigh
+    laplacian, dd = csgraph.laplacian(sims, normed=False,
+                                      return_diag=True)
+    _, diffusion_map = eigh(laplacian)
+    embedding_2 = diffusion_map.T[:n_components]
+    embedding_2 = _deterministic_vector_sign_flip(embedding_2).T
+
+    assert_array_almost_equal(embedding_1, embedding_2)
+
+
+def test_spectral_embedding_first_eigen_vector():
+    # Test that the first eigenvector of spectral_embedding
+    # is constant and that the second is not (for a connected graph)
+    random_state = np.random.RandomState(36)
+    data = random_state.randn(10, 30)
+    sims = rbf_kernel(data)
+    n_components = 2
+
+    for seed in range(10):
+        embedding = spectral_embedding(sims,
+                                       norm_laplacian=False,
+                                       n_components=n_components,
+                                       drop_first=False,
+                                       random_state=seed)
+
+        assert np.std(embedding[:, 0]) == pytest.approx(0)
+        assert np.std(embedding[:, 1]) > 1e-3
--- a/venv/Lib/site-packages/sklearn/manifold/tests/test_t_sne.py
+++ b/venv/Lib/site-packages/sklearn/manifold/tests/test_t_sne.py
@ -0,0 +1,893 @@
+import sys
+from io import StringIO
+import numpy as np
+from numpy.testing import assert_allclose
+import scipy.sparse as sp
+import pytest
+
+from sklearn.neighbors import NearestNeighbors
+from sklearn.neighbors import kneighbors_graph
+from sklearn.exceptions import EfficiencyWarning
+from sklearn.utils._testing import ignore_warnings
+from sklearn.utils._testing import assert_almost_equal
+from sklearn.utils._testing import assert_array_equal
+from sklearn.utils._testing import assert_array_almost_equal
+from sklearn.utils._testing import skip_if_32bit
+from sklearn.utils import check_random_state
+from sklearn.manifold._t_sne import _joint_probabilities
+from sklearn.manifold._t_sne import _joint_probabilities_nn
+from sklearn.manifold._t_sne import _kl_divergence
+from sklearn.manifold._t_sne import _kl_divergence_bh
+from sklearn.manifold._t_sne import _gradient_descent
+from sklearn.manifold._t_sne import trustworthiness
+from sklearn.manifold import TSNE
+# mypy error: Module 'sklearn.manifold' has no attribute '_barnes_hut_tsne'
+from sklearn.manifold import _barnes_hut_tsne  # type: ignore
+from sklearn.manifold._utils import _binary_search_perplexity
+from sklearn.datasets import make_blobs
+from scipy.optimize import check_grad
+from scipy.spatial.distance import pdist
+from scipy.spatial.distance import squareform
+from sklearn.metrics.pairwise import pairwise_distances
+from sklearn.metrics.pairwise import manhattan_distances
+from sklearn.metrics.pairwise import cosine_distances
+
+
+x = np.linspace(0, 1, 10)
+xx, yy = np.meshgrid(x, x)
+X_2d_grid = np.hstack([
+    xx.ravel().reshape(-1, 1),
+    yy.ravel().reshape(-1, 1),
+])
+
+
+def test_gradient_descent_stops():
+    # Test stopping conditions of gradient descent.
+    class ObjectiveSmallGradient:
+        def __init__(self):
+            self.it = -1
+
+        def __call__(self, _, compute_error=True):
+            self.it += 1
+            return (10 - self.it) / 10.0, np.array([1e-5])
+
+    def flat_function(_, compute_error=True):
+        return 0.0, np.ones(1)
+
+    # Gradient norm
+    old_stdout = sys.stdout
+    sys.stdout = StringIO()
+    try:
+        _, error, it = _gradient_descent(
+            ObjectiveSmallGradient(), np.zeros(1), 0, n_iter=100,
+            n_iter_without_progress=100, momentum=0.0, learning_rate=0.0,
+            min_gain=0.0, min_grad_norm=1e-5, verbose=2)
+    finally:
+        out = sys.stdout.getvalue()
+        sys.stdout.close()
+        sys.stdout = old_stdout
+    assert error == 1.0
+    assert it == 0
+    assert("gradient norm" in out)
+
+    # Maximum number of iterations without improvement
+    old_stdout = sys.stdout
+    sys.stdout = StringIO()
+    try:
+        _, error, it = _gradient_descent(
+            flat_function, np.zeros(1), 0, n_iter=100,
+            n_iter_without_progress=10, momentum=0.0, learning_rate=0.0,
+            min_gain=0.0, min_grad_norm=0.0, verbose=2)
+    finally:
+        out = sys.stdout.getvalue()
+        sys.stdout.close()
+        sys.stdout = old_stdout
+    assert error == 0.0
+    assert it == 11
+    assert("did not make any progress" in out)
+
+    # Maximum number of iterations
+    old_stdout = sys.stdout
+    sys.stdout = StringIO()
+    try:
+        _, error, it = _gradient_descent(
+            ObjectiveSmallGradient(), np.zeros(1), 0, n_iter=11,
+            n_iter_without_progress=100, momentum=0.0, learning_rate=0.0,
+            min_gain=0.0, min_grad_norm=0.0, verbose=2)
+    finally:
+        out = sys.stdout.getvalue()
+        sys.stdout.close()
+        sys.stdout = old_stdout
+    assert error == 0.0
+    assert it == 10
+    assert("Iteration 10" in out)
+
+
+def test_binary_search():
+    # Test if the binary search finds Gaussians with desired perplexity.
+    random_state = check_random_state(0)
+    data = random_state.randn(50, 5)
+    distances = pairwise_distances(data).astype(np.float32)
+    desired_perplexity = 25.0
+    P = _binary_search_perplexity(distances, desired_perplexity, verbose=0)
+    P = np.maximum(P, np.finfo(np.double).eps)
+    mean_perplexity = np.mean([np.exp(-np.sum(P[i] * np.log(P[i])))
+                               for i in range(P.shape[0])])
+    assert_almost_equal(mean_perplexity, desired_perplexity, decimal=3)
+
+
+def test_binary_search_neighbors():
+    # Binary perplexity search approximation.
+    # Should be approximately equal to the slow method when we use
+    # all points as neighbors.
+    n_samples = 200
+    desired_perplexity = 25.0
+    random_state = check_random_state(0)
+    data = random_state.randn(n_samples, 2).astype(np.float32, copy=False)
+    distances = pairwise_distances(data)
+    P1 = _binary_search_perplexity(distances, desired_perplexity, verbose=0)
+
+    # Test that when we use all the neighbors the results are identical
+    n_neighbors = n_samples - 1
+    nn = NearestNeighbors().fit(data)
+    distance_graph = nn.kneighbors_graph(n_neighbors=n_neighbors,
+                                         mode='distance')
+    distances_nn = distance_graph.data.astype(np.float32, copy=False)
+    distances_nn = distances_nn.reshape(n_samples, n_neighbors)
+    P2 = _binary_search_perplexity(distances_nn, desired_perplexity, verbose=0)
+
+    indptr = distance_graph.indptr
+    P1_nn = np.array([P1[k, distance_graph.indices[indptr[k]:indptr[k + 1]]]
+                     for k in range(n_samples)])
+    assert_array_almost_equal(P1_nn, P2, decimal=4)
+
+    # Test that the highest P_ij are the same when fewer neighbors are used
+    for k in np.linspace(150, n_samples - 1, 5):
+        k = int(k)
+        topn = k * 10  # check the top 10 * k entries out of k * k entries
+        distance_graph = nn.kneighbors_graph(n_neighbors=k, mode='distance')
+        distances_nn = distance_graph.data.astype(np.float32, copy=False)
+        distances_nn = distances_nn.reshape(n_samples, k)
+        P2k = _binary_search_perplexity(distances_nn, desired_perplexity,
+                                        verbose=0)
+        assert_array_almost_equal(P1_nn, P2, decimal=2)
+        idx = np.argsort(P1.ravel())[::-1]
+        P1top = P1.ravel()[idx][:topn]
+        idx = np.argsort(P2k.ravel())[::-1]
+        P2top = P2k.ravel()[idx][:topn]
+        assert_array_almost_equal(P1top, P2top, decimal=2)
+
+
+def test_binary_perplexity_stability():
+    # Binary perplexity search should be stable.
+    # The binary_search_perplexity had a bug wherein the P array
+    # was uninitialized, leading to sporadically failing tests.
+    n_neighbors = 10
+    n_samples = 100
+    random_state = check_random_state(0)
+    data = random_state.randn(n_samples, 5)
+    nn = NearestNeighbors().fit(data)
+    distance_graph = nn.kneighbors_graph(n_neighbors=n_neighbors,
+                                         mode='distance')
+    distances = distance_graph.data.astype(np.float32, copy=False)
+    distances = distances.reshape(n_samples, n_neighbors)
+    last_P = None
+    desired_perplexity = 3
+    for _ in range(100):
+        P = _binary_search_perplexity(distances.copy(), desired_perplexity,
+                                      verbose=0)
+        P1 = _joint_probabilities_nn(distance_graph, desired_perplexity,
+                                     verbose=0)
+        # Convert the sparse matrix to a dense one for testing
+        P1 = P1.toarray()
+        if last_P is None:
+            last_P = P
+            last_P1 = P1
+        else:
+            assert_array_almost_equal(P, last_P, decimal=4)
+            assert_array_almost_equal(P1, last_P1, decimal=4)
+
+
+def test_gradient():
+    # Test gradient of Kullback-Leibler divergence.
+    random_state = check_random_state(0)
+
+    n_samples = 50
+    n_features = 2
+    n_components = 2
+    alpha = 1.0
+
+    distances = random_state.randn(n_samples, n_features).astype(np.float32)
+    distances = np.abs(distances.dot(distances.T))
+    np.fill_diagonal(distances, 0.0)
+    X_embedded = random_state.randn(n_samples, n_components).astype(np.float32)
+
+    P = _joint_probabilities(distances, desired_perplexity=25.0,
+                             verbose=0)
+
+    def fun(params):
+        return _kl_divergence(params, P, alpha, n_samples, n_components)[0]
+
+    def grad(params):
+        return _kl_divergence(params, P, alpha, n_samples, n_components)[1]
+
+    assert_almost_equal(check_grad(fun, grad, X_embedded.ravel()), 0.0,
+                        decimal=5)
+
+
+def test_trustworthiness():
+    # Test trustworthiness score.
+    random_state = check_random_state(0)
+
+    # Affine transformation
+    X = random_state.randn(100, 2)
+    assert trustworthiness(X, 5.0 + X / 10.0) == 1.0
+
+    # Randomly shuffled
+    X = np.arange(100).reshape(-1, 1)
+    X_embedded = X.copy()
+    random_state.shuffle(X_embedded)
+    assert trustworthiness(X, X_embedded) < 0.6
+
+    # Completely different
+    X = np.arange(5).reshape(-1, 1)
+    X_embedded = np.array([[0], [2], [4], [1], [3]])
+    assert_almost_equal(trustworthiness(X, X_embedded, n_neighbors=1), 0.2)
+
+
+@pytest.mark.parametrize("method", ['exact', 'barnes_hut'])
+@pytest.mark.parametrize("init", ('random', 'pca'))
+def test_preserve_trustworthiness_approximately(method, init):
+    # Nearest neighbors should be preserved approximately.
+    random_state = check_random_state(0)
+    n_components = 2
+    X = random_state.randn(50, n_components).astype(np.float32)
+    tsne = TSNE(n_components=n_components, init=init, random_state=0,
+                method=method, n_iter=700)
+    X_embedded = tsne.fit_transform(X)
+    t = trustworthiness(X, X_embedded, n_neighbors=1)
+    assert t > 0.85
+
+
+def test_optimization_minimizes_kl_divergence():
+    """t-SNE should give a lower KL divergence with more iterations."""
+    random_state = check_random_state(0)
+    X, _ = make_blobs(n_features=3, random_state=random_state)
+    kl_divergences = []
+    for n_iter in [250, 300, 350]:
+        tsne = TSNE(n_components=2, perplexity=10, learning_rate=100.0,
+                    n_iter=n_iter, random_state=0)
+        tsne.fit_transform(X)
+        kl_divergences.append(tsne.kl_divergence_)
+    assert kl_divergences[1] <= kl_divergences[0]
+    assert kl_divergences[2] <= kl_divergences[1]
+
+
+@pytest.mark.parametrize('method', ['exact', 'barnes_hut'])
+def test_fit_csr_matrix(method):
+    # X can be a sparse matrix.
+    rng = check_random_state(0)
+    X = rng.randn(50, 2)
+    X[(rng.randint(0, 50, 25), rng.randint(0, 2, 25))] = 0.0
+    X_csr = sp.csr_matrix(X)
+    tsne = TSNE(n_components=2, perplexity=10, learning_rate=100.0,
+                random_state=0, method=method, n_iter=750)
+    X_embedded = tsne.fit_transform(X_csr)
+    assert_allclose(trustworthiness(X_csr, X_embedded, n_neighbors=1),
+                    1.0, rtol=1.1e-1)
+
+
+def test_preserve_trustworthiness_approximately_with_precomputed_distances():
+    # Nearest neighbors should be preserved approximately.
+    random_state = check_random_state(0)
+    for i in range(3):
+        X = random_state.randn(80, 2)
+        D = squareform(pdist(X), "sqeuclidean")
+        tsne = TSNE(n_components=2, perplexity=2, learning_rate=100.0,
+                    early_exaggeration=2.0, metric="precomputed",
+                    random_state=i, verbose=0, n_iter=500)
+        X_embedded = tsne.fit_transform(D)
+        t = trustworthiness(D, X_embedded, n_neighbors=1, metric="precomputed")
+        assert t > .95
+
+
+def test_trustworthiness_not_euclidean_metric():
+    # Test trustworthiness with a metric different from 'euclidean' and
+    # 'precomputed'
+    random_state = check_random_state(0)
+    X = random_state.randn(100, 2)
+    assert (trustworthiness(X, X, metric='cosine') ==
+            trustworthiness(pairwise_distances(X, metric='cosine'), X,
+                            metric='precomputed'))
+
+
+def test_early_exaggeration_too_small():
+    # Early exaggeration factor must be >= 1.
+    tsne = TSNE(early_exaggeration=0.99)
+    with pytest.raises(ValueError, match="early_exaggeration .*"):
+        tsne.fit_transform(np.array([[0.0], [0.0]]))
+
+
+def test_too_few_iterations():
+    # Number of gradient descent iterations must be at least 200.
+    tsne = TSNE(n_iter=199)
+    with pytest.raises(ValueError, match="n_iter .*"):
+        tsne.fit_transform(np.array([[0.0], [0.0]]))
+
+
+@pytest.mark.parametrize('method, retype', [
+    ('exact', np.asarray),
+    ('barnes_hut', np.asarray),
+    ('barnes_hut', sp.csr_matrix),
+])
+@pytest.mark.parametrize('D, message_regex', [
+    ([[0.0], [1.0]], ".* square distance matrix"),
+    ([[0., -1.], [1., 0.]], ".* positive.*"),
+])
+def test_bad_precomputed_distances(method, D, retype, message_regex):
+    tsne = TSNE(metric="precomputed", method=method)
+    with pytest.raises(ValueError, match=message_regex):
+        tsne.fit_transform(retype(D))
+
+
+def test_exact_no_precomputed_sparse():
+    tsne = TSNE(metric='precomputed', method='exact')
+    with pytest.raises(TypeError, match='sparse'):
+        tsne.fit_transform(sp.csr_matrix([[0, 5], [5, 0]]))
+
+
+def test_high_perplexity_precomputed_sparse_distances():
+    # Perplexity should be less than 50
+    dist = np.array([[1., 0., 0.], [0., 1., 0.], [1., 0., 0.]])
+    bad_dist = sp.csr_matrix(dist)
+    tsne = TSNE(metric="precomputed")
+    msg = "3 neighbors per samples are required, but some samples have only 1"
+    with pytest.raises(ValueError, match=msg):
+        tsne.fit_transform(bad_dist)
+
+
+@ignore_warnings(category=EfficiencyWarning)
+def test_sparse_precomputed_distance():
+    """Make sure that TSNE works identically for sparse and dense matrix"""
+    random_state = check_random_state(0)
+    X = random_state.randn(100, 2)
+
+    D_sparse = kneighbors_graph(X, n_neighbors=100, mode='distance',
+                                include_self=True)
+    D = pairwise_distances(X)
+    assert sp.issparse(D_sparse)
+    assert_almost_equal(D_sparse.A, D)
+
+    tsne = TSNE(metric="precomputed", random_state=0)
+    Xt_dense = tsne.fit_transform(D)
+
+    for fmt in ['csr', 'lil']:
+        Xt_sparse = tsne.fit_transform(D_sparse.asformat(fmt))
+        assert_almost_equal(Xt_dense, Xt_sparse)
+
+
+def test_non_positive_computed_distances():
+    # Computed distance matrices must be positive.
+    def metric(x, y):
+        return -1
+
+    tsne = TSNE(metric=metric, method='exact')
+    X = np.array([[0.0, 0.0], [1.0, 1.0]])
+    with pytest.raises(ValueError, match="All distances .*metric given.*"):
+        tsne.fit_transform(X)
+
+
+def test_init_not_available():
+    # 'init' must be 'pca', 'random', or numpy array.
+    tsne = TSNE(init="not available")
+    m = "'init' must be 'pca', 'random', or a numpy array"
+    with pytest.raises(ValueError, match=m):
+        tsne.fit_transform(np.array([[0.0], [1.0]]))
+
+
+def test_init_ndarray():
+    # Initialize TSNE with ndarray and test fit
+    tsne = TSNE(init=np.zeros((100, 2)))
+    X_embedded = tsne.fit_transform(np.ones((100, 5)))
+    assert_array_equal(np.zeros((100, 2)), X_embedded)
+
+
+def test_init_ndarray_precomputed():
+    # Initialize TSNE with ndarray and metric 'precomputed'
+    # Make sure no FutureWarning is thrown from _fit
+    tsne = TSNE(init=np.zeros((100, 2)), metric="precomputed")
+    tsne.fit(np.zeros((100, 100)))
+
+
+def test_distance_not_available():
+    # 'metric' must be valid.
+    tsne = TSNE(metric="not available", method='exact')
+    with pytest.raises(ValueError, match="Unknown metric not available.*"):
+        tsne.fit_transform(np.array([[0.0], [1.0]]))
+
+    tsne = TSNE(metric="not available", method='barnes_hut')
+    with pytest.raises(ValueError, match="Metric 'not available' not valid.*"):
+        tsne.fit_transform(np.array([[0.0], [1.0]]))
+
+
+def test_method_not_available():
+    # 'nethod' must be 'barnes_hut' or 'exact'
+    tsne = TSNE(method='not available')
+    with pytest.raises(ValueError, match="'method' must be 'barnes_hut' or "):
+        tsne.fit_transform(np.array([[0.0], [1.0]]))
+
+
+def test_angle_out_of_range_checks():
+    # check the angle parameter range
+    for angle in [-1, -1e-6, 1 + 1e-6, 2]:
+        tsne = TSNE(angle=angle)
+        with pytest.raises(ValueError, match="'angle' must be between "
+                                             "0.0 - 1.0"):
+            tsne.fit_transform(np.array([[0.0], [1.0]]))
+
+
+def test_pca_initialization_not_compatible_with_precomputed_kernel():
+    # Precomputed distance matrices must be square matrices.
+    tsne = TSNE(metric="precomputed", init="pca")
+    with pytest.raises(ValueError, match="The parameter init=\"pca\" cannot"
+                                         " be used with"
+                                         " metric=\"precomputed\"."):
+        tsne.fit_transform(np.array([[0.0], [1.0]]))
+
+
+def test_n_components_range():
+    # barnes_hut method should only be used with n_components <= 3
+    tsne = TSNE(n_components=4, method="barnes_hut")
+    with pytest.raises(ValueError, match="'n_components' should be .*"):
+        tsne.fit_transform(np.array([[0.0], [1.0]]))
+
+
+def test_early_exaggeration_used():
+    # check that the ``early_exaggeration`` parameter has an effect
+    random_state = check_random_state(0)
+    n_components = 2
+    methods = ['exact', 'barnes_hut']
+    X = random_state.randn(25, n_components).astype(np.float32)
+    for method in methods:
+        tsne = TSNE(n_components=n_components, perplexity=1,
+                    learning_rate=100.0, init="pca", random_state=0,
+                    method=method, early_exaggeration=1.0, n_iter=250)
+        X_embedded1 = tsne.fit_transform(X)
+        tsne = TSNE(n_components=n_components, perplexity=1,
+                    learning_rate=100.0, init="pca", random_state=0,
+                    method=method, early_exaggeration=10.0, n_iter=250)
+        X_embedded2 = tsne.fit_transform(X)
+
+        assert not np.allclose(X_embedded1, X_embedded2)
+
+
+def test_n_iter_used():
+    # check that the ``n_iter`` parameter has an effect
+    random_state = check_random_state(0)
+    n_components = 2
+    methods = ['exact', 'barnes_hut']
+    X = random_state.randn(25, n_components).astype(np.float32)
+    for method in methods:
+        for n_iter in [251, 500]:
+            tsne = TSNE(n_components=n_components, perplexity=1,
+                        learning_rate=0.5, init="random", random_state=0,
+                        method=method, early_exaggeration=1.0, n_iter=n_iter)
+            tsne.fit_transform(X)
+
+            assert tsne.n_iter_ == n_iter - 1
+
+
+def test_answer_gradient_two_points():
+    # Test the tree with only a single set of children.
+    #
+    # These tests & answers have been checked against the reference
+    # implementation by LvdM.
+    pos_input = np.array([[1.0, 0.0], [0.0, 1.0]])
+    pos_output = np.array([[-4.961291e-05, -1.072243e-04],
+                           [9.259460e-05, 2.702024e-04]])
+    neighbors = np.array([[1],
+                          [0]])
+    grad_output = np.array([[-2.37012478e-05, -6.29044398e-05],
+                            [2.37012478e-05, 6.29044398e-05]])
+    _run_answer_test(pos_input, pos_output, neighbors, grad_output)
+
+
+def test_answer_gradient_four_points():
+    # Four points tests the tree with multiple levels of children.
+    #
+    # These tests & answers have been checked against the reference
+    # implementation by LvdM.
+    pos_input = np.array([[1.0, 0.0], [0.0, 1.0],
+                          [5.0, 2.0], [7.3, 2.2]])
+    pos_output = np.array([[6.080564e-05, -7.120823e-05],
+                           [-1.718945e-04, -4.000536e-05],
+                           [-2.271720e-04, 8.663310e-05],
+                           [-1.032577e-04, -3.582033e-05]])
+    neighbors = np.array([[1, 2, 3],
+                          [0, 2, 3],
+                          [1, 0, 3],
+                          [1, 2, 0]])
+    grad_output = np.array([[5.81128448e-05, -7.78033454e-06],
+                            [-5.81526851e-05, 7.80976444e-06],
+                            [4.24275173e-08, -3.69569698e-08],
+                            [-2.58720939e-09, 7.52706374e-09]])
+    _run_answer_test(pos_input, pos_output, neighbors, grad_output)
+
+
+def test_skip_num_points_gradient():
+    # Test the kwargs option skip_num_points.
+    #
+    # Skip num points should make it such that the Barnes_hut gradient
+    # is not calculated for indices below skip_num_point.
+    # Aside from skip_num_points=2 and the first two gradient rows
+    # being set to zero, these data points are the same as in
+    # test_answer_gradient_four_points()
+    pos_input = np.array([[1.0, 0.0], [0.0, 1.0],
+                          [5.0, 2.0], [7.3, 2.2]])
+    pos_output = np.array([[6.080564e-05, -7.120823e-05],
+                           [-1.718945e-04, -4.000536e-05],
+                           [-2.271720e-04, 8.663310e-05],
+                           [-1.032577e-04, -3.582033e-05]])
+    neighbors = np.array([[1, 2, 3],
+                          [0, 2, 3],
+                          [1, 0, 3],
+                          [1, 2, 0]])
+    grad_output = np.array([[0.0, 0.0],
+                            [0.0, 0.0],
+                            [4.24275173e-08, -3.69569698e-08],
+                            [-2.58720939e-09, 7.52706374e-09]])
+    _run_answer_test(pos_input, pos_output, neighbors, grad_output,
+                     False, 0.1, 2)
+
+
+def _run_answer_test(pos_input, pos_output, neighbors, grad_output,
+                     verbose=False, perplexity=0.1, skip_num_points=0):
+    distances = pairwise_distances(pos_input).astype(np.float32)
+    args = distances, perplexity, verbose
+    pos_output = pos_output.astype(np.float32)
+    neighbors = neighbors.astype(np.int64, copy=False)
+    pij_input = _joint_probabilities(*args)
+    pij_input = squareform(pij_input).astype(np.float32)
+    grad_bh = np.zeros(pos_output.shape, dtype=np.float32)
+
+    from scipy.sparse import csr_matrix
+    P = csr_matrix(pij_input)
+
+    neighbors = P.indices.astype(np.int64)
+    indptr = P.indptr.astype(np.int64)
+
+    _barnes_hut_tsne.gradient(P.data, pos_output, neighbors, indptr,
+                              grad_bh, 0.5, 2, 1, skip_num_points=0)
+    assert_array_almost_equal(grad_bh, grad_output, decimal=4)
+
+
+def test_verbose():
+    # Verbose options write to stdout.
+    random_state = check_random_state(0)
+    tsne = TSNE(verbose=2)
+    X = random_state.randn(5, 2)
+
+    old_stdout = sys.stdout
+    sys.stdout = StringIO()
+    try:
+        tsne.fit_transform(X)
+    finally:
+        out = sys.stdout.getvalue()
+        sys.stdout.close()
+        sys.stdout = old_stdout
+
+    assert("[t-SNE]" in out)
+    assert("nearest neighbors..." in out)
+    assert("Computed conditional probabilities" in out)
+    assert("Mean sigma" in out)
+    assert("early exaggeration" in out)
+
+
+def test_chebyshev_metric():
+    # t-SNE should allow metrics that cannot be squared (issue #3526).
+    random_state = check_random_state(0)
+    tsne = TSNE(metric="chebyshev")
+    X = random_state.randn(5, 2)
+    tsne.fit_transform(X)
+
+
+def test_reduction_to_one_component():
+    # t-SNE should allow reduction to one component (issue #4154).
+    random_state = check_random_state(0)
+    tsne = TSNE(n_components=1)
+    X = random_state.randn(5, 2)
+    X_embedded = tsne.fit(X).embedding_
+    assert(np.all(np.isfinite(X_embedded)))
+
+
+@pytest.mark.parametrize('method', ['barnes_hut', 'exact'])
+@pytest.mark.parametrize('dt', [np.float32, np.float64])
+def test_64bit(method, dt):
+    # Ensure 64bit arrays are handled correctly.
+    random_state = check_random_state(0)
+
+    X = random_state.randn(10, 2).astype(dt, copy=False)
+    tsne = TSNE(n_components=2, perplexity=2, learning_rate=100.0,
+                random_state=0, method=method, verbose=0,
+                n_iter=300)
+    X_embedded = tsne.fit_transform(X)
+    effective_type = X_embedded.dtype
+
+    # tsne cython code is only single precision, so the output will
+    # always be single precision, irrespectively of the input dtype
+    assert effective_type == np.float32
+
+
+@pytest.mark.parametrize('method', ['barnes_hut', 'exact'])
+def test_kl_divergence_not_nan(method):
+    # Ensure kl_divergence_ is computed at last iteration
+    # even though n_iter % n_iter_check != 0, i.e. 1003 % 50 != 0
+    random_state = check_random_state(0)
+
+    X = random_state.randn(50, 2)
+    tsne = TSNE(n_components=2, perplexity=2, learning_rate=100.0,
+                random_state=0, method=method, verbose=0, n_iter=503)
+    tsne.fit_transform(X)
+
+    assert not np.isnan(tsne.kl_divergence_)
+
+
+def test_barnes_hut_angle():
+    # When Barnes-Hut's angle=0 this corresponds to the exact method.
+    angle = 0.0
+    perplexity = 10
+    n_samples = 100
+    for n_components in [2, 3]:
+        n_features = 5
+        degrees_of_freedom = float(n_components - 1.0)
+
+        random_state = check_random_state(0)
+        data = random_state.randn(n_samples, n_features)
+        distances = pairwise_distances(data)
+        params = random_state.randn(n_samples, n_components)
+        P = _joint_probabilities(distances, perplexity, verbose=0)
+        kl_exact, grad_exact = _kl_divergence(params, P, degrees_of_freedom,
+                                              n_samples, n_components)
+
+        n_neighbors = n_samples - 1
+        distances_csr = NearestNeighbors().fit(data).kneighbors_graph(
+            n_neighbors=n_neighbors, mode='distance')
+        P_bh = _joint_probabilities_nn(distances_csr, perplexity, verbose=0)
+        kl_bh, grad_bh = _kl_divergence_bh(params, P_bh, degrees_of_freedom,
+                                           n_samples, n_components,
+                                           angle=angle, skip_num_points=0,
+                                           verbose=0)
+
+        P = squareform(P)
+        P_bh = P_bh.toarray()
+        assert_array_almost_equal(P_bh, P, decimal=5)
+        assert_almost_equal(kl_exact, kl_bh, decimal=3)
+
+
+@skip_if_32bit
+def test_n_iter_without_progress():
+    # Use a dummy negative n_iter_without_progress and check output on stdout
+    random_state = check_random_state(0)
+    X = random_state.randn(100, 10)
+    for method in ["barnes_hut", "exact"]:
+        tsne = TSNE(n_iter_without_progress=-1, verbose=2, learning_rate=1e8,
+                    random_state=0, method=method, n_iter=351, init="random")
+        tsne._N_ITER_CHECK = 1
+        tsne._EXPLORATION_N_ITER = 0
+
+        old_stdout = sys.stdout
+        sys.stdout = StringIO()
+        try:
+            tsne.fit_transform(X)
+        finally:
+            out = sys.stdout.getvalue()
+            sys.stdout.close()
+            sys.stdout = old_stdout
+
+        # The output needs to contain the value of n_iter_without_progress
+        assert ("did not make any progress during the "
+                "last -1 episodes. Finished." in out)
+
+
+def test_min_grad_norm():
+    # Make sure that the parameter min_grad_norm is used correctly
+    random_state = check_random_state(0)
+    X = random_state.randn(100, 2)
+    min_grad_norm = 0.002
+    tsne = TSNE(min_grad_norm=min_grad_norm, verbose=2,
+                random_state=0, method='exact')
+
+    old_stdout = sys.stdout
+    sys.stdout = StringIO()
+    try:
+        tsne.fit_transform(X)
+    finally:
+        out = sys.stdout.getvalue()
+        sys.stdout.close()
+        sys.stdout = old_stdout
+
+    lines_out = out.split('\n')
+
+    # extract the gradient norm from the verbose output
+    gradient_norm_values = []
+    for line in lines_out:
+        # When the computation is Finished just an old gradient norm value
+        # is repeated that we do not need to store
+        if 'Finished' in line:
+            break
+
+        start_grad_norm = line.find('gradient norm')
+        if start_grad_norm >= 0:
+            line = line[start_grad_norm:]
+            line = line.replace('gradient norm = ', '').split(' ')[0]
+            gradient_norm_values.append(float(line))
+
+    # Compute how often the gradient norm is smaller than min_grad_norm
+    gradient_norm_values = np.array(gradient_norm_values)
+    n_smaller_gradient_norms = \
+        len(gradient_norm_values[gradient_norm_values <= min_grad_norm])
+
+    # The gradient norm can be smaller than min_grad_norm at most once,
+    # because in the moment it becomes smaller the optimization stops
+    assert n_smaller_gradient_norms <= 1
+
+
+def test_accessible_kl_divergence():
+    # Ensures that the accessible kl_divergence matches the computed value
+    random_state = check_random_state(0)
+    X = random_state.randn(50, 2)
+    tsne = TSNE(n_iter_without_progress=2, verbose=2,
+                random_state=0, method='exact',
+                n_iter=500)
+
+    old_stdout = sys.stdout
+    sys.stdout = StringIO()
+    try:
+        tsne.fit_transform(X)
+    finally:
+        out = sys.stdout.getvalue()
+        sys.stdout.close()
+        sys.stdout = old_stdout
+
+    # The output needs to contain the accessible kl_divergence as the error at
+    # the last iteration
+    for line in out.split('\n')[::-1]:
+        if 'Iteration' in line:
+            _, _, error = line.partition('error = ')
+            if error:
+                error, _, _ = error.partition(',')
+                break
+    assert_almost_equal(tsne.kl_divergence_, float(error), decimal=5)
+
+
+@pytest.mark.parametrize('method', ['barnes_hut', 'exact'])
+def test_uniform_grid(method):
+    """Make sure that TSNE can approximately recover a uniform 2D grid
+
+    Due to ties in distances between point in X_2d_grid, this test is platform
+    dependent for ``method='barnes_hut'`` due to numerical imprecision.
+
+    Also, t-SNE is not assured to converge to the right solution because bad
+    initialization can lead to convergence to bad local minimum (the
+    optimization problem is non-convex). To avoid breaking the test too often,
+    we re-run t-SNE from the final point when the convergence is not good
+    enough.
+    """
+    seeds = range(3)
+    n_iter = 500
+    for seed in seeds:
+        tsne = TSNE(n_components=2, init='random', random_state=seed,
+                    perplexity=50, n_iter=n_iter, method=method)
+        Y = tsne.fit_transform(X_2d_grid)
+
+        try_name = "{}_{}".format(method, seed)
+        try:
+            assert_uniform_grid(Y, try_name)
+        except AssertionError:
+            # If the test fails a first time, re-run with init=Y to see if
+            # this was caused by a bad initialization. Note that this will
+            # also run an early_exaggeration step.
+            try_name += ":rerun"
+            tsne.init = Y
+            Y = tsne.fit_transform(X_2d_grid)
+            assert_uniform_grid(Y, try_name)
+
+
+def assert_uniform_grid(Y, try_name=None):
+    # Ensure that the resulting embedding leads to approximately
+    # uniformly spaced points: the distance to the closest neighbors
+    # should be non-zero and approximately constant.
+    nn = NearestNeighbors(n_neighbors=1).fit(Y)
+    dist_to_nn = nn.kneighbors(return_distance=True)[0].ravel()
+    assert dist_to_nn.min() > 0.1
+
+    smallest_to_mean = dist_to_nn.min() / np.mean(dist_to_nn)
+    largest_to_mean = dist_to_nn.max() / np.mean(dist_to_nn)
+
+    assert smallest_to_mean > .5, try_name
+    assert largest_to_mean < 2, try_name
+
+
+def test_bh_match_exact():
+    # check that the ``barnes_hut`` method match the exact one when
+    # ``angle = 0`` and ``perplexity > n_samples / 3``
+    random_state = check_random_state(0)
+    n_features = 10
+    X = random_state.randn(30, n_features).astype(np.float32)
+    X_embeddeds = {}
+    n_iter = {}
+    for method in ['exact', 'barnes_hut']:
+        tsne = TSNE(n_components=2, method=method, learning_rate=1.0,
+                    init="random", random_state=0, n_iter=251,
+                    perplexity=30.0, angle=0)
+        # Kill the early_exaggeration
+        tsne._EXPLORATION_N_ITER = 0
+        X_embeddeds[method] = tsne.fit_transform(X)
+        n_iter[method] = tsne.n_iter_
+
+    assert n_iter['exact'] == n_iter['barnes_hut']
+    assert_allclose(X_embeddeds['exact'], X_embeddeds['barnes_hut'], rtol=1e-4)
+
+
+def test_gradient_bh_multithread_match_sequential():
+    # check that the bh gradient with different num_threads gives the same
+    # results
+
+    n_features = 10
+    n_samples = 30
+    n_components = 2
+    degrees_of_freedom = 1
+
+    angle = 3
+    perplexity = 5
+
+    random_state = check_random_state(0)
+    data = random_state.randn(n_samples, n_features).astype(np.float32)
+    params = random_state.randn(n_samples, n_components)
+
+    n_neighbors = n_samples - 1
+    distances_csr = NearestNeighbors().fit(data).kneighbors_graph(
+        n_neighbors=n_neighbors, mode='distance')
+    P_bh = _joint_probabilities_nn(distances_csr, perplexity, verbose=0)
+    kl_sequential, grad_sequential = _kl_divergence_bh(
+        params, P_bh, degrees_of_freedom, n_samples, n_components,
+        angle=angle, skip_num_points=0, verbose=0, num_threads=1)
+    for num_threads in [2, 4]:
+        kl_multithread, grad_multithread = _kl_divergence_bh(
+            params, P_bh, degrees_of_freedom, n_samples, n_components,
+            angle=angle, skip_num_points=0, verbose=0, num_threads=num_threads)
+
+        assert_allclose(kl_multithread, kl_sequential, rtol=1e-6)
+        assert_allclose(grad_multithread, grad_multithread)
+
+
+def test_tsne_with_different_distance_metrics():
+    """Make sure that TSNE works for different distance metrics"""
+    random_state = check_random_state(0)
+    n_components_original = 3
+    n_components_embedding = 2
+    X = random_state.randn(50, n_components_original).astype(np.float32)
+    metrics = ['manhattan', 'cosine']
+    dist_funcs = [manhattan_distances, cosine_distances]
+    for metric, dist_func in zip(metrics, dist_funcs):
+        X_transformed_tsne = TSNE(
+            metric=metric, n_components=n_components_embedding,
+            random_state=0, n_iter=300).fit_transform(X)
+        X_transformed_tsne_precomputed = TSNE(
+            metric='precomputed', n_components=n_components_embedding,
+            random_state=0, n_iter=300).fit_transform(dist_func(X))
+        assert_array_equal(X_transformed_tsne, X_transformed_tsne_precomputed)
+
+
+@pytest.mark.parametrize('method', ['exact', 'barnes_hut'])
+def test_tsne_n_jobs(method):
+    """Make sure that the n_jobs parameter doesn't impact the output"""
+    random_state = check_random_state(0)
+    n_features = 10
+    X = random_state.randn(30, n_features)
+    X_tr_ref = TSNE(n_components=2, method=method, perplexity=30.0,
+                    angle=0, n_jobs=1, random_state=0).fit_transform(X)
+    X_tr = TSNE(n_components=2, method=method, perplexity=30.0,
+                angle=0, n_jobs=2, random_state=0).fit_transform(X)
+
+    assert_allclose(X_tr_ref, X_tr)