Uploaded Test files
This commit is contained in:
parent
f584ad9d97
commit
2e81cb7d99
16627 changed files with 2065359 additions and 102444 deletions
13
venv/Lib/site-packages/sklearn/manifold/__init__.py
Normal file
13
venv/Lib/site-packages/sklearn/manifold/__init__.py
Normal file
|
@ -0,0 +1,13 @@
|
|||
"""
|
||||
The :mod:`sklearn.manifold` module implements data embedding techniques.
|
||||
"""
|
||||
|
||||
from ._locally_linear import locally_linear_embedding, LocallyLinearEmbedding
|
||||
from ._isomap import Isomap
|
||||
from ._mds import MDS, smacof
|
||||
from ._spectral_embedding import SpectralEmbedding, spectral_embedding
|
||||
from ._t_sne import TSNE, trustworthiness
|
||||
|
||||
__all__ = ['locally_linear_embedding', 'LocallyLinearEmbedding', 'Isomap',
|
||||
'MDS', 'smacof', 'SpectralEmbedding', 'spectral_embedding', "TSNE",
|
||||
'trustworthiness']
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
282
venv/Lib/site-packages/sklearn/manifold/_isomap.py
Normal file
282
venv/Lib/site-packages/sklearn/manifold/_isomap.py
Normal file
|
@ -0,0 +1,282 @@
|
|||
"""Isomap for manifold learning"""
|
||||
|
||||
# Author: Jake Vanderplas -- <vanderplas@astro.washington.edu>
|
||||
# License: BSD 3 clause (C) 2011
|
||||
|
||||
import numpy as np
|
||||
from ..base import BaseEstimator, TransformerMixin
|
||||
from ..neighbors import NearestNeighbors, kneighbors_graph
|
||||
from ..utils.deprecation import deprecated
|
||||
from ..utils.validation import check_is_fitted
|
||||
from ..utils.validation import _deprecate_positional_args
|
||||
from ..utils.graph import graph_shortest_path
|
||||
from ..decomposition import KernelPCA
|
||||
from ..preprocessing import KernelCenterer
|
||||
|
||||
|
||||
class Isomap(TransformerMixin, BaseEstimator):
|
||||
"""Isomap Embedding
|
||||
|
||||
Non-linear dimensionality reduction through Isometric Mapping
|
||||
|
||||
Read more in the :ref:`User Guide <isomap>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
n_neighbors : integer
|
||||
number of neighbors to consider for each point.
|
||||
|
||||
n_components : integer
|
||||
number of coordinates for the manifold
|
||||
|
||||
eigen_solver : ['auto'|'arpack'|'dense']
|
||||
'auto' : Attempt to choose the most efficient solver
|
||||
for the given problem.
|
||||
|
||||
'arpack' : Use Arnoldi decomposition to find the eigenvalues
|
||||
and eigenvectors.
|
||||
|
||||
'dense' : Use a direct solver (i.e. LAPACK)
|
||||
for the eigenvalue decomposition.
|
||||
|
||||
tol : float
|
||||
Convergence tolerance passed to arpack or lobpcg.
|
||||
not used if eigen_solver == 'dense'.
|
||||
|
||||
max_iter : integer
|
||||
Maximum number of iterations for the arpack solver.
|
||||
not used if eigen_solver == 'dense'.
|
||||
|
||||
path_method : string ['auto'|'FW'|'D']
|
||||
Method to use in finding shortest path.
|
||||
|
||||
'auto' : attempt to choose the best algorithm automatically.
|
||||
|
||||
'FW' : Floyd-Warshall algorithm.
|
||||
|
||||
'D' : Dijkstra's algorithm.
|
||||
|
||||
neighbors_algorithm : string ['auto'|'brute'|'kd_tree'|'ball_tree']
|
||||
Algorithm to use for nearest neighbors search,
|
||||
passed to neighbors.NearestNeighbors instance.
|
||||
|
||||
n_jobs : int or None, default=None
|
||||
The number of parallel jobs to run.
|
||||
``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
|
||||
``-1`` means using all processors. See :term:`Glossary <n_jobs>`
|
||||
for more details.
|
||||
|
||||
metric : string, or callable, default="minkowski"
|
||||
The metric to use when calculating distance between instances in a
|
||||
feature array. If metric is a string or callable, it must be one of
|
||||
the options allowed by :func:`sklearn.metrics.pairwise_distances` for
|
||||
its metric parameter.
|
||||
If metric is "precomputed", X is assumed to be a distance matrix and
|
||||
must be square. X may be a :term:`Glossary <sparse graph>`.
|
||||
|
||||
.. versionadded:: 0.22
|
||||
|
||||
p : int, default=2
|
||||
Parameter for the Minkowski metric from
|
||||
sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is
|
||||
equivalent to using manhattan_distance (l1), and euclidean_distance
|
||||
(l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
|
||||
|
||||
.. versionadded:: 0.22
|
||||
|
||||
metric_params : dict, default=None
|
||||
Additional keyword arguments for the metric function.
|
||||
|
||||
.. versionadded:: 0.22
|
||||
|
||||
Attributes
|
||||
----------
|
||||
embedding_ : array-like, shape (n_samples, n_components)
|
||||
Stores the embedding vectors.
|
||||
|
||||
kernel_pca_ : object
|
||||
:class:`~sklearn.decomposition.KernelPCA` object used to implement the
|
||||
embedding.
|
||||
|
||||
nbrs_ : sklearn.neighbors.NearestNeighbors instance
|
||||
Stores nearest neighbors instance, including BallTree or KDtree
|
||||
if applicable.
|
||||
|
||||
dist_matrix_ : array-like, shape (n_samples, n_samples)
|
||||
Stores the geodesic distance matrix of training data.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from sklearn.datasets import load_digits
|
||||
>>> from sklearn.manifold import Isomap
|
||||
>>> X, _ = load_digits(return_X_y=True)
|
||||
>>> X.shape
|
||||
(1797, 64)
|
||||
>>> embedding = Isomap(n_components=2)
|
||||
>>> X_transformed = embedding.fit_transform(X[:100])
|
||||
>>> X_transformed.shape
|
||||
(100, 2)
|
||||
|
||||
References
|
||||
----------
|
||||
|
||||
.. [1] Tenenbaum, J.B.; De Silva, V.; & Langford, J.C. A global geometric
|
||||
framework for nonlinear dimensionality reduction. Science 290 (5500)
|
||||
"""
|
||||
@_deprecate_positional_args
|
||||
def __init__(self, *, n_neighbors=5, n_components=2, eigen_solver='auto',
|
||||
tol=0, max_iter=None, path_method='auto',
|
||||
neighbors_algorithm='auto', n_jobs=None, metric='minkowski',
|
||||
p=2, metric_params=None):
|
||||
self.n_neighbors = n_neighbors
|
||||
self.n_components = n_components
|
||||
self.eigen_solver = eigen_solver
|
||||
self.tol = tol
|
||||
self.max_iter = max_iter
|
||||
self.path_method = path_method
|
||||
self.neighbors_algorithm = neighbors_algorithm
|
||||
self.n_jobs = n_jobs
|
||||
self.metric = metric
|
||||
self.p = p
|
||||
self.metric_params = metric_params
|
||||
|
||||
def _fit_transform(self, X):
|
||||
self.nbrs_ = NearestNeighbors(n_neighbors=self.n_neighbors,
|
||||
algorithm=self.neighbors_algorithm,
|
||||
metric=self.metric, p=self.p,
|
||||
metric_params=self.metric_params,
|
||||
n_jobs=self.n_jobs)
|
||||
self.nbrs_.fit(X)
|
||||
self.n_features_in_ = self.nbrs_.n_features_in_
|
||||
|
||||
self.kernel_pca_ = KernelPCA(n_components=self.n_components,
|
||||
kernel="precomputed",
|
||||
eigen_solver=self.eigen_solver,
|
||||
tol=self.tol, max_iter=self.max_iter,
|
||||
n_jobs=self.n_jobs)
|
||||
|
||||
kng = kneighbors_graph(self.nbrs_, self.n_neighbors,
|
||||
metric=self.metric, p=self.p,
|
||||
metric_params=self.metric_params,
|
||||
mode='distance', n_jobs=self.n_jobs)
|
||||
|
||||
self.dist_matrix_ = graph_shortest_path(kng,
|
||||
method=self.path_method,
|
||||
directed=False)
|
||||
G = self.dist_matrix_ ** 2
|
||||
G *= -0.5
|
||||
|
||||
self.embedding_ = self.kernel_pca_.fit_transform(G)
|
||||
|
||||
# mypy error: Decorated property not supported
|
||||
@deprecated( # type: ignore
|
||||
"Attribute `training_data_` was deprecated in version 0.22 and"
|
||||
" will be removed in 0.24."
|
||||
)
|
||||
@property
|
||||
def training_data_(self):
|
||||
check_is_fitted(self)
|
||||
return self.nbrs_._fit_X
|
||||
|
||||
def reconstruction_error(self):
|
||||
"""Compute the reconstruction error for the embedding.
|
||||
|
||||
Returns
|
||||
-------
|
||||
reconstruction_error : float
|
||||
|
||||
Notes
|
||||
-----
|
||||
The cost function of an isomap embedding is
|
||||
|
||||
``E = frobenius_norm[K(D) - K(D_fit)] / n_samples``
|
||||
|
||||
Where D is the matrix of distances for the input data X,
|
||||
D_fit is the matrix of distances for the output embedding X_fit,
|
||||
and K is the isomap kernel:
|
||||
|
||||
``K(D) = -0.5 * (I - 1/n_samples) * D^2 * (I - 1/n_samples)``
|
||||
"""
|
||||
G = -0.5 * self.dist_matrix_ ** 2
|
||||
G_center = KernelCenterer().fit_transform(G)
|
||||
evals = self.kernel_pca_.lambdas_
|
||||
return np.sqrt(np.sum(G_center ** 2) - np.sum(evals ** 2)) / G.shape[0]
|
||||
|
||||
def fit(self, X, y=None):
|
||||
"""Compute the embedding vectors for data X
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse graph, BallTree, KDTree, NearestNeighbors}
|
||||
Sample data, shape = (n_samples, n_features), in the form of a
|
||||
numpy array, sparse graph, precomputed tree, or NearestNeighbors
|
||||
object.
|
||||
|
||||
y : Ignored
|
||||
|
||||
Returns
|
||||
-------
|
||||
self : returns an instance of self.
|
||||
"""
|
||||
self._fit_transform(X)
|
||||
return self
|
||||
|
||||
def fit_transform(self, X, y=None):
|
||||
"""Fit the model from data in X and transform X.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse graph, BallTree, KDTree}
|
||||
Training vector, where n_samples in the number of samples
|
||||
and n_features is the number of features.
|
||||
|
||||
y : Ignored
|
||||
|
||||
Returns
|
||||
-------
|
||||
X_new : array-like, shape (n_samples, n_components)
|
||||
"""
|
||||
self._fit_transform(X)
|
||||
return self.embedding_
|
||||
|
||||
def transform(self, X):
|
||||
"""Transform X.
|
||||
|
||||
This is implemented by linking the points X into the graph of geodesic
|
||||
distances of the training data. First the `n_neighbors` nearest
|
||||
neighbors of X are found in the training data, and from these the
|
||||
shortest geodesic distances from each point in X to each point in
|
||||
the training data are computed in order to construct the kernel.
|
||||
The embedding of X is the projection of this kernel onto the
|
||||
embedding vectors of the training set.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like, shape (n_queries, n_features)
|
||||
If neighbors_algorithm='precomputed', X is assumed to be a
|
||||
distance matrix or a sparse graph of shape
|
||||
(n_queries, n_samples_fit).
|
||||
|
||||
Returns
|
||||
-------
|
||||
X_new : array-like, shape (n_queries, n_components)
|
||||
"""
|
||||
check_is_fitted(self)
|
||||
distances, indices = self.nbrs_.kneighbors(X, return_distance=True)
|
||||
|
||||
# Create the graph of shortest distances from X to
|
||||
# training data via the nearest neighbors of X.
|
||||
# This can be done as a single array operation, but it potentially
|
||||
# takes a lot of memory. To avoid that, use a loop:
|
||||
|
||||
n_samples_fit = self.nbrs_.n_samples_fit_
|
||||
n_queries = distances.shape[0]
|
||||
G_X = np.zeros((n_queries, n_samples_fit))
|
||||
for i in range(n_queries):
|
||||
G_X[i] = np.min(self.dist_matrix_[indices[i]] +
|
||||
distances[i][:, None], 0)
|
||||
|
||||
G_X **= 2
|
||||
G_X *= -0.5
|
||||
|
||||
return self.kernel_pca_.transform(G_X)
|
729
venv/Lib/site-packages/sklearn/manifold/_locally_linear.py
Normal file
729
venv/Lib/site-packages/sklearn/manifold/_locally_linear.py
Normal file
|
@ -0,0 +1,729 @@
|
|||
"""Locally Linear Embedding"""
|
||||
|
||||
# Author: Fabian Pedregosa -- <fabian.pedregosa@inria.fr>
|
||||
# Jake Vanderplas -- <vanderplas@astro.washington.edu>
|
||||
# License: BSD 3 clause (C) INRIA 2011
|
||||
|
||||
import numpy as np
|
||||
from scipy.linalg import eigh, svd, qr, solve
|
||||
from scipy.sparse import eye, csr_matrix
|
||||
from scipy.sparse.linalg import eigsh
|
||||
|
||||
from ..base import BaseEstimator, TransformerMixin, _UnstableArchMixin
|
||||
from ..utils import check_random_state, check_array
|
||||
from ..utils.extmath import stable_cumsum
|
||||
from ..utils.validation import check_is_fitted
|
||||
from ..utils.validation import FLOAT_DTYPES
|
||||
from ..utils.validation import _deprecate_positional_args
|
||||
from ..neighbors import NearestNeighbors
|
||||
|
||||
|
||||
def barycenter_weights(X, Z, reg=1e-3):
|
||||
"""Compute barycenter weights of X from Y along the first axis
|
||||
|
||||
We estimate the weights to assign to each point in Y[i] to recover
|
||||
the point X[i]. The barycenter weights sum to 1.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like, shape (n_samples, n_dim)
|
||||
|
||||
Z : array-like, shape (n_samples, n_neighbors, n_dim)
|
||||
|
||||
reg : float, optional
|
||||
amount of regularization to add for the problem to be
|
||||
well-posed in the case of n_neighbors > n_dim
|
||||
|
||||
Returns
|
||||
-------
|
||||
B : array-like, shape (n_samples, n_neighbors)
|
||||
|
||||
Notes
|
||||
-----
|
||||
See developers note for more information.
|
||||
"""
|
||||
X = check_array(X, dtype=FLOAT_DTYPES)
|
||||
Z = check_array(Z, dtype=FLOAT_DTYPES, allow_nd=True)
|
||||
|
||||
n_samples, n_neighbors = X.shape[0], Z.shape[1]
|
||||
B = np.empty((n_samples, n_neighbors), dtype=X.dtype)
|
||||
v = np.ones(n_neighbors, dtype=X.dtype)
|
||||
|
||||
# this might raise a LinalgError if G is singular and has trace
|
||||
# zero
|
||||
for i, A in enumerate(Z.transpose(0, 2, 1)):
|
||||
C = A.T - X[i] # broadcasting
|
||||
G = np.dot(C, C.T)
|
||||
trace = np.trace(G)
|
||||
if trace > 0:
|
||||
R = reg * trace
|
||||
else:
|
||||
R = reg
|
||||
G.flat[::Z.shape[1] + 1] += R
|
||||
w = solve(G, v, sym_pos=True)
|
||||
B[i, :] = w / np.sum(w)
|
||||
return B
|
||||
|
||||
|
||||
def barycenter_kneighbors_graph(X, n_neighbors, reg=1e-3, n_jobs=None):
|
||||
"""Computes the barycenter weighted graph of k-Neighbors for points in X
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, NearestNeighbors}
|
||||
Sample data, shape = (n_samples, n_features), in the form of a
|
||||
numpy array or a NearestNeighbors object.
|
||||
|
||||
n_neighbors : int
|
||||
Number of neighbors for each sample.
|
||||
|
||||
reg : float, optional
|
||||
Amount of regularization when solving the least-squares
|
||||
problem. Only relevant if mode='barycenter'. If None, use the
|
||||
default.
|
||||
|
||||
n_jobs : int or None, optional (default=None)
|
||||
The number of parallel jobs to run for neighbors search.
|
||||
``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
|
||||
``-1`` means using all processors. See :term:`Glossary <n_jobs>`
|
||||
for more details.
|
||||
|
||||
Returns
|
||||
-------
|
||||
A : sparse matrix in CSR format, shape = [n_samples, n_samples]
|
||||
A[i, j] is assigned the weight of edge that connects i to j.
|
||||
|
||||
See also
|
||||
--------
|
||||
sklearn.neighbors.kneighbors_graph
|
||||
sklearn.neighbors.radius_neighbors_graph
|
||||
"""
|
||||
knn = NearestNeighbors(n_neighbors=n_neighbors + 1, n_jobs=n_jobs).fit(X)
|
||||
X = knn._fit_X
|
||||
n_samples = knn.n_samples_fit_
|
||||
ind = knn.kneighbors(X, return_distance=False)[:, 1:]
|
||||
data = barycenter_weights(X, X[ind], reg=reg)
|
||||
indptr = np.arange(0, n_samples * n_neighbors + 1, n_neighbors)
|
||||
return csr_matrix((data.ravel(), ind.ravel(), indptr),
|
||||
shape=(n_samples, n_samples))
|
||||
|
||||
|
||||
def null_space(M, k, k_skip=1, eigen_solver='arpack', tol=1E-6, max_iter=100,
|
||||
random_state=None):
|
||||
"""
|
||||
Find the null space of a matrix M.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
M : {array, matrix, sparse matrix, LinearOperator}
|
||||
Input covariance matrix: should be symmetric positive semi-definite
|
||||
|
||||
k : integer
|
||||
Number of eigenvalues/vectors to return
|
||||
|
||||
k_skip : integer, optional
|
||||
Number of low eigenvalues to skip.
|
||||
|
||||
eigen_solver : string, {'auto', 'arpack', 'dense'}
|
||||
auto : algorithm will attempt to choose the best method for input data
|
||||
arpack : use arnoldi iteration in shift-invert mode.
|
||||
For this method, M may be a dense matrix, sparse matrix,
|
||||
or general linear operator.
|
||||
Warning: ARPACK can be unstable for some problems. It is
|
||||
best to try several random seeds in order to check results.
|
||||
dense : use standard dense matrix operations for the eigenvalue
|
||||
decomposition. For this method, M must be an array
|
||||
or matrix type. This method should be avoided for
|
||||
large problems.
|
||||
|
||||
tol : float, optional
|
||||
Tolerance for 'arpack' method.
|
||||
Not used if eigen_solver=='dense'.
|
||||
|
||||
max_iter : int
|
||||
Maximum number of iterations for 'arpack' method.
|
||||
Not used if eigen_solver=='dense'
|
||||
|
||||
random_state : int, RandomState instance, default=None
|
||||
Determines the random number generator when ``solver`` == 'arpack'.
|
||||
Pass an int for reproducible results across multiple function calls.
|
||||
See :term: `Glossary <random_state>`.
|
||||
"""
|
||||
if eigen_solver == 'auto':
|
||||
if M.shape[0] > 200 and k + k_skip < 10:
|
||||
eigen_solver = 'arpack'
|
||||
else:
|
||||
eigen_solver = 'dense'
|
||||
|
||||
if eigen_solver == 'arpack':
|
||||
random_state = check_random_state(random_state)
|
||||
# initialize with [-1,1] as in ARPACK
|
||||
v0 = random_state.uniform(-1, 1, M.shape[0])
|
||||
try:
|
||||
eigen_values, eigen_vectors = eigsh(M, k + k_skip, sigma=0.0,
|
||||
tol=tol, maxiter=max_iter,
|
||||
v0=v0)
|
||||
except RuntimeError as msg:
|
||||
raise ValueError("Error in determining null-space with ARPACK. "
|
||||
"Error message: '%s'. "
|
||||
"Note that method='arpack' can fail when the "
|
||||
"weight matrix is singular or otherwise "
|
||||
"ill-behaved. method='dense' is recommended. "
|
||||
"See online documentation for more information."
|
||||
% msg)
|
||||
|
||||
return eigen_vectors[:, k_skip:], np.sum(eigen_values[k_skip:])
|
||||
elif eigen_solver == 'dense':
|
||||
if hasattr(M, 'toarray'):
|
||||
M = M.toarray()
|
||||
eigen_values, eigen_vectors = eigh(
|
||||
M, eigvals=(k_skip, k + k_skip - 1), overwrite_a=True)
|
||||
index = np.argsort(np.abs(eigen_values))
|
||||
return eigen_vectors[:, index], np.sum(eigen_values)
|
||||
else:
|
||||
raise ValueError("Unrecognized eigen_solver '%s'" % eigen_solver)
|
||||
|
||||
|
||||
@_deprecate_positional_args
|
||||
def locally_linear_embedding(
|
||||
X, *, n_neighbors, n_components, reg=1e-3, eigen_solver='auto',
|
||||
tol=1e-6, max_iter=100, method='standard', hessian_tol=1E-4,
|
||||
modified_tol=1E-12, random_state=None, n_jobs=None):
|
||||
"""Perform a Locally Linear Embedding analysis on the data.
|
||||
|
||||
Read more in the :ref:`User Guide <locally_linear_embedding>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, NearestNeighbors}
|
||||
Sample data, shape = (n_samples, n_features), in the form of a
|
||||
numpy array or a NearestNeighbors object.
|
||||
|
||||
n_neighbors : integer
|
||||
number of neighbors to consider for each point.
|
||||
|
||||
n_components : integer
|
||||
number of coordinates for the manifold.
|
||||
|
||||
reg : float
|
||||
regularization constant, multiplies the trace of the local covariance
|
||||
matrix of the distances.
|
||||
|
||||
eigen_solver : string, {'auto', 'arpack', 'dense'}
|
||||
auto : algorithm will attempt to choose the best method for input data
|
||||
|
||||
arpack : use arnoldi iteration in shift-invert mode.
|
||||
For this method, M may be a dense matrix, sparse matrix,
|
||||
or general linear operator.
|
||||
Warning: ARPACK can be unstable for some problems. It is
|
||||
best to try several random seeds in order to check results.
|
||||
|
||||
dense : use standard dense matrix operations for the eigenvalue
|
||||
decomposition. For this method, M must be an array
|
||||
or matrix type. This method should be avoided for
|
||||
large problems.
|
||||
|
||||
tol : float, optional
|
||||
Tolerance for 'arpack' method
|
||||
Not used if eigen_solver=='dense'.
|
||||
|
||||
max_iter : integer
|
||||
maximum number of iterations for the arpack solver.
|
||||
|
||||
method : {'standard', 'hessian', 'modified', 'ltsa'}
|
||||
standard : use the standard locally linear embedding algorithm.
|
||||
see reference [1]_
|
||||
hessian : use the Hessian eigenmap method. This method requires
|
||||
n_neighbors > n_components * (1 + (n_components + 1) / 2.
|
||||
see reference [2]_
|
||||
modified : use the modified locally linear embedding algorithm.
|
||||
see reference [3]_
|
||||
ltsa : use local tangent space alignment algorithm
|
||||
see reference [4]_
|
||||
|
||||
hessian_tol : float, optional
|
||||
Tolerance for Hessian eigenmapping method.
|
||||
Only used if method == 'hessian'
|
||||
|
||||
modified_tol : float, optional
|
||||
Tolerance for modified LLE method.
|
||||
Only used if method == 'modified'
|
||||
|
||||
random_state : int, RandomState instance, default=None
|
||||
Determines the random number generator when ``solver`` == 'arpack'.
|
||||
Pass an int for reproducible results across multiple function calls.
|
||||
See :term: `Glossary <random_state>`.
|
||||
|
||||
n_jobs : int or None, optional (default=None)
|
||||
The number of parallel jobs to run for neighbors search.
|
||||
``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
|
||||
``-1`` means using all processors. See :term:`Glossary <n_jobs>`
|
||||
for more details.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Y : array-like, shape [n_samples, n_components]
|
||||
Embedding vectors.
|
||||
|
||||
squared_error : float
|
||||
Reconstruction error for the embedding vectors. Equivalent to
|
||||
``norm(Y - W Y, 'fro')**2``, where W are the reconstruction weights.
|
||||
|
||||
References
|
||||
----------
|
||||
|
||||
.. [1] Roweis, S. & Saul, L. Nonlinear dimensionality reduction
|
||||
by locally linear embedding. Science 290:2323 (2000).
|
||||
.. [2] Donoho, D. & Grimes, C. Hessian eigenmaps: Locally
|
||||
linear embedding techniques for high-dimensional data.
|
||||
Proc Natl Acad Sci U S A. 100:5591 (2003).
|
||||
.. [3] Zhang, Z. & Wang, J. MLLE: Modified Locally Linear
|
||||
Embedding Using Multiple Weights.
|
||||
http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.70.382
|
||||
.. [4] Zhang, Z. & Zha, H. Principal manifolds and nonlinear
|
||||
dimensionality reduction via tangent space alignment.
|
||||
Journal of Shanghai Univ. 8:406 (2004)
|
||||
"""
|
||||
if eigen_solver not in ('auto', 'arpack', 'dense'):
|
||||
raise ValueError("unrecognized eigen_solver '%s'" % eigen_solver)
|
||||
|
||||
if method not in ('standard', 'hessian', 'modified', 'ltsa'):
|
||||
raise ValueError("unrecognized method '%s'" % method)
|
||||
|
||||
nbrs = NearestNeighbors(n_neighbors=n_neighbors + 1, n_jobs=n_jobs)
|
||||
nbrs.fit(X)
|
||||
X = nbrs._fit_X
|
||||
|
||||
N, d_in = X.shape
|
||||
|
||||
if n_components > d_in:
|
||||
raise ValueError("output dimension must be less than or equal "
|
||||
"to input dimension")
|
||||
if n_neighbors >= N:
|
||||
raise ValueError(
|
||||
"Expected n_neighbors <= n_samples, "
|
||||
" but n_samples = %d, n_neighbors = %d" %
|
||||
(N, n_neighbors)
|
||||
)
|
||||
|
||||
if n_neighbors <= 0:
|
||||
raise ValueError("n_neighbors must be positive")
|
||||
|
||||
M_sparse = (eigen_solver != 'dense')
|
||||
|
||||
if method == 'standard':
|
||||
W = barycenter_kneighbors_graph(
|
||||
nbrs, n_neighbors=n_neighbors, reg=reg, n_jobs=n_jobs)
|
||||
|
||||
# we'll compute M = (I-W)'(I-W)
|
||||
# depending on the solver, we'll do this differently
|
||||
if M_sparse:
|
||||
M = eye(*W.shape, format=W.format) - W
|
||||
M = (M.T * M).tocsr()
|
||||
else:
|
||||
M = (W.T * W - W.T - W).toarray()
|
||||
M.flat[::M.shape[0] + 1] += 1 # W = W - I = W - I
|
||||
|
||||
elif method == 'hessian':
|
||||
dp = n_components * (n_components + 1) // 2
|
||||
|
||||
if n_neighbors <= n_components + dp:
|
||||
raise ValueError("for method='hessian', n_neighbors must be "
|
||||
"greater than "
|
||||
"[n_components * (n_components + 3) / 2]")
|
||||
|
||||
neighbors = nbrs.kneighbors(X, n_neighbors=n_neighbors + 1,
|
||||
return_distance=False)
|
||||
neighbors = neighbors[:, 1:]
|
||||
|
||||
Yi = np.empty((n_neighbors, 1 + n_components + dp), dtype=np.float64)
|
||||
Yi[:, 0] = 1
|
||||
|
||||
M = np.zeros((N, N), dtype=np.float64)
|
||||
|
||||
use_svd = (n_neighbors > d_in)
|
||||
|
||||
for i in range(N):
|
||||
Gi = X[neighbors[i]]
|
||||
Gi -= Gi.mean(0)
|
||||
|
||||
# build Hessian estimator
|
||||
if use_svd:
|
||||
U = svd(Gi, full_matrices=0)[0]
|
||||
else:
|
||||
Ci = np.dot(Gi, Gi.T)
|
||||
U = eigh(Ci)[1][:, ::-1]
|
||||
|
||||
Yi[:, 1:1 + n_components] = U[:, :n_components]
|
||||
|
||||
j = 1 + n_components
|
||||
for k in range(n_components):
|
||||
Yi[:, j:j + n_components - k] = (U[:, k:k + 1] *
|
||||
U[:, k:n_components])
|
||||
j += n_components - k
|
||||
|
||||
Q, R = qr(Yi)
|
||||
|
||||
w = Q[:, n_components + 1:]
|
||||
S = w.sum(0)
|
||||
|
||||
S[np.where(abs(S) < hessian_tol)] = 1
|
||||
w /= S
|
||||
|
||||
nbrs_x, nbrs_y = np.meshgrid(neighbors[i], neighbors[i])
|
||||
M[nbrs_x, nbrs_y] += np.dot(w, w.T)
|
||||
|
||||
if M_sparse:
|
||||
M = csr_matrix(M)
|
||||
|
||||
elif method == 'modified':
|
||||
if n_neighbors < n_components:
|
||||
raise ValueError("modified LLE requires "
|
||||
"n_neighbors >= n_components")
|
||||
|
||||
neighbors = nbrs.kneighbors(X, n_neighbors=n_neighbors + 1,
|
||||
return_distance=False)
|
||||
neighbors = neighbors[:, 1:]
|
||||
|
||||
# find the eigenvectors and eigenvalues of each local covariance
|
||||
# matrix. We want V[i] to be a [n_neighbors x n_neighbors] matrix,
|
||||
# where the columns are eigenvectors
|
||||
V = np.zeros((N, n_neighbors, n_neighbors))
|
||||
nev = min(d_in, n_neighbors)
|
||||
evals = np.zeros([N, nev])
|
||||
|
||||
# choose the most efficient way to find the eigenvectors
|
||||
use_svd = (n_neighbors > d_in)
|
||||
|
||||
if use_svd:
|
||||
for i in range(N):
|
||||
X_nbrs = X[neighbors[i]] - X[i]
|
||||
V[i], evals[i], _ = svd(X_nbrs,
|
||||
full_matrices=True)
|
||||
evals **= 2
|
||||
else:
|
||||
for i in range(N):
|
||||
X_nbrs = X[neighbors[i]] - X[i]
|
||||
C_nbrs = np.dot(X_nbrs, X_nbrs.T)
|
||||
evi, vi = eigh(C_nbrs)
|
||||
evals[i] = evi[::-1]
|
||||
V[i] = vi[:, ::-1]
|
||||
|
||||
# find regularized weights: this is like normal LLE.
|
||||
# because we've already computed the SVD of each covariance matrix,
|
||||
# it's faster to use this rather than np.linalg.solve
|
||||
reg = 1E-3 * evals.sum(1)
|
||||
|
||||
tmp = np.dot(V.transpose(0, 2, 1), np.ones(n_neighbors))
|
||||
tmp[:, :nev] /= evals + reg[:, None]
|
||||
tmp[:, nev:] /= reg[:, None]
|
||||
|
||||
w_reg = np.zeros((N, n_neighbors))
|
||||
for i in range(N):
|
||||
w_reg[i] = np.dot(V[i], tmp[i])
|
||||
w_reg /= w_reg.sum(1)[:, None]
|
||||
|
||||
# calculate eta: the median of the ratio of small to large eigenvalues
|
||||
# across the points. This is used to determine s_i, below
|
||||
rho = evals[:, n_components:].sum(1) / evals[:, :n_components].sum(1)
|
||||
eta = np.median(rho)
|
||||
|
||||
# find s_i, the size of the "almost null space" for each point:
|
||||
# this is the size of the largest set of eigenvalues
|
||||
# such that Sum[v; v in set]/Sum[v; v not in set] < eta
|
||||
s_range = np.zeros(N, dtype=int)
|
||||
evals_cumsum = stable_cumsum(evals, 1)
|
||||
eta_range = evals_cumsum[:, -1:] / evals_cumsum[:, :-1] - 1
|
||||
for i in range(N):
|
||||
s_range[i] = np.searchsorted(eta_range[i, ::-1], eta)
|
||||
s_range += n_neighbors - nev # number of zero eigenvalues
|
||||
|
||||
# Now calculate M.
|
||||
# This is the [N x N] matrix whose null space is the desired embedding
|
||||
M = np.zeros((N, N), dtype=np.float64)
|
||||
for i in range(N):
|
||||
s_i = s_range[i]
|
||||
|
||||
# select bottom s_i eigenvectors and calculate alpha
|
||||
Vi = V[i, :, n_neighbors - s_i:]
|
||||
alpha_i = np.linalg.norm(Vi.sum(0)) / np.sqrt(s_i)
|
||||
|
||||
# compute Householder matrix which satisfies
|
||||
# Hi*Vi.T*ones(n_neighbors) = alpha_i*ones(s)
|
||||
# using prescription from paper
|
||||
h = np.full(s_i, alpha_i) - np.dot(Vi.T, np.ones(n_neighbors))
|
||||
|
||||
norm_h = np.linalg.norm(h)
|
||||
if norm_h < modified_tol:
|
||||
h *= 0
|
||||
else:
|
||||
h /= norm_h
|
||||
|
||||
# Householder matrix is
|
||||
# >> Hi = np.identity(s_i) - 2*np.outer(h,h)
|
||||
# Then the weight matrix is
|
||||
# >> Wi = np.dot(Vi,Hi) + (1-alpha_i) * w_reg[i,:,None]
|
||||
# We do this much more efficiently:
|
||||
Wi = (Vi - 2 * np.outer(np.dot(Vi, h), h) +
|
||||
(1 - alpha_i) * w_reg[i, :, None])
|
||||
|
||||
# Update M as follows:
|
||||
# >> W_hat = np.zeros( (N,s_i) )
|
||||
# >> W_hat[neighbors[i],:] = Wi
|
||||
# >> W_hat[i] -= 1
|
||||
# >> M += np.dot(W_hat,W_hat.T)
|
||||
# We can do this much more efficiently:
|
||||
nbrs_x, nbrs_y = np.meshgrid(neighbors[i], neighbors[i])
|
||||
M[nbrs_x, nbrs_y] += np.dot(Wi, Wi.T)
|
||||
Wi_sum1 = Wi.sum(1)
|
||||
M[i, neighbors[i]] -= Wi_sum1
|
||||
M[neighbors[i], i] -= Wi_sum1
|
||||
M[i, i] += s_i
|
||||
|
||||
if M_sparse:
|
||||
M = csr_matrix(M)
|
||||
|
||||
elif method == 'ltsa':
|
||||
neighbors = nbrs.kneighbors(X, n_neighbors=n_neighbors + 1,
|
||||
return_distance=False)
|
||||
neighbors = neighbors[:, 1:]
|
||||
|
||||
M = np.zeros((N, N))
|
||||
|
||||
use_svd = (n_neighbors > d_in)
|
||||
|
||||
for i in range(N):
|
||||
Xi = X[neighbors[i]]
|
||||
Xi -= Xi.mean(0)
|
||||
|
||||
# compute n_components largest eigenvalues of Xi * Xi^T
|
||||
if use_svd:
|
||||
v = svd(Xi, full_matrices=True)[0]
|
||||
else:
|
||||
Ci = np.dot(Xi, Xi.T)
|
||||
v = eigh(Ci)[1][:, ::-1]
|
||||
|
||||
Gi = np.zeros((n_neighbors, n_components + 1))
|
||||
Gi[:, 1:] = v[:, :n_components]
|
||||
Gi[:, 0] = 1. / np.sqrt(n_neighbors)
|
||||
|
||||
GiGiT = np.dot(Gi, Gi.T)
|
||||
|
||||
nbrs_x, nbrs_y = np.meshgrid(neighbors[i], neighbors[i])
|
||||
M[nbrs_x, nbrs_y] -= GiGiT
|
||||
M[neighbors[i], neighbors[i]] += 1
|
||||
|
||||
return null_space(M, n_components, k_skip=1, eigen_solver=eigen_solver,
|
||||
tol=tol, max_iter=max_iter, random_state=random_state)
|
||||
|
||||
|
||||
class LocallyLinearEmbedding(TransformerMixin,
|
||||
_UnstableArchMixin, BaseEstimator):
|
||||
"""Locally Linear Embedding
|
||||
|
||||
Read more in the :ref:`User Guide <locally_linear_embedding>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
n_neighbors : integer
|
||||
number of neighbors to consider for each point.
|
||||
|
||||
n_components : integer
|
||||
number of coordinates for the manifold
|
||||
|
||||
reg : float
|
||||
regularization constant, multiplies the trace of the local covariance
|
||||
matrix of the distances.
|
||||
|
||||
eigen_solver : string, {'auto', 'arpack', 'dense'}
|
||||
auto : algorithm will attempt to choose the best method for input data
|
||||
|
||||
arpack : use arnoldi iteration in shift-invert mode.
|
||||
For this method, M may be a dense matrix, sparse matrix,
|
||||
or general linear operator.
|
||||
Warning: ARPACK can be unstable for some problems. It is
|
||||
best to try several random seeds in order to check results.
|
||||
|
||||
dense : use standard dense matrix operations for the eigenvalue
|
||||
decomposition. For this method, M must be an array
|
||||
or matrix type. This method should be avoided for
|
||||
large problems.
|
||||
|
||||
tol : float, optional
|
||||
Tolerance for 'arpack' method
|
||||
Not used if eigen_solver=='dense'.
|
||||
|
||||
max_iter : integer
|
||||
maximum number of iterations for the arpack solver.
|
||||
Not used if eigen_solver=='dense'.
|
||||
|
||||
method : string ('standard', 'hessian', 'modified' or 'ltsa')
|
||||
standard : use the standard locally linear embedding algorithm. see
|
||||
reference [1]
|
||||
hessian : use the Hessian eigenmap method. This method requires
|
||||
``n_neighbors > n_components * (1 + (n_components + 1) / 2``
|
||||
see reference [2]
|
||||
modified : use the modified locally linear embedding algorithm.
|
||||
see reference [3]
|
||||
ltsa : use local tangent space alignment algorithm
|
||||
see reference [4]
|
||||
|
||||
hessian_tol : float, optional
|
||||
Tolerance for Hessian eigenmapping method.
|
||||
Only used if ``method == 'hessian'``
|
||||
|
||||
modified_tol : float, optional
|
||||
Tolerance for modified LLE method.
|
||||
Only used if ``method == 'modified'``
|
||||
|
||||
neighbors_algorithm : string ['auto'|'brute'|'kd_tree'|'ball_tree']
|
||||
algorithm to use for nearest neighbors search,
|
||||
passed to neighbors.NearestNeighbors instance
|
||||
|
||||
random_state : int, RandomState instance, default=None
|
||||
Determines the random number generator when
|
||||
``eigen_solver`` == 'arpack'. Pass an int for reproducible results
|
||||
across multiple function calls. See :term: `Glossary <random_state>`.
|
||||
|
||||
n_jobs : int or None, optional (default=None)
|
||||
The number of parallel jobs to run.
|
||||
``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
|
||||
``-1`` means using all processors. See :term:`Glossary <n_jobs>`
|
||||
for more details.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
embedding_ : array-like, shape [n_samples, n_components]
|
||||
Stores the embedding vectors
|
||||
|
||||
reconstruction_error_ : float
|
||||
Reconstruction error associated with `embedding_`
|
||||
|
||||
nbrs_ : NearestNeighbors object
|
||||
Stores nearest neighbors instance, including BallTree or KDtree
|
||||
if applicable.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from sklearn.datasets import load_digits
|
||||
>>> from sklearn.manifold import LocallyLinearEmbedding
|
||||
>>> X, _ = load_digits(return_X_y=True)
|
||||
>>> X.shape
|
||||
(1797, 64)
|
||||
>>> embedding = LocallyLinearEmbedding(n_components=2)
|
||||
>>> X_transformed = embedding.fit_transform(X[:100])
|
||||
>>> X_transformed.shape
|
||||
(100, 2)
|
||||
|
||||
References
|
||||
----------
|
||||
|
||||
.. [1] Roweis, S. & Saul, L. Nonlinear dimensionality reduction
|
||||
by locally linear embedding. Science 290:2323 (2000).
|
||||
.. [2] Donoho, D. & Grimes, C. Hessian eigenmaps: Locally
|
||||
linear embedding techniques for high-dimensional data.
|
||||
Proc Natl Acad Sci U S A. 100:5591 (2003).
|
||||
.. [3] Zhang, Z. & Wang, J. MLLE: Modified Locally Linear
|
||||
Embedding Using Multiple Weights.
|
||||
http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.70.382
|
||||
.. [4] Zhang, Z. & Zha, H. Principal manifolds and nonlinear
|
||||
dimensionality reduction via tangent space alignment.
|
||||
Journal of Shanghai Univ. 8:406 (2004)
|
||||
"""
|
||||
@_deprecate_positional_args
|
||||
def __init__(self, *, n_neighbors=5, n_components=2, reg=1E-3,
|
||||
eigen_solver='auto', tol=1E-6, max_iter=100,
|
||||
method='standard', hessian_tol=1E-4, modified_tol=1E-12,
|
||||
neighbors_algorithm='auto', random_state=None, n_jobs=None):
|
||||
self.n_neighbors = n_neighbors
|
||||
self.n_components = n_components
|
||||
self.reg = reg
|
||||
self.eigen_solver = eigen_solver
|
||||
self.tol = tol
|
||||
self.max_iter = max_iter
|
||||
self.method = method
|
||||
self.hessian_tol = hessian_tol
|
||||
self.modified_tol = modified_tol
|
||||
self.random_state = random_state
|
||||
self.neighbors_algorithm = neighbors_algorithm
|
||||
self.n_jobs = n_jobs
|
||||
|
||||
def _fit_transform(self, X):
|
||||
self.nbrs_ = NearestNeighbors(n_neighbors=self.n_neighbors,
|
||||
algorithm=self.neighbors_algorithm,
|
||||
n_jobs=self.n_jobs)
|
||||
|
||||
random_state = check_random_state(self.random_state)
|
||||
X = self._validate_data(X, dtype=float)
|
||||
self.nbrs_.fit(X)
|
||||
self.embedding_, self.reconstruction_error_ = \
|
||||
locally_linear_embedding(
|
||||
X=self.nbrs_, n_neighbors=self.n_neighbors,
|
||||
n_components=self.n_components,
|
||||
eigen_solver=self.eigen_solver, tol=self.tol,
|
||||
max_iter=self.max_iter, method=self.method,
|
||||
hessian_tol=self.hessian_tol, modified_tol=self.modified_tol,
|
||||
random_state=random_state, reg=self.reg, n_jobs=self.n_jobs)
|
||||
|
||||
def fit(self, X, y=None):
|
||||
"""Compute the embedding vectors for data X
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape [n_samples, n_features]
|
||||
training set.
|
||||
|
||||
y : Ignored
|
||||
|
||||
Returns
|
||||
-------
|
||||
self : returns an instance of self.
|
||||
"""
|
||||
self._fit_transform(X)
|
||||
return self
|
||||
|
||||
def fit_transform(self, X, y=None):
|
||||
"""Compute the embedding vectors for data X and transform X.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape [n_samples, n_features]
|
||||
training set.
|
||||
|
||||
y : Ignored
|
||||
|
||||
Returns
|
||||
-------
|
||||
X_new : array-like, shape (n_samples, n_components)
|
||||
"""
|
||||
self._fit_transform(X)
|
||||
return self.embedding_
|
||||
|
||||
def transform(self, X):
|
||||
"""
|
||||
Transform new points into embedding space.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
|
||||
Returns
|
||||
-------
|
||||
X_new : array, shape = [n_samples, n_components]
|
||||
|
||||
Notes
|
||||
-----
|
||||
Because of scaling performed by this method, it is discouraged to use
|
||||
it together with methods that are not scale-invariant (like SVMs)
|
||||
"""
|
||||
check_is_fitted(self)
|
||||
|
||||
X = check_array(X)
|
||||
ind = self.nbrs_.kneighbors(X, n_neighbors=self.n_neighbors,
|
||||
return_distance=False)
|
||||
weights = barycenter_weights(X, self.nbrs_._fit_X[ind],
|
||||
reg=self.reg)
|
||||
X_new = np.empty((X.shape[0], self.n_components))
|
||||
for i in range(X.shape[0]):
|
||||
X_new[i] = np.dot(self.embedding_[ind[i]].T, weights[i])
|
||||
return X_new
|
439
venv/Lib/site-packages/sklearn/manifold/_mds.py
Normal file
439
venv/Lib/site-packages/sklearn/manifold/_mds.py
Normal file
|
@ -0,0 +1,439 @@
|
|||
"""
|
||||
Multi-dimensional Scaling (MDS)
|
||||
"""
|
||||
|
||||
# author: Nelle Varoquaux <nelle.varoquaux@gmail.com>
|
||||
# License: BSD
|
||||
|
||||
import numpy as np
|
||||
from joblib import Parallel, delayed, effective_n_jobs
|
||||
|
||||
import warnings
|
||||
|
||||
from ..base import BaseEstimator
|
||||
from ..metrics import euclidean_distances
|
||||
from ..utils import check_random_state, check_array, check_symmetric
|
||||
from ..isotonic import IsotonicRegression
|
||||
from ..utils.validation import _deprecate_positional_args
|
||||
|
||||
|
||||
def _smacof_single(dissimilarities, metric=True, n_components=2, init=None,
|
||||
max_iter=300, verbose=0, eps=1e-3, random_state=None):
|
||||
"""Computes multidimensional scaling using SMACOF algorithm
|
||||
|
||||
Parameters
|
||||
----------
|
||||
dissimilarities : ndarray, shape (n_samples, n_samples)
|
||||
Pairwise dissimilarities between the points. Must be symmetric.
|
||||
|
||||
metric : boolean, optional, default: True
|
||||
Compute metric or nonmetric SMACOF algorithm.
|
||||
|
||||
n_components : int, optional, default: 2
|
||||
Number of dimensions in which to immerse the dissimilarities. If an
|
||||
``init`` array is provided, this option is overridden and the shape of
|
||||
``init`` is used to determine the dimensionality of the embedding
|
||||
space.
|
||||
|
||||
init : ndarray, shape (n_samples, n_components), optional, default: None
|
||||
Starting configuration of the embedding to initialize the algorithm. By
|
||||
default, the algorithm is initialized with a randomly chosen array.
|
||||
|
||||
max_iter : int, optional, default: 300
|
||||
Maximum number of iterations of the SMACOF algorithm for a single run.
|
||||
|
||||
verbose : int, optional, default: 0
|
||||
Level of verbosity.
|
||||
|
||||
eps : float, optional, default: 1e-3
|
||||
Relative tolerance with respect to stress at which to declare
|
||||
convergence.
|
||||
|
||||
random_state : int, RandomState instance, default=None
|
||||
Determines the random number generator used to initialize the centers.
|
||||
Pass an int for reproducible results across multiple function calls.
|
||||
See :term: `Glossary <random_state>`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
X : ndarray, shape (n_samples, n_components)
|
||||
Coordinates of the points in a ``n_components``-space.
|
||||
|
||||
stress : float
|
||||
The final value of the stress (sum of squared distance of the
|
||||
disparities and the distances for all constrained points).
|
||||
|
||||
n_iter : int
|
||||
The number of iterations corresponding to the best stress.
|
||||
"""
|
||||
dissimilarities = check_symmetric(dissimilarities, raise_exception=True)
|
||||
|
||||
n_samples = dissimilarities.shape[0]
|
||||
random_state = check_random_state(random_state)
|
||||
|
||||
sim_flat = ((1 - np.tri(n_samples)) * dissimilarities).ravel()
|
||||
sim_flat_w = sim_flat[sim_flat != 0]
|
||||
if init is None:
|
||||
# Randomly choose initial configuration
|
||||
X = random_state.rand(n_samples * n_components)
|
||||
X = X.reshape((n_samples, n_components))
|
||||
else:
|
||||
# overrides the parameter p
|
||||
n_components = init.shape[1]
|
||||
if n_samples != init.shape[0]:
|
||||
raise ValueError("init matrix should be of shape (%d, %d)" %
|
||||
(n_samples, n_components))
|
||||
X = init
|
||||
|
||||
old_stress = None
|
||||
ir = IsotonicRegression()
|
||||
for it in range(max_iter):
|
||||
# Compute distance and monotonic regression
|
||||
dis = euclidean_distances(X)
|
||||
|
||||
if metric:
|
||||
disparities = dissimilarities
|
||||
else:
|
||||
dis_flat = dis.ravel()
|
||||
# dissimilarities with 0 are considered as missing values
|
||||
dis_flat_w = dis_flat[sim_flat != 0]
|
||||
|
||||
# Compute the disparities using a monotonic regression
|
||||
disparities_flat = ir.fit_transform(sim_flat_w, dis_flat_w)
|
||||
disparities = dis_flat.copy()
|
||||
disparities[sim_flat != 0] = disparities_flat
|
||||
disparities = disparities.reshape((n_samples, n_samples))
|
||||
disparities *= np.sqrt((n_samples * (n_samples - 1) / 2) /
|
||||
(disparities ** 2).sum())
|
||||
|
||||
# Compute stress
|
||||
stress = ((dis.ravel() - disparities.ravel()) ** 2).sum() / 2
|
||||
|
||||
# Update X using the Guttman transform
|
||||
dis[dis == 0] = 1e-5
|
||||
ratio = disparities / dis
|
||||
B = - ratio
|
||||
B[np.arange(len(B)), np.arange(len(B))] += ratio.sum(axis=1)
|
||||
X = 1. / n_samples * np.dot(B, X)
|
||||
|
||||
dis = np.sqrt((X ** 2).sum(axis=1)).sum()
|
||||
if verbose >= 2:
|
||||
print('it: %d, stress %s' % (it, stress))
|
||||
if old_stress is not None:
|
||||
if(old_stress - stress / dis) < eps:
|
||||
if verbose:
|
||||
print('breaking at iteration %d with stress %s' % (it,
|
||||
stress))
|
||||
break
|
||||
old_stress = stress / dis
|
||||
|
||||
return X, stress, it + 1
|
||||
|
||||
|
||||
@_deprecate_positional_args
|
||||
def smacof(dissimilarities, *, metric=True, n_components=2, init=None,
|
||||
n_init=8, n_jobs=None, max_iter=300, verbose=0, eps=1e-3,
|
||||
random_state=None, return_n_iter=False):
|
||||
"""Computes multidimensional scaling using the SMACOF algorithm.
|
||||
|
||||
The SMACOF (Scaling by MAjorizing a COmplicated Function) algorithm is a
|
||||
multidimensional scaling algorithm which minimizes an objective function
|
||||
(the *stress*) using a majorization technique. Stress majorization, also
|
||||
known as the Guttman Transform, guarantees a monotone convergence of
|
||||
stress, and is more powerful than traditional techniques such as gradient
|
||||
descent.
|
||||
|
||||
The SMACOF algorithm for metric MDS can summarized by the following steps:
|
||||
|
||||
1. Set an initial start configuration, randomly or not.
|
||||
2. Compute the stress
|
||||
3. Compute the Guttman Transform
|
||||
4. Iterate 2 and 3 until convergence.
|
||||
|
||||
The nonmetric algorithm adds a monotonic regression step before computing
|
||||
the stress.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
dissimilarities : ndarray, shape (n_samples, n_samples)
|
||||
Pairwise dissimilarities between the points. Must be symmetric.
|
||||
|
||||
metric : boolean, optional, default: True
|
||||
Compute metric or nonmetric SMACOF algorithm.
|
||||
|
||||
n_components : int, optional, default: 2
|
||||
Number of dimensions in which to immerse the dissimilarities. If an
|
||||
``init`` array is provided, this option is overridden and the shape of
|
||||
``init`` is used to determine the dimensionality of the embedding
|
||||
space.
|
||||
|
||||
init : ndarray, shape (n_samples, n_components), optional, default: None
|
||||
Starting configuration of the embedding to initialize the algorithm. By
|
||||
default, the algorithm is initialized with a randomly chosen array.
|
||||
|
||||
n_init : int, optional, default: 8
|
||||
Number of times the SMACOF algorithm will be run with different
|
||||
initializations. The final results will be the best output of the runs,
|
||||
determined by the run with the smallest final stress. If ``init`` is
|
||||
provided, this option is overridden and a single run is performed.
|
||||
|
||||
n_jobs : int or None, optional (default=None)
|
||||
The number of jobs to use for the computation. If multiple
|
||||
initializations are used (``n_init``), each run of the algorithm is
|
||||
computed in parallel.
|
||||
|
||||
``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
|
||||
``-1`` means using all processors. See :term:`Glossary <n_jobs>`
|
||||
for more details.
|
||||
|
||||
max_iter : int, optional, default: 300
|
||||
Maximum number of iterations of the SMACOF algorithm for a single run.
|
||||
|
||||
verbose : int, optional, default: 0
|
||||
Level of verbosity.
|
||||
|
||||
eps : float, optional, default: 1e-3
|
||||
Relative tolerance with respect to stress at which to declare
|
||||
convergence.
|
||||
|
||||
random_state : int, RandomState instance, default=None
|
||||
Determines the random number generator used to initialize the centers.
|
||||
Pass an int for reproducible results across multiple function calls.
|
||||
See :term: `Glossary <random_state>`.
|
||||
|
||||
return_n_iter : bool, optional, default: False
|
||||
Whether or not to return the number of iterations.
|
||||
|
||||
Returns
|
||||
-------
|
||||
X : ndarray, shape (n_samples, n_components)
|
||||
Coordinates of the points in a ``n_components``-space.
|
||||
|
||||
stress : float
|
||||
The final value of the stress (sum of squared distance of the
|
||||
disparities and the distances for all constrained points).
|
||||
|
||||
n_iter : int
|
||||
The number of iterations corresponding to the best stress. Returned
|
||||
only if ``return_n_iter`` is set to ``True``.
|
||||
|
||||
Notes
|
||||
-----
|
||||
"Modern Multidimensional Scaling - Theory and Applications" Borg, I.;
|
||||
Groenen P. Springer Series in Statistics (1997)
|
||||
|
||||
"Nonmetric multidimensional scaling: a numerical method" Kruskal, J.
|
||||
Psychometrika, 29 (1964)
|
||||
|
||||
"Multidimensional scaling by optimizing goodness of fit to a nonmetric
|
||||
hypothesis" Kruskal, J. Psychometrika, 29, (1964)
|
||||
"""
|
||||
|
||||
dissimilarities = check_array(dissimilarities)
|
||||
random_state = check_random_state(random_state)
|
||||
|
||||
if hasattr(init, '__array__'):
|
||||
init = np.asarray(init).copy()
|
||||
if not n_init == 1:
|
||||
warnings.warn(
|
||||
'Explicit initial positions passed: '
|
||||
'performing only one init of the MDS instead of %d'
|
||||
% n_init)
|
||||
n_init = 1
|
||||
|
||||
best_pos, best_stress = None, None
|
||||
|
||||
if effective_n_jobs(n_jobs) == 1:
|
||||
for it in range(n_init):
|
||||
pos, stress, n_iter_ = _smacof_single(
|
||||
dissimilarities, metric=metric,
|
||||
n_components=n_components, init=init,
|
||||
max_iter=max_iter, verbose=verbose,
|
||||
eps=eps, random_state=random_state)
|
||||
if best_stress is None or stress < best_stress:
|
||||
best_stress = stress
|
||||
best_pos = pos.copy()
|
||||
best_iter = n_iter_
|
||||
else:
|
||||
seeds = random_state.randint(np.iinfo(np.int32).max, size=n_init)
|
||||
results = Parallel(n_jobs=n_jobs, verbose=max(verbose - 1, 0))(
|
||||
delayed(_smacof_single)(
|
||||
dissimilarities, metric=metric, n_components=n_components,
|
||||
init=init, max_iter=max_iter, verbose=verbose, eps=eps,
|
||||
random_state=seed)
|
||||
for seed in seeds)
|
||||
positions, stress, n_iters = zip(*results)
|
||||
best = np.argmin(stress)
|
||||
best_stress = stress[best]
|
||||
best_pos = positions[best]
|
||||
best_iter = n_iters[best]
|
||||
|
||||
if return_n_iter:
|
||||
return best_pos, best_stress, best_iter
|
||||
else:
|
||||
return best_pos, best_stress
|
||||
|
||||
|
||||
class MDS(BaseEstimator):
|
||||
"""Multidimensional scaling
|
||||
|
||||
Read more in the :ref:`User Guide <multidimensional_scaling>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
n_components : int, optional, default: 2
|
||||
Number of dimensions in which to immerse the dissimilarities.
|
||||
|
||||
metric : boolean, optional, default: True
|
||||
If ``True``, perform metric MDS; otherwise, perform nonmetric MDS.
|
||||
|
||||
n_init : int, optional, default: 4
|
||||
Number of times the SMACOF algorithm will be run with different
|
||||
initializations. The final results will be the best output of the runs,
|
||||
determined by the run with the smallest final stress.
|
||||
|
||||
max_iter : int, optional, default: 300
|
||||
Maximum number of iterations of the SMACOF algorithm for a single run.
|
||||
|
||||
verbose : int, optional, default: 0
|
||||
Level of verbosity.
|
||||
|
||||
eps : float, optional, default: 1e-3
|
||||
Relative tolerance with respect to stress at which to declare
|
||||
convergence.
|
||||
|
||||
n_jobs : int or None, optional (default=None)
|
||||
The number of jobs to use for the computation. If multiple
|
||||
initializations are used (``n_init``), each run of the algorithm is
|
||||
computed in parallel.
|
||||
|
||||
``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
|
||||
``-1`` means using all processors. See :term:`Glossary <n_jobs>`
|
||||
for more details.
|
||||
|
||||
random_state : int, RandomState instance, default=None
|
||||
Determines the random number generator used to initialize the centers.
|
||||
Pass an int for reproducible results across multiple function calls.
|
||||
See :term: `Glossary <random_state>`.
|
||||
|
||||
dissimilarity : 'euclidean' | 'precomputed', optional, default: 'euclidean'
|
||||
Dissimilarity measure to use:
|
||||
|
||||
- 'euclidean':
|
||||
Pairwise Euclidean distances between points in the dataset.
|
||||
|
||||
- 'precomputed':
|
||||
Pre-computed dissimilarities are passed directly to ``fit`` and
|
||||
``fit_transform``.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
embedding_ : array-like, shape (n_samples, n_components)
|
||||
Stores the position of the dataset in the embedding space.
|
||||
|
||||
stress_ : float
|
||||
The final value of the stress (sum of squared distance of the
|
||||
disparities and the distances for all constrained points).
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from sklearn.datasets import load_digits
|
||||
>>> from sklearn.manifold import MDS
|
||||
>>> X, _ = load_digits(return_X_y=True)
|
||||
>>> X.shape
|
||||
(1797, 64)
|
||||
>>> embedding = MDS(n_components=2)
|
||||
>>> X_transformed = embedding.fit_transform(X[:100])
|
||||
>>> X_transformed.shape
|
||||
(100, 2)
|
||||
|
||||
References
|
||||
----------
|
||||
"Modern Multidimensional Scaling - Theory and Applications" Borg, I.;
|
||||
Groenen P. Springer Series in Statistics (1997)
|
||||
|
||||
"Nonmetric multidimensional scaling: a numerical method" Kruskal, J.
|
||||
Psychometrika, 29 (1964)
|
||||
|
||||
"Multidimensional scaling by optimizing goodness of fit to a nonmetric
|
||||
hypothesis" Kruskal, J. Psychometrika, 29, (1964)
|
||||
|
||||
"""
|
||||
@_deprecate_positional_args
|
||||
def __init__(self, n_components=2, *, metric=True, n_init=4,
|
||||
max_iter=300, verbose=0, eps=1e-3, n_jobs=None,
|
||||
random_state=None, dissimilarity="euclidean"):
|
||||
self.n_components = n_components
|
||||
self.dissimilarity = dissimilarity
|
||||
self.metric = metric
|
||||
self.n_init = n_init
|
||||
self.max_iter = max_iter
|
||||
self.eps = eps
|
||||
self.verbose = verbose
|
||||
self.n_jobs = n_jobs
|
||||
self.random_state = random_state
|
||||
|
||||
@property
|
||||
def _pairwise(self):
|
||||
return self.kernel == "precomputed"
|
||||
|
||||
def fit(self, X, y=None, init=None):
|
||||
"""
|
||||
Computes the position of the points in the embedding space
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array, shape (n_samples, n_features) or (n_samples, n_samples)
|
||||
Input data. If ``dissimilarity=='precomputed'``, the input should
|
||||
be the dissimilarity matrix.
|
||||
|
||||
y : Ignored
|
||||
|
||||
init : ndarray, shape (n_samples,), optional, default: None
|
||||
Starting configuration of the embedding to initialize the SMACOF
|
||||
algorithm. By default, the algorithm is initialized with a randomly
|
||||
chosen array.
|
||||
"""
|
||||
self.fit_transform(X, init=init)
|
||||
return self
|
||||
|
||||
def fit_transform(self, X, y=None, init=None):
|
||||
"""
|
||||
Fit the data from X, and returns the embedded coordinates
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array, shape (n_samples, n_features) or (n_samples, n_samples)
|
||||
Input data. If ``dissimilarity=='precomputed'``, the input should
|
||||
be the dissimilarity matrix.
|
||||
|
||||
y : Ignored
|
||||
|
||||
init : ndarray, shape (n_samples,), optional, default: None
|
||||
Starting configuration of the embedding to initialize the SMACOF
|
||||
algorithm. By default, the algorithm is initialized with a randomly
|
||||
chosen array.
|
||||
"""
|
||||
X = self._validate_data(X)
|
||||
if X.shape[0] == X.shape[1] and self.dissimilarity != "precomputed":
|
||||
warnings.warn("The MDS API has changed. ``fit`` now constructs an"
|
||||
" dissimilarity matrix from data. To use a custom "
|
||||
"dissimilarity matrix, set "
|
||||
"``dissimilarity='precomputed'``.")
|
||||
|
||||
if self.dissimilarity == "precomputed":
|
||||
self.dissimilarity_matrix_ = X
|
||||
elif self.dissimilarity == "euclidean":
|
||||
self.dissimilarity_matrix_ = euclidean_distances(X)
|
||||
else:
|
||||
raise ValueError("Proximity must be 'precomputed' or 'euclidean'."
|
||||
" Got %s instead" % str(self.dissimilarity))
|
||||
|
||||
self.embedding_, self.stress_, self.n_iter_ = smacof(
|
||||
self.dissimilarity_matrix_, metric=self.metric,
|
||||
n_components=self.n_components, init=init, n_init=self.n_init,
|
||||
n_jobs=self.n_jobs, max_iter=self.max_iter, verbose=self.verbose,
|
||||
eps=self.eps, random_state=self.random_state,
|
||||
return_n_iter=True)
|
||||
|
||||
return self.embedding_
|
577
venv/Lib/site-packages/sklearn/manifold/_spectral_embedding.py
Normal file
577
venv/Lib/site-packages/sklearn/manifold/_spectral_embedding.py
Normal file
|
@ -0,0 +1,577 @@
|
|||
"""Spectral Embedding"""
|
||||
|
||||
# Author: Gael Varoquaux <gael.varoquaux@normalesup.org>
|
||||
# Wei LI <kuantkid@gmail.com>
|
||||
# License: BSD 3 clause
|
||||
|
||||
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
from scipy import sparse
|
||||
from scipy.linalg import eigh
|
||||
from scipy.sparse.linalg import eigsh
|
||||
from scipy.sparse.csgraph import connected_components
|
||||
from scipy.sparse.csgraph import laplacian as csgraph_laplacian
|
||||
|
||||
from ..base import BaseEstimator
|
||||
from ..utils import check_random_state, check_array, check_symmetric
|
||||
from ..utils.extmath import _deterministic_vector_sign_flip
|
||||
from ..utils.fixes import lobpcg
|
||||
from ..metrics.pairwise import rbf_kernel
|
||||
from ..neighbors import kneighbors_graph, NearestNeighbors
|
||||
from ..utils.validation import _deprecate_positional_args
|
||||
|
||||
|
||||
def _graph_connected_component(graph, node_id):
|
||||
"""Find the largest graph connected components that contains one
|
||||
given node
|
||||
|
||||
Parameters
|
||||
----------
|
||||
graph : array-like, shape: (n_samples, n_samples)
|
||||
adjacency matrix of the graph, non-zero weight means an edge
|
||||
between the nodes
|
||||
|
||||
node_id : int
|
||||
The index of the query node of the graph
|
||||
|
||||
Returns
|
||||
-------
|
||||
connected_components_matrix : array-like, shape: (n_samples,)
|
||||
An array of bool value indicating the indexes of the nodes
|
||||
belonging to the largest connected components of the given query
|
||||
node
|
||||
"""
|
||||
n_node = graph.shape[0]
|
||||
if sparse.issparse(graph):
|
||||
# speed up row-wise access to boolean connection mask
|
||||
graph = graph.tocsr()
|
||||
connected_nodes = np.zeros(n_node, dtype=np.bool)
|
||||
nodes_to_explore = np.zeros(n_node, dtype=np.bool)
|
||||
nodes_to_explore[node_id] = True
|
||||
for _ in range(n_node):
|
||||
last_num_component = connected_nodes.sum()
|
||||
np.logical_or(connected_nodes, nodes_to_explore, out=connected_nodes)
|
||||
if last_num_component >= connected_nodes.sum():
|
||||
break
|
||||
indices = np.where(nodes_to_explore)[0]
|
||||
nodes_to_explore.fill(False)
|
||||
for i in indices:
|
||||
if sparse.issparse(graph):
|
||||
neighbors = graph[i].toarray().ravel()
|
||||
else:
|
||||
neighbors = graph[i]
|
||||
np.logical_or(nodes_to_explore, neighbors, out=nodes_to_explore)
|
||||
return connected_nodes
|
||||
|
||||
|
||||
def _graph_is_connected(graph):
|
||||
""" Return whether the graph is connected (True) or Not (False)
|
||||
|
||||
Parameters
|
||||
----------
|
||||
graph : array-like or sparse matrix, shape: (n_samples, n_samples)
|
||||
adjacency matrix of the graph, non-zero weight means an edge
|
||||
between the nodes
|
||||
|
||||
Returns
|
||||
-------
|
||||
is_connected : bool
|
||||
True means the graph is fully connected and False means not
|
||||
"""
|
||||
if sparse.isspmatrix(graph):
|
||||
# sparse graph, find all the connected components
|
||||
n_connected_components, _ = connected_components(graph)
|
||||
return n_connected_components == 1
|
||||
else:
|
||||
# dense graph, find all connected components start from node 0
|
||||
return _graph_connected_component(graph, 0).sum() == graph.shape[0]
|
||||
|
||||
|
||||
def _set_diag(laplacian, value, norm_laplacian):
|
||||
"""Set the diagonal of the laplacian matrix and convert it to a
|
||||
sparse format well suited for eigenvalue decomposition
|
||||
|
||||
Parameters
|
||||
----------
|
||||
laplacian : array or sparse matrix
|
||||
The graph laplacian
|
||||
value : float
|
||||
The value of the diagonal
|
||||
norm_laplacian : bool
|
||||
Whether the value of the diagonal should be changed or not
|
||||
|
||||
Returns
|
||||
-------
|
||||
laplacian : array or sparse matrix
|
||||
An array of matrix in a form that is well suited to fast
|
||||
eigenvalue decomposition, depending on the band width of the
|
||||
matrix.
|
||||
"""
|
||||
n_nodes = laplacian.shape[0]
|
||||
# We need all entries in the diagonal to values
|
||||
if not sparse.isspmatrix(laplacian):
|
||||
if norm_laplacian:
|
||||
laplacian.flat[::n_nodes + 1] = value
|
||||
else:
|
||||
laplacian = laplacian.tocoo()
|
||||
if norm_laplacian:
|
||||
diag_idx = (laplacian.row == laplacian.col)
|
||||
laplacian.data[diag_idx] = value
|
||||
# If the matrix has a small number of diagonals (as in the
|
||||
# case of structured matrices coming from images), the
|
||||
# dia format might be best suited for matvec products:
|
||||
n_diags = np.unique(laplacian.row - laplacian.col).size
|
||||
if n_diags <= 7:
|
||||
# 3 or less outer diagonals on each side
|
||||
laplacian = laplacian.todia()
|
||||
else:
|
||||
# csr has the fastest matvec and is thus best suited to
|
||||
# arpack
|
||||
laplacian = laplacian.tocsr()
|
||||
return laplacian
|
||||
|
||||
|
||||
@_deprecate_positional_args
|
||||
def spectral_embedding(adjacency, *, n_components=8, eigen_solver=None,
|
||||
random_state=None, eigen_tol=0.0,
|
||||
norm_laplacian=True, drop_first=True):
|
||||
"""Project the sample on the first eigenvectors of the graph Laplacian.
|
||||
|
||||
The adjacency matrix is used to compute a normalized graph Laplacian
|
||||
whose spectrum (especially the eigenvectors associated to the
|
||||
smallest eigenvalues) has an interpretation in terms of minimal
|
||||
number of cuts necessary to split the graph into comparably sized
|
||||
components.
|
||||
|
||||
This embedding can also 'work' even if the ``adjacency`` variable is
|
||||
not strictly the adjacency matrix of a graph but more generally
|
||||
an affinity or similarity matrix between samples (for instance the
|
||||
heat kernel of a euclidean distance matrix or a k-NN matrix).
|
||||
|
||||
However care must taken to always make the affinity matrix symmetric
|
||||
so that the eigenvector decomposition works as expected.
|
||||
|
||||
Note : Laplacian Eigenmaps is the actual algorithm implemented here.
|
||||
|
||||
Read more in the :ref:`User Guide <spectral_embedding>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
adjacency : array-like or sparse graph, shape: (n_samples, n_samples)
|
||||
The adjacency matrix of the graph to embed.
|
||||
|
||||
n_components : integer, optional, default 8
|
||||
The dimension of the projection subspace.
|
||||
|
||||
eigen_solver : {None, 'arpack', 'lobpcg', or 'amg'}, default None
|
||||
The eigenvalue decomposition strategy to use. AMG requires pyamg
|
||||
to be installed. It can be faster on very large, sparse problems,
|
||||
but may also lead to instabilities.
|
||||
|
||||
random_state : int, RandomState instance, default=None
|
||||
Determines the random number generator used for the initialization of
|
||||
the lobpcg eigenvectors decomposition when ``solver`` == 'amg'. Pass
|
||||
an int for reproducible results across multiple function calls.
|
||||
See :term: `Glossary <random_state>`.
|
||||
|
||||
eigen_tol : float, optional, default=0.0
|
||||
Stopping criterion for eigendecomposition of the Laplacian matrix
|
||||
when using arpack eigen_solver.
|
||||
|
||||
norm_laplacian : bool, optional, default=True
|
||||
If True, then compute normalized Laplacian.
|
||||
|
||||
drop_first : bool, optional, default=True
|
||||
Whether to drop the first eigenvector. For spectral embedding, this
|
||||
should be True as the first eigenvector should be constant vector for
|
||||
connected graph, but for spectral clustering, this should be kept as
|
||||
False to retain the first eigenvector.
|
||||
|
||||
Returns
|
||||
-------
|
||||
embedding : array, shape=(n_samples, n_components)
|
||||
The reduced samples.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Spectral Embedding (Laplacian Eigenmaps) is most useful when the graph
|
||||
has one connected component. If there graph has many components, the first
|
||||
few eigenvectors will simply uncover the connected components of the graph.
|
||||
|
||||
References
|
||||
----------
|
||||
* https://en.wikipedia.org/wiki/LOBPCG
|
||||
|
||||
* Toward the Optimal Preconditioned Eigensolver: Locally Optimal
|
||||
Block Preconditioned Conjugate Gradient Method
|
||||
Andrew V. Knyazev
|
||||
https://doi.org/10.1137%2FS1064827500366124
|
||||
"""
|
||||
adjacency = check_symmetric(adjacency)
|
||||
|
||||
try:
|
||||
from pyamg import smoothed_aggregation_solver
|
||||
except ImportError:
|
||||
if eigen_solver == "amg":
|
||||
raise ValueError("The eigen_solver was set to 'amg', but pyamg is "
|
||||
"not available.")
|
||||
|
||||
if eigen_solver is None:
|
||||
eigen_solver = 'arpack'
|
||||
elif eigen_solver not in ('arpack', 'lobpcg', 'amg'):
|
||||
raise ValueError("Unknown value for eigen_solver: '%s'."
|
||||
"Should be 'amg', 'arpack', or 'lobpcg'"
|
||||
% eigen_solver)
|
||||
|
||||
random_state = check_random_state(random_state)
|
||||
|
||||
n_nodes = adjacency.shape[0]
|
||||
# Whether to drop the first eigenvector
|
||||
if drop_first:
|
||||
n_components = n_components + 1
|
||||
|
||||
if not _graph_is_connected(adjacency):
|
||||
warnings.warn("Graph is not fully connected, spectral embedding"
|
||||
" may not work as expected.")
|
||||
|
||||
laplacian, dd = csgraph_laplacian(adjacency, normed=norm_laplacian,
|
||||
return_diag=True)
|
||||
if (eigen_solver == 'arpack' or eigen_solver != 'lobpcg' and
|
||||
(not sparse.isspmatrix(laplacian) or n_nodes < 5 * n_components)):
|
||||
# lobpcg used with eigen_solver='amg' has bugs for low number of nodes
|
||||
# for details see the source code in scipy:
|
||||
# https://github.com/scipy/scipy/blob/v0.11.0/scipy/sparse/linalg/eigen
|
||||
# /lobpcg/lobpcg.py#L237
|
||||
# or matlab:
|
||||
# https://www.mathworks.com/matlabcentral/fileexchange/48-lobpcg-m
|
||||
laplacian = _set_diag(laplacian, 1, norm_laplacian)
|
||||
|
||||
# Here we'll use shift-invert mode for fast eigenvalues
|
||||
# (see https://docs.scipy.org/doc/scipy/reference/tutorial/arpack.html
|
||||
# for a short explanation of what this means)
|
||||
# Because the normalized Laplacian has eigenvalues between 0 and 2,
|
||||
# I - L has eigenvalues between -1 and 1. ARPACK is most efficient
|
||||
# when finding eigenvalues of largest magnitude (keyword which='LM')
|
||||
# and when these eigenvalues are very large compared to the rest.
|
||||
# For very large, very sparse graphs, I - L can have many, many
|
||||
# eigenvalues very near 1.0. This leads to slow convergence. So
|
||||
# instead, we'll use ARPACK's shift-invert mode, asking for the
|
||||
# eigenvalues near 1.0. This effectively spreads-out the spectrum
|
||||
# near 1.0 and leads to much faster convergence: potentially an
|
||||
# orders-of-magnitude speedup over simply using keyword which='LA'
|
||||
# in standard mode.
|
||||
try:
|
||||
# We are computing the opposite of the laplacian inplace so as
|
||||
# to spare a memory allocation of a possibly very large array
|
||||
laplacian *= -1
|
||||
v0 = random_state.uniform(-1, 1, laplacian.shape[0])
|
||||
_, diffusion_map = eigsh(
|
||||
laplacian, k=n_components, sigma=1.0, which='LM',
|
||||
tol=eigen_tol, v0=v0)
|
||||
embedding = diffusion_map.T[n_components::-1]
|
||||
if norm_laplacian:
|
||||
embedding = embedding / dd
|
||||
except RuntimeError:
|
||||
# When submatrices are exactly singular, an LU decomposition
|
||||
# in arpack fails. We fallback to lobpcg
|
||||
eigen_solver = "lobpcg"
|
||||
# Revert the laplacian to its opposite to have lobpcg work
|
||||
laplacian *= -1
|
||||
|
||||
elif eigen_solver == 'amg':
|
||||
# Use AMG to get a preconditioner and speed up the eigenvalue
|
||||
# problem.
|
||||
if not sparse.issparse(laplacian):
|
||||
warnings.warn("AMG works better for sparse matrices")
|
||||
# lobpcg needs double precision floats
|
||||
laplacian = check_array(laplacian, dtype=np.float64,
|
||||
accept_sparse=True)
|
||||
laplacian = _set_diag(laplacian, 1, norm_laplacian)
|
||||
|
||||
# The Laplacian matrix is always singular, having at least one zero
|
||||
# eigenvalue, corresponding to the trivial eigenvector, which is a
|
||||
# constant. Using a singular matrix for preconditioning may result in
|
||||
# random failures in LOBPCG and is not supported by the existing
|
||||
# theory:
|
||||
# see https://doi.org/10.1007/s10208-015-9297-1
|
||||
# Shift the Laplacian so its diagononal is not all ones. The shift
|
||||
# does change the eigenpairs however, so we'll feed the shifted
|
||||
# matrix to the solver and afterward set it back to the original.
|
||||
diag_shift = 1e-5 * sparse.eye(laplacian.shape[0])
|
||||
laplacian += diag_shift
|
||||
ml = smoothed_aggregation_solver(check_array(laplacian,
|
||||
accept_sparse='csr'))
|
||||
laplacian -= diag_shift
|
||||
|
||||
M = ml.aspreconditioner()
|
||||
X = random_state.rand(laplacian.shape[0], n_components + 1)
|
||||
X[:, 0] = dd.ravel()
|
||||
_, diffusion_map = lobpcg(laplacian, X, M=M, tol=1.e-5,
|
||||
largest=False)
|
||||
embedding = diffusion_map.T
|
||||
if norm_laplacian:
|
||||
embedding = embedding / dd
|
||||
if embedding.shape[0] == 1:
|
||||
raise ValueError
|
||||
|
||||
if eigen_solver == "lobpcg":
|
||||
# lobpcg needs double precision floats
|
||||
laplacian = check_array(laplacian, dtype=np.float64,
|
||||
accept_sparse=True)
|
||||
if n_nodes < 5 * n_components + 1:
|
||||
# see note above under arpack why lobpcg has problems with small
|
||||
# number of nodes
|
||||
# lobpcg will fallback to eigh, so we short circuit it
|
||||
if sparse.isspmatrix(laplacian):
|
||||
laplacian = laplacian.toarray()
|
||||
_, diffusion_map = eigh(laplacian)
|
||||
embedding = diffusion_map.T[:n_components]
|
||||
if norm_laplacian:
|
||||
embedding = embedding / dd
|
||||
else:
|
||||
laplacian = _set_diag(laplacian, 1, norm_laplacian)
|
||||
# We increase the number of eigenvectors requested, as lobpcg
|
||||
# doesn't behave well in low dimension
|
||||
X = random_state.rand(laplacian.shape[0], n_components + 1)
|
||||
X[:, 0] = dd.ravel()
|
||||
_, diffusion_map = lobpcg(laplacian, X, tol=1e-15,
|
||||
largest=False, maxiter=2000)
|
||||
embedding = diffusion_map.T[:n_components]
|
||||
if norm_laplacian:
|
||||
embedding = embedding / dd
|
||||
if embedding.shape[0] == 1:
|
||||
raise ValueError
|
||||
|
||||
embedding = _deterministic_vector_sign_flip(embedding)
|
||||
if drop_first:
|
||||
return embedding[1:n_components].T
|
||||
else:
|
||||
return embedding[:n_components].T
|
||||
|
||||
|
||||
class SpectralEmbedding(BaseEstimator):
|
||||
"""Spectral embedding for non-linear dimensionality reduction.
|
||||
|
||||
Forms an affinity matrix given by the specified function and
|
||||
applies spectral decomposition to the corresponding graph laplacian.
|
||||
The resulting transformation is given by the value of the
|
||||
eigenvectors for each data point.
|
||||
|
||||
Note : Laplacian Eigenmaps is the actual algorithm implemented here.
|
||||
|
||||
Read more in the :ref:`User Guide <spectral_embedding>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
n_components : integer, default: 2
|
||||
The dimension of the projected subspace.
|
||||
|
||||
affinity : string or callable, default : "nearest_neighbors"
|
||||
How to construct the affinity matrix.
|
||||
- 'nearest_neighbors' : construct the affinity matrix by computing a
|
||||
graph of nearest neighbors.
|
||||
- 'rbf' : construct the affinity matrix by computing a radial basis
|
||||
function (RBF) kernel.
|
||||
- 'precomputed' : interpret ``X`` as a precomputed affinity matrix.
|
||||
- 'precomputed_nearest_neighbors' : interpret ``X`` as a sparse graph
|
||||
of precomputed nearest neighbors, and constructs the affinity matrix
|
||||
by selecting the ``n_neighbors`` nearest neighbors.
|
||||
- callable : use passed in function as affinity
|
||||
the function takes in data matrix (n_samples, n_features)
|
||||
and return affinity matrix (n_samples, n_samples).
|
||||
|
||||
gamma : float, optional, default : 1/n_features
|
||||
Kernel coefficient for rbf kernel.
|
||||
|
||||
random_state : int, RandomState instance, default=None
|
||||
Determines the random number generator used for the initialization of
|
||||
the lobpcg eigenvectors when ``solver`` == 'amg'. Pass an int for
|
||||
reproducible results across multiple function calls.
|
||||
See :term: `Glossary <random_state>`.
|
||||
|
||||
eigen_solver : {None, 'arpack', 'lobpcg', or 'amg'}
|
||||
The eigenvalue decomposition strategy to use. AMG requires pyamg
|
||||
to be installed. It can be faster on very large, sparse problems.
|
||||
|
||||
n_neighbors : int, default : max(n_samples/10 , 1)
|
||||
Number of nearest neighbors for nearest_neighbors graph building.
|
||||
|
||||
n_jobs : int or None, optional (default=None)
|
||||
The number of parallel jobs to run.
|
||||
``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
|
||||
``-1`` means using all processors. See :term:`Glossary <n_jobs>`
|
||||
for more details.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
|
||||
embedding_ : array, shape = (n_samples, n_components)
|
||||
Spectral embedding of the training matrix.
|
||||
|
||||
affinity_matrix_ : array, shape = (n_samples, n_samples)
|
||||
Affinity_matrix constructed from samples or precomputed.
|
||||
|
||||
n_neighbors_ : int
|
||||
Number of nearest neighbors effectively used.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from sklearn.datasets import load_digits
|
||||
>>> from sklearn.manifold import SpectralEmbedding
|
||||
>>> X, _ = load_digits(return_X_y=True)
|
||||
>>> X.shape
|
||||
(1797, 64)
|
||||
>>> embedding = SpectralEmbedding(n_components=2)
|
||||
>>> X_transformed = embedding.fit_transform(X[:100])
|
||||
>>> X_transformed.shape
|
||||
(100, 2)
|
||||
|
||||
References
|
||||
----------
|
||||
|
||||
- A Tutorial on Spectral Clustering, 2007
|
||||
Ulrike von Luxburg
|
||||
http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.165.9323
|
||||
|
||||
- On Spectral Clustering: Analysis and an algorithm, 2001
|
||||
Andrew Y. Ng, Michael I. Jordan, Yair Weiss
|
||||
http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.19.8100
|
||||
|
||||
- Normalized cuts and image segmentation, 2000
|
||||
Jianbo Shi, Jitendra Malik
|
||||
http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.160.2324
|
||||
"""
|
||||
@_deprecate_positional_args
|
||||
def __init__(self, n_components=2, *, affinity="nearest_neighbors",
|
||||
gamma=None, random_state=None, eigen_solver=None,
|
||||
n_neighbors=None, n_jobs=None):
|
||||
self.n_components = n_components
|
||||
self.affinity = affinity
|
||||
self.gamma = gamma
|
||||
self.random_state = random_state
|
||||
self.eigen_solver = eigen_solver
|
||||
self.n_neighbors = n_neighbors
|
||||
self.n_jobs = n_jobs
|
||||
|
||||
@property
|
||||
def _pairwise(self):
|
||||
return self.affinity in ["precomputed",
|
||||
"precomputed_nearest_neighbors"]
|
||||
|
||||
def _get_affinity_matrix(self, X, Y=None):
|
||||
"""Calculate the affinity matrix from data
|
||||
Parameters
|
||||
----------
|
||||
X : array-like, shape (n_samples, n_features)
|
||||
Training vector, where n_samples is the number of samples
|
||||
and n_features is the number of features.
|
||||
|
||||
If affinity is "precomputed"
|
||||
X : array-like, shape (n_samples, n_samples),
|
||||
Interpret X as precomputed adjacency graph computed from
|
||||
samples.
|
||||
|
||||
Y: Ignored
|
||||
|
||||
Returns
|
||||
-------
|
||||
affinity_matrix, shape (n_samples, n_samples)
|
||||
"""
|
||||
if self.affinity == 'precomputed':
|
||||
self.affinity_matrix_ = X
|
||||
return self.affinity_matrix_
|
||||
if self.affinity == 'precomputed_nearest_neighbors':
|
||||
estimator = NearestNeighbors(n_neighbors=self.n_neighbors,
|
||||
n_jobs=self.n_jobs,
|
||||
metric="precomputed").fit(X)
|
||||
connectivity = estimator.kneighbors_graph(X=X, mode='connectivity')
|
||||
self.affinity_matrix_ = 0.5 * (connectivity + connectivity.T)
|
||||
return self.affinity_matrix_
|
||||
if self.affinity == 'nearest_neighbors':
|
||||
if sparse.issparse(X):
|
||||
warnings.warn("Nearest neighbors affinity currently does "
|
||||
"not support sparse input, falling back to "
|
||||
"rbf affinity")
|
||||
self.affinity = "rbf"
|
||||
else:
|
||||
self.n_neighbors_ = (self.n_neighbors
|
||||
if self.n_neighbors is not None
|
||||
else max(int(X.shape[0] / 10), 1))
|
||||
self.affinity_matrix_ = kneighbors_graph(X, self.n_neighbors_,
|
||||
include_self=True,
|
||||
n_jobs=self.n_jobs)
|
||||
# currently only symmetric affinity_matrix supported
|
||||
self.affinity_matrix_ = 0.5 * (self.affinity_matrix_ +
|
||||
self.affinity_matrix_.T)
|
||||
return self.affinity_matrix_
|
||||
if self.affinity == 'rbf':
|
||||
self.gamma_ = (self.gamma
|
||||
if self.gamma is not None else 1.0 / X.shape[1])
|
||||
self.affinity_matrix_ = rbf_kernel(X, gamma=self.gamma_)
|
||||
return self.affinity_matrix_
|
||||
self.affinity_matrix_ = self.affinity(X)
|
||||
return self.affinity_matrix_
|
||||
|
||||
def fit(self, X, y=None):
|
||||
"""Fit the model from data in X.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse matrix}, shape (n_samples, n_features)
|
||||
Training vector, where n_samples is the number of samples
|
||||
and n_features is the number of features.
|
||||
|
||||
If affinity is "precomputed"
|
||||
X : {array-like, sparse matrix}, shape (n_samples, n_samples),
|
||||
Interpret X as precomputed adjacency graph computed from
|
||||
samples.
|
||||
|
||||
Returns
|
||||
-------
|
||||
self : object
|
||||
Returns the instance itself.
|
||||
"""
|
||||
|
||||
X = self._validate_data(X, accept_sparse='csr', ensure_min_samples=2,
|
||||
estimator=self)
|
||||
|
||||
random_state = check_random_state(self.random_state)
|
||||
if isinstance(self.affinity, str):
|
||||
if self.affinity not in {"nearest_neighbors", "rbf", "precomputed",
|
||||
"precomputed_nearest_neighbors"}:
|
||||
raise ValueError(("%s is not a valid affinity. Expected "
|
||||
"'precomputed', 'rbf', 'nearest_neighbors' "
|
||||
"or a callable.") % self.affinity)
|
||||
elif not callable(self.affinity):
|
||||
raise ValueError(("'affinity' is expected to be an affinity "
|
||||
"name or a callable. Got: %s") % self.affinity)
|
||||
|
||||
affinity_matrix = self._get_affinity_matrix(X)
|
||||
self.embedding_ = spectral_embedding(affinity_matrix,
|
||||
n_components=self.n_components,
|
||||
eigen_solver=self.eigen_solver,
|
||||
random_state=random_state)
|
||||
return self
|
||||
|
||||
def fit_transform(self, X, y=None):
|
||||
"""Fit the model from data in X and transform X.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse matrix}, shape (n_samples, n_features)
|
||||
Training vector, where n_samples is the number of samples
|
||||
and n_features is the number of features.
|
||||
|
||||
If affinity is "precomputed"
|
||||
X : {array-like, sparse matrix}, shape (n_samples, n_samples),
|
||||
Interpret X as precomputed adjacency graph computed from
|
||||
samples.
|
||||
|
||||
Returns
|
||||
-------
|
||||
X_new : array-like, shape (n_samples, n_components)
|
||||
"""
|
||||
self.fit(X)
|
||||
return self.embedding_
|
910
venv/Lib/site-packages/sklearn/manifold/_t_sne.py
Normal file
910
venv/Lib/site-packages/sklearn/manifold/_t_sne.py
Normal file
|
@ -0,0 +1,910 @@
|
|||
# Author: Alexander Fabisch -- <afabisch@informatik.uni-bremen.de>
|
||||
# Author: Christopher Moody <chrisemoody@gmail.com>
|
||||
# Author: Nick Travers <nickt@squareup.com>
|
||||
# License: BSD 3 clause (C) 2014
|
||||
|
||||
# This is the exact and Barnes-Hut t-SNE implementation. There are other
|
||||
# modifications of the algorithm:
|
||||
# * Fast Optimization for t-SNE:
|
||||
# https://cseweb.ucsd.edu/~lvdmaaten/workshops/nips2010/papers/vandermaaten.pdf
|
||||
|
||||
from time import time
|
||||
import numpy as np
|
||||
from scipy import linalg
|
||||
from scipy.spatial.distance import pdist
|
||||
from scipy.spatial.distance import squareform
|
||||
from scipy.sparse import csr_matrix, issparse
|
||||
from ..neighbors import NearestNeighbors
|
||||
from ..base import BaseEstimator
|
||||
from ..utils import check_random_state
|
||||
from ..utils._openmp_helpers import _openmp_effective_n_threads
|
||||
from ..utils.validation import check_non_negative
|
||||
from ..utils.validation import _deprecate_positional_args
|
||||
from ..decomposition import PCA
|
||||
from ..metrics.pairwise import pairwise_distances
|
||||
from . import _utils
|
||||
# mypy error: Module 'sklearn.manifold' has no attribute '_barnes_hut_tsne'
|
||||
from . import _barnes_hut_tsne # type: ignore
|
||||
|
||||
|
||||
MACHINE_EPSILON = np.finfo(np.double).eps
|
||||
|
||||
|
||||
def _joint_probabilities(distances, desired_perplexity, verbose):
|
||||
"""Compute joint probabilities p_ij from distances.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
distances : array, shape (n_samples * (n_samples-1) / 2,)
|
||||
Distances of samples are stored as condensed matrices, i.e.
|
||||
we omit the diagonal and duplicate entries and store everything
|
||||
in a one-dimensional array.
|
||||
|
||||
desired_perplexity : float
|
||||
Desired perplexity of the joint probability distributions.
|
||||
|
||||
verbose : int
|
||||
Verbosity level.
|
||||
|
||||
Returns
|
||||
-------
|
||||
P : array, shape (n_samples * (n_samples-1) / 2,)
|
||||
Condensed joint probability matrix.
|
||||
"""
|
||||
# Compute conditional probabilities such that they approximately match
|
||||
# the desired perplexity
|
||||
distances = distances.astype(np.float32, copy=False)
|
||||
conditional_P = _utils._binary_search_perplexity(
|
||||
distances, desired_perplexity, verbose)
|
||||
P = conditional_P + conditional_P.T
|
||||
sum_P = np.maximum(np.sum(P), MACHINE_EPSILON)
|
||||
P = np.maximum(squareform(P) / sum_P, MACHINE_EPSILON)
|
||||
return P
|
||||
|
||||
|
||||
def _joint_probabilities_nn(distances, desired_perplexity, verbose):
|
||||
"""Compute joint probabilities p_ij from distances using just nearest
|
||||
neighbors.
|
||||
|
||||
This method is approximately equal to _joint_probabilities. The latter
|
||||
is O(N), but limiting the joint probability to nearest neighbors improves
|
||||
this substantially to O(uN).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
distances : CSR sparse matrix, shape (n_samples, n_samples)
|
||||
Distances of samples to its n_neighbors nearest neighbors. All other
|
||||
distances are left to zero (and are not materialized in memory).
|
||||
|
||||
desired_perplexity : float
|
||||
Desired perplexity of the joint probability distributions.
|
||||
|
||||
verbose : int
|
||||
Verbosity level.
|
||||
|
||||
Returns
|
||||
-------
|
||||
P : csr sparse matrix, shape (n_samples, n_samples)
|
||||
Condensed joint probability matrix with only nearest neighbors.
|
||||
"""
|
||||
t0 = time()
|
||||
# Compute conditional probabilities such that they approximately match
|
||||
# the desired perplexity
|
||||
distances.sort_indices()
|
||||
n_samples = distances.shape[0]
|
||||
distances_data = distances.data.reshape(n_samples, -1)
|
||||
distances_data = distances_data.astype(np.float32, copy=False)
|
||||
conditional_P = _utils._binary_search_perplexity(
|
||||
distances_data, desired_perplexity, verbose)
|
||||
assert np.all(np.isfinite(conditional_P)), \
|
||||
"All probabilities should be finite"
|
||||
|
||||
# Symmetrize the joint probability distribution using sparse operations
|
||||
P = csr_matrix((conditional_P.ravel(), distances.indices,
|
||||
distances.indptr),
|
||||
shape=(n_samples, n_samples))
|
||||
P = P + P.T
|
||||
|
||||
# Normalize the joint probability distribution
|
||||
sum_P = np.maximum(P.sum(), MACHINE_EPSILON)
|
||||
P /= sum_P
|
||||
|
||||
assert np.all(np.abs(P.data) <= 1.0)
|
||||
if verbose >= 2:
|
||||
duration = time() - t0
|
||||
print("[t-SNE] Computed conditional probabilities in {:.3f}s"
|
||||
.format(duration))
|
||||
return P
|
||||
|
||||
|
||||
def _kl_divergence(params, P, degrees_of_freedom, n_samples, n_components,
|
||||
skip_num_points=0, compute_error=True):
|
||||
"""t-SNE objective function: gradient of the KL divergence
|
||||
of p_ijs and q_ijs and the absolute error.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
params : array, shape (n_params,)
|
||||
Unraveled embedding.
|
||||
|
||||
P : array, shape (n_samples * (n_samples-1) / 2,)
|
||||
Condensed joint probability matrix.
|
||||
|
||||
degrees_of_freedom : int
|
||||
Degrees of freedom of the Student's-t distribution.
|
||||
|
||||
n_samples : int
|
||||
Number of samples.
|
||||
|
||||
n_components : int
|
||||
Dimension of the embedded space.
|
||||
|
||||
skip_num_points : int (optional, default:0)
|
||||
This does not compute the gradient for points with indices below
|
||||
`skip_num_points`. This is useful when computing transforms of new
|
||||
data where you'd like to keep the old data fixed.
|
||||
|
||||
compute_error: bool (optional, default:True)
|
||||
If False, the kl_divergence is not computed and returns NaN.
|
||||
|
||||
Returns
|
||||
-------
|
||||
kl_divergence : float
|
||||
Kullback-Leibler divergence of p_ij and q_ij.
|
||||
|
||||
grad : array, shape (n_params,)
|
||||
Unraveled gradient of the Kullback-Leibler divergence with respect to
|
||||
the embedding.
|
||||
"""
|
||||
X_embedded = params.reshape(n_samples, n_components)
|
||||
|
||||
# Q is a heavy-tailed distribution: Student's t-distribution
|
||||
dist = pdist(X_embedded, "sqeuclidean")
|
||||
dist /= degrees_of_freedom
|
||||
dist += 1.
|
||||
dist **= (degrees_of_freedom + 1.0) / -2.0
|
||||
Q = np.maximum(dist / (2.0 * np.sum(dist)), MACHINE_EPSILON)
|
||||
|
||||
# Optimization trick below: np.dot(x, y) is faster than
|
||||
# np.sum(x * y) because it calls BLAS
|
||||
|
||||
# Objective: C (Kullback-Leibler divergence of P and Q)
|
||||
if compute_error:
|
||||
kl_divergence = 2.0 * np.dot(
|
||||
P, np.log(np.maximum(P, MACHINE_EPSILON) / Q))
|
||||
else:
|
||||
kl_divergence = np.nan
|
||||
|
||||
# Gradient: dC/dY
|
||||
# pdist always returns double precision distances. Thus we need to take
|
||||
grad = np.ndarray((n_samples, n_components), dtype=params.dtype)
|
||||
PQd = squareform((P - Q) * dist)
|
||||
for i in range(skip_num_points, n_samples):
|
||||
grad[i] = np.dot(np.ravel(PQd[i], order='K'),
|
||||
X_embedded[i] - X_embedded)
|
||||
grad = grad.ravel()
|
||||
c = 2.0 * (degrees_of_freedom + 1.0) / degrees_of_freedom
|
||||
grad *= c
|
||||
|
||||
return kl_divergence, grad
|
||||
|
||||
|
||||
def _kl_divergence_bh(params, P, degrees_of_freedom, n_samples, n_components,
|
||||
angle=0.5, skip_num_points=0, verbose=False,
|
||||
compute_error=True, num_threads=1):
|
||||
"""t-SNE objective function: KL divergence of p_ijs and q_ijs.
|
||||
|
||||
Uses Barnes-Hut tree methods to calculate the gradient that
|
||||
runs in O(NlogN) instead of O(N^2)
|
||||
|
||||
Parameters
|
||||
----------
|
||||
params : array, shape (n_params,)
|
||||
Unraveled embedding.
|
||||
|
||||
P : csr sparse matrix, shape (n_samples, n_sample)
|
||||
Sparse approximate joint probability matrix, computed only for the
|
||||
k nearest-neighbors and symmetrized.
|
||||
|
||||
degrees_of_freedom : int
|
||||
Degrees of freedom of the Student's-t distribution.
|
||||
|
||||
n_samples : int
|
||||
Number of samples.
|
||||
|
||||
n_components : int
|
||||
Dimension of the embedded space.
|
||||
|
||||
angle : float (default: 0.5)
|
||||
This is the trade-off between speed and accuracy for Barnes-Hut T-SNE.
|
||||
'angle' is the angular size (referred to as theta in [3]) of a distant
|
||||
node as measured from a point. If this size is below 'angle' then it is
|
||||
used as a summary node of all points contained within it.
|
||||
This method is not very sensitive to changes in this parameter
|
||||
in the range of 0.2 - 0.8. Angle less than 0.2 has quickly increasing
|
||||
computation time and angle greater 0.8 has quickly increasing error.
|
||||
|
||||
skip_num_points : int (optional, default:0)
|
||||
This does not compute the gradient for points with indices below
|
||||
`skip_num_points`. This is useful when computing transforms of new
|
||||
data where you'd like to keep the old data fixed.
|
||||
|
||||
verbose : int
|
||||
Verbosity level.
|
||||
|
||||
compute_error: bool (optional, default:True)
|
||||
If False, the kl_divergence is not computed and returns NaN.
|
||||
|
||||
num_threads : int (optional, default:1)
|
||||
Number of threads used to compute the gradient. This is set here to
|
||||
avoid calling _openmp_effective_n_threads for each gradient step.
|
||||
|
||||
Returns
|
||||
-------
|
||||
kl_divergence : float
|
||||
Kullback-Leibler divergence of p_ij and q_ij.
|
||||
|
||||
grad : array, shape (n_params,)
|
||||
Unraveled gradient of the Kullback-Leibler divergence with respect to
|
||||
the embedding.
|
||||
"""
|
||||
params = params.astype(np.float32, copy=False)
|
||||
X_embedded = params.reshape(n_samples, n_components)
|
||||
|
||||
val_P = P.data.astype(np.float32, copy=False)
|
||||
neighbors = P.indices.astype(np.int64, copy=False)
|
||||
indptr = P.indptr.astype(np.int64, copy=False)
|
||||
|
||||
grad = np.zeros(X_embedded.shape, dtype=np.float32)
|
||||
error = _barnes_hut_tsne.gradient(val_P, X_embedded, neighbors, indptr,
|
||||
grad, angle, n_components, verbose,
|
||||
dof=degrees_of_freedom,
|
||||
compute_error=compute_error,
|
||||
num_threads=num_threads)
|
||||
c = 2.0 * (degrees_of_freedom + 1.0) / degrees_of_freedom
|
||||
grad = grad.ravel()
|
||||
grad *= c
|
||||
|
||||
return error, grad
|
||||
|
||||
|
||||
def _gradient_descent(objective, p0, it, n_iter,
|
||||
n_iter_check=1, n_iter_without_progress=300,
|
||||
momentum=0.8, learning_rate=200.0, min_gain=0.01,
|
||||
min_grad_norm=1e-7, verbose=0, args=None, kwargs=None):
|
||||
"""Batch gradient descent with momentum and individual gains.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
objective : function or callable
|
||||
Should return a tuple of cost and gradient for a given parameter
|
||||
vector. When expensive to compute, the cost can optionally
|
||||
be None and can be computed every n_iter_check steps using
|
||||
the objective_error function.
|
||||
|
||||
p0 : array-like, shape (n_params,)
|
||||
Initial parameter vector.
|
||||
|
||||
it : int
|
||||
Current number of iterations (this function will be called more than
|
||||
once during the optimization).
|
||||
|
||||
n_iter : int
|
||||
Maximum number of gradient descent iterations.
|
||||
|
||||
n_iter_check : int
|
||||
Number of iterations before evaluating the global error. If the error
|
||||
is sufficiently low, we abort the optimization.
|
||||
|
||||
n_iter_without_progress : int, optional (default: 300)
|
||||
Maximum number of iterations without progress before we abort the
|
||||
optimization.
|
||||
|
||||
momentum : float, within (0.0, 1.0), optional (default: 0.8)
|
||||
The momentum generates a weight for previous gradients that decays
|
||||
exponentially.
|
||||
|
||||
learning_rate : float, optional (default: 200.0)
|
||||
The learning rate for t-SNE is usually in the range [10.0, 1000.0]. If
|
||||
the learning rate is too high, the data may look like a 'ball' with any
|
||||
point approximately equidistant from its nearest neighbours. If the
|
||||
learning rate is too low, most points may look compressed in a dense
|
||||
cloud with few outliers.
|
||||
|
||||
min_gain : float, optional (default: 0.01)
|
||||
Minimum individual gain for each parameter.
|
||||
|
||||
min_grad_norm : float, optional (default: 1e-7)
|
||||
If the gradient norm is below this threshold, the optimization will
|
||||
be aborted.
|
||||
|
||||
verbose : int, optional (default: 0)
|
||||
Verbosity level.
|
||||
|
||||
args : sequence
|
||||
Arguments to pass to objective function.
|
||||
|
||||
kwargs : dict
|
||||
Keyword arguments to pass to objective function.
|
||||
|
||||
Returns
|
||||
-------
|
||||
p : array, shape (n_params,)
|
||||
Optimum parameters.
|
||||
|
||||
error : float
|
||||
Optimum.
|
||||
|
||||
i : int
|
||||
Last iteration.
|
||||
"""
|
||||
if args is None:
|
||||
args = []
|
||||
if kwargs is None:
|
||||
kwargs = {}
|
||||
|
||||
p = p0.copy().ravel()
|
||||
update = np.zeros_like(p)
|
||||
gains = np.ones_like(p)
|
||||
error = np.finfo(np.float).max
|
||||
best_error = np.finfo(np.float).max
|
||||
best_iter = i = it
|
||||
|
||||
tic = time()
|
||||
for i in range(it, n_iter):
|
||||
check_convergence = (i + 1) % n_iter_check == 0
|
||||
# only compute the error when needed
|
||||
kwargs['compute_error'] = check_convergence or i == n_iter - 1
|
||||
|
||||
error, grad = objective(p, *args, **kwargs)
|
||||
grad_norm = linalg.norm(grad)
|
||||
|
||||
inc = update * grad < 0.0
|
||||
dec = np.invert(inc)
|
||||
gains[inc] += 0.2
|
||||
gains[dec] *= 0.8
|
||||
np.clip(gains, min_gain, np.inf, out=gains)
|
||||
grad *= gains
|
||||
update = momentum * update - learning_rate * grad
|
||||
p += update
|
||||
|
||||
if check_convergence:
|
||||
toc = time()
|
||||
duration = toc - tic
|
||||
tic = toc
|
||||
|
||||
if verbose >= 2:
|
||||
print("[t-SNE] Iteration %d: error = %.7f,"
|
||||
" gradient norm = %.7f"
|
||||
" (%s iterations in %0.3fs)"
|
||||
% (i + 1, error, grad_norm, n_iter_check, duration))
|
||||
|
||||
if error < best_error:
|
||||
best_error = error
|
||||
best_iter = i
|
||||
elif i - best_iter > n_iter_without_progress:
|
||||
if verbose >= 2:
|
||||
print("[t-SNE] Iteration %d: did not make any progress "
|
||||
"during the last %d episodes. Finished."
|
||||
% (i + 1, n_iter_without_progress))
|
||||
break
|
||||
if grad_norm <= min_grad_norm:
|
||||
if verbose >= 2:
|
||||
print("[t-SNE] Iteration %d: gradient norm %f. Finished."
|
||||
% (i + 1, grad_norm))
|
||||
break
|
||||
|
||||
return p, error, i
|
||||
|
||||
|
||||
@_deprecate_positional_args
|
||||
def trustworthiness(X, X_embedded, *, n_neighbors=5, metric='euclidean'):
|
||||
r"""Expresses to what extent the local structure is retained.
|
||||
|
||||
The trustworthiness is within [0, 1]. It is defined as
|
||||
|
||||
.. math::
|
||||
|
||||
T(k) = 1 - \frac{2}{nk (2n - 3k - 1)} \sum^n_{i=1}
|
||||
\sum_{j \in \mathcal{N}_{i}^{k}} \max(0, (r(i, j) - k))
|
||||
|
||||
where for each sample i, :math:`\mathcal{N}_{i}^{k}` are its k nearest
|
||||
neighbors in the output space, and every sample j is its :math:`r(i, j)`-th
|
||||
nearest neighbor in the input space. In other words, any unexpected nearest
|
||||
neighbors in the output space are penalised in proportion to their rank in
|
||||
the input space.
|
||||
|
||||
* "Neighborhood Preservation in Nonlinear Projection Methods: An
|
||||
Experimental Study"
|
||||
J. Venna, S. Kaski
|
||||
* "Learning a Parametric Embedding by Preserving Local Structure"
|
||||
L.J.P. van der Maaten
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array, shape (n_samples, n_features) or (n_samples, n_samples)
|
||||
If the metric is 'precomputed' X must be a square distance
|
||||
matrix. Otherwise it contains a sample per row.
|
||||
|
||||
X_embedded : array, shape (n_samples, n_components)
|
||||
Embedding of the training data in low-dimensional space.
|
||||
|
||||
n_neighbors : int, optional (default: 5)
|
||||
Number of neighbors k that will be considered.
|
||||
|
||||
metric : string, or callable, optional, default 'euclidean'
|
||||
Which metric to use for computing pairwise distances between samples
|
||||
from the original input space. If metric is 'precomputed', X must be a
|
||||
matrix of pairwise distances or squared distances. Otherwise, see the
|
||||
documentation of argument metric in sklearn.pairwise.pairwise_distances
|
||||
for a list of available metrics.
|
||||
|
||||
.. versionadded:: 0.20
|
||||
|
||||
Returns
|
||||
-------
|
||||
trustworthiness : float
|
||||
Trustworthiness of the low-dimensional embedding.
|
||||
"""
|
||||
dist_X = pairwise_distances(X, metric=metric)
|
||||
if metric == 'precomputed':
|
||||
dist_X = dist_X.copy()
|
||||
# we set the diagonal to np.inf to exclude the points themselves from
|
||||
# their own neighborhood
|
||||
np.fill_diagonal(dist_X, np.inf)
|
||||
ind_X = np.argsort(dist_X, axis=1)
|
||||
# `ind_X[i]` is the index of sorted distances between i and other samples
|
||||
ind_X_embedded = NearestNeighbors(n_neighbors=n_neighbors).fit(
|
||||
X_embedded).kneighbors(return_distance=False)
|
||||
|
||||
# We build an inverted index of neighbors in the input space: For sample i,
|
||||
# we define `inverted_index[i]` as the inverted index of sorted distances:
|
||||
# inverted_index[i][ind_X[i]] = np.arange(1, n_sample + 1)
|
||||
n_samples = X.shape[0]
|
||||
inverted_index = np.zeros((n_samples, n_samples), dtype=int)
|
||||
ordered_indices = np.arange(n_samples + 1)
|
||||
inverted_index[ordered_indices[:-1, np.newaxis],
|
||||
ind_X] = ordered_indices[1:]
|
||||
ranks = inverted_index[ordered_indices[:-1, np.newaxis],
|
||||
ind_X_embedded] - n_neighbors
|
||||
t = np.sum(ranks[ranks > 0])
|
||||
t = 1.0 - t * (2.0 / (n_samples * n_neighbors *
|
||||
(2.0 * n_samples - 3.0 * n_neighbors - 1.0)))
|
||||
return t
|
||||
|
||||
|
||||
class TSNE(BaseEstimator):
|
||||
"""t-distributed Stochastic Neighbor Embedding.
|
||||
|
||||
t-SNE [1] is a tool to visualize high-dimensional data. It converts
|
||||
similarities between data points to joint probabilities and tries
|
||||
to minimize the Kullback-Leibler divergence between the joint
|
||||
probabilities of the low-dimensional embedding and the
|
||||
high-dimensional data. t-SNE has a cost function that is not convex,
|
||||
i.e. with different initializations we can get different results.
|
||||
|
||||
It is highly recommended to use another dimensionality reduction
|
||||
method (e.g. PCA for dense data or TruncatedSVD for sparse data)
|
||||
to reduce the number of dimensions to a reasonable amount (e.g. 50)
|
||||
if the number of features is very high. This will suppress some
|
||||
noise and speed up the computation of pairwise distances between
|
||||
samples. For more tips see Laurens van der Maaten's FAQ [2].
|
||||
|
||||
Read more in the :ref:`User Guide <t_sne>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
n_components : int, optional (default: 2)
|
||||
Dimension of the embedded space.
|
||||
|
||||
perplexity : float, optional (default: 30)
|
||||
The perplexity is related to the number of nearest neighbors that
|
||||
is used in other manifold learning algorithms. Larger datasets
|
||||
usually require a larger perplexity. Consider selecting a value
|
||||
between 5 and 50. Different values can result in significanlty
|
||||
different results.
|
||||
|
||||
early_exaggeration : float, optional (default: 12.0)
|
||||
Controls how tight natural clusters in the original space are in
|
||||
the embedded space and how much space will be between them. For
|
||||
larger values, the space between natural clusters will be larger
|
||||
in the embedded space. Again, the choice of this parameter is not
|
||||
very critical. If the cost function increases during initial
|
||||
optimization, the early exaggeration factor or the learning rate
|
||||
might be too high.
|
||||
|
||||
learning_rate : float, optional (default: 200.0)
|
||||
The learning rate for t-SNE is usually in the range [10.0, 1000.0]. If
|
||||
the learning rate is too high, the data may look like a 'ball' with any
|
||||
point approximately equidistant from its nearest neighbours. If the
|
||||
learning rate is too low, most points may look compressed in a dense
|
||||
cloud with few outliers. If the cost function gets stuck in a bad local
|
||||
minimum increasing the learning rate may help.
|
||||
|
||||
n_iter : int, optional (default: 1000)
|
||||
Maximum number of iterations for the optimization. Should be at
|
||||
least 250.
|
||||
|
||||
n_iter_without_progress : int, optional (default: 300)
|
||||
Maximum number of iterations without progress before we abort the
|
||||
optimization, used after 250 initial iterations with early
|
||||
exaggeration. Note that progress is only checked every 50 iterations so
|
||||
this value is rounded to the next multiple of 50.
|
||||
|
||||
.. versionadded:: 0.17
|
||||
parameter *n_iter_without_progress* to control stopping criteria.
|
||||
|
||||
min_grad_norm : float, optional (default: 1e-7)
|
||||
If the gradient norm is below this threshold, the optimization will
|
||||
be stopped.
|
||||
|
||||
metric : string or callable, optional
|
||||
The metric to use when calculating distance between instances in a
|
||||
feature array. If metric is a string, it must be one of the options
|
||||
allowed by scipy.spatial.distance.pdist for its metric parameter, or
|
||||
a metric listed in pairwise.PAIRWISE_DISTANCE_FUNCTIONS.
|
||||
If metric is "precomputed", X is assumed to be a distance matrix.
|
||||
Alternatively, if metric is a callable function, it is called on each
|
||||
pair of instances (rows) and the resulting value recorded. The callable
|
||||
should take two arrays from X as input and return a value indicating
|
||||
the distance between them. The default is "euclidean" which is
|
||||
interpreted as squared euclidean distance.
|
||||
|
||||
init : string or numpy array, optional (default: "random")
|
||||
Initialization of embedding. Possible options are 'random', 'pca',
|
||||
and a numpy array of shape (n_samples, n_components).
|
||||
PCA initialization cannot be used with precomputed distances and is
|
||||
usually more globally stable than random initialization.
|
||||
|
||||
verbose : int, optional (default: 0)
|
||||
Verbosity level.
|
||||
|
||||
random_state : int, RandomState instance, default=None
|
||||
Determines the random number generator. Pass an int for reproducible
|
||||
results across multiple function calls. Note that different
|
||||
initializations might result in different local minima of the cost
|
||||
function. See :term: `Glossary <random_state>`.
|
||||
|
||||
method : string (default: 'barnes_hut')
|
||||
By default the gradient calculation algorithm uses Barnes-Hut
|
||||
approximation running in O(NlogN) time. method='exact'
|
||||
will run on the slower, but exact, algorithm in O(N^2) time. The
|
||||
exact algorithm should be used when nearest-neighbor errors need
|
||||
to be better than 3%. However, the exact method cannot scale to
|
||||
millions of examples.
|
||||
|
||||
.. versionadded:: 0.17
|
||||
Approximate optimization *method* via the Barnes-Hut.
|
||||
|
||||
angle : float (default: 0.5)
|
||||
Only used if method='barnes_hut'
|
||||
This is the trade-off between speed and accuracy for Barnes-Hut T-SNE.
|
||||
'angle' is the angular size (referred to as theta in [3]) of a distant
|
||||
node as measured from a point. If this size is below 'angle' then it is
|
||||
used as a summary node of all points contained within it.
|
||||
This method is not very sensitive to changes in this parameter
|
||||
in the range of 0.2 - 0.8. Angle less than 0.2 has quickly increasing
|
||||
computation time and angle greater 0.8 has quickly increasing error.
|
||||
|
||||
n_jobs : int or None, optional (default=None)
|
||||
The number of parallel jobs to run for neighbors search. This parameter
|
||||
has no impact when ``metric="precomputed"`` or
|
||||
(``metric="euclidean"`` and ``method="exact"``).
|
||||
``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
|
||||
``-1`` means using all processors. See :term:`Glossary <n_jobs>`
|
||||
for more details.
|
||||
|
||||
.. versionadded:: 0.22
|
||||
|
||||
Attributes
|
||||
----------
|
||||
embedding_ : array-like, shape (n_samples, n_components)
|
||||
Stores the embedding vectors.
|
||||
|
||||
kl_divergence_ : float
|
||||
Kullback-Leibler divergence after optimization.
|
||||
|
||||
n_iter_ : int
|
||||
Number of iterations run.
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
>>> import numpy as np
|
||||
>>> from sklearn.manifold import TSNE
|
||||
>>> X = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]])
|
||||
>>> X_embedded = TSNE(n_components=2).fit_transform(X)
|
||||
>>> X_embedded.shape
|
||||
(4, 2)
|
||||
|
||||
References
|
||||
----------
|
||||
|
||||
[1] van der Maaten, L.J.P.; Hinton, G.E. Visualizing High-Dimensional Data
|
||||
Using t-SNE. Journal of Machine Learning Research 9:2579-2605, 2008.
|
||||
|
||||
[2] van der Maaten, L.J.P. t-Distributed Stochastic Neighbor Embedding
|
||||
https://lvdmaaten.github.io/tsne/
|
||||
|
||||
[3] L.J.P. van der Maaten. Accelerating t-SNE using Tree-Based Algorithms.
|
||||
Journal of Machine Learning Research 15(Oct):3221-3245, 2014.
|
||||
https://lvdmaaten.github.io/publications/papers/JMLR_2014.pdf
|
||||
"""
|
||||
# Control the number of exploration iterations with early_exaggeration on
|
||||
_EXPLORATION_N_ITER = 250
|
||||
|
||||
# Control the number of iterations between progress checks
|
||||
_N_ITER_CHECK = 50
|
||||
|
||||
@_deprecate_positional_args
|
||||
def __init__(self, n_components=2, *, perplexity=30.0,
|
||||
early_exaggeration=12.0, learning_rate=200.0, n_iter=1000,
|
||||
n_iter_without_progress=300, min_grad_norm=1e-7,
|
||||
metric="euclidean", init="random", verbose=0,
|
||||
random_state=None, method='barnes_hut', angle=0.5,
|
||||
n_jobs=None):
|
||||
self.n_components = n_components
|
||||
self.perplexity = perplexity
|
||||
self.early_exaggeration = early_exaggeration
|
||||
self.learning_rate = learning_rate
|
||||
self.n_iter = n_iter
|
||||
self.n_iter_without_progress = n_iter_without_progress
|
||||
self.min_grad_norm = min_grad_norm
|
||||
self.metric = metric
|
||||
self.init = init
|
||||
self.verbose = verbose
|
||||
self.random_state = random_state
|
||||
self.method = method
|
||||
self.angle = angle
|
||||
self.n_jobs = n_jobs
|
||||
|
||||
def _fit(self, X, skip_num_points=0):
|
||||
"""Private function to fit the model using X as training data."""
|
||||
|
||||
if self.method not in ['barnes_hut', 'exact']:
|
||||
raise ValueError("'method' must be 'barnes_hut' or 'exact'")
|
||||
if self.angle < 0.0 or self.angle > 1.0:
|
||||
raise ValueError("'angle' must be between 0.0 - 1.0")
|
||||
if self.method == 'barnes_hut':
|
||||
X = self._validate_data(X, accept_sparse=['csr'],
|
||||
ensure_min_samples=2,
|
||||
dtype=[np.float32, np.float64])
|
||||
else:
|
||||
X = self._validate_data(X, accept_sparse=['csr', 'csc', 'coo'],
|
||||
dtype=[np.float32, np.float64])
|
||||
if self.metric == "precomputed":
|
||||
if isinstance(self.init, str) and self.init == 'pca':
|
||||
raise ValueError("The parameter init=\"pca\" cannot be "
|
||||
"used with metric=\"precomputed\".")
|
||||
if X.shape[0] != X.shape[1]:
|
||||
raise ValueError("X should be a square distance matrix")
|
||||
|
||||
check_non_negative(X, "TSNE.fit(). With metric='precomputed', X "
|
||||
"should contain positive distances.")
|
||||
|
||||
if self.method == "exact" and issparse(X):
|
||||
raise TypeError(
|
||||
'TSNE with method="exact" does not accept sparse '
|
||||
'precomputed distance matrix. Use method="barnes_hut" '
|
||||
'or provide the dense distance matrix.')
|
||||
|
||||
if self.method == 'barnes_hut' and self.n_components > 3:
|
||||
raise ValueError("'n_components' should be inferior to 4 for the "
|
||||
"barnes_hut algorithm as it relies on "
|
||||
"quad-tree or oct-tree.")
|
||||
random_state = check_random_state(self.random_state)
|
||||
|
||||
if self.early_exaggeration < 1.0:
|
||||
raise ValueError("early_exaggeration must be at least 1, but is {}"
|
||||
.format(self.early_exaggeration))
|
||||
|
||||
if self.n_iter < 250:
|
||||
raise ValueError("n_iter should be at least 250")
|
||||
|
||||
n_samples = X.shape[0]
|
||||
|
||||
neighbors_nn = None
|
||||
if self.method == "exact":
|
||||
# Retrieve the distance matrix, either using the precomputed one or
|
||||
# computing it.
|
||||
if self.metric == "precomputed":
|
||||
distances = X
|
||||
else:
|
||||
if self.verbose:
|
||||
print("[t-SNE] Computing pairwise distances...")
|
||||
|
||||
if self.metric == "euclidean":
|
||||
distances = pairwise_distances(X, metric=self.metric,
|
||||
squared=True)
|
||||
else:
|
||||
distances = pairwise_distances(X, metric=self.metric,
|
||||
n_jobs=self.n_jobs)
|
||||
|
||||
if np.any(distances < 0):
|
||||
raise ValueError("All distances should be positive, the "
|
||||
"metric given is not correct")
|
||||
|
||||
# compute the joint probability distribution for the input space
|
||||
P = _joint_probabilities(distances, self.perplexity, self.verbose)
|
||||
assert np.all(np.isfinite(P)), "All probabilities should be finite"
|
||||
assert np.all(P >= 0), "All probabilities should be non-negative"
|
||||
assert np.all(P <= 1), ("All probabilities should be less "
|
||||
"or then equal to one")
|
||||
|
||||
else:
|
||||
# Compute the number of nearest neighbors to find.
|
||||
# LvdM uses 3 * perplexity as the number of neighbors.
|
||||
# In the event that we have very small # of points
|
||||
# set the neighbors to n - 1.
|
||||
n_neighbors = min(n_samples - 1, int(3. * self.perplexity + 1))
|
||||
|
||||
if self.verbose:
|
||||
print("[t-SNE] Computing {} nearest neighbors..."
|
||||
.format(n_neighbors))
|
||||
|
||||
# Find the nearest neighbors for every point
|
||||
knn = NearestNeighbors(algorithm='auto',
|
||||
n_jobs=self.n_jobs,
|
||||
n_neighbors=n_neighbors,
|
||||
metric=self.metric)
|
||||
t0 = time()
|
||||
knn.fit(X)
|
||||
duration = time() - t0
|
||||
if self.verbose:
|
||||
print("[t-SNE] Indexed {} samples in {:.3f}s...".format(
|
||||
n_samples, duration))
|
||||
|
||||
t0 = time()
|
||||
distances_nn = knn.kneighbors_graph(mode='distance')
|
||||
duration = time() - t0
|
||||
if self.verbose:
|
||||
print("[t-SNE] Computed neighbors for {} samples "
|
||||
"in {:.3f}s...".format(n_samples, duration))
|
||||
|
||||
# Free the memory used by the ball_tree
|
||||
del knn
|
||||
|
||||
if self.metric == "euclidean":
|
||||
# knn return the euclidean distance but we need it squared
|
||||
# to be consistent with the 'exact' method. Note that the
|
||||
# the method was derived using the euclidean method as in the
|
||||
# input space. Not sure of the implication of using a different
|
||||
# metric.
|
||||
distances_nn.data **= 2
|
||||
|
||||
# compute the joint probability distribution for the input space
|
||||
P = _joint_probabilities_nn(distances_nn, self.perplexity,
|
||||
self.verbose)
|
||||
|
||||
if isinstance(self.init, np.ndarray):
|
||||
X_embedded = self.init
|
||||
elif self.init == 'pca':
|
||||
pca = PCA(n_components=self.n_components, svd_solver='randomized',
|
||||
random_state=random_state)
|
||||
X_embedded = pca.fit_transform(X).astype(np.float32, copy=False)
|
||||
elif self.init == 'random':
|
||||
# The embedding is initialized with iid samples from Gaussians with
|
||||
# standard deviation 1e-4.
|
||||
X_embedded = 1e-4 * random_state.randn(
|
||||
n_samples, self.n_components).astype(np.float32)
|
||||
else:
|
||||
raise ValueError("'init' must be 'pca', 'random', or "
|
||||
"a numpy array")
|
||||
|
||||
# Degrees of freedom of the Student's t-distribution. The suggestion
|
||||
# degrees_of_freedom = n_components - 1 comes from
|
||||
# "Learning a Parametric Embedding by Preserving Local Structure"
|
||||
# Laurens van der Maaten, 2009.
|
||||
degrees_of_freedom = max(self.n_components - 1, 1)
|
||||
|
||||
return self._tsne(P, degrees_of_freedom, n_samples,
|
||||
X_embedded=X_embedded,
|
||||
neighbors=neighbors_nn,
|
||||
skip_num_points=skip_num_points)
|
||||
|
||||
def _tsne(self, P, degrees_of_freedom, n_samples, X_embedded,
|
||||
neighbors=None, skip_num_points=0):
|
||||
"""Runs t-SNE."""
|
||||
# t-SNE minimizes the Kullback-Leiber divergence of the Gaussians P
|
||||
# and the Student's t-distributions Q. The optimization algorithm that
|
||||
# we use is batch gradient descent with two stages:
|
||||
# * initial optimization with early exaggeration and momentum at 0.5
|
||||
# * final optimization with momentum at 0.8
|
||||
params = X_embedded.ravel()
|
||||
|
||||
opt_args = {
|
||||
"it": 0,
|
||||
"n_iter_check": self._N_ITER_CHECK,
|
||||
"min_grad_norm": self.min_grad_norm,
|
||||
"learning_rate": self.learning_rate,
|
||||
"verbose": self.verbose,
|
||||
"kwargs": dict(skip_num_points=skip_num_points),
|
||||
"args": [P, degrees_of_freedom, n_samples, self.n_components],
|
||||
"n_iter_without_progress": self._EXPLORATION_N_ITER,
|
||||
"n_iter": self._EXPLORATION_N_ITER,
|
||||
"momentum": 0.5,
|
||||
}
|
||||
if self.method == 'barnes_hut':
|
||||
obj_func = _kl_divergence_bh
|
||||
opt_args['kwargs']['angle'] = self.angle
|
||||
# Repeat verbose argument for _kl_divergence_bh
|
||||
opt_args['kwargs']['verbose'] = self.verbose
|
||||
# Get the number of threads for gradient computation here to
|
||||
# avoid recomputing it at each iteration.
|
||||
opt_args['kwargs']['num_threads'] = _openmp_effective_n_threads()
|
||||
else:
|
||||
obj_func = _kl_divergence
|
||||
|
||||
# Learning schedule (part 1): do 250 iteration with lower momentum but
|
||||
# higher learning rate controlled via the early exaggeration parameter
|
||||
P *= self.early_exaggeration
|
||||
params, kl_divergence, it = _gradient_descent(obj_func, params,
|
||||
**opt_args)
|
||||
if self.verbose:
|
||||
print("[t-SNE] KL divergence after %d iterations with early "
|
||||
"exaggeration: %f" % (it + 1, kl_divergence))
|
||||
|
||||
# Learning schedule (part 2): disable early exaggeration and finish
|
||||
# optimization with a higher momentum at 0.8
|
||||
P /= self.early_exaggeration
|
||||
remaining = self.n_iter - self._EXPLORATION_N_ITER
|
||||
if it < self._EXPLORATION_N_ITER or remaining > 0:
|
||||
opt_args['n_iter'] = self.n_iter
|
||||
opt_args['it'] = it + 1
|
||||
opt_args['momentum'] = 0.8
|
||||
opt_args['n_iter_without_progress'] = self.n_iter_without_progress
|
||||
params, kl_divergence, it = _gradient_descent(obj_func, params,
|
||||
**opt_args)
|
||||
|
||||
# Save the final number of iterations
|
||||
self.n_iter_ = it
|
||||
|
||||
if self.verbose:
|
||||
print("[t-SNE] KL divergence after %d iterations: %f"
|
||||
% (it + 1, kl_divergence))
|
||||
|
||||
X_embedded = params.reshape(n_samples, self.n_components)
|
||||
self.kl_divergence_ = kl_divergence
|
||||
|
||||
return X_embedded
|
||||
|
||||
def fit_transform(self, X, y=None):
|
||||
"""Fit X into an embedded space and return that transformed
|
||||
output.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array, shape (n_samples, n_features) or (n_samples, n_samples)
|
||||
If the metric is 'precomputed' X must be a square distance
|
||||
matrix. Otherwise it contains a sample per row. If the method
|
||||
is 'exact', X may be a sparse matrix of type 'csr', 'csc'
|
||||
or 'coo'. If the method is 'barnes_hut' and the metric is
|
||||
'precomputed', X may be a precomputed sparse graph.
|
||||
|
||||
y : Ignored
|
||||
|
||||
Returns
|
||||
-------
|
||||
X_new : array, shape (n_samples, n_components)
|
||||
Embedding of the training data in low-dimensional space.
|
||||
"""
|
||||
embedding = self._fit(X)
|
||||
self.embedding_ = embedding
|
||||
return self.embedding_
|
||||
|
||||
def fit(self, X, y=None):
|
||||
"""Fit X into an embedded space.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array, shape (n_samples, n_features) or (n_samples, n_samples)
|
||||
If the metric is 'precomputed' X must be a square distance
|
||||
matrix. Otherwise it contains a sample per row. If the method
|
||||
is 'exact', X may be a sparse matrix of type 'csr', 'csc'
|
||||
or 'coo'. If the method is 'barnes_hut' and the metric is
|
||||
'precomputed', X may be a precomputed sparse graph.
|
||||
|
||||
y : Ignored
|
||||
"""
|
||||
self.fit_transform(X)
|
||||
return self
|
BIN
venv/Lib/site-packages/sklearn/manifold/_utils.cp36-win32.pyd
Normal file
BIN
venv/Lib/site-packages/sklearn/manifold/_utils.cp36-win32.pyd
Normal file
Binary file not shown.
18
venv/Lib/site-packages/sklearn/manifold/isomap.py
Normal file
18
venv/Lib/site-packages/sklearn/manifold/isomap.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
|
||||
import sys
|
||||
# mypy error: Module X has no attribute y (typically for C extensions)
|
||||
from . import _isomap # type: ignore
|
||||
from ..externals._pep562 import Pep562
|
||||
from ..utils.deprecation import _raise_dep_warning_if_not_pytest
|
||||
|
||||
deprecated_path = 'sklearn.manifold.isomap'
|
||||
correct_import_path = 'sklearn.manifold'
|
||||
|
||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
|
||||
|
||||
def __getattr__(name):
|
||||
return getattr(_isomap, name)
|
||||
|
||||
if not sys.version_info >= (3, 7):
|
||||
Pep562(__name__)
|
18
venv/Lib/site-packages/sklearn/manifold/locally_linear.py
Normal file
18
venv/Lib/site-packages/sklearn/manifold/locally_linear.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
|
||||
import sys
|
||||
# mypy error: Module X has no attribute y (typically for C extensions)
|
||||
from . import _locally_linear # type: ignore
|
||||
from ..externals._pep562 import Pep562
|
||||
from ..utils.deprecation import _raise_dep_warning_if_not_pytest
|
||||
|
||||
deprecated_path = 'sklearn.manifold.locally_linear'
|
||||
correct_import_path = 'sklearn.manifold'
|
||||
|
||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
|
||||
|
||||
def __getattr__(name):
|
||||
return getattr(_locally_linear, name)
|
||||
|
||||
if not sys.version_info >= (3, 7):
|
||||
Pep562(__name__)
|
18
venv/Lib/site-packages/sklearn/manifold/mds.py
Normal file
18
venv/Lib/site-packages/sklearn/manifold/mds.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
|
||||
import sys
|
||||
# mypy error: Module X has no attribute y (typically for C extensions)
|
||||
from . import _mds # type: ignore
|
||||
from ..externals._pep562 import Pep562
|
||||
from ..utils.deprecation import _raise_dep_warning_if_not_pytest
|
||||
|
||||
deprecated_path = 'sklearn.manifold.mds'
|
||||
correct_import_path = 'sklearn.manifold'
|
||||
|
||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
|
||||
|
||||
def __getattr__(name):
|
||||
return getattr(_mds, name)
|
||||
|
||||
if not sys.version_info >= (3, 7):
|
||||
Pep562(__name__)
|
34
venv/Lib/site-packages/sklearn/manifold/setup.py
Normal file
34
venv/Lib/site-packages/sklearn/manifold/setup.py
Normal file
|
@ -0,0 +1,34 @@
|
|||
import os
|
||||
|
||||
import numpy
|
||||
|
||||
|
||||
def configuration(parent_package="", top_path=None):
|
||||
from numpy.distutils.misc_util import Configuration
|
||||
|
||||
config = Configuration("manifold", parent_package, top_path)
|
||||
|
||||
libraries = []
|
||||
if os.name == 'posix':
|
||||
libraries.append('m')
|
||||
|
||||
config.add_extension("_utils",
|
||||
sources=["_utils.pyx"],
|
||||
include_dirs=[numpy.get_include()],
|
||||
libraries=libraries,
|
||||
extra_compile_args=["-O3"])
|
||||
|
||||
config.add_extension("_barnes_hut_tsne",
|
||||
sources=["_barnes_hut_tsne.pyx"],
|
||||
include_dirs=[numpy.get_include()],
|
||||
libraries=libraries,
|
||||
extra_compile_args=['-O3'])
|
||||
|
||||
config.add_subpackage('tests')
|
||||
|
||||
return config
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from numpy.distutils.core import setup
|
||||
setup(**configuration().todict())
|
|
@ -0,0 +1,18 @@
|
|||
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
|
||||
import sys
|
||||
# mypy error: Module X has no attribute y (typically for C extensions)
|
||||
from . import _spectral_embedding # type: ignore
|
||||
from ..externals._pep562 import Pep562
|
||||
from ..utils.deprecation import _raise_dep_warning_if_not_pytest
|
||||
|
||||
deprecated_path = 'sklearn.manifold.spectral_embedding_'
|
||||
correct_import_path = 'sklearn.manifold'
|
||||
|
||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
|
||||
|
||||
def __getattr__(name):
|
||||
return getattr(_spectral_embedding, name)
|
||||
|
||||
if not sys.version_info >= (3, 7):
|
||||
Pep562(__name__)
|
18
venv/Lib/site-packages/sklearn/manifold/t_sne.py
Normal file
18
venv/Lib/site-packages/sklearn/manifold/t_sne.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
|
||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
|
||||
import sys
|
||||
# mypy error: Module X has no attribute y (typically for C extensions)
|
||||
from . import _t_sne # type: ignore
|
||||
from ..externals._pep562 import Pep562
|
||||
from ..utils.deprecation import _raise_dep_warning_if_not_pytest
|
||||
|
||||
deprecated_path = 'sklearn.manifold.t_sne'
|
||||
correct_import_path = 'sklearn.manifold'
|
||||
|
||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
|
||||
|
||||
def __getattr__(name):
|
||||
return getattr(_t_sne, name)
|
||||
|
||||
if not sys.version_info >= (3, 7):
|
||||
Pep562(__name__)
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
188
venv/Lib/site-packages/sklearn/manifold/tests/test_isomap.py
Normal file
188
venv/Lib/site-packages/sklearn/manifold/tests/test_isomap.py
Normal file
|
@ -0,0 +1,188 @@
|
|||
from itertools import product
|
||||
import numpy as np
|
||||
from numpy.testing import assert_almost_equal, assert_array_almost_equal
|
||||
import pytest
|
||||
|
||||
from sklearn import datasets
|
||||
from sklearn import manifold
|
||||
from sklearn import neighbors
|
||||
from sklearn import pipeline
|
||||
from sklearn import preprocessing
|
||||
|
||||
from scipy.sparse import rand as sparse_rand
|
||||
|
||||
eigen_solvers = ['auto', 'dense', 'arpack']
|
||||
path_methods = ['auto', 'FW', 'D']
|
||||
|
||||
|
||||
def test_isomap_simple_grid():
|
||||
# Isomap should preserve distances when all neighbors are used
|
||||
N_per_side = 5
|
||||
Npts = N_per_side ** 2
|
||||
n_neighbors = Npts - 1
|
||||
|
||||
# grid of equidistant points in 2D, n_components = n_dim
|
||||
X = np.array(list(product(range(N_per_side), repeat=2)))
|
||||
|
||||
# distances from each point to all others
|
||||
G = neighbors.kneighbors_graph(X, n_neighbors,
|
||||
mode='distance').toarray()
|
||||
|
||||
for eigen_solver in eigen_solvers:
|
||||
for path_method in path_methods:
|
||||
clf = manifold.Isomap(n_neighbors=n_neighbors, n_components=2,
|
||||
eigen_solver=eigen_solver,
|
||||
path_method=path_method)
|
||||
clf.fit(X)
|
||||
|
||||
G_iso = neighbors.kneighbors_graph(clf.embedding_,
|
||||
n_neighbors,
|
||||
mode='distance').toarray()
|
||||
assert_array_almost_equal(G, G_iso)
|
||||
|
||||
|
||||
def test_isomap_reconstruction_error():
|
||||
# Same setup as in test_isomap_simple_grid, with an added dimension
|
||||
N_per_side = 5
|
||||
Npts = N_per_side ** 2
|
||||
n_neighbors = Npts - 1
|
||||
|
||||
# grid of equidistant points in 2D, n_components = n_dim
|
||||
X = np.array(list(product(range(N_per_side), repeat=2)))
|
||||
|
||||
# add noise in a third dimension
|
||||
rng = np.random.RandomState(0)
|
||||
noise = 0.1 * rng.randn(Npts, 1)
|
||||
X = np.concatenate((X, noise), 1)
|
||||
|
||||
# compute input kernel
|
||||
G = neighbors.kneighbors_graph(X, n_neighbors,
|
||||
mode='distance').toarray()
|
||||
|
||||
centerer = preprocessing.KernelCenterer()
|
||||
K = centerer.fit_transform(-0.5 * G ** 2)
|
||||
|
||||
for eigen_solver in eigen_solvers:
|
||||
for path_method in path_methods:
|
||||
clf = manifold.Isomap(n_neighbors=n_neighbors, n_components=2,
|
||||
eigen_solver=eigen_solver,
|
||||
path_method=path_method)
|
||||
clf.fit(X)
|
||||
|
||||
# compute output kernel
|
||||
G_iso = neighbors.kneighbors_graph(clf.embedding_,
|
||||
n_neighbors,
|
||||
mode='distance').toarray()
|
||||
|
||||
K_iso = centerer.fit_transform(-0.5 * G_iso ** 2)
|
||||
|
||||
# make sure error agrees
|
||||
reconstruction_error = np.linalg.norm(K - K_iso) / Npts
|
||||
assert_almost_equal(reconstruction_error,
|
||||
clf.reconstruction_error())
|
||||
|
||||
|
||||
def test_transform():
|
||||
n_samples = 200
|
||||
n_components = 10
|
||||
noise_scale = 0.01
|
||||
|
||||
# Create S-curve dataset
|
||||
X, y = datasets.make_s_curve(n_samples, random_state=0)
|
||||
|
||||
# Compute isomap embedding
|
||||
iso = manifold.Isomap(n_components=n_components, n_neighbors=2)
|
||||
X_iso = iso.fit_transform(X)
|
||||
|
||||
# Re-embed a noisy version of the points
|
||||
rng = np.random.RandomState(0)
|
||||
noise = noise_scale * rng.randn(*X.shape)
|
||||
X_iso2 = iso.transform(X + noise)
|
||||
|
||||
# Make sure the rms error on re-embedding is comparable to noise_scale
|
||||
assert np.sqrt(np.mean((X_iso - X_iso2) ** 2)) < 2 * noise_scale
|
||||
|
||||
|
||||
def test_pipeline():
|
||||
# check that Isomap works fine as a transformer in a Pipeline
|
||||
# only checks that no error is raised.
|
||||
# TODO check that it actually does something useful
|
||||
X, y = datasets.make_blobs(random_state=0)
|
||||
clf = pipeline.Pipeline(
|
||||
[('isomap', manifold.Isomap()),
|
||||
('clf', neighbors.KNeighborsClassifier())])
|
||||
clf.fit(X, y)
|
||||
assert .9 < clf.score(X, y)
|
||||
|
||||
|
||||
def test_pipeline_with_nearest_neighbors_transformer():
|
||||
# Test chaining NearestNeighborsTransformer and Isomap with
|
||||
# neighbors_algorithm='precomputed'
|
||||
algorithm = 'auto'
|
||||
n_neighbors = 10
|
||||
|
||||
X, _ = datasets.make_blobs(random_state=0)
|
||||
X2, _ = datasets.make_blobs(random_state=1)
|
||||
|
||||
# compare the chained version and the compact version
|
||||
est_chain = pipeline.make_pipeline(
|
||||
neighbors.KNeighborsTransformer(
|
||||
n_neighbors=n_neighbors, algorithm=algorithm, mode='distance'),
|
||||
manifold.Isomap(n_neighbors=n_neighbors, metric='precomputed'))
|
||||
est_compact = manifold.Isomap(n_neighbors=n_neighbors,
|
||||
neighbors_algorithm=algorithm)
|
||||
|
||||
Xt_chain = est_chain.fit_transform(X)
|
||||
Xt_compact = est_compact.fit_transform(X)
|
||||
assert_array_almost_equal(Xt_chain, Xt_compact)
|
||||
|
||||
Xt_chain = est_chain.transform(X2)
|
||||
Xt_compact = est_compact.transform(X2)
|
||||
assert_array_almost_equal(Xt_chain, Xt_compact)
|
||||
|
||||
|
||||
def test_different_metric():
|
||||
# Test that the metric parameters work correctly, and default to euclidean
|
||||
def custom_metric(x1, x2):
|
||||
return np.sqrt(np.sum(x1 ** 2 + x2 ** 2))
|
||||
|
||||
# metric, p, is_euclidean
|
||||
metrics = [('euclidean', 2, True),
|
||||
('manhattan', 1, False),
|
||||
('minkowski', 1, False),
|
||||
('minkowski', 2, True),
|
||||
(custom_metric, 2, False)]
|
||||
|
||||
X, _ = datasets.make_blobs(random_state=0)
|
||||
reference = manifold.Isomap().fit_transform(X)
|
||||
|
||||
for metric, p, is_euclidean in metrics:
|
||||
embedding = manifold.Isomap(metric=metric, p=p).fit_transform(X)
|
||||
|
||||
if is_euclidean:
|
||||
assert_array_almost_equal(embedding, reference)
|
||||
else:
|
||||
with pytest.raises(AssertionError, match='not almost equal'):
|
||||
assert_array_almost_equal(embedding, reference)
|
||||
|
||||
|
||||
def test_isomap_clone_bug():
|
||||
# regression test for bug reported in #6062
|
||||
model = manifold.Isomap()
|
||||
for n_neighbors in [10, 15, 20]:
|
||||
model.set_params(n_neighbors=n_neighbors)
|
||||
model.fit(np.random.rand(50, 2))
|
||||
assert (model.nbrs_.n_neighbors ==
|
||||
n_neighbors)
|
||||
|
||||
|
||||
def test_sparse_input():
|
||||
X = sparse_rand(100, 3, density=0.1, format='csr')
|
||||
|
||||
# Should not error
|
||||
for eigen_solver in eigen_solvers:
|
||||
for path_method in path_methods:
|
||||
clf = manifold.Isomap(n_components=2,
|
||||
eigen_solver=eigen_solver,
|
||||
path_method=path_method)
|
||||
clf.fit(X)
|
|
@ -0,0 +1,146 @@
|
|||
from itertools import product
|
||||
|
||||
import numpy as np
|
||||
from numpy.testing import assert_almost_equal, assert_array_almost_equal
|
||||
from scipy import linalg
|
||||
import pytest
|
||||
|
||||
from sklearn import neighbors, manifold
|
||||
from sklearn.manifold._locally_linear import barycenter_kneighbors_graph
|
||||
from sklearn.utils._testing import ignore_warnings
|
||||
from sklearn.utils._testing import assert_raise_message
|
||||
|
||||
eigen_solvers = ['dense', 'arpack']
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Test utility routines
|
||||
def test_barycenter_kneighbors_graph():
|
||||
X = np.array([[0, 1], [1.01, 1.], [2, 0]])
|
||||
|
||||
A = barycenter_kneighbors_graph(X, 1)
|
||||
assert_array_almost_equal(
|
||||
A.toarray(),
|
||||
[[0., 1., 0.],
|
||||
[1., 0., 0.],
|
||||
[0., 1., 0.]])
|
||||
|
||||
A = barycenter_kneighbors_graph(X, 2)
|
||||
# check that columns sum to one
|
||||
assert_array_almost_equal(np.sum(A.toarray(), 1), np.ones(3))
|
||||
pred = np.dot(A.toarray(), X)
|
||||
assert linalg.norm(pred - X) / X.shape[0] < 1
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Test LLE by computing the reconstruction error on some manifolds.
|
||||
|
||||
def test_lle_simple_grid():
|
||||
# note: ARPACK is numerically unstable, so this test will fail for
|
||||
# some random seeds. We choose 2 because the tests pass.
|
||||
rng = np.random.RandomState(2)
|
||||
|
||||
# grid of equidistant points in 2D, n_components = n_dim
|
||||
X = np.array(list(product(range(5), repeat=2)))
|
||||
X = X + 1e-10 * rng.uniform(size=X.shape)
|
||||
n_components = 2
|
||||
clf = manifold.LocallyLinearEmbedding(n_neighbors=5,
|
||||
n_components=n_components,
|
||||
random_state=rng)
|
||||
tol = 0.1
|
||||
|
||||
N = barycenter_kneighbors_graph(X, clf.n_neighbors).toarray()
|
||||
reconstruction_error = linalg.norm(np.dot(N, X) - X, 'fro')
|
||||
assert reconstruction_error < tol
|
||||
|
||||
for solver in eigen_solvers:
|
||||
clf.set_params(eigen_solver=solver)
|
||||
clf.fit(X)
|
||||
assert clf.embedding_.shape[1] == n_components
|
||||
reconstruction_error = linalg.norm(
|
||||
np.dot(N, clf.embedding_) - clf.embedding_, 'fro') ** 2
|
||||
|
||||
assert reconstruction_error < tol
|
||||
assert_almost_equal(clf.reconstruction_error_,
|
||||
reconstruction_error, decimal=1)
|
||||
|
||||
# re-embed a noisy version of X using the transform method
|
||||
noise = rng.randn(*X.shape) / 100
|
||||
X_reembedded = clf.transform(X + noise)
|
||||
assert linalg.norm(X_reembedded - clf.embedding_) < tol
|
||||
|
||||
|
||||
def test_lle_manifold():
|
||||
rng = np.random.RandomState(0)
|
||||
# similar test on a slightly more complex manifold
|
||||
X = np.array(list(product(np.arange(18), repeat=2)))
|
||||
X = np.c_[X, X[:, 0] ** 2 / 18]
|
||||
X = X + 1e-10 * rng.uniform(size=X.shape)
|
||||
n_components = 2
|
||||
for method in ["standard", "hessian", "modified", "ltsa"]:
|
||||
clf = manifold.LocallyLinearEmbedding(n_neighbors=6,
|
||||
n_components=n_components,
|
||||
method=method, random_state=0)
|
||||
tol = 1.5 if method == "standard" else 3
|
||||
|
||||
N = barycenter_kneighbors_graph(X, clf.n_neighbors).toarray()
|
||||
reconstruction_error = linalg.norm(np.dot(N, X) - X)
|
||||
assert reconstruction_error < tol
|
||||
|
||||
for solver in eigen_solvers:
|
||||
clf.set_params(eigen_solver=solver)
|
||||
clf.fit(X)
|
||||
assert clf.embedding_.shape[1] == n_components
|
||||
reconstruction_error = linalg.norm(
|
||||
np.dot(N, clf.embedding_) - clf.embedding_, 'fro') ** 2
|
||||
details = ("solver: %s, method: %s" % (solver, method))
|
||||
assert reconstruction_error < tol, details
|
||||
assert (np.abs(clf.reconstruction_error_ -
|
||||
reconstruction_error) <
|
||||
tol * reconstruction_error), details
|
||||
|
||||
|
||||
# Test the error raised when parameter passed to lle is invalid
|
||||
def test_lle_init_parameters():
|
||||
X = np.random.rand(5, 3)
|
||||
|
||||
clf = manifold.LocallyLinearEmbedding(eigen_solver="error")
|
||||
msg = "unrecognized eigen_solver 'error'"
|
||||
assert_raise_message(ValueError, msg, clf.fit, X)
|
||||
|
||||
clf = manifold.LocallyLinearEmbedding(method="error")
|
||||
msg = "unrecognized method 'error'"
|
||||
assert_raise_message(ValueError, msg, clf.fit, X)
|
||||
|
||||
|
||||
def test_pipeline():
|
||||
# check that LocallyLinearEmbedding works fine as a Pipeline
|
||||
# only checks that no error is raised.
|
||||
# TODO check that it actually does something useful
|
||||
from sklearn import pipeline, datasets
|
||||
X, y = datasets.make_blobs(random_state=0)
|
||||
clf = pipeline.Pipeline(
|
||||
[('filter', manifold.LocallyLinearEmbedding(random_state=0)),
|
||||
('clf', neighbors.KNeighborsClassifier())])
|
||||
clf.fit(X, y)
|
||||
assert .9 < clf.score(X, y)
|
||||
|
||||
|
||||
# Test the error raised when the weight matrix is singular
|
||||
def test_singular_matrix():
|
||||
M = np.ones((10, 3))
|
||||
f = ignore_warnings
|
||||
with pytest.raises(ValueError):
|
||||
f(manifold.locally_linear_embedding(M, n_neighbors=2, n_components=1,
|
||||
method='standard',
|
||||
eigen_solver='arpack'))
|
||||
|
||||
|
||||
# regression test for #6033
|
||||
def test_integer_input():
|
||||
rand = np.random.RandomState(0)
|
||||
X = rand.randint(0, 100, size=(20, 3))
|
||||
|
||||
for method in ["standard", "hessian", "modified", "ltsa"]:
|
||||
clf = manifold.LocallyLinearEmbedding(method=method, n_neighbors=10)
|
||||
clf.fit(X) # this previously raised a TypeError
|
64
venv/Lib/site-packages/sklearn/manifold/tests/test_mds.py
Normal file
64
venv/Lib/site-packages/sklearn/manifold/tests/test_mds.py
Normal file
|
@ -0,0 +1,64 @@
|
|||
import numpy as np
|
||||
from numpy.testing import assert_array_almost_equal
|
||||
import pytest
|
||||
|
||||
from sklearn.manifold import _mds as mds
|
||||
|
||||
|
||||
def test_smacof():
|
||||
# test metric smacof using the data of "Modern Multidimensional Scaling",
|
||||
# Borg & Groenen, p 154
|
||||
sim = np.array([[0, 5, 3, 4],
|
||||
[5, 0, 2, 2],
|
||||
[3, 2, 0, 1],
|
||||
[4, 2, 1, 0]])
|
||||
Z = np.array([[-.266, -.539],
|
||||
[.451, .252],
|
||||
[.016, -.238],
|
||||
[-.200, .524]])
|
||||
X, _ = mds.smacof(sim, init=Z, n_components=2, max_iter=1, n_init=1)
|
||||
X_true = np.array([[-1.415, -2.471],
|
||||
[1.633, 1.107],
|
||||
[.249, -.067],
|
||||
[-.468, 1.431]])
|
||||
assert_array_almost_equal(X, X_true, decimal=3)
|
||||
|
||||
|
||||
def test_smacof_error():
|
||||
# Not symmetric similarity matrix:
|
||||
sim = np.array([[0, 5, 9, 4],
|
||||
[5, 0, 2, 2],
|
||||
[3, 2, 0, 1],
|
||||
[4, 2, 1, 0]])
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
mds.smacof(sim)
|
||||
|
||||
# Not squared similarity matrix:
|
||||
sim = np.array([[0, 5, 9, 4],
|
||||
[5, 0, 2, 2],
|
||||
[4, 2, 1, 0]])
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
mds.smacof(sim)
|
||||
|
||||
# init not None and not correct format:
|
||||
sim = np.array([[0, 5, 3, 4],
|
||||
[5, 0, 2, 2],
|
||||
[3, 2, 0, 1],
|
||||
[4, 2, 1, 0]])
|
||||
|
||||
Z = np.array([[-.266, -.539],
|
||||
[.016, -.238],
|
||||
[-.200, .524]])
|
||||
with pytest.raises(ValueError):
|
||||
mds.smacof(sim, init=Z, n_init=1)
|
||||
|
||||
|
||||
def test_MDS():
|
||||
sim = np.array([[0, 5, 3, 4],
|
||||
[5, 0, 2, 2],
|
||||
[3, 2, 0, 1],
|
||||
[4, 2, 1, 0]])
|
||||
mds_clf = mds.MDS(metric=False, n_jobs=3, dissimilarity="precomputed")
|
||||
mds_clf.fit(sim)
|
|
@ -0,0 +1,347 @@
|
|||
import pytest
|
||||
|
||||
import numpy as np
|
||||
|
||||
from scipy import sparse
|
||||
from scipy.sparse import csgraph
|
||||
from scipy.linalg import eigh
|
||||
|
||||
from sklearn.manifold import SpectralEmbedding
|
||||
from sklearn.manifold._spectral_embedding import _graph_is_connected
|
||||
from sklearn.manifold._spectral_embedding import _graph_connected_component
|
||||
from sklearn.manifold import spectral_embedding
|
||||
from sklearn.metrics.pairwise import rbf_kernel
|
||||
from sklearn.metrics import normalized_mutual_info_score
|
||||
from sklearn.neighbors import NearestNeighbors
|
||||
from sklearn.cluster import KMeans
|
||||
from sklearn.datasets import make_blobs
|
||||
from sklearn.utils.extmath import _deterministic_vector_sign_flip
|
||||
from sklearn.utils._testing import assert_array_almost_equal
|
||||
from sklearn.utils._testing import assert_array_equal
|
||||
|
||||
|
||||
# non centered, sparse centers to check the
|
||||
centers = np.array([
|
||||
[0.0, 5.0, 0.0, 0.0, 0.0],
|
||||
[0.0, 0.0, 4.0, 0.0, 0.0],
|
||||
[1.0, 0.0, 0.0, 5.0, 1.0],
|
||||
])
|
||||
n_samples = 1000
|
||||
n_clusters, n_features = centers.shape
|
||||
S, true_labels = make_blobs(n_samples=n_samples, centers=centers,
|
||||
cluster_std=1., random_state=42)
|
||||
|
||||
|
||||
def _assert_equal_with_sign_flipping(A, B, tol=0.0):
|
||||
""" Check array A and B are equal with possible sign flipping on
|
||||
each columns"""
|
||||
tol_squared = tol ** 2
|
||||
for A_col, B_col in zip(A.T, B.T):
|
||||
assert (np.max((A_col - B_col) ** 2) <= tol_squared or
|
||||
np.max((A_col + B_col) ** 2) <= tol_squared)
|
||||
|
||||
|
||||
def test_sparse_graph_connected_component():
|
||||
rng = np.random.RandomState(42)
|
||||
n_samples = 300
|
||||
boundaries = [0, 42, 121, 200, n_samples]
|
||||
p = rng.permutation(n_samples)
|
||||
connections = []
|
||||
|
||||
for start, stop in zip(boundaries[:-1], boundaries[1:]):
|
||||
group = p[start:stop]
|
||||
# Connect all elements within the group at least once via an
|
||||
# arbitrary path that spans the group.
|
||||
for i in range(len(group) - 1):
|
||||
connections.append((group[i], group[i + 1]))
|
||||
|
||||
# Add some more random connections within the group
|
||||
min_idx, max_idx = 0, len(group) - 1
|
||||
n_random_connections = 1000
|
||||
source = rng.randint(min_idx, max_idx, size=n_random_connections)
|
||||
target = rng.randint(min_idx, max_idx, size=n_random_connections)
|
||||
connections.extend(zip(group[source], group[target]))
|
||||
|
||||
# Build a symmetric affinity matrix
|
||||
row_idx, column_idx = tuple(np.array(connections).T)
|
||||
data = rng.uniform(.1, 42, size=len(connections))
|
||||
affinity = sparse.coo_matrix((data, (row_idx, column_idx)))
|
||||
affinity = 0.5 * (affinity + affinity.T)
|
||||
|
||||
for start, stop in zip(boundaries[:-1], boundaries[1:]):
|
||||
component_1 = _graph_connected_component(affinity, p[start])
|
||||
component_size = stop - start
|
||||
assert component_1.sum() == component_size
|
||||
|
||||
# We should retrieve the same component mask by starting by both ends
|
||||
# of the group
|
||||
component_2 = _graph_connected_component(affinity, p[stop - 1])
|
||||
assert component_2.sum() == component_size
|
||||
assert_array_equal(component_1, component_2)
|
||||
|
||||
|
||||
def test_spectral_embedding_two_components(seed=36):
|
||||
# Test spectral embedding with two components
|
||||
random_state = np.random.RandomState(seed)
|
||||
n_sample = 100
|
||||
affinity = np.zeros(shape=[n_sample * 2, n_sample * 2])
|
||||
# first component
|
||||
affinity[0:n_sample,
|
||||
0:n_sample] = np.abs(random_state.randn(n_sample, n_sample)) + 2
|
||||
# second component
|
||||
affinity[n_sample::,
|
||||
n_sample::] = np.abs(random_state.randn(n_sample, n_sample)) + 2
|
||||
|
||||
# Test of internal _graph_connected_component before connection
|
||||
component = _graph_connected_component(affinity, 0)
|
||||
assert component[:n_sample].all()
|
||||
assert not component[n_sample:].any()
|
||||
component = _graph_connected_component(affinity, -1)
|
||||
assert not component[:n_sample].any()
|
||||
assert component[n_sample:].all()
|
||||
|
||||
# connection
|
||||
affinity[0, n_sample + 1] = 1
|
||||
affinity[n_sample + 1, 0] = 1
|
||||
affinity.flat[::2 * n_sample + 1] = 0
|
||||
affinity = 0.5 * (affinity + affinity.T)
|
||||
|
||||
true_label = np.zeros(shape=2 * n_sample)
|
||||
true_label[0:n_sample] = 1
|
||||
|
||||
se_precomp = SpectralEmbedding(n_components=1, affinity="precomputed",
|
||||
random_state=np.random.RandomState(seed))
|
||||
embedded_coordinate = se_precomp.fit_transform(affinity)
|
||||
# Some numpy versions are touchy with types
|
||||
embedded_coordinate = \
|
||||
se_precomp.fit_transform(affinity.astype(np.float32))
|
||||
# thresholding on the first components using 0.
|
||||
label_ = np.array(embedded_coordinate.ravel() < 0, dtype="float")
|
||||
assert normalized_mutual_info_score(
|
||||
true_label, label_) == pytest.approx(1.0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("X", [S, sparse.csr_matrix(S)],
|
||||
ids=["dense", "sparse"])
|
||||
def test_spectral_embedding_precomputed_affinity(X, seed=36):
|
||||
# Test spectral embedding with precomputed kernel
|
||||
gamma = 1.0
|
||||
se_precomp = SpectralEmbedding(n_components=2, affinity="precomputed",
|
||||
random_state=np.random.RandomState(seed))
|
||||
se_rbf = SpectralEmbedding(n_components=2, affinity="rbf",
|
||||
gamma=gamma,
|
||||
random_state=np.random.RandomState(seed))
|
||||
embed_precomp = se_precomp.fit_transform(rbf_kernel(X, gamma=gamma))
|
||||
embed_rbf = se_rbf.fit_transform(X)
|
||||
assert_array_almost_equal(
|
||||
se_precomp.affinity_matrix_, se_rbf.affinity_matrix_)
|
||||
_assert_equal_with_sign_flipping(embed_precomp, embed_rbf, 0.05)
|
||||
|
||||
|
||||
def test_precomputed_nearest_neighbors_filtering():
|
||||
# Test precomputed graph filtering when containing too many neighbors
|
||||
n_neighbors = 2
|
||||
results = []
|
||||
for additional_neighbors in [0, 10]:
|
||||
nn = NearestNeighbors(
|
||||
n_neighbors=n_neighbors + additional_neighbors).fit(S)
|
||||
graph = nn.kneighbors_graph(S, mode='connectivity')
|
||||
embedding = SpectralEmbedding(random_state=0, n_components=2,
|
||||
affinity='precomputed_nearest_neighbors',
|
||||
n_neighbors=n_neighbors
|
||||
).fit(graph).embedding_
|
||||
results.append(embedding)
|
||||
|
||||
assert_array_equal(results[0], results[1])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("X", [S, sparse.csr_matrix(S)],
|
||||
ids=["dense", "sparse"])
|
||||
def test_spectral_embedding_callable_affinity(X, seed=36):
|
||||
# Test spectral embedding with callable affinity
|
||||
gamma = 0.9
|
||||
kern = rbf_kernel(S, gamma=gamma)
|
||||
se_callable = SpectralEmbedding(n_components=2,
|
||||
affinity=(
|
||||
lambda x: rbf_kernel(x, gamma=gamma)),
|
||||
gamma=gamma,
|
||||
random_state=np.random.RandomState(seed))
|
||||
se_rbf = SpectralEmbedding(n_components=2, affinity="rbf",
|
||||
gamma=gamma,
|
||||
random_state=np.random.RandomState(seed))
|
||||
embed_rbf = se_rbf.fit_transform(X)
|
||||
embed_callable = se_callable.fit_transform(X)
|
||||
assert_array_almost_equal(
|
||||
se_callable.affinity_matrix_, se_rbf.affinity_matrix_)
|
||||
assert_array_almost_equal(kern, se_rbf.affinity_matrix_)
|
||||
_assert_equal_with_sign_flipping(embed_rbf, embed_callable, 0.05)
|
||||
|
||||
|
||||
# TODO: Remove when pyamg does replaces sp.rand call with np.random.rand
|
||||
# https://github.com/scikit-learn/scikit-learn/issues/15913
|
||||
@pytest.mark.filterwarnings(
|
||||
"ignore:scipy.rand is deprecated:DeprecationWarning:pyamg.*")
|
||||
def test_spectral_embedding_amg_solver(seed=36):
|
||||
# Test spectral embedding with amg solver
|
||||
pytest.importorskip('pyamg')
|
||||
|
||||
se_amg = SpectralEmbedding(n_components=2, affinity="nearest_neighbors",
|
||||
eigen_solver="amg", n_neighbors=5,
|
||||
random_state=np.random.RandomState(seed))
|
||||
se_arpack = SpectralEmbedding(n_components=2, affinity="nearest_neighbors",
|
||||
eigen_solver="arpack", n_neighbors=5,
|
||||
random_state=np.random.RandomState(seed))
|
||||
embed_amg = se_amg.fit_transform(S)
|
||||
embed_arpack = se_arpack.fit_transform(S)
|
||||
_assert_equal_with_sign_flipping(embed_amg, embed_arpack, 1e-5)
|
||||
|
||||
# same with special case in which amg is not actually used
|
||||
# regression test for #10715
|
||||
# affinity between nodes
|
||||
row = [0, 0, 1, 2, 3, 3, 4]
|
||||
col = [1, 2, 2, 3, 4, 5, 5]
|
||||
val = [100, 100, 100, 1, 100, 100, 100]
|
||||
|
||||
affinity = sparse.coo_matrix((val + val, (row + col, col + row)),
|
||||
shape=(6, 6)).toarray()
|
||||
se_amg.affinity = "precomputed"
|
||||
se_arpack.affinity = "precomputed"
|
||||
embed_amg = se_amg.fit_transform(affinity)
|
||||
embed_arpack = se_arpack.fit_transform(affinity)
|
||||
_assert_equal_with_sign_flipping(embed_amg, embed_arpack, 1e-5)
|
||||
|
||||
|
||||
# TODO: Remove filterwarnings when pyamg does replaces sp.rand call with
|
||||
# np.random.rand:
|
||||
# https://github.com/scikit-learn/scikit-learn/issues/15913
|
||||
@pytest.mark.filterwarnings(
|
||||
"ignore:scipy.rand is deprecated:DeprecationWarning:pyamg.*")
|
||||
def test_spectral_embedding_amg_solver_failure():
|
||||
# Non-regression test for amg solver failure (issue #13393 on github)
|
||||
pytest.importorskip('pyamg')
|
||||
seed = 36
|
||||
num_nodes = 100
|
||||
X = sparse.rand(num_nodes, num_nodes, density=0.1, random_state=seed)
|
||||
upper = sparse.triu(X) - sparse.diags(X.diagonal())
|
||||
sym_matrix = upper + upper.T
|
||||
embedding = spectral_embedding(sym_matrix,
|
||||
n_components=10,
|
||||
eigen_solver='amg',
|
||||
random_state=0)
|
||||
|
||||
# Check that the learned embedding is stable w.r.t. random solver init:
|
||||
for i in range(3):
|
||||
new_embedding = spectral_embedding(sym_matrix,
|
||||
n_components=10,
|
||||
eigen_solver='amg',
|
||||
random_state=i + 1)
|
||||
_assert_equal_with_sign_flipping(embedding, new_embedding, tol=0.05)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:the behavior of nmi will "
|
||||
"change in version 0.22")
|
||||
def test_pipeline_spectral_clustering(seed=36):
|
||||
# Test using pipeline to do spectral clustering
|
||||
random_state = np.random.RandomState(seed)
|
||||
se_rbf = SpectralEmbedding(n_components=n_clusters,
|
||||
affinity="rbf",
|
||||
random_state=random_state)
|
||||
se_knn = SpectralEmbedding(n_components=n_clusters,
|
||||
affinity="nearest_neighbors",
|
||||
n_neighbors=5,
|
||||
random_state=random_state)
|
||||
for se in [se_rbf, se_knn]:
|
||||
km = KMeans(n_clusters=n_clusters, random_state=random_state)
|
||||
km.fit(se.fit_transform(S))
|
||||
assert_array_almost_equal(
|
||||
normalized_mutual_info_score(
|
||||
km.labels_,
|
||||
true_labels), 1.0, 2)
|
||||
|
||||
|
||||
def test_spectral_embedding_unknown_eigensolver(seed=36):
|
||||
# Test that SpectralClustering fails with an unknown eigensolver
|
||||
se = SpectralEmbedding(n_components=1, affinity="precomputed",
|
||||
random_state=np.random.RandomState(seed),
|
||||
eigen_solver="<unknown>")
|
||||
with pytest.raises(ValueError):
|
||||
se.fit(S)
|
||||
|
||||
|
||||
def test_spectral_embedding_unknown_affinity(seed=36):
|
||||
# Test that SpectralClustering fails with an unknown affinity type
|
||||
se = SpectralEmbedding(n_components=1, affinity="<unknown>",
|
||||
random_state=np.random.RandomState(seed))
|
||||
with pytest.raises(ValueError):
|
||||
se.fit(S)
|
||||
|
||||
|
||||
def test_connectivity(seed=36):
|
||||
# Test that graph connectivity test works as expected
|
||||
graph = np.array([[1, 0, 0, 0, 0],
|
||||
[0, 1, 1, 0, 0],
|
||||
[0, 1, 1, 1, 0],
|
||||
[0, 0, 1, 1, 1],
|
||||
[0, 0, 0, 1, 1]])
|
||||
assert not _graph_is_connected(graph)
|
||||
assert not _graph_is_connected(sparse.csr_matrix(graph))
|
||||
assert not _graph_is_connected(sparse.csc_matrix(graph))
|
||||
graph = np.array([[1, 1, 0, 0, 0],
|
||||
[1, 1, 1, 0, 0],
|
||||
[0, 1, 1, 1, 0],
|
||||
[0, 0, 1, 1, 1],
|
||||
[0, 0, 0, 1, 1]])
|
||||
assert _graph_is_connected(graph)
|
||||
assert _graph_is_connected(sparse.csr_matrix(graph))
|
||||
assert _graph_is_connected(sparse.csc_matrix(graph))
|
||||
|
||||
|
||||
def test_spectral_embedding_deterministic():
|
||||
# Test that Spectral Embedding is deterministic
|
||||
random_state = np.random.RandomState(36)
|
||||
data = random_state.randn(10, 30)
|
||||
sims = rbf_kernel(data)
|
||||
embedding_1 = spectral_embedding(sims)
|
||||
embedding_2 = spectral_embedding(sims)
|
||||
assert_array_almost_equal(embedding_1, embedding_2)
|
||||
|
||||
|
||||
def test_spectral_embedding_unnormalized():
|
||||
# Test that spectral_embedding is also processing unnormalized laplacian
|
||||
# correctly
|
||||
random_state = np.random.RandomState(36)
|
||||
data = random_state.randn(10, 30)
|
||||
sims = rbf_kernel(data)
|
||||
n_components = 8
|
||||
embedding_1 = spectral_embedding(sims,
|
||||
norm_laplacian=False,
|
||||
n_components=n_components,
|
||||
drop_first=False)
|
||||
|
||||
# Verify using manual computation with dense eigh
|
||||
laplacian, dd = csgraph.laplacian(sims, normed=False,
|
||||
return_diag=True)
|
||||
_, diffusion_map = eigh(laplacian)
|
||||
embedding_2 = diffusion_map.T[:n_components]
|
||||
embedding_2 = _deterministic_vector_sign_flip(embedding_2).T
|
||||
|
||||
assert_array_almost_equal(embedding_1, embedding_2)
|
||||
|
||||
|
||||
def test_spectral_embedding_first_eigen_vector():
|
||||
# Test that the first eigenvector of spectral_embedding
|
||||
# is constant and that the second is not (for a connected graph)
|
||||
random_state = np.random.RandomState(36)
|
||||
data = random_state.randn(10, 30)
|
||||
sims = rbf_kernel(data)
|
||||
n_components = 2
|
||||
|
||||
for seed in range(10):
|
||||
embedding = spectral_embedding(sims,
|
||||
norm_laplacian=False,
|
||||
n_components=n_components,
|
||||
drop_first=False,
|
||||
random_state=seed)
|
||||
|
||||
assert np.std(embedding[:, 0]) == pytest.approx(0)
|
||||
assert np.std(embedding[:, 1]) > 1e-3
|
893
venv/Lib/site-packages/sklearn/manifold/tests/test_t_sne.py
Normal file
893
venv/Lib/site-packages/sklearn/manifold/tests/test_t_sne.py
Normal file
|
@ -0,0 +1,893 @@
|
|||
import sys
|
||||
from io import StringIO
|
||||
import numpy as np
|
||||
from numpy.testing import assert_allclose
|
||||
import scipy.sparse as sp
|
||||
import pytest
|
||||
|
||||
from sklearn.neighbors import NearestNeighbors
|
||||
from sklearn.neighbors import kneighbors_graph
|
||||
from sklearn.exceptions import EfficiencyWarning
|
||||
from sklearn.utils._testing import ignore_warnings
|
||||
from sklearn.utils._testing import assert_almost_equal
|
||||
from sklearn.utils._testing import assert_array_equal
|
||||
from sklearn.utils._testing import assert_array_almost_equal
|
||||
from sklearn.utils._testing import skip_if_32bit
|
||||
from sklearn.utils import check_random_state
|
||||
from sklearn.manifold._t_sne import _joint_probabilities
|
||||
from sklearn.manifold._t_sne import _joint_probabilities_nn
|
||||
from sklearn.manifold._t_sne import _kl_divergence
|
||||
from sklearn.manifold._t_sne import _kl_divergence_bh
|
||||
from sklearn.manifold._t_sne import _gradient_descent
|
||||
from sklearn.manifold._t_sne import trustworthiness
|
||||
from sklearn.manifold import TSNE
|
||||
# mypy error: Module 'sklearn.manifold' has no attribute '_barnes_hut_tsne'
|
||||
from sklearn.manifold import _barnes_hut_tsne # type: ignore
|
||||
from sklearn.manifold._utils import _binary_search_perplexity
|
||||
from sklearn.datasets import make_blobs
|
||||
from scipy.optimize import check_grad
|
||||
from scipy.spatial.distance import pdist
|
||||
from scipy.spatial.distance import squareform
|
||||
from sklearn.metrics.pairwise import pairwise_distances
|
||||
from sklearn.metrics.pairwise import manhattan_distances
|
||||
from sklearn.metrics.pairwise import cosine_distances
|
||||
|
||||
|
||||
x = np.linspace(0, 1, 10)
|
||||
xx, yy = np.meshgrid(x, x)
|
||||
X_2d_grid = np.hstack([
|
||||
xx.ravel().reshape(-1, 1),
|
||||
yy.ravel().reshape(-1, 1),
|
||||
])
|
||||
|
||||
|
||||
def test_gradient_descent_stops():
|
||||
# Test stopping conditions of gradient descent.
|
||||
class ObjectiveSmallGradient:
|
||||
def __init__(self):
|
||||
self.it = -1
|
||||
|
||||
def __call__(self, _, compute_error=True):
|
||||
self.it += 1
|
||||
return (10 - self.it) / 10.0, np.array([1e-5])
|
||||
|
||||
def flat_function(_, compute_error=True):
|
||||
return 0.0, np.ones(1)
|
||||
|
||||
# Gradient norm
|
||||
old_stdout = sys.stdout
|
||||
sys.stdout = StringIO()
|
||||
try:
|
||||
_, error, it = _gradient_descent(
|
||||
ObjectiveSmallGradient(), np.zeros(1), 0, n_iter=100,
|
||||
n_iter_without_progress=100, momentum=0.0, learning_rate=0.0,
|
||||
min_gain=0.0, min_grad_norm=1e-5, verbose=2)
|
||||
finally:
|
||||
out = sys.stdout.getvalue()
|
||||
sys.stdout.close()
|
||||
sys.stdout = old_stdout
|
||||
assert error == 1.0
|
||||
assert it == 0
|
||||
assert("gradient norm" in out)
|
||||
|
||||
# Maximum number of iterations without improvement
|
||||
old_stdout = sys.stdout
|
||||
sys.stdout = StringIO()
|
||||
try:
|
||||
_, error, it = _gradient_descent(
|
||||
flat_function, np.zeros(1), 0, n_iter=100,
|
||||
n_iter_without_progress=10, momentum=0.0, learning_rate=0.0,
|
||||
min_gain=0.0, min_grad_norm=0.0, verbose=2)
|
||||
finally:
|
||||
out = sys.stdout.getvalue()
|
||||
sys.stdout.close()
|
||||
sys.stdout = old_stdout
|
||||
assert error == 0.0
|
||||
assert it == 11
|
||||
assert("did not make any progress" in out)
|
||||
|
||||
# Maximum number of iterations
|
||||
old_stdout = sys.stdout
|
||||
sys.stdout = StringIO()
|
||||
try:
|
||||
_, error, it = _gradient_descent(
|
||||
ObjectiveSmallGradient(), np.zeros(1), 0, n_iter=11,
|
||||
n_iter_without_progress=100, momentum=0.0, learning_rate=0.0,
|
||||
min_gain=0.0, min_grad_norm=0.0, verbose=2)
|
||||
finally:
|
||||
out = sys.stdout.getvalue()
|
||||
sys.stdout.close()
|
||||
sys.stdout = old_stdout
|
||||
assert error == 0.0
|
||||
assert it == 10
|
||||
assert("Iteration 10" in out)
|
||||
|
||||
|
||||
def test_binary_search():
|
||||
# Test if the binary search finds Gaussians with desired perplexity.
|
||||
random_state = check_random_state(0)
|
||||
data = random_state.randn(50, 5)
|
||||
distances = pairwise_distances(data).astype(np.float32)
|
||||
desired_perplexity = 25.0
|
||||
P = _binary_search_perplexity(distances, desired_perplexity, verbose=0)
|
||||
P = np.maximum(P, np.finfo(np.double).eps)
|
||||
mean_perplexity = np.mean([np.exp(-np.sum(P[i] * np.log(P[i])))
|
||||
for i in range(P.shape[0])])
|
||||
assert_almost_equal(mean_perplexity, desired_perplexity, decimal=3)
|
||||
|
||||
|
||||
def test_binary_search_neighbors():
|
||||
# Binary perplexity search approximation.
|
||||
# Should be approximately equal to the slow method when we use
|
||||
# all points as neighbors.
|
||||
n_samples = 200
|
||||
desired_perplexity = 25.0
|
||||
random_state = check_random_state(0)
|
||||
data = random_state.randn(n_samples, 2).astype(np.float32, copy=False)
|
||||
distances = pairwise_distances(data)
|
||||
P1 = _binary_search_perplexity(distances, desired_perplexity, verbose=0)
|
||||
|
||||
# Test that when we use all the neighbors the results are identical
|
||||
n_neighbors = n_samples - 1
|
||||
nn = NearestNeighbors().fit(data)
|
||||
distance_graph = nn.kneighbors_graph(n_neighbors=n_neighbors,
|
||||
mode='distance')
|
||||
distances_nn = distance_graph.data.astype(np.float32, copy=False)
|
||||
distances_nn = distances_nn.reshape(n_samples, n_neighbors)
|
||||
P2 = _binary_search_perplexity(distances_nn, desired_perplexity, verbose=0)
|
||||
|
||||
indptr = distance_graph.indptr
|
||||
P1_nn = np.array([P1[k, distance_graph.indices[indptr[k]:indptr[k + 1]]]
|
||||
for k in range(n_samples)])
|
||||
assert_array_almost_equal(P1_nn, P2, decimal=4)
|
||||
|
||||
# Test that the highest P_ij are the same when fewer neighbors are used
|
||||
for k in np.linspace(150, n_samples - 1, 5):
|
||||
k = int(k)
|
||||
topn = k * 10 # check the top 10 * k entries out of k * k entries
|
||||
distance_graph = nn.kneighbors_graph(n_neighbors=k, mode='distance')
|
||||
distances_nn = distance_graph.data.astype(np.float32, copy=False)
|
||||
distances_nn = distances_nn.reshape(n_samples, k)
|
||||
P2k = _binary_search_perplexity(distances_nn, desired_perplexity,
|
||||
verbose=0)
|
||||
assert_array_almost_equal(P1_nn, P2, decimal=2)
|
||||
idx = np.argsort(P1.ravel())[::-1]
|
||||
P1top = P1.ravel()[idx][:topn]
|
||||
idx = np.argsort(P2k.ravel())[::-1]
|
||||
P2top = P2k.ravel()[idx][:topn]
|
||||
assert_array_almost_equal(P1top, P2top, decimal=2)
|
||||
|
||||
|
||||
def test_binary_perplexity_stability():
|
||||
# Binary perplexity search should be stable.
|
||||
# The binary_search_perplexity had a bug wherein the P array
|
||||
# was uninitialized, leading to sporadically failing tests.
|
||||
n_neighbors = 10
|
||||
n_samples = 100
|
||||
random_state = check_random_state(0)
|
||||
data = random_state.randn(n_samples, 5)
|
||||
nn = NearestNeighbors().fit(data)
|
||||
distance_graph = nn.kneighbors_graph(n_neighbors=n_neighbors,
|
||||
mode='distance')
|
||||
distances = distance_graph.data.astype(np.float32, copy=False)
|
||||
distances = distances.reshape(n_samples, n_neighbors)
|
||||
last_P = None
|
||||
desired_perplexity = 3
|
||||
for _ in range(100):
|
||||
P = _binary_search_perplexity(distances.copy(), desired_perplexity,
|
||||
verbose=0)
|
||||
P1 = _joint_probabilities_nn(distance_graph, desired_perplexity,
|
||||
verbose=0)
|
||||
# Convert the sparse matrix to a dense one for testing
|
||||
P1 = P1.toarray()
|
||||
if last_P is None:
|
||||
last_P = P
|
||||
last_P1 = P1
|
||||
else:
|
||||
assert_array_almost_equal(P, last_P, decimal=4)
|
||||
assert_array_almost_equal(P1, last_P1, decimal=4)
|
||||
|
||||
|
||||
def test_gradient():
|
||||
# Test gradient of Kullback-Leibler divergence.
|
||||
random_state = check_random_state(0)
|
||||
|
||||
n_samples = 50
|
||||
n_features = 2
|
||||
n_components = 2
|
||||
alpha = 1.0
|
||||
|
||||
distances = random_state.randn(n_samples, n_features).astype(np.float32)
|
||||
distances = np.abs(distances.dot(distances.T))
|
||||
np.fill_diagonal(distances, 0.0)
|
||||
X_embedded = random_state.randn(n_samples, n_components).astype(np.float32)
|
||||
|
||||
P = _joint_probabilities(distances, desired_perplexity=25.0,
|
||||
verbose=0)
|
||||
|
||||
def fun(params):
|
||||
return _kl_divergence(params, P, alpha, n_samples, n_components)[0]
|
||||
|
||||
def grad(params):
|
||||
return _kl_divergence(params, P, alpha, n_samples, n_components)[1]
|
||||
|
||||
assert_almost_equal(check_grad(fun, grad, X_embedded.ravel()), 0.0,
|
||||
decimal=5)
|
||||
|
||||
|
||||
def test_trustworthiness():
|
||||
# Test trustworthiness score.
|
||||
random_state = check_random_state(0)
|
||||
|
||||
# Affine transformation
|
||||
X = random_state.randn(100, 2)
|
||||
assert trustworthiness(X, 5.0 + X / 10.0) == 1.0
|
||||
|
||||
# Randomly shuffled
|
||||
X = np.arange(100).reshape(-1, 1)
|
||||
X_embedded = X.copy()
|
||||
random_state.shuffle(X_embedded)
|
||||
assert trustworthiness(X, X_embedded) < 0.6
|
||||
|
||||
# Completely different
|
||||
X = np.arange(5).reshape(-1, 1)
|
||||
X_embedded = np.array([[0], [2], [4], [1], [3]])
|
||||
assert_almost_equal(trustworthiness(X, X_embedded, n_neighbors=1), 0.2)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("method", ['exact', 'barnes_hut'])
|
||||
@pytest.mark.parametrize("init", ('random', 'pca'))
|
||||
def test_preserve_trustworthiness_approximately(method, init):
|
||||
# Nearest neighbors should be preserved approximately.
|
||||
random_state = check_random_state(0)
|
||||
n_components = 2
|
||||
X = random_state.randn(50, n_components).astype(np.float32)
|
||||
tsne = TSNE(n_components=n_components, init=init, random_state=0,
|
||||
method=method, n_iter=700)
|
||||
X_embedded = tsne.fit_transform(X)
|
||||
t = trustworthiness(X, X_embedded, n_neighbors=1)
|
||||
assert t > 0.85
|
||||
|
||||
|
||||
def test_optimization_minimizes_kl_divergence():
|
||||
"""t-SNE should give a lower KL divergence with more iterations."""
|
||||
random_state = check_random_state(0)
|
||||
X, _ = make_blobs(n_features=3, random_state=random_state)
|
||||
kl_divergences = []
|
||||
for n_iter in [250, 300, 350]:
|
||||
tsne = TSNE(n_components=2, perplexity=10, learning_rate=100.0,
|
||||
n_iter=n_iter, random_state=0)
|
||||
tsne.fit_transform(X)
|
||||
kl_divergences.append(tsne.kl_divergence_)
|
||||
assert kl_divergences[1] <= kl_divergences[0]
|
||||
assert kl_divergences[2] <= kl_divergences[1]
|
||||
|
||||
|
||||
@pytest.mark.parametrize('method', ['exact', 'barnes_hut'])
|
||||
def test_fit_csr_matrix(method):
|
||||
# X can be a sparse matrix.
|
||||
rng = check_random_state(0)
|
||||
X = rng.randn(50, 2)
|
||||
X[(rng.randint(0, 50, 25), rng.randint(0, 2, 25))] = 0.0
|
||||
X_csr = sp.csr_matrix(X)
|
||||
tsne = TSNE(n_components=2, perplexity=10, learning_rate=100.0,
|
||||
random_state=0, method=method, n_iter=750)
|
||||
X_embedded = tsne.fit_transform(X_csr)
|
||||
assert_allclose(trustworthiness(X_csr, X_embedded, n_neighbors=1),
|
||||
1.0, rtol=1.1e-1)
|
||||
|
||||
|
||||
def test_preserve_trustworthiness_approximately_with_precomputed_distances():
|
||||
# Nearest neighbors should be preserved approximately.
|
||||
random_state = check_random_state(0)
|
||||
for i in range(3):
|
||||
X = random_state.randn(80, 2)
|
||||
D = squareform(pdist(X), "sqeuclidean")
|
||||
tsne = TSNE(n_components=2, perplexity=2, learning_rate=100.0,
|
||||
early_exaggeration=2.0, metric="precomputed",
|
||||
random_state=i, verbose=0, n_iter=500)
|
||||
X_embedded = tsne.fit_transform(D)
|
||||
t = trustworthiness(D, X_embedded, n_neighbors=1, metric="precomputed")
|
||||
assert t > .95
|
||||
|
||||
|
||||
def test_trustworthiness_not_euclidean_metric():
|
||||
# Test trustworthiness with a metric different from 'euclidean' and
|
||||
# 'precomputed'
|
||||
random_state = check_random_state(0)
|
||||
X = random_state.randn(100, 2)
|
||||
assert (trustworthiness(X, X, metric='cosine') ==
|
||||
trustworthiness(pairwise_distances(X, metric='cosine'), X,
|
||||
metric='precomputed'))
|
||||
|
||||
|
||||
def test_early_exaggeration_too_small():
|
||||
# Early exaggeration factor must be >= 1.
|
||||
tsne = TSNE(early_exaggeration=0.99)
|
||||
with pytest.raises(ValueError, match="early_exaggeration .*"):
|
||||
tsne.fit_transform(np.array([[0.0], [0.0]]))
|
||||
|
||||
|
||||
def test_too_few_iterations():
|
||||
# Number of gradient descent iterations must be at least 200.
|
||||
tsne = TSNE(n_iter=199)
|
||||
with pytest.raises(ValueError, match="n_iter .*"):
|
||||
tsne.fit_transform(np.array([[0.0], [0.0]]))
|
||||
|
||||
|
||||
@pytest.mark.parametrize('method, retype', [
|
||||
('exact', np.asarray),
|
||||
('barnes_hut', np.asarray),
|
||||
('barnes_hut', sp.csr_matrix),
|
||||
])
|
||||
@pytest.mark.parametrize('D, message_regex', [
|
||||
([[0.0], [1.0]], ".* square distance matrix"),
|
||||
([[0., -1.], [1., 0.]], ".* positive.*"),
|
||||
])
|
||||
def test_bad_precomputed_distances(method, D, retype, message_regex):
|
||||
tsne = TSNE(metric="precomputed", method=method)
|
||||
with pytest.raises(ValueError, match=message_regex):
|
||||
tsne.fit_transform(retype(D))
|
||||
|
||||
|
||||
def test_exact_no_precomputed_sparse():
|
||||
tsne = TSNE(metric='precomputed', method='exact')
|
||||
with pytest.raises(TypeError, match='sparse'):
|
||||
tsne.fit_transform(sp.csr_matrix([[0, 5], [5, 0]]))
|
||||
|
||||
|
||||
def test_high_perplexity_precomputed_sparse_distances():
|
||||
# Perplexity should be less than 50
|
||||
dist = np.array([[1., 0., 0.], [0., 1., 0.], [1., 0., 0.]])
|
||||
bad_dist = sp.csr_matrix(dist)
|
||||
tsne = TSNE(metric="precomputed")
|
||||
msg = "3 neighbors per samples are required, but some samples have only 1"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
tsne.fit_transform(bad_dist)
|
||||
|
||||
|
||||
@ignore_warnings(category=EfficiencyWarning)
|
||||
def test_sparse_precomputed_distance():
|
||||
"""Make sure that TSNE works identically for sparse and dense matrix"""
|
||||
random_state = check_random_state(0)
|
||||
X = random_state.randn(100, 2)
|
||||
|
||||
D_sparse = kneighbors_graph(X, n_neighbors=100, mode='distance',
|
||||
include_self=True)
|
||||
D = pairwise_distances(X)
|
||||
assert sp.issparse(D_sparse)
|
||||
assert_almost_equal(D_sparse.A, D)
|
||||
|
||||
tsne = TSNE(metric="precomputed", random_state=0)
|
||||
Xt_dense = tsne.fit_transform(D)
|
||||
|
||||
for fmt in ['csr', 'lil']:
|
||||
Xt_sparse = tsne.fit_transform(D_sparse.asformat(fmt))
|
||||
assert_almost_equal(Xt_dense, Xt_sparse)
|
||||
|
||||
|
||||
def test_non_positive_computed_distances():
|
||||
# Computed distance matrices must be positive.
|
||||
def metric(x, y):
|
||||
return -1
|
||||
|
||||
tsne = TSNE(metric=metric, method='exact')
|
||||
X = np.array([[0.0, 0.0], [1.0, 1.0]])
|
||||
with pytest.raises(ValueError, match="All distances .*metric given.*"):
|
||||
tsne.fit_transform(X)
|
||||
|
||||
|
||||
def test_init_not_available():
|
||||
# 'init' must be 'pca', 'random', or numpy array.
|
||||
tsne = TSNE(init="not available")
|
||||
m = "'init' must be 'pca', 'random', or a numpy array"
|
||||
with pytest.raises(ValueError, match=m):
|
||||
tsne.fit_transform(np.array([[0.0], [1.0]]))
|
||||
|
||||
|
||||
def test_init_ndarray():
|
||||
# Initialize TSNE with ndarray and test fit
|
||||
tsne = TSNE(init=np.zeros((100, 2)))
|
||||
X_embedded = tsne.fit_transform(np.ones((100, 5)))
|
||||
assert_array_equal(np.zeros((100, 2)), X_embedded)
|
||||
|
||||
|
||||
def test_init_ndarray_precomputed():
|
||||
# Initialize TSNE with ndarray and metric 'precomputed'
|
||||
# Make sure no FutureWarning is thrown from _fit
|
||||
tsne = TSNE(init=np.zeros((100, 2)), metric="precomputed")
|
||||
tsne.fit(np.zeros((100, 100)))
|
||||
|
||||
|
||||
def test_distance_not_available():
|
||||
# 'metric' must be valid.
|
||||
tsne = TSNE(metric="not available", method='exact')
|
||||
with pytest.raises(ValueError, match="Unknown metric not available.*"):
|
||||
tsne.fit_transform(np.array([[0.0], [1.0]]))
|
||||
|
||||
tsne = TSNE(metric="not available", method='barnes_hut')
|
||||
with pytest.raises(ValueError, match="Metric 'not available' not valid.*"):
|
||||
tsne.fit_transform(np.array([[0.0], [1.0]]))
|
||||
|
||||
|
||||
def test_method_not_available():
|
||||
# 'nethod' must be 'barnes_hut' or 'exact'
|
||||
tsne = TSNE(method='not available')
|
||||
with pytest.raises(ValueError, match="'method' must be 'barnes_hut' or "):
|
||||
tsne.fit_transform(np.array([[0.0], [1.0]]))
|
||||
|
||||
|
||||
def test_angle_out_of_range_checks():
|
||||
# check the angle parameter range
|
||||
for angle in [-1, -1e-6, 1 + 1e-6, 2]:
|
||||
tsne = TSNE(angle=angle)
|
||||
with pytest.raises(ValueError, match="'angle' must be between "
|
||||
"0.0 - 1.0"):
|
||||
tsne.fit_transform(np.array([[0.0], [1.0]]))
|
||||
|
||||
|
||||
def test_pca_initialization_not_compatible_with_precomputed_kernel():
|
||||
# Precomputed distance matrices must be square matrices.
|
||||
tsne = TSNE(metric="precomputed", init="pca")
|
||||
with pytest.raises(ValueError, match="The parameter init=\"pca\" cannot"
|
||||
" be used with"
|
||||
" metric=\"precomputed\"."):
|
||||
tsne.fit_transform(np.array([[0.0], [1.0]]))
|
||||
|
||||
|
||||
def test_n_components_range():
|
||||
# barnes_hut method should only be used with n_components <= 3
|
||||
tsne = TSNE(n_components=4, method="barnes_hut")
|
||||
with pytest.raises(ValueError, match="'n_components' should be .*"):
|
||||
tsne.fit_transform(np.array([[0.0], [1.0]]))
|
||||
|
||||
|
||||
def test_early_exaggeration_used():
|
||||
# check that the ``early_exaggeration`` parameter has an effect
|
||||
random_state = check_random_state(0)
|
||||
n_components = 2
|
||||
methods = ['exact', 'barnes_hut']
|
||||
X = random_state.randn(25, n_components).astype(np.float32)
|
||||
for method in methods:
|
||||
tsne = TSNE(n_components=n_components, perplexity=1,
|
||||
learning_rate=100.0, init="pca", random_state=0,
|
||||
method=method, early_exaggeration=1.0, n_iter=250)
|
||||
X_embedded1 = tsne.fit_transform(X)
|
||||
tsne = TSNE(n_components=n_components, perplexity=1,
|
||||
learning_rate=100.0, init="pca", random_state=0,
|
||||
method=method, early_exaggeration=10.0, n_iter=250)
|
||||
X_embedded2 = tsne.fit_transform(X)
|
||||
|
||||
assert not np.allclose(X_embedded1, X_embedded2)
|
||||
|
||||
|
||||
def test_n_iter_used():
|
||||
# check that the ``n_iter`` parameter has an effect
|
||||
random_state = check_random_state(0)
|
||||
n_components = 2
|
||||
methods = ['exact', 'barnes_hut']
|
||||
X = random_state.randn(25, n_components).astype(np.float32)
|
||||
for method in methods:
|
||||
for n_iter in [251, 500]:
|
||||
tsne = TSNE(n_components=n_components, perplexity=1,
|
||||
learning_rate=0.5, init="random", random_state=0,
|
||||
method=method, early_exaggeration=1.0, n_iter=n_iter)
|
||||
tsne.fit_transform(X)
|
||||
|
||||
assert tsne.n_iter_ == n_iter - 1
|
||||
|
||||
|
||||
def test_answer_gradient_two_points():
|
||||
# Test the tree with only a single set of children.
|
||||
#
|
||||
# These tests & answers have been checked against the reference
|
||||
# implementation by LvdM.
|
||||
pos_input = np.array([[1.0, 0.0], [0.0, 1.0]])
|
||||
pos_output = np.array([[-4.961291e-05, -1.072243e-04],
|
||||
[9.259460e-05, 2.702024e-04]])
|
||||
neighbors = np.array([[1],
|
||||
[0]])
|
||||
grad_output = np.array([[-2.37012478e-05, -6.29044398e-05],
|
||||
[2.37012478e-05, 6.29044398e-05]])
|
||||
_run_answer_test(pos_input, pos_output, neighbors, grad_output)
|
||||
|
||||
|
||||
def test_answer_gradient_four_points():
|
||||
# Four points tests the tree with multiple levels of children.
|
||||
#
|
||||
# These tests & answers have been checked against the reference
|
||||
# implementation by LvdM.
|
||||
pos_input = np.array([[1.0, 0.0], [0.0, 1.0],
|
||||
[5.0, 2.0], [7.3, 2.2]])
|
||||
pos_output = np.array([[6.080564e-05, -7.120823e-05],
|
||||
[-1.718945e-04, -4.000536e-05],
|
||||
[-2.271720e-04, 8.663310e-05],
|
||||
[-1.032577e-04, -3.582033e-05]])
|
||||
neighbors = np.array([[1, 2, 3],
|
||||
[0, 2, 3],
|
||||
[1, 0, 3],
|
||||
[1, 2, 0]])
|
||||
grad_output = np.array([[5.81128448e-05, -7.78033454e-06],
|
||||
[-5.81526851e-05, 7.80976444e-06],
|
||||
[4.24275173e-08, -3.69569698e-08],
|
||||
[-2.58720939e-09, 7.52706374e-09]])
|
||||
_run_answer_test(pos_input, pos_output, neighbors, grad_output)
|
||||
|
||||
|
||||
def test_skip_num_points_gradient():
|
||||
# Test the kwargs option skip_num_points.
|
||||
#
|
||||
# Skip num points should make it such that the Barnes_hut gradient
|
||||
# is not calculated for indices below skip_num_point.
|
||||
# Aside from skip_num_points=2 and the first two gradient rows
|
||||
# being set to zero, these data points are the same as in
|
||||
# test_answer_gradient_four_points()
|
||||
pos_input = np.array([[1.0, 0.0], [0.0, 1.0],
|
||||
[5.0, 2.0], [7.3, 2.2]])
|
||||
pos_output = np.array([[6.080564e-05, -7.120823e-05],
|
||||
[-1.718945e-04, -4.000536e-05],
|
||||
[-2.271720e-04, 8.663310e-05],
|
||||
[-1.032577e-04, -3.582033e-05]])
|
||||
neighbors = np.array([[1, 2, 3],
|
||||
[0, 2, 3],
|
||||
[1, 0, 3],
|
||||
[1, 2, 0]])
|
||||
grad_output = np.array([[0.0, 0.0],
|
||||
[0.0, 0.0],
|
||||
[4.24275173e-08, -3.69569698e-08],
|
||||
[-2.58720939e-09, 7.52706374e-09]])
|
||||
_run_answer_test(pos_input, pos_output, neighbors, grad_output,
|
||||
False, 0.1, 2)
|
||||
|
||||
|
||||
def _run_answer_test(pos_input, pos_output, neighbors, grad_output,
|
||||
verbose=False, perplexity=0.1, skip_num_points=0):
|
||||
distances = pairwise_distances(pos_input).astype(np.float32)
|
||||
args = distances, perplexity, verbose
|
||||
pos_output = pos_output.astype(np.float32)
|
||||
neighbors = neighbors.astype(np.int64, copy=False)
|
||||
pij_input = _joint_probabilities(*args)
|
||||
pij_input = squareform(pij_input).astype(np.float32)
|
||||
grad_bh = np.zeros(pos_output.shape, dtype=np.float32)
|
||||
|
||||
from scipy.sparse import csr_matrix
|
||||
P = csr_matrix(pij_input)
|
||||
|
||||
neighbors = P.indices.astype(np.int64)
|
||||
indptr = P.indptr.astype(np.int64)
|
||||
|
||||
_barnes_hut_tsne.gradient(P.data, pos_output, neighbors, indptr,
|
||||
grad_bh, 0.5, 2, 1, skip_num_points=0)
|
||||
assert_array_almost_equal(grad_bh, grad_output, decimal=4)
|
||||
|
||||
|
||||
def test_verbose():
|
||||
# Verbose options write to stdout.
|
||||
random_state = check_random_state(0)
|
||||
tsne = TSNE(verbose=2)
|
||||
X = random_state.randn(5, 2)
|
||||
|
||||
old_stdout = sys.stdout
|
||||
sys.stdout = StringIO()
|
||||
try:
|
||||
tsne.fit_transform(X)
|
||||
finally:
|
||||
out = sys.stdout.getvalue()
|
||||
sys.stdout.close()
|
||||
sys.stdout = old_stdout
|
||||
|
||||
assert("[t-SNE]" in out)
|
||||
assert("nearest neighbors..." in out)
|
||||
assert("Computed conditional probabilities" in out)
|
||||
assert("Mean sigma" in out)
|
||||
assert("early exaggeration" in out)
|
||||
|
||||
|
||||
def test_chebyshev_metric():
|
||||
# t-SNE should allow metrics that cannot be squared (issue #3526).
|
||||
random_state = check_random_state(0)
|
||||
tsne = TSNE(metric="chebyshev")
|
||||
X = random_state.randn(5, 2)
|
||||
tsne.fit_transform(X)
|
||||
|
||||
|
||||
def test_reduction_to_one_component():
|
||||
# t-SNE should allow reduction to one component (issue #4154).
|
||||
random_state = check_random_state(0)
|
||||
tsne = TSNE(n_components=1)
|
||||
X = random_state.randn(5, 2)
|
||||
X_embedded = tsne.fit(X).embedding_
|
||||
assert(np.all(np.isfinite(X_embedded)))
|
||||
|
||||
|
||||
@pytest.mark.parametrize('method', ['barnes_hut', 'exact'])
|
||||
@pytest.mark.parametrize('dt', [np.float32, np.float64])
|
||||
def test_64bit(method, dt):
|
||||
# Ensure 64bit arrays are handled correctly.
|
||||
random_state = check_random_state(0)
|
||||
|
||||
X = random_state.randn(10, 2).astype(dt, copy=False)
|
||||
tsne = TSNE(n_components=2, perplexity=2, learning_rate=100.0,
|
||||
random_state=0, method=method, verbose=0,
|
||||
n_iter=300)
|
||||
X_embedded = tsne.fit_transform(X)
|
||||
effective_type = X_embedded.dtype
|
||||
|
||||
# tsne cython code is only single precision, so the output will
|
||||
# always be single precision, irrespectively of the input dtype
|
||||
assert effective_type == np.float32
|
||||
|
||||
|
||||
@pytest.mark.parametrize('method', ['barnes_hut', 'exact'])
|
||||
def test_kl_divergence_not_nan(method):
|
||||
# Ensure kl_divergence_ is computed at last iteration
|
||||
# even though n_iter % n_iter_check != 0, i.e. 1003 % 50 != 0
|
||||
random_state = check_random_state(0)
|
||||
|
||||
X = random_state.randn(50, 2)
|
||||
tsne = TSNE(n_components=2, perplexity=2, learning_rate=100.0,
|
||||
random_state=0, method=method, verbose=0, n_iter=503)
|
||||
tsne.fit_transform(X)
|
||||
|
||||
assert not np.isnan(tsne.kl_divergence_)
|
||||
|
||||
|
||||
def test_barnes_hut_angle():
|
||||
# When Barnes-Hut's angle=0 this corresponds to the exact method.
|
||||
angle = 0.0
|
||||
perplexity = 10
|
||||
n_samples = 100
|
||||
for n_components in [2, 3]:
|
||||
n_features = 5
|
||||
degrees_of_freedom = float(n_components - 1.0)
|
||||
|
||||
random_state = check_random_state(0)
|
||||
data = random_state.randn(n_samples, n_features)
|
||||
distances = pairwise_distances(data)
|
||||
params = random_state.randn(n_samples, n_components)
|
||||
P = _joint_probabilities(distances, perplexity, verbose=0)
|
||||
kl_exact, grad_exact = _kl_divergence(params, P, degrees_of_freedom,
|
||||
n_samples, n_components)
|
||||
|
||||
n_neighbors = n_samples - 1
|
||||
distances_csr = NearestNeighbors().fit(data).kneighbors_graph(
|
||||
n_neighbors=n_neighbors, mode='distance')
|
||||
P_bh = _joint_probabilities_nn(distances_csr, perplexity, verbose=0)
|
||||
kl_bh, grad_bh = _kl_divergence_bh(params, P_bh, degrees_of_freedom,
|
||||
n_samples, n_components,
|
||||
angle=angle, skip_num_points=0,
|
||||
verbose=0)
|
||||
|
||||
P = squareform(P)
|
||||
P_bh = P_bh.toarray()
|
||||
assert_array_almost_equal(P_bh, P, decimal=5)
|
||||
assert_almost_equal(kl_exact, kl_bh, decimal=3)
|
||||
|
||||
|
||||
@skip_if_32bit
|
||||
def test_n_iter_without_progress():
|
||||
# Use a dummy negative n_iter_without_progress and check output on stdout
|
||||
random_state = check_random_state(0)
|
||||
X = random_state.randn(100, 10)
|
||||
for method in ["barnes_hut", "exact"]:
|
||||
tsne = TSNE(n_iter_without_progress=-1, verbose=2, learning_rate=1e8,
|
||||
random_state=0, method=method, n_iter=351, init="random")
|
||||
tsne._N_ITER_CHECK = 1
|
||||
tsne._EXPLORATION_N_ITER = 0
|
||||
|
||||
old_stdout = sys.stdout
|
||||
sys.stdout = StringIO()
|
||||
try:
|
||||
tsne.fit_transform(X)
|
||||
finally:
|
||||
out = sys.stdout.getvalue()
|
||||
sys.stdout.close()
|
||||
sys.stdout = old_stdout
|
||||
|
||||
# The output needs to contain the value of n_iter_without_progress
|
||||
assert ("did not make any progress during the "
|
||||
"last -1 episodes. Finished." in out)
|
||||
|
||||
|
||||
def test_min_grad_norm():
|
||||
# Make sure that the parameter min_grad_norm is used correctly
|
||||
random_state = check_random_state(0)
|
||||
X = random_state.randn(100, 2)
|
||||
min_grad_norm = 0.002
|
||||
tsne = TSNE(min_grad_norm=min_grad_norm, verbose=2,
|
||||
random_state=0, method='exact')
|
||||
|
||||
old_stdout = sys.stdout
|
||||
sys.stdout = StringIO()
|
||||
try:
|
||||
tsne.fit_transform(X)
|
||||
finally:
|
||||
out = sys.stdout.getvalue()
|
||||
sys.stdout.close()
|
||||
sys.stdout = old_stdout
|
||||
|
||||
lines_out = out.split('\n')
|
||||
|
||||
# extract the gradient norm from the verbose output
|
||||
gradient_norm_values = []
|
||||
for line in lines_out:
|
||||
# When the computation is Finished just an old gradient norm value
|
||||
# is repeated that we do not need to store
|
||||
if 'Finished' in line:
|
||||
break
|
||||
|
||||
start_grad_norm = line.find('gradient norm')
|
||||
if start_grad_norm >= 0:
|
||||
line = line[start_grad_norm:]
|
||||
line = line.replace('gradient norm = ', '').split(' ')[0]
|
||||
gradient_norm_values.append(float(line))
|
||||
|
||||
# Compute how often the gradient norm is smaller than min_grad_norm
|
||||
gradient_norm_values = np.array(gradient_norm_values)
|
||||
n_smaller_gradient_norms = \
|
||||
len(gradient_norm_values[gradient_norm_values <= min_grad_norm])
|
||||
|
||||
# The gradient norm can be smaller than min_grad_norm at most once,
|
||||
# because in the moment it becomes smaller the optimization stops
|
||||
assert n_smaller_gradient_norms <= 1
|
||||
|
||||
|
||||
def test_accessible_kl_divergence():
|
||||
# Ensures that the accessible kl_divergence matches the computed value
|
||||
random_state = check_random_state(0)
|
||||
X = random_state.randn(50, 2)
|
||||
tsne = TSNE(n_iter_without_progress=2, verbose=2,
|
||||
random_state=0, method='exact',
|
||||
n_iter=500)
|
||||
|
||||
old_stdout = sys.stdout
|
||||
sys.stdout = StringIO()
|
||||
try:
|
||||
tsne.fit_transform(X)
|
||||
finally:
|
||||
out = sys.stdout.getvalue()
|
||||
sys.stdout.close()
|
||||
sys.stdout = old_stdout
|
||||
|
||||
# The output needs to contain the accessible kl_divergence as the error at
|
||||
# the last iteration
|
||||
for line in out.split('\n')[::-1]:
|
||||
if 'Iteration' in line:
|
||||
_, _, error = line.partition('error = ')
|
||||
if error:
|
||||
error, _, _ = error.partition(',')
|
||||
break
|
||||
assert_almost_equal(tsne.kl_divergence_, float(error), decimal=5)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('method', ['barnes_hut', 'exact'])
|
||||
def test_uniform_grid(method):
|
||||
"""Make sure that TSNE can approximately recover a uniform 2D grid
|
||||
|
||||
Due to ties in distances between point in X_2d_grid, this test is platform
|
||||
dependent for ``method='barnes_hut'`` due to numerical imprecision.
|
||||
|
||||
Also, t-SNE is not assured to converge to the right solution because bad
|
||||
initialization can lead to convergence to bad local minimum (the
|
||||
optimization problem is non-convex). To avoid breaking the test too often,
|
||||
we re-run t-SNE from the final point when the convergence is not good
|
||||
enough.
|
||||
"""
|
||||
seeds = range(3)
|
||||
n_iter = 500
|
||||
for seed in seeds:
|
||||
tsne = TSNE(n_components=2, init='random', random_state=seed,
|
||||
perplexity=50, n_iter=n_iter, method=method)
|
||||
Y = tsne.fit_transform(X_2d_grid)
|
||||
|
||||
try_name = "{}_{}".format(method, seed)
|
||||
try:
|
||||
assert_uniform_grid(Y, try_name)
|
||||
except AssertionError:
|
||||
# If the test fails a first time, re-run with init=Y to see if
|
||||
# this was caused by a bad initialization. Note that this will
|
||||
# also run an early_exaggeration step.
|
||||
try_name += ":rerun"
|
||||
tsne.init = Y
|
||||
Y = tsne.fit_transform(X_2d_grid)
|
||||
assert_uniform_grid(Y, try_name)
|
||||
|
||||
|
||||
def assert_uniform_grid(Y, try_name=None):
|
||||
# Ensure that the resulting embedding leads to approximately
|
||||
# uniformly spaced points: the distance to the closest neighbors
|
||||
# should be non-zero and approximately constant.
|
||||
nn = NearestNeighbors(n_neighbors=1).fit(Y)
|
||||
dist_to_nn = nn.kneighbors(return_distance=True)[0].ravel()
|
||||
assert dist_to_nn.min() > 0.1
|
||||
|
||||
smallest_to_mean = dist_to_nn.min() / np.mean(dist_to_nn)
|
||||
largest_to_mean = dist_to_nn.max() / np.mean(dist_to_nn)
|
||||
|
||||
assert smallest_to_mean > .5, try_name
|
||||
assert largest_to_mean < 2, try_name
|
||||
|
||||
|
||||
def test_bh_match_exact():
|
||||
# check that the ``barnes_hut`` method match the exact one when
|
||||
# ``angle = 0`` and ``perplexity > n_samples / 3``
|
||||
random_state = check_random_state(0)
|
||||
n_features = 10
|
||||
X = random_state.randn(30, n_features).astype(np.float32)
|
||||
X_embeddeds = {}
|
||||
n_iter = {}
|
||||
for method in ['exact', 'barnes_hut']:
|
||||
tsne = TSNE(n_components=2, method=method, learning_rate=1.0,
|
||||
init="random", random_state=0, n_iter=251,
|
||||
perplexity=30.0, angle=0)
|
||||
# Kill the early_exaggeration
|
||||
tsne._EXPLORATION_N_ITER = 0
|
||||
X_embeddeds[method] = tsne.fit_transform(X)
|
||||
n_iter[method] = tsne.n_iter_
|
||||
|
||||
assert n_iter['exact'] == n_iter['barnes_hut']
|
||||
assert_allclose(X_embeddeds['exact'], X_embeddeds['barnes_hut'], rtol=1e-4)
|
||||
|
||||
|
||||
def test_gradient_bh_multithread_match_sequential():
|
||||
# check that the bh gradient with different num_threads gives the same
|
||||
# results
|
||||
|
||||
n_features = 10
|
||||
n_samples = 30
|
||||
n_components = 2
|
||||
degrees_of_freedom = 1
|
||||
|
||||
angle = 3
|
||||
perplexity = 5
|
||||
|
||||
random_state = check_random_state(0)
|
||||
data = random_state.randn(n_samples, n_features).astype(np.float32)
|
||||
params = random_state.randn(n_samples, n_components)
|
||||
|
||||
n_neighbors = n_samples - 1
|
||||
distances_csr = NearestNeighbors().fit(data).kneighbors_graph(
|
||||
n_neighbors=n_neighbors, mode='distance')
|
||||
P_bh = _joint_probabilities_nn(distances_csr, perplexity, verbose=0)
|
||||
kl_sequential, grad_sequential = _kl_divergence_bh(
|
||||
params, P_bh, degrees_of_freedom, n_samples, n_components,
|
||||
angle=angle, skip_num_points=0, verbose=0, num_threads=1)
|
||||
for num_threads in [2, 4]:
|
||||
kl_multithread, grad_multithread = _kl_divergence_bh(
|
||||
params, P_bh, degrees_of_freedom, n_samples, n_components,
|
||||
angle=angle, skip_num_points=0, verbose=0, num_threads=num_threads)
|
||||
|
||||
assert_allclose(kl_multithread, kl_sequential, rtol=1e-6)
|
||||
assert_allclose(grad_multithread, grad_multithread)
|
||||
|
||||
|
||||
def test_tsne_with_different_distance_metrics():
|
||||
"""Make sure that TSNE works for different distance metrics"""
|
||||
random_state = check_random_state(0)
|
||||
n_components_original = 3
|
||||
n_components_embedding = 2
|
||||
X = random_state.randn(50, n_components_original).astype(np.float32)
|
||||
metrics = ['manhattan', 'cosine']
|
||||
dist_funcs = [manhattan_distances, cosine_distances]
|
||||
for metric, dist_func in zip(metrics, dist_funcs):
|
||||
X_transformed_tsne = TSNE(
|
||||
metric=metric, n_components=n_components_embedding,
|
||||
random_state=0, n_iter=300).fit_transform(X)
|
||||
X_transformed_tsne_precomputed = TSNE(
|
||||
metric='precomputed', n_components=n_components_embedding,
|
||||
random_state=0, n_iter=300).fit_transform(dist_func(X))
|
||||
assert_array_equal(X_transformed_tsne, X_transformed_tsne_precomputed)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('method', ['exact', 'barnes_hut'])
|
||||
def test_tsne_n_jobs(method):
|
||||
"""Make sure that the n_jobs parameter doesn't impact the output"""
|
||||
random_state = check_random_state(0)
|
||||
n_features = 10
|
||||
X = random_state.randn(30, n_features)
|
||||
X_tr_ref = TSNE(n_components=2, method=method, perplexity=30.0,
|
||||
angle=0, n_jobs=1, random_state=0).fit_transform(X)
|
||||
X_tr = TSNE(n_components=2, method=method, perplexity=30.0,
|
||||
angle=0, n_jobs=2, random_state=0).fit_transform(X)
|
||||
|
||||
assert_allclose(X_tr_ref, X_tr)
|
Loading…
Add table
Add a link
Reference in a new issue