Uploaded Test files
This commit is contained in:
parent
f584ad9d97
commit
2e81cb7d99
16627 changed files with 2065359 additions and 102444 deletions
282
venv/Lib/site-packages/sklearn/manifold/_isomap.py
Normal file
282
venv/Lib/site-packages/sklearn/manifold/_isomap.py
Normal file
|
@ -0,0 +1,282 @@
|
|||
"""Isomap for manifold learning"""
|
||||
|
||||
# Author: Jake Vanderplas -- <vanderplas@astro.washington.edu>
|
||||
# License: BSD 3 clause (C) 2011
|
||||
|
||||
import numpy as np
|
||||
from ..base import BaseEstimator, TransformerMixin
|
||||
from ..neighbors import NearestNeighbors, kneighbors_graph
|
||||
from ..utils.deprecation import deprecated
|
||||
from ..utils.validation import check_is_fitted
|
||||
from ..utils.validation import _deprecate_positional_args
|
||||
from ..utils.graph import graph_shortest_path
|
||||
from ..decomposition import KernelPCA
|
||||
from ..preprocessing import KernelCenterer
|
||||
|
||||
|
||||
class Isomap(TransformerMixin, BaseEstimator):
|
||||
"""Isomap Embedding
|
||||
|
||||
Non-linear dimensionality reduction through Isometric Mapping
|
||||
|
||||
Read more in the :ref:`User Guide <isomap>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
n_neighbors : integer
|
||||
number of neighbors to consider for each point.
|
||||
|
||||
n_components : integer
|
||||
number of coordinates for the manifold
|
||||
|
||||
eigen_solver : ['auto'|'arpack'|'dense']
|
||||
'auto' : Attempt to choose the most efficient solver
|
||||
for the given problem.
|
||||
|
||||
'arpack' : Use Arnoldi decomposition to find the eigenvalues
|
||||
and eigenvectors.
|
||||
|
||||
'dense' : Use a direct solver (i.e. LAPACK)
|
||||
for the eigenvalue decomposition.
|
||||
|
||||
tol : float
|
||||
Convergence tolerance passed to arpack or lobpcg.
|
||||
not used if eigen_solver == 'dense'.
|
||||
|
||||
max_iter : integer
|
||||
Maximum number of iterations for the arpack solver.
|
||||
not used if eigen_solver == 'dense'.
|
||||
|
||||
path_method : string ['auto'|'FW'|'D']
|
||||
Method to use in finding shortest path.
|
||||
|
||||
'auto' : attempt to choose the best algorithm automatically.
|
||||
|
||||
'FW' : Floyd-Warshall algorithm.
|
||||
|
||||
'D' : Dijkstra's algorithm.
|
||||
|
||||
neighbors_algorithm : string ['auto'|'brute'|'kd_tree'|'ball_tree']
|
||||
Algorithm to use for nearest neighbors search,
|
||||
passed to neighbors.NearestNeighbors instance.
|
||||
|
||||
n_jobs : int or None, default=None
|
||||
The number of parallel jobs to run.
|
||||
``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
|
||||
``-1`` means using all processors. See :term:`Glossary <n_jobs>`
|
||||
for more details.
|
||||
|
||||
metric : string, or callable, default="minkowski"
|
||||
The metric to use when calculating distance between instances in a
|
||||
feature array. If metric is a string or callable, it must be one of
|
||||
the options allowed by :func:`sklearn.metrics.pairwise_distances` for
|
||||
its metric parameter.
|
||||
If metric is "precomputed", X is assumed to be a distance matrix and
|
||||
must be square. X may be a :term:`Glossary <sparse graph>`.
|
||||
|
||||
.. versionadded:: 0.22
|
||||
|
||||
p : int, default=2
|
||||
Parameter for the Minkowski metric from
|
||||
sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is
|
||||
equivalent to using manhattan_distance (l1), and euclidean_distance
|
||||
(l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
|
||||
|
||||
.. versionadded:: 0.22
|
||||
|
||||
metric_params : dict, default=None
|
||||
Additional keyword arguments for the metric function.
|
||||
|
||||
.. versionadded:: 0.22
|
||||
|
||||
Attributes
|
||||
----------
|
||||
embedding_ : array-like, shape (n_samples, n_components)
|
||||
Stores the embedding vectors.
|
||||
|
||||
kernel_pca_ : object
|
||||
:class:`~sklearn.decomposition.KernelPCA` object used to implement the
|
||||
embedding.
|
||||
|
||||
nbrs_ : sklearn.neighbors.NearestNeighbors instance
|
||||
Stores nearest neighbors instance, including BallTree or KDtree
|
||||
if applicable.
|
||||
|
||||
dist_matrix_ : array-like, shape (n_samples, n_samples)
|
||||
Stores the geodesic distance matrix of training data.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from sklearn.datasets import load_digits
|
||||
>>> from sklearn.manifold import Isomap
|
||||
>>> X, _ = load_digits(return_X_y=True)
|
||||
>>> X.shape
|
||||
(1797, 64)
|
||||
>>> embedding = Isomap(n_components=2)
|
||||
>>> X_transformed = embedding.fit_transform(X[:100])
|
||||
>>> X_transformed.shape
|
||||
(100, 2)
|
||||
|
||||
References
|
||||
----------
|
||||
|
||||
.. [1] Tenenbaum, J.B.; De Silva, V.; & Langford, J.C. A global geometric
|
||||
framework for nonlinear dimensionality reduction. Science 290 (5500)
|
||||
"""
|
||||
@_deprecate_positional_args
|
||||
def __init__(self, *, n_neighbors=5, n_components=2, eigen_solver='auto',
|
||||
tol=0, max_iter=None, path_method='auto',
|
||||
neighbors_algorithm='auto', n_jobs=None, metric='minkowski',
|
||||
p=2, metric_params=None):
|
||||
self.n_neighbors = n_neighbors
|
||||
self.n_components = n_components
|
||||
self.eigen_solver = eigen_solver
|
||||
self.tol = tol
|
||||
self.max_iter = max_iter
|
||||
self.path_method = path_method
|
||||
self.neighbors_algorithm = neighbors_algorithm
|
||||
self.n_jobs = n_jobs
|
||||
self.metric = metric
|
||||
self.p = p
|
||||
self.metric_params = metric_params
|
||||
|
||||
def _fit_transform(self, X):
|
||||
self.nbrs_ = NearestNeighbors(n_neighbors=self.n_neighbors,
|
||||
algorithm=self.neighbors_algorithm,
|
||||
metric=self.metric, p=self.p,
|
||||
metric_params=self.metric_params,
|
||||
n_jobs=self.n_jobs)
|
||||
self.nbrs_.fit(X)
|
||||
self.n_features_in_ = self.nbrs_.n_features_in_
|
||||
|
||||
self.kernel_pca_ = KernelPCA(n_components=self.n_components,
|
||||
kernel="precomputed",
|
||||
eigen_solver=self.eigen_solver,
|
||||
tol=self.tol, max_iter=self.max_iter,
|
||||
n_jobs=self.n_jobs)
|
||||
|
||||
kng = kneighbors_graph(self.nbrs_, self.n_neighbors,
|
||||
metric=self.metric, p=self.p,
|
||||
metric_params=self.metric_params,
|
||||
mode='distance', n_jobs=self.n_jobs)
|
||||
|
||||
self.dist_matrix_ = graph_shortest_path(kng,
|
||||
method=self.path_method,
|
||||
directed=False)
|
||||
G = self.dist_matrix_ ** 2
|
||||
G *= -0.5
|
||||
|
||||
self.embedding_ = self.kernel_pca_.fit_transform(G)
|
||||
|
||||
# mypy error: Decorated property not supported
|
||||
@deprecated( # type: ignore
|
||||
"Attribute `training_data_` was deprecated in version 0.22 and"
|
||||
" will be removed in 0.24."
|
||||
)
|
||||
@property
|
||||
def training_data_(self):
|
||||
check_is_fitted(self)
|
||||
return self.nbrs_._fit_X
|
||||
|
||||
def reconstruction_error(self):
|
||||
"""Compute the reconstruction error for the embedding.
|
||||
|
||||
Returns
|
||||
-------
|
||||
reconstruction_error : float
|
||||
|
||||
Notes
|
||||
-----
|
||||
The cost function of an isomap embedding is
|
||||
|
||||
``E = frobenius_norm[K(D) - K(D_fit)] / n_samples``
|
||||
|
||||
Where D is the matrix of distances for the input data X,
|
||||
D_fit is the matrix of distances for the output embedding X_fit,
|
||||
and K is the isomap kernel:
|
||||
|
||||
``K(D) = -0.5 * (I - 1/n_samples) * D^2 * (I - 1/n_samples)``
|
||||
"""
|
||||
G = -0.5 * self.dist_matrix_ ** 2
|
||||
G_center = KernelCenterer().fit_transform(G)
|
||||
evals = self.kernel_pca_.lambdas_
|
||||
return np.sqrt(np.sum(G_center ** 2) - np.sum(evals ** 2)) / G.shape[0]
|
||||
|
||||
def fit(self, X, y=None):
|
||||
"""Compute the embedding vectors for data X
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse graph, BallTree, KDTree, NearestNeighbors}
|
||||
Sample data, shape = (n_samples, n_features), in the form of a
|
||||
numpy array, sparse graph, precomputed tree, or NearestNeighbors
|
||||
object.
|
||||
|
||||
y : Ignored
|
||||
|
||||
Returns
|
||||
-------
|
||||
self : returns an instance of self.
|
||||
"""
|
||||
self._fit_transform(X)
|
||||
return self
|
||||
|
||||
def fit_transform(self, X, y=None):
|
||||
"""Fit the model from data in X and transform X.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse graph, BallTree, KDTree}
|
||||
Training vector, where n_samples in the number of samples
|
||||
and n_features is the number of features.
|
||||
|
||||
y : Ignored
|
||||
|
||||
Returns
|
||||
-------
|
||||
X_new : array-like, shape (n_samples, n_components)
|
||||
"""
|
||||
self._fit_transform(X)
|
||||
return self.embedding_
|
||||
|
||||
def transform(self, X):
|
||||
"""Transform X.
|
||||
|
||||
This is implemented by linking the points X into the graph of geodesic
|
||||
distances of the training data. First the `n_neighbors` nearest
|
||||
neighbors of X are found in the training data, and from these the
|
||||
shortest geodesic distances from each point in X to each point in
|
||||
the training data are computed in order to construct the kernel.
|
||||
The embedding of X is the projection of this kernel onto the
|
||||
embedding vectors of the training set.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like, shape (n_queries, n_features)
|
||||
If neighbors_algorithm='precomputed', X is assumed to be a
|
||||
distance matrix or a sparse graph of shape
|
||||
(n_queries, n_samples_fit).
|
||||
|
||||
Returns
|
||||
-------
|
||||
X_new : array-like, shape (n_queries, n_components)
|
||||
"""
|
||||
check_is_fitted(self)
|
||||
distances, indices = self.nbrs_.kneighbors(X, return_distance=True)
|
||||
|
||||
# Create the graph of shortest distances from X to
|
||||
# training data via the nearest neighbors of X.
|
||||
# This can be done as a single array operation, but it potentially
|
||||
# takes a lot of memory. To avoid that, use a loop:
|
||||
|
||||
n_samples_fit = self.nbrs_.n_samples_fit_
|
||||
n_queries = distances.shape[0]
|
||||
G_X = np.zeros((n_queries, n_samples_fit))
|
||||
for i in range(n_queries):
|
||||
G_X[i] = np.min(self.dist_matrix_[indices[i]] +
|
||||
distances[i][:, None], 0)
|
||||
|
||||
G_X **= 2
|
||||
G_X *= -0.5
|
||||
|
||||
return self.kernel_pca_.transform(G_X)
|
Loading…
Add table
Add a link
Reference in a new issue