"""Principal Component Analysis Base Classes""" # Author: Alexandre Gramfort # Olivier Grisel # Mathieu Blondel # Denis A. Engemann # Kyle Kastner # # License: BSD 3 clause import numpy as np from scipy import linalg from ..base import BaseEstimator, TransformerMixin from ..utils import check_array from ..utils.validation import check_is_fitted from abc import ABCMeta, abstractmethod class _BasePCA(TransformerMixin, BaseEstimator, metaclass=ABCMeta): """Base class for PCA methods. Warning: This class should not be used directly. Use derived classes instead. """ def get_covariance(self): """Compute data covariance with the generative model. ``cov = components_.T * S**2 * components_ + sigma2 * eye(n_features)`` where S**2 contains the explained variances, and sigma2 contains the noise variances. Returns ------- cov : array, shape=(n_features, n_features) Estimated covariance of data. """ components_ = self.components_ exp_var = self.explained_variance_ if self.whiten: components_ = components_ * np.sqrt(exp_var[:, np.newaxis]) exp_var_diff = np.maximum(exp_var - self.noise_variance_, 0.) cov = np.dot(components_.T * exp_var_diff, components_) cov.flat[::len(cov) + 1] += self.noise_variance_ # modify diag inplace return cov def get_precision(self): """Compute data precision matrix with the generative model. Equals the inverse of the covariance but computed with the matrix inversion lemma for efficiency. Returns ------- precision : array, shape=(n_features, n_features) Estimated precision of data. """ n_features = self.components_.shape[1] # handle corner cases first if self.n_components_ == 0: return np.eye(n_features) / self.noise_variance_ if self.n_components_ == n_features: return linalg.inv(self.get_covariance()) # Get precision using matrix inversion lemma components_ = self.components_ exp_var = self.explained_variance_ if self.whiten: components_ = components_ * np.sqrt(exp_var[:, np.newaxis]) exp_var_diff = np.maximum(exp_var - self.noise_variance_, 0.) precision = np.dot(components_, components_.T) / self.noise_variance_ precision.flat[::len(precision) + 1] += 1. / exp_var_diff precision = np.dot(components_.T, np.dot(linalg.inv(precision), components_)) precision /= -(self.noise_variance_ ** 2) precision.flat[::len(precision) + 1] += 1. / self.noise_variance_ return precision @abstractmethod def fit(X, y=None): """Placeholder for fit. Subclasses should implement this method! Fit the model with X. Parameters ---------- X : array-like, shape (n_samples, n_features) Training data, where n_samples is the number of samples and n_features is the number of features. Returns ------- self : object Returns the instance itself. """ def transform(self, X): """Apply dimensionality reduction to X. X is projected on the first principal components previously extracted from a training set. Parameters ---------- X : array-like, shape (n_samples, n_features) New data, where n_samples is the number of samples and n_features is the number of features. Returns ------- X_new : array-like, shape (n_samples, n_components) Examples -------- >>> import numpy as np >>> from sklearn.decomposition import IncrementalPCA >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) >>> ipca = IncrementalPCA(n_components=2, batch_size=3) >>> ipca.fit(X) IncrementalPCA(batch_size=3, n_components=2) >>> ipca.transform(X) # doctest: +SKIP """ check_is_fitted(self) X = check_array(X) if self.mean_ is not None: X = X - self.mean_ X_transformed = np.dot(X, self.components_.T) if self.whiten: X_transformed /= np.sqrt(self.explained_variance_) return X_transformed def inverse_transform(self, X): """Transform data back to its original space. In other words, return an input X_original whose transform would be X. Parameters ---------- X : array-like, shape (n_samples, n_components) New data, where n_samples is the number of samples and n_components is the number of components. Returns ------- X_original array-like, shape (n_samples, n_features) Notes ----- If whitening is enabled, inverse_transform will compute the exact inverse operation, which includes reversing whitening. """ if self.whiten: return np.dot(X, np.sqrt(self.explained_variance_[:, np.newaxis]) * self.components_) + self.mean_ else: return np.dot(X, self.components_) + self.mean_