Uploaded Test files

2020-11-12 11:05:57 -05:00 · 2020-11-12 11:05:57 -05:00 · 2e81cb7d99
commit 2e81cb7d99
parent f584ad9d97
16627 changed files with 2065359 additions and 102444 deletions
--- a/venv/Lib/site-packages/sklearn/decomposition/_sparse_pca.py
+++ b/venv/Lib/site-packages/sklearn/decomposition/_sparse_pca.py
@ -0,0 +1,420 @@
+"""Matrix factorization with Sparse PCA"""
+# Author: Vlad Niculae, Gael Varoquaux, Alexandre Gramfort
+# License: BSD 3 clause
+
+import warnings
+
+import numpy as np
+
+from ..utils import check_random_state, check_array
+from ..utils.validation import check_is_fitted
+from ..utils.validation import _deprecate_positional_args
+from ..linear_model import ridge_regression
+from ..base import BaseEstimator, TransformerMixin
+from ._dict_learning import dict_learning, dict_learning_online
+
+
+# FIXME: remove in 0.24
+def _check_normalize_components(normalize_components, estimator_name):
+    if normalize_components != 'deprecated':
+        if normalize_components:
+            warnings.warn(
+                "'normalize_components' has been deprecated in 0.22 and "
+                "will be removed in 0.24. Remove the parameter from the "
+                " constructor.", FutureWarning
+            )
+        else:
+            raise NotImplementedError(
+                "normalize_components=False is not supported starting from "
+                "0.22. Remove this parameter from the constructor."
+            )
+
+
+class SparsePCA(TransformerMixin, BaseEstimator):
+    """Sparse Principal Components Analysis (SparsePCA)
+
+    Finds the set of sparse components that can optimally reconstruct
+    the data.  The amount of sparseness is controllable by the coefficient
+    of the L1 penalty, given by the parameter alpha.
+
+    Read more in the :ref:`User Guide <SparsePCA>`.
+
+    Parameters
+    ----------
+    n_components : int,
+        Number of sparse atoms to extract.
+
+    alpha : float,
+        Sparsity controlling parameter. Higher values lead to sparser
+        components.
+
+    ridge_alpha : float,
+        Amount of ridge shrinkage to apply in order to improve
+        conditioning when calling the transform method.
+
+    max_iter : int,
+        Maximum number of iterations to perform.
+
+    tol : float,
+        Tolerance for the stopping condition.
+
+    method : {'lars', 'cd'}
+        lars: uses the least angle regression method to solve the lasso problem
+        (linear_model.lars_path)
+        cd: uses the coordinate descent method to compute the
+        Lasso solution (linear_model.Lasso). Lars will be faster if
+        the estimated components are sparse.
+
+    n_jobs : int or None, optional (default=None)
+        Number of parallel jobs to run.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    U_init : array of shape (n_samples, n_components),
+        Initial values for the loadings for warm restart scenarios.
+
+    V_init : array of shape (n_components, n_features),
+        Initial values for the components for warm restart scenarios.
+
+    verbose : int
+        Controls the verbosity; the higher, the more messages. Defaults to 0.
+
+    random_state : int, RandomState instance, default=None
+        Used during dictionary learning. Pass an int for reproducible results
+        across multiple function calls.
+        See :term:`Glossary <random_state>`.
+
+    normalize_components : 'deprecated'
+        This parameter does not have any effect. The components are always
+        normalized.
+
+        .. versionadded:: 0.20
+
+        .. deprecated:: 0.22
+           ``normalize_components`` is deprecated in 0.22 and will be removed
+           in 0.24.
+
+    Attributes
+    ----------
+    components_ : array, [n_components, n_features]
+        Sparse components extracted from the data.
+
+    error_ : array
+        Vector of errors at each iteration.
+
+    n_components_ : int
+        Estimated number of components.
+
+        .. versionadded:: 0.23
+
+    n_iter_ : int
+        Number of iterations run.
+
+    mean_ : array, shape (n_features,)
+        Per-feature empirical mean, estimated from the training set.
+        Equal to ``X.mean(axis=0)``.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.datasets import make_friedman1
+    >>> from sklearn.decomposition import SparsePCA
+    >>> X, _ = make_friedman1(n_samples=200, n_features=30, random_state=0)
+    >>> transformer = SparsePCA(n_components=5, random_state=0)
+    >>> transformer.fit(X)
+    SparsePCA(...)
+    >>> X_transformed = transformer.transform(X)
+    >>> X_transformed.shape
+    (200, 5)
+    >>> # most values in the components_ are zero (sparsity)
+    >>> np.mean(transformer.components_ == 0)
+    0.9666...
+
+    See also
+    --------
+    PCA
+    MiniBatchSparsePCA
+    DictionaryLearning
+    """
+    @_deprecate_positional_args
+    def __init__(self, n_components=None, *, alpha=1, ridge_alpha=0.01,
+                 max_iter=1000, tol=1e-8, method='lars', n_jobs=None,
+                 U_init=None, V_init=None, verbose=False, random_state=None,
+                 normalize_components='deprecated'):
+        self.n_components = n_components
+        self.alpha = alpha
+        self.ridge_alpha = ridge_alpha
+        self.max_iter = max_iter
+        self.tol = tol
+        self.method = method
+        self.n_jobs = n_jobs
+        self.U_init = U_init
+        self.V_init = V_init
+        self.verbose = verbose
+        self.random_state = random_state
+        self.normalize_components = normalize_components
+
+    def fit(self, X, y=None):
+        """Fit the model from data in X.
+
+        Parameters
+        ----------
+        X : array-like, shape (n_samples, n_features)
+            Training vector, where n_samples in the number of samples
+            and n_features is the number of features.
+
+        y : Ignored
+
+        Returns
+        -------
+        self : object
+            Returns the instance itself.
+        """
+        random_state = check_random_state(self.random_state)
+        X = self._validate_data(X)
+
+        _check_normalize_components(
+            self.normalize_components, self.__class__.__name__
+        )
+
+        self.mean_ = X.mean(axis=0)
+        X = X - self.mean_
+
+        if self.n_components is None:
+            n_components = X.shape[1]
+        else:
+            n_components = self.n_components
+        code_init = self.V_init.T if self.V_init is not None else None
+        dict_init = self.U_init.T if self.U_init is not None else None
+        Vt, _, E, self.n_iter_ = dict_learning(X.T, n_components,
+                                               alpha=self.alpha,
+                                               tol=self.tol,
+                                               max_iter=self.max_iter,
+                                               method=self.method,
+                                               n_jobs=self.n_jobs,
+                                               verbose=self.verbose,
+                                               random_state=random_state,
+                                               code_init=code_init,
+                                               dict_init=dict_init,
+                                               return_n_iter=True)
+        self.components_ = Vt.T
+        components_norm = np.linalg.norm(
+            self.components_, axis=1)[:, np.newaxis]
+        components_norm[components_norm == 0] = 1
+        self.components_ /= components_norm
+        self.n_components_ = len(self.components_)
+
+        self.error_ = E
+        return self
+
+    def transform(self, X):
+        """Least Squares projection of the data onto the sparse components.
+
+        To avoid instability issues in case the system is under-determined,
+        regularization can be applied (Ridge regression) via the
+        `ridge_alpha` parameter.
+
+        Note that Sparse PCA components orthogonality is not enforced as in PCA
+        hence one cannot use a simple linear projection.
+
+        Parameters
+        ----------
+        X : array of shape (n_samples, n_features)
+            Test data to be transformed, must have the same number of
+            features as the data used to train the model.
+
+        Returns
+        -------
+        X_new array, shape (n_samples, n_components)
+            Transformed data.
+        """
+        check_is_fitted(self)
+
+        X = check_array(X)
+        X = X - self.mean_
+
+        U = ridge_regression(self.components_.T, X.T, self.ridge_alpha,
+                             solver='cholesky')
+
+        return U
+
+    def _more_tags(self):
+        return {
+            '_xfail_checks': {
+                "check_methods_subset_invariance":
+                "fails for the transform method"
+            }
+        }
+
+
+class MiniBatchSparsePCA(SparsePCA):
+    """Mini-batch Sparse Principal Components Analysis
+
+    Finds the set of sparse components that can optimally reconstruct
+    the data.  The amount of sparseness is controllable by the coefficient
+    of the L1 penalty, given by the parameter alpha.
+
+    Read more in the :ref:`User Guide <SparsePCA>`.
+
+    Parameters
+    ----------
+    n_components : int,
+        number of sparse atoms to extract
+
+    alpha : int,
+        Sparsity controlling parameter. Higher values lead to sparser
+        components.
+
+    ridge_alpha : float,
+        Amount of ridge shrinkage to apply in order to improve
+        conditioning when calling the transform method.
+
+    n_iter : int,
+        number of iterations to perform for each mini batch
+
+    callback : callable or None, optional (default: None)
+        callable that gets invoked every five iterations
+
+    batch_size : int,
+        the number of features to take in each mini batch
+
+    verbose : int
+        Controls the verbosity; the higher, the more messages. Defaults to 0.
+
+    shuffle : boolean,
+        whether to shuffle the data before splitting it in batches
+
+    n_jobs : int or None, optional (default=None)
+        Number of parallel jobs to run.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    method : {'lars', 'cd'}
+        lars: uses the least angle regression method to solve the lasso problem
+        (linear_model.lars_path)
+        cd: uses the coordinate descent method to compute the
+        Lasso solution (linear_model.Lasso). Lars will be faster if
+        the estimated components are sparse.
+
+    random_state : int, RandomState instance, default=None
+        Used for random shuffling when ``shuffle`` is set to ``True``,
+        during online dictionary learning. Pass an int for reproducible results
+        across multiple function calls.
+        See :term:`Glossary <random_state>`.
+
+    normalize_components : 'deprecated'
+        This parameter does not have any effect. The components are always
+        normalized.
+
+        .. versionadded:: 0.20
+
+        .. deprecated:: 0.22
+           ``normalize_components`` is deprecated in 0.22 and will be removed
+           in 0.24.
+
+    Attributes
+    ----------
+    components_ : array, [n_components, n_features]
+        Sparse components extracted from the data.
+
+    n_components_ : int
+        Estimated number of components.
+
+        .. versionadded:: 0.23
+
+    n_iter_ : int
+        Number of iterations run.
+
+    mean_ : array, shape (n_features,)
+        Per-feature empirical mean, estimated from the training set.
+        Equal to ``X.mean(axis=0)``.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.datasets import make_friedman1
+    >>> from sklearn.decomposition import MiniBatchSparsePCA
+    >>> X, _ = make_friedman1(n_samples=200, n_features=30, random_state=0)
+    >>> transformer = MiniBatchSparsePCA(n_components=5, batch_size=50,
+    ...                                  random_state=0)
+    >>> transformer.fit(X)
+    MiniBatchSparsePCA(...)
+    >>> X_transformed = transformer.transform(X)
+    >>> X_transformed.shape
+    (200, 5)
+    >>> # most values in the components_ are zero (sparsity)
+    >>> np.mean(transformer.components_ == 0)
+    0.94
+
+    See also
+    --------
+    PCA
+    SparsePCA
+    DictionaryLearning
+    """
+    @_deprecate_positional_args
+    def __init__(self, n_components=None, *, alpha=1, ridge_alpha=0.01,
+                 n_iter=100, callback=None, batch_size=3, verbose=False,
+                 shuffle=True, n_jobs=None, method='lars', random_state=None,
+                 normalize_components='deprecated'):
+        super().__init__(
+            n_components=n_components, alpha=alpha, verbose=verbose,
+            ridge_alpha=ridge_alpha, n_jobs=n_jobs, method=method,
+            random_state=random_state,
+            normalize_components=normalize_components)
+        self.n_iter = n_iter
+        self.callback = callback
+        self.batch_size = batch_size
+        self.shuffle = shuffle
+
+    def fit(self, X, y=None):
+        """Fit the model from data in X.
+
+        Parameters
+        ----------
+        X : array-like, shape (n_samples, n_features)
+            Training vector, where n_samples in the number of samples
+            and n_features is the number of features.
+
+        y : Ignored
+
+        Returns
+        -------
+        self : object
+            Returns the instance itself.
+        """
+        random_state = check_random_state(self.random_state)
+        X = self._validate_data(X)
+
+        _check_normalize_components(
+            self.normalize_components, self.__class__.__name__
+        )
+
+        self.mean_ = X.mean(axis=0)
+        X = X - self.mean_
+
+        if self.n_components is None:
+            n_components = X.shape[1]
+        else:
+            n_components = self.n_components
+        Vt, _, self.n_iter_ = dict_learning_online(
+            X.T, n_components, alpha=self.alpha,
+            n_iter=self.n_iter, return_code=True,
+            dict_init=None, verbose=self.verbose,
+            callback=self.callback,
+            batch_size=self.batch_size,
+            shuffle=self.shuffle,
+            n_jobs=self.n_jobs, method=self.method,
+            random_state=random_state,
+            return_n_iter=True)
+        self.components_ = Vt.T
+
+        components_norm = np.linalg.norm(
+            self.components_, axis=1)[:, np.newaxis]
+        components_norm[components_norm == 0] = 1
+        self.components_ /= components_norm
+        self.n_components_ = len(self.components_)
+
+        return self