Uploaded Test files
This commit is contained in:
		
							parent
							
								
									f584ad9d97
								
							
						
					
					
						commit
						2e81cb7d99
					
				
					 16627 changed files with 2065359 additions and 102444 deletions
				
			
		
							
								
								
									
										15
									
								
								venv/Lib/site-packages/sklearn/neural_network/__init__.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										15
									
								
								venv/Lib/site-packages/sklearn/neural_network/__init__.py
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,15 @@
 | 
			
		|||
"""
 | 
			
		||||
The :mod:`sklearn.neural_network` module includes models based on neural
 | 
			
		||||
networks.
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
# License: BSD 3 clause
 | 
			
		||||
 | 
			
		||||
from ._rbm import BernoulliRBM
 | 
			
		||||
 | 
			
		||||
from ._multilayer_perceptron import MLPClassifier
 | 
			
		||||
from ._multilayer_perceptron import MLPRegressor
 | 
			
		||||
 | 
			
		||||
__all__ = ["BernoulliRBM",
 | 
			
		||||
           "MLPClassifier",
 | 
			
		||||
           "MLPRegressor"]
 | 
			
		||||
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										253
									
								
								venv/Lib/site-packages/sklearn/neural_network/_base.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										253
									
								
								venv/Lib/site-packages/sklearn/neural_network/_base.py
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,253 @@
 | 
			
		|||
"""Utilities for the neural network modules
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
# Author: Issam H. Laradji <issam.laradji@gmail.com>
 | 
			
		||||
# License: BSD 3 clause
 | 
			
		||||
 | 
			
		||||
import numpy as np
 | 
			
		||||
 | 
			
		||||
from scipy.special import expit as logistic_sigmoid
 | 
			
		||||
from scipy.special import xlogy
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def identity(X):
 | 
			
		||||
    """Simply return the input array.
 | 
			
		||||
 | 
			
		||||
    Parameters
 | 
			
		||||
    ----------
 | 
			
		||||
    X : {array-like, sparse matrix}, shape (n_samples, n_features)
 | 
			
		||||
        Data, where n_samples is the number of samples
 | 
			
		||||
        and n_features is the number of features.
 | 
			
		||||
 | 
			
		||||
    Returns
 | 
			
		||||
    -------
 | 
			
		||||
    X : {array-like, sparse matrix}, shape (n_samples, n_features)
 | 
			
		||||
        Same as the input data.
 | 
			
		||||
    """
 | 
			
		||||
    return X
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def logistic(X):
 | 
			
		||||
    """Compute the logistic function inplace.
 | 
			
		||||
 | 
			
		||||
    Parameters
 | 
			
		||||
    ----------
 | 
			
		||||
    X : {array-like, sparse matrix}, shape (n_samples, n_features)
 | 
			
		||||
        The input data.
 | 
			
		||||
 | 
			
		||||
    Returns
 | 
			
		||||
    -------
 | 
			
		||||
    X_new : {array-like, sparse matrix}, shape (n_samples, n_features)
 | 
			
		||||
        The transformed data.
 | 
			
		||||
    """
 | 
			
		||||
    return logistic_sigmoid(X, out=X)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def tanh(X):
 | 
			
		||||
    """Compute the hyperbolic tan function inplace.
 | 
			
		||||
 | 
			
		||||
    Parameters
 | 
			
		||||
    ----------
 | 
			
		||||
    X : {array-like, sparse matrix}, shape (n_samples, n_features)
 | 
			
		||||
        The input data.
 | 
			
		||||
 | 
			
		||||
    Returns
 | 
			
		||||
    -------
 | 
			
		||||
    X_new : {array-like, sparse matrix}, shape (n_samples, n_features)
 | 
			
		||||
        The transformed data.
 | 
			
		||||
    """
 | 
			
		||||
    return np.tanh(X, out=X)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def relu(X):
 | 
			
		||||
    """Compute the rectified linear unit function inplace.
 | 
			
		||||
 | 
			
		||||
    Parameters
 | 
			
		||||
    ----------
 | 
			
		||||
    X : {array-like, sparse matrix}, shape (n_samples, n_features)
 | 
			
		||||
        The input data.
 | 
			
		||||
 | 
			
		||||
    Returns
 | 
			
		||||
    -------
 | 
			
		||||
    X_new : {array-like, sparse matrix}, shape (n_samples, n_features)
 | 
			
		||||
        The transformed data.
 | 
			
		||||
    """
 | 
			
		||||
    np.clip(X, 0, np.finfo(X.dtype).max, out=X)
 | 
			
		||||
    return X
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def softmax(X):
 | 
			
		||||
    """Compute the K-way softmax function inplace.
 | 
			
		||||
 | 
			
		||||
    Parameters
 | 
			
		||||
    ----------
 | 
			
		||||
    X : {array-like, sparse matrix}, shape (n_samples, n_features)
 | 
			
		||||
        The input data.
 | 
			
		||||
 | 
			
		||||
    Returns
 | 
			
		||||
    -------
 | 
			
		||||
    X_new : {array-like, sparse matrix}, shape (n_samples, n_features)
 | 
			
		||||
        The transformed data.
 | 
			
		||||
    """
 | 
			
		||||
    tmp = X - X.max(axis=1)[:, np.newaxis]
 | 
			
		||||
    np.exp(tmp, out=X)
 | 
			
		||||
    X /= X.sum(axis=1)[:, np.newaxis]
 | 
			
		||||
 | 
			
		||||
    return X
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
ACTIVATIONS = {'identity': identity, 'tanh': tanh, 'logistic': logistic,
 | 
			
		||||
               'relu': relu, 'softmax': softmax}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def inplace_identity_derivative(Z, delta):
 | 
			
		||||
    """Apply the derivative of the identity function: do nothing.
 | 
			
		||||
 | 
			
		||||
    Parameters
 | 
			
		||||
    ----------
 | 
			
		||||
    Z : {array-like, sparse matrix}, shape (n_samples, n_features)
 | 
			
		||||
        The data which was output from the identity activation function during
 | 
			
		||||
        the forward pass.
 | 
			
		||||
 | 
			
		||||
    delta : {array-like}, shape (n_samples, n_features)
 | 
			
		||||
         The backpropagated error signal to be modified inplace.
 | 
			
		||||
    """
 | 
			
		||||
    # Nothing to do
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def inplace_logistic_derivative(Z, delta):
 | 
			
		||||
    """Apply the derivative of the logistic sigmoid function.
 | 
			
		||||
 | 
			
		||||
    It exploits the fact that the derivative is a simple function of the output
 | 
			
		||||
    value from logistic function.
 | 
			
		||||
 | 
			
		||||
    Parameters
 | 
			
		||||
    ----------
 | 
			
		||||
    Z : {array-like, sparse matrix}, shape (n_samples, n_features)
 | 
			
		||||
        The data which was output from the logistic activation function during
 | 
			
		||||
        the forward pass.
 | 
			
		||||
 | 
			
		||||
    delta : {array-like}, shape (n_samples, n_features)
 | 
			
		||||
         The backpropagated error signal to be modified inplace.
 | 
			
		||||
    """
 | 
			
		||||
    delta *= Z
 | 
			
		||||
    delta *= (1 - Z)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def inplace_tanh_derivative(Z, delta):
 | 
			
		||||
    """Apply the derivative of the hyperbolic tanh function.
 | 
			
		||||
 | 
			
		||||
    It exploits the fact that the derivative is a simple function of the output
 | 
			
		||||
    value from hyperbolic tangent.
 | 
			
		||||
 | 
			
		||||
    Parameters
 | 
			
		||||
    ----------
 | 
			
		||||
    Z : {array-like, sparse matrix}, shape (n_samples, n_features)
 | 
			
		||||
        The data which was output from the hyperbolic tangent activation
 | 
			
		||||
        function during the forward pass.
 | 
			
		||||
 | 
			
		||||
    delta : {array-like}, shape (n_samples, n_features)
 | 
			
		||||
         The backpropagated error signal to be modified inplace.
 | 
			
		||||
    """
 | 
			
		||||
    delta *= (1 - Z ** 2)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def inplace_relu_derivative(Z, delta):
 | 
			
		||||
    """Apply the derivative of the relu function.
 | 
			
		||||
 | 
			
		||||
    It exploits the fact that the derivative is a simple function of the output
 | 
			
		||||
    value from rectified linear units activation function.
 | 
			
		||||
 | 
			
		||||
    Parameters
 | 
			
		||||
    ----------
 | 
			
		||||
    Z : {array-like, sparse matrix}, shape (n_samples, n_features)
 | 
			
		||||
        The data which was output from the rectified linear units activation
 | 
			
		||||
        function during the forward pass.
 | 
			
		||||
 | 
			
		||||
    delta : {array-like}, shape (n_samples, n_features)
 | 
			
		||||
         The backpropagated error signal to be modified inplace.
 | 
			
		||||
    """
 | 
			
		||||
    delta[Z == 0] = 0
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
DERIVATIVES = {'identity': inplace_identity_derivative,
 | 
			
		||||
               'tanh': inplace_tanh_derivative,
 | 
			
		||||
               'logistic': inplace_logistic_derivative,
 | 
			
		||||
               'relu': inplace_relu_derivative}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def squared_loss(y_true, y_pred):
 | 
			
		||||
    """Compute the squared loss for regression.
 | 
			
		||||
 | 
			
		||||
    Parameters
 | 
			
		||||
    ----------
 | 
			
		||||
    y_true : array-like or label indicator matrix
 | 
			
		||||
        Ground truth (correct) values.
 | 
			
		||||
 | 
			
		||||
    y_pred : array-like or label indicator matrix
 | 
			
		||||
        Predicted values, as returned by a regression estimator.
 | 
			
		||||
 | 
			
		||||
    Returns
 | 
			
		||||
    -------
 | 
			
		||||
    loss : float
 | 
			
		||||
        The degree to which the samples are correctly predicted.
 | 
			
		||||
    """
 | 
			
		||||
    return ((y_true - y_pred) ** 2).mean() / 2
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def log_loss(y_true, y_prob):
 | 
			
		||||
    """Compute Logistic loss for classification.
 | 
			
		||||
 | 
			
		||||
    Parameters
 | 
			
		||||
    ----------
 | 
			
		||||
    y_true : array-like or label indicator matrix
 | 
			
		||||
        Ground truth (correct) labels.
 | 
			
		||||
 | 
			
		||||
    y_prob : array-like of float, shape = (n_samples, n_classes)
 | 
			
		||||
        Predicted probabilities, as returned by a classifier's
 | 
			
		||||
        predict_proba method.
 | 
			
		||||
 | 
			
		||||
    Returns
 | 
			
		||||
    -------
 | 
			
		||||
    loss : float
 | 
			
		||||
        The degree to which the samples are correctly predicted.
 | 
			
		||||
    """
 | 
			
		||||
    eps = np.finfo(y_prob.dtype).eps
 | 
			
		||||
    y_prob = np.clip(y_prob, eps, 1 - eps)
 | 
			
		||||
    if y_prob.shape[1] == 1:
 | 
			
		||||
        y_prob = np.append(1 - y_prob, y_prob, axis=1)
 | 
			
		||||
 | 
			
		||||
    if y_true.shape[1] == 1:
 | 
			
		||||
        y_true = np.append(1 - y_true, y_true, axis=1)
 | 
			
		||||
 | 
			
		||||
    return - xlogy(y_true, y_prob).sum() / y_prob.shape[0]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def binary_log_loss(y_true, y_prob):
 | 
			
		||||
    """Compute binary logistic loss for classification.
 | 
			
		||||
 | 
			
		||||
    This is identical to log_loss in binary classification case,
 | 
			
		||||
    but is kept for its use in multilabel case.
 | 
			
		||||
 | 
			
		||||
    Parameters
 | 
			
		||||
    ----------
 | 
			
		||||
    y_true : array-like or label indicator matrix
 | 
			
		||||
        Ground truth (correct) labels.
 | 
			
		||||
 | 
			
		||||
    y_prob : array-like of float, shape = (n_samples, 1)
 | 
			
		||||
        Predicted probabilities, as returned by a classifier's
 | 
			
		||||
        predict_proba method.
 | 
			
		||||
 | 
			
		||||
    Returns
 | 
			
		||||
    -------
 | 
			
		||||
    loss : float
 | 
			
		||||
        The degree to which the samples are correctly predicted.
 | 
			
		||||
    """
 | 
			
		||||
    eps = np.finfo(y_prob.dtype).eps
 | 
			
		||||
    y_prob = np.clip(y_prob, eps, 1 - eps)
 | 
			
		||||
    return -(xlogy(y_true, y_prob) +
 | 
			
		||||
             xlogy(1 - y_true, 1 - y_prob)).sum() / y_prob.shape[0]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
LOSS_FUNCTIONS = {'squared_loss': squared_loss, 'log_loss': log_loss,
 | 
			
		||||
                  'binary_log_loss': binary_log_loss}
 | 
			
		||||
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
							
								
								
									
										383
									
								
								venv/Lib/site-packages/sklearn/neural_network/_rbm.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										383
									
								
								venv/Lib/site-packages/sklearn/neural_network/_rbm.py
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,383 @@
 | 
			
		|||
"""Restricted Boltzmann Machine
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
# Authors: Yann N. Dauphin <dauphiya@iro.umontreal.ca>
 | 
			
		||||
#          Vlad Niculae
 | 
			
		||||
#          Gabriel Synnaeve
 | 
			
		||||
#          Lars Buitinck
 | 
			
		||||
# License: BSD 3 clause
 | 
			
		||||
 | 
			
		||||
import time
 | 
			
		||||
 | 
			
		||||
import numpy as np
 | 
			
		||||
import scipy.sparse as sp
 | 
			
		||||
from scipy.special import expit  # logistic function
 | 
			
		||||
 | 
			
		||||
from ..base import BaseEstimator
 | 
			
		||||
from ..base import TransformerMixin
 | 
			
		||||
from ..utils import check_array
 | 
			
		||||
from ..utils import check_random_state
 | 
			
		||||
from ..utils import gen_even_slices
 | 
			
		||||
from ..utils.extmath import safe_sparse_dot
 | 
			
		||||
from ..utils.extmath import log_logistic
 | 
			
		||||
from ..utils.validation import check_is_fitted, _deprecate_positional_args
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class BernoulliRBM(TransformerMixin, BaseEstimator):
 | 
			
		||||
    """Bernoulli Restricted Boltzmann Machine (RBM).
 | 
			
		||||
 | 
			
		||||
    A Restricted Boltzmann Machine with binary visible units and
 | 
			
		||||
    binary hidden units. Parameters are estimated using Stochastic Maximum
 | 
			
		||||
    Likelihood (SML), also known as Persistent Contrastive Divergence (PCD)
 | 
			
		||||
    [2].
 | 
			
		||||
 | 
			
		||||
    The time complexity of this implementation is ``O(d ** 2)`` assuming
 | 
			
		||||
    d ~ n_features ~ n_components.
 | 
			
		||||
 | 
			
		||||
    Read more in the :ref:`User Guide <rbm>`.
 | 
			
		||||
 | 
			
		||||
    Parameters
 | 
			
		||||
    ----------
 | 
			
		||||
    n_components : int, default=256
 | 
			
		||||
        Number of binary hidden units.
 | 
			
		||||
 | 
			
		||||
    learning_rate : float, default=0.1
 | 
			
		||||
        The learning rate for weight updates. It is *highly* recommended
 | 
			
		||||
        to tune this hyper-parameter. Reasonable values are in the
 | 
			
		||||
        10**[0., -3.] range.
 | 
			
		||||
 | 
			
		||||
    batch_size : int, default=10
 | 
			
		||||
        Number of examples per minibatch.
 | 
			
		||||
 | 
			
		||||
    n_iter : int, default=10
 | 
			
		||||
        Number of iterations/sweeps over the training dataset to perform
 | 
			
		||||
        during training.
 | 
			
		||||
 | 
			
		||||
    verbose : int, default=0
 | 
			
		||||
        The verbosity level. The default, zero, means silent mode.
 | 
			
		||||
 | 
			
		||||
    random_state : integer or RandomState, default=None
 | 
			
		||||
        Determines random number generation for:
 | 
			
		||||
 | 
			
		||||
        - Gibbs sampling from visible and hidden layers.
 | 
			
		||||
 | 
			
		||||
        - Initializing components, sampling from layers during fit.
 | 
			
		||||
 | 
			
		||||
        - Corrupting the data when scoring samples.
 | 
			
		||||
 | 
			
		||||
        Pass an int for reproducible results across multiple function calls.
 | 
			
		||||
        See :term:`Glossary <random_state>`.
 | 
			
		||||
 | 
			
		||||
    Attributes
 | 
			
		||||
    ----------
 | 
			
		||||
    intercept_hidden_ : array-like, shape (n_components,)
 | 
			
		||||
        Biases of the hidden units.
 | 
			
		||||
 | 
			
		||||
    intercept_visible_ : array-like, shape (n_features,)
 | 
			
		||||
        Biases of the visible units.
 | 
			
		||||
 | 
			
		||||
    components_ : array-like, shape (n_components, n_features)
 | 
			
		||||
        Weight matrix, where n_features in the number of
 | 
			
		||||
        visible units and n_components is the number of hidden units.
 | 
			
		||||
 | 
			
		||||
    h_samples_ : array-like, shape (batch_size, n_components)
 | 
			
		||||
        Hidden Activation sampled from the model distribution,
 | 
			
		||||
        where batch_size in the number of examples per minibatch and
 | 
			
		||||
        n_components is the number of hidden units.
 | 
			
		||||
 | 
			
		||||
    Examples
 | 
			
		||||
    --------
 | 
			
		||||
 | 
			
		||||
    >>> import numpy as np
 | 
			
		||||
    >>> from sklearn.neural_network import BernoulliRBM
 | 
			
		||||
    >>> X = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]])
 | 
			
		||||
    >>> model = BernoulliRBM(n_components=2)
 | 
			
		||||
    >>> model.fit(X)
 | 
			
		||||
    BernoulliRBM(n_components=2)
 | 
			
		||||
 | 
			
		||||
    References
 | 
			
		||||
    ----------
 | 
			
		||||
 | 
			
		||||
    [1] Hinton, G. E., Osindero, S. and Teh, Y. A fast learning algorithm for
 | 
			
		||||
        deep belief nets. Neural Computation 18, pp 1527-1554.
 | 
			
		||||
        https://www.cs.toronto.edu/~hinton/absps/fastnc.pdf
 | 
			
		||||
 | 
			
		||||
    [2] Tieleman, T. Training Restricted Boltzmann Machines using
 | 
			
		||||
        Approximations to the Likelihood Gradient. International Conference
 | 
			
		||||
        on Machine Learning (ICML) 2008
 | 
			
		||||
    """
 | 
			
		||||
    @_deprecate_positional_args
 | 
			
		||||
    def __init__(self, n_components=256, *, learning_rate=0.1, batch_size=10,
 | 
			
		||||
                 n_iter=10, verbose=0, random_state=None):
 | 
			
		||||
        self.n_components = n_components
 | 
			
		||||
        self.learning_rate = learning_rate
 | 
			
		||||
        self.batch_size = batch_size
 | 
			
		||||
        self.n_iter = n_iter
 | 
			
		||||
        self.verbose = verbose
 | 
			
		||||
        self.random_state = random_state
 | 
			
		||||
 | 
			
		||||
    def transform(self, X):
 | 
			
		||||
        """Compute the hidden layer activation probabilities, P(h=1|v=X).
 | 
			
		||||
 | 
			
		||||
        Parameters
 | 
			
		||||
        ----------
 | 
			
		||||
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
 | 
			
		||||
            The data to be transformed.
 | 
			
		||||
 | 
			
		||||
        Returns
 | 
			
		||||
        -------
 | 
			
		||||
        h : ndarray of shape (n_samples, n_components)
 | 
			
		||||
            Latent representations of the data.
 | 
			
		||||
        """
 | 
			
		||||
        check_is_fitted(self)
 | 
			
		||||
 | 
			
		||||
        X = check_array(X, accept_sparse='csr', dtype=np.float64)
 | 
			
		||||
        return self._mean_hiddens(X)
 | 
			
		||||
 | 
			
		||||
    def _mean_hiddens(self, v):
 | 
			
		||||
        """Computes the probabilities P(h=1|v).
 | 
			
		||||
 | 
			
		||||
        Parameters
 | 
			
		||||
        ----------
 | 
			
		||||
        v : ndarray of shape (n_samples, n_features)
 | 
			
		||||
            Values of the visible layer.
 | 
			
		||||
 | 
			
		||||
        Returns
 | 
			
		||||
        -------
 | 
			
		||||
        h : ndarray of shape (n_samples, n_components)
 | 
			
		||||
            Corresponding mean field values for the hidden layer.
 | 
			
		||||
        """
 | 
			
		||||
        p = safe_sparse_dot(v, self.components_.T)
 | 
			
		||||
        p += self.intercept_hidden_
 | 
			
		||||
        return expit(p, out=p)
 | 
			
		||||
 | 
			
		||||
    def _sample_hiddens(self, v, rng):
 | 
			
		||||
        """Sample from the distribution P(h|v).
 | 
			
		||||
 | 
			
		||||
        Parameters
 | 
			
		||||
        ----------
 | 
			
		||||
        v : ndarray of shape (n_samples, n_features)
 | 
			
		||||
            Values of the visible layer to sample from.
 | 
			
		||||
 | 
			
		||||
        rng : RandomState
 | 
			
		||||
            Random number generator to use.
 | 
			
		||||
 | 
			
		||||
        Returns
 | 
			
		||||
        -------
 | 
			
		||||
        h : ndarray of shape (n_samples, n_components)
 | 
			
		||||
            Values of the hidden layer.
 | 
			
		||||
        """
 | 
			
		||||
        p = self._mean_hiddens(v)
 | 
			
		||||
        return (rng.random_sample(size=p.shape) < p)
 | 
			
		||||
 | 
			
		||||
    def _sample_visibles(self, h, rng):
 | 
			
		||||
        """Sample from the distribution P(v|h).
 | 
			
		||||
 | 
			
		||||
        Parameters
 | 
			
		||||
        ----------
 | 
			
		||||
        h : ndarray of shape (n_samples, n_components)
 | 
			
		||||
            Values of the hidden layer to sample from.
 | 
			
		||||
 | 
			
		||||
        rng : RandomState
 | 
			
		||||
            Random number generator to use.
 | 
			
		||||
 | 
			
		||||
        Returns
 | 
			
		||||
        -------
 | 
			
		||||
        v : ndarray of shape (n_samples, n_features)
 | 
			
		||||
            Values of the visible layer.
 | 
			
		||||
        """
 | 
			
		||||
        p = np.dot(h, self.components_)
 | 
			
		||||
        p += self.intercept_visible_
 | 
			
		||||
        expit(p, out=p)
 | 
			
		||||
        return (rng.random_sample(size=p.shape) < p)
 | 
			
		||||
 | 
			
		||||
    def _free_energy(self, v):
 | 
			
		||||
        """Computes the free energy F(v) = - log sum_h exp(-E(v,h)).
 | 
			
		||||
 | 
			
		||||
        Parameters
 | 
			
		||||
        ----------
 | 
			
		||||
        v : ndarray of shape (n_samples, n_features)
 | 
			
		||||
            Values of the visible layer.
 | 
			
		||||
 | 
			
		||||
        Returns
 | 
			
		||||
        -------
 | 
			
		||||
        free_energy : ndarray of shape (n_samples,)
 | 
			
		||||
            The value of the free energy.
 | 
			
		||||
        """
 | 
			
		||||
        return (- safe_sparse_dot(v, self.intercept_visible_)
 | 
			
		||||
                - np.logaddexp(0, safe_sparse_dot(v, self.components_.T)
 | 
			
		||||
                               + self.intercept_hidden_).sum(axis=1))
 | 
			
		||||
 | 
			
		||||
    def gibbs(self, v):
 | 
			
		||||
        """Perform one Gibbs sampling step.
 | 
			
		||||
 | 
			
		||||
        Parameters
 | 
			
		||||
        ----------
 | 
			
		||||
        v : ndarray of shape (n_samples, n_features)
 | 
			
		||||
            Values of the visible layer to start from.
 | 
			
		||||
 | 
			
		||||
        Returns
 | 
			
		||||
        -------
 | 
			
		||||
        v_new : ndarray of shape (n_samples, n_features)
 | 
			
		||||
            Values of the visible layer after one Gibbs step.
 | 
			
		||||
        """
 | 
			
		||||
        check_is_fitted(self)
 | 
			
		||||
        if not hasattr(self, "random_state_"):
 | 
			
		||||
            self.random_state_ = check_random_state(self.random_state)
 | 
			
		||||
        h_ = self._sample_hiddens(v, self.random_state_)
 | 
			
		||||
        v_ = self._sample_visibles(h_, self.random_state_)
 | 
			
		||||
 | 
			
		||||
        return v_
 | 
			
		||||
 | 
			
		||||
    def partial_fit(self, X, y=None):
 | 
			
		||||
        """Fit the model to the data X which should contain a partial
 | 
			
		||||
        segment of the data.
 | 
			
		||||
 | 
			
		||||
        Parameters
 | 
			
		||||
        ----------
 | 
			
		||||
        X : ndarray of shape (n_samples, n_features)
 | 
			
		||||
            Training data.
 | 
			
		||||
 | 
			
		||||
        Returns
 | 
			
		||||
        -------
 | 
			
		||||
        self : BernoulliRBM
 | 
			
		||||
            The fitted model.
 | 
			
		||||
        """
 | 
			
		||||
        X = check_array(X, accept_sparse='csr', dtype=np.float64)
 | 
			
		||||
        if not hasattr(self, 'random_state_'):
 | 
			
		||||
            self.random_state_ = check_random_state(self.random_state)
 | 
			
		||||
        if not hasattr(self, 'components_'):
 | 
			
		||||
            self.components_ = np.asarray(
 | 
			
		||||
                self.random_state_.normal(
 | 
			
		||||
                    0,
 | 
			
		||||
                    0.01,
 | 
			
		||||
                    (self.n_components, X.shape[1])
 | 
			
		||||
                ),
 | 
			
		||||
                order='F')
 | 
			
		||||
        if not hasattr(self, 'intercept_hidden_'):
 | 
			
		||||
            self.intercept_hidden_ = np.zeros(self.n_components, )
 | 
			
		||||
        if not hasattr(self, 'intercept_visible_'):
 | 
			
		||||
            self.intercept_visible_ = np.zeros(X.shape[1], )
 | 
			
		||||
        if not hasattr(self, 'h_samples_'):
 | 
			
		||||
            self.h_samples_ = np.zeros((self.batch_size, self.n_components))
 | 
			
		||||
 | 
			
		||||
        self._fit(X, self.random_state_)
 | 
			
		||||
 | 
			
		||||
    def _fit(self, v_pos, rng):
 | 
			
		||||
        """Inner fit for one mini-batch.
 | 
			
		||||
 | 
			
		||||
        Adjust the parameters to maximize the likelihood of v using
 | 
			
		||||
        Stochastic Maximum Likelihood (SML).
 | 
			
		||||
 | 
			
		||||
        Parameters
 | 
			
		||||
        ----------
 | 
			
		||||
        v_pos : ndarray of shape (n_samples, n_features)
 | 
			
		||||
            The data to use for training.
 | 
			
		||||
 | 
			
		||||
        rng : RandomState
 | 
			
		||||
            Random number generator to use for sampling.
 | 
			
		||||
        """
 | 
			
		||||
        h_pos = self._mean_hiddens(v_pos)
 | 
			
		||||
        v_neg = self._sample_visibles(self.h_samples_, rng)
 | 
			
		||||
        h_neg = self._mean_hiddens(v_neg)
 | 
			
		||||
 | 
			
		||||
        lr = float(self.learning_rate) / v_pos.shape[0]
 | 
			
		||||
        update = safe_sparse_dot(v_pos.T, h_pos, dense_output=True).T
 | 
			
		||||
        update -= np.dot(h_neg.T, v_neg)
 | 
			
		||||
        self.components_ += lr * update
 | 
			
		||||
        self.intercept_hidden_ += lr * (h_pos.sum(axis=0) - h_neg.sum(axis=0))
 | 
			
		||||
        self.intercept_visible_ += lr * (np.asarray(
 | 
			
		||||
                                         v_pos.sum(axis=0)).squeeze() -
 | 
			
		||||
                                         v_neg.sum(axis=0))
 | 
			
		||||
 | 
			
		||||
        h_neg[rng.uniform(size=h_neg.shape) < h_neg] = 1.0  # sample binomial
 | 
			
		||||
        self.h_samples_ = np.floor(h_neg, h_neg)
 | 
			
		||||
 | 
			
		||||
    def score_samples(self, X):
 | 
			
		||||
        """Compute the pseudo-likelihood of X.
 | 
			
		||||
 | 
			
		||||
        Parameters
 | 
			
		||||
        ----------
 | 
			
		||||
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
 | 
			
		||||
            Values of the visible layer. Must be all-boolean (not checked).
 | 
			
		||||
 | 
			
		||||
        Returns
 | 
			
		||||
        -------
 | 
			
		||||
        pseudo_likelihood : ndarray of shape (n_samples,)
 | 
			
		||||
            Value of the pseudo-likelihood (proxy for likelihood).
 | 
			
		||||
 | 
			
		||||
        Notes
 | 
			
		||||
        -----
 | 
			
		||||
        This method is not deterministic: it computes a quantity called the
 | 
			
		||||
        free energy on X, then on a randomly corrupted version of X, and
 | 
			
		||||
        returns the log of the logistic function of the difference.
 | 
			
		||||
        """
 | 
			
		||||
        check_is_fitted(self)
 | 
			
		||||
 | 
			
		||||
        v = check_array(X, accept_sparse='csr')
 | 
			
		||||
        rng = check_random_state(self.random_state)
 | 
			
		||||
 | 
			
		||||
        # Randomly corrupt one feature in each sample in v.
 | 
			
		||||
        ind = (np.arange(v.shape[0]),
 | 
			
		||||
               rng.randint(0, v.shape[1], v.shape[0]))
 | 
			
		||||
        if sp.issparse(v):
 | 
			
		||||
            data = -2 * v[ind] + 1
 | 
			
		||||
            v_ = v + sp.csr_matrix((data.A.ravel(), ind), shape=v.shape)
 | 
			
		||||
        else:
 | 
			
		||||
            v_ = v.copy()
 | 
			
		||||
            v_[ind] = 1 - v_[ind]
 | 
			
		||||
 | 
			
		||||
        fe = self._free_energy(v)
 | 
			
		||||
        fe_ = self._free_energy(v_)
 | 
			
		||||
        return v.shape[1] * log_logistic(fe_ - fe)
 | 
			
		||||
 | 
			
		||||
    def fit(self, X, y=None):
 | 
			
		||||
        """Fit the model to the data X.
 | 
			
		||||
 | 
			
		||||
        Parameters
 | 
			
		||||
        ----------
 | 
			
		||||
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
 | 
			
		||||
            Training data.
 | 
			
		||||
 | 
			
		||||
        Returns
 | 
			
		||||
        -------
 | 
			
		||||
        self : BernoulliRBM
 | 
			
		||||
            The fitted model.
 | 
			
		||||
        """
 | 
			
		||||
        X = self._validate_data(X, accept_sparse='csr', dtype=np.float64)
 | 
			
		||||
        n_samples = X.shape[0]
 | 
			
		||||
        rng = check_random_state(self.random_state)
 | 
			
		||||
 | 
			
		||||
        self.components_ = np.asarray(
 | 
			
		||||
            rng.normal(0, 0.01, (self.n_components, X.shape[1])),
 | 
			
		||||
            order='F')
 | 
			
		||||
        self.intercept_hidden_ = np.zeros(self.n_components, )
 | 
			
		||||
        self.intercept_visible_ = np.zeros(X.shape[1], )
 | 
			
		||||
        self.h_samples_ = np.zeros((self.batch_size, self.n_components))
 | 
			
		||||
 | 
			
		||||
        n_batches = int(np.ceil(float(n_samples) / self.batch_size))
 | 
			
		||||
        batch_slices = list(gen_even_slices(n_batches * self.batch_size,
 | 
			
		||||
                                            n_batches, n_samples=n_samples))
 | 
			
		||||
        verbose = self.verbose
 | 
			
		||||
        begin = time.time()
 | 
			
		||||
        for iteration in range(1, self.n_iter + 1):
 | 
			
		||||
            for batch_slice in batch_slices:
 | 
			
		||||
                self._fit(X[batch_slice], rng)
 | 
			
		||||
 | 
			
		||||
            if verbose:
 | 
			
		||||
                end = time.time()
 | 
			
		||||
                print("[%s] Iteration %d, pseudo-likelihood = %.2f,"
 | 
			
		||||
                      " time = %.2fs"
 | 
			
		||||
                      % (type(self).__name__, iteration,
 | 
			
		||||
                         self.score_samples(X).mean(), end - begin))
 | 
			
		||||
                begin = end
 | 
			
		||||
 | 
			
		||||
        return self
 | 
			
		||||
 | 
			
		||||
    def _more_tags(self):
 | 
			
		||||
        return {
 | 
			
		||||
            '_xfail_checks': {
 | 
			
		||||
                'check_methods_subset_invariance':
 | 
			
		||||
                'fails for the decision_function method'
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,270 @@
 | 
			
		|||
"""Stochastic optimization methods for MLP
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
# Authors: Jiyuan Qian <jq401@nyu.edu>
 | 
			
		||||
# License: BSD 3 clause
 | 
			
		||||
 | 
			
		||||
import numpy as np
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class BaseOptimizer:
 | 
			
		||||
    """Base (Stochastic) gradient descent optimizer
 | 
			
		||||
 | 
			
		||||
    Parameters
 | 
			
		||||
    ----------
 | 
			
		||||
    params : list, length = len(coefs_) + len(intercepts_)
 | 
			
		||||
        The concatenated list containing coefs_ and intercepts_ in MLP model.
 | 
			
		||||
        Used for initializing velocities and updating params
 | 
			
		||||
 | 
			
		||||
    learning_rate_init : float, default=0.1
 | 
			
		||||
        The initial learning rate used. It controls the step-size in updating
 | 
			
		||||
        the weights
 | 
			
		||||
 | 
			
		||||
    Attributes
 | 
			
		||||
    ----------
 | 
			
		||||
    learning_rate : float
 | 
			
		||||
        the current learning rate
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    def __init__(self, params, learning_rate_init=0.1):
 | 
			
		||||
        self.params = [param for param in params]
 | 
			
		||||
        self.learning_rate_init = learning_rate_init
 | 
			
		||||
        self.learning_rate = float(learning_rate_init)
 | 
			
		||||
 | 
			
		||||
    def update_params(self, grads):
 | 
			
		||||
        """Update parameters with given gradients
 | 
			
		||||
 | 
			
		||||
        Parameters
 | 
			
		||||
        ----------
 | 
			
		||||
        grads : list, length = len(params)
 | 
			
		||||
            Containing gradients with respect to coefs_ and intercepts_ in MLP
 | 
			
		||||
            model. So length should be aligned with params
 | 
			
		||||
        """
 | 
			
		||||
        updates = self._get_updates(grads)
 | 
			
		||||
        for param, update in zip(self.params, updates):
 | 
			
		||||
            param += update
 | 
			
		||||
 | 
			
		||||
    def iteration_ends(self, time_step):
 | 
			
		||||
        """Perform update to learning rate and potentially other states at the
 | 
			
		||||
        end of an iteration
 | 
			
		||||
        """
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
    def trigger_stopping(self, msg, verbose):
 | 
			
		||||
        """Decides whether it is time to stop training
 | 
			
		||||
 | 
			
		||||
        Parameters
 | 
			
		||||
        ----------
 | 
			
		||||
        msg : str
 | 
			
		||||
            Message passed in for verbose output
 | 
			
		||||
 | 
			
		||||
        verbose : bool
 | 
			
		||||
            Print message to stdin if True
 | 
			
		||||
 | 
			
		||||
        Returns
 | 
			
		||||
        -------
 | 
			
		||||
        is_stopping : bool
 | 
			
		||||
            True if training needs to stop
 | 
			
		||||
        """
 | 
			
		||||
        if verbose:
 | 
			
		||||
            print(msg + " Stopping.")
 | 
			
		||||
        return True
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SGDOptimizer(BaseOptimizer):
 | 
			
		||||
    """Stochastic gradient descent optimizer with momentum
 | 
			
		||||
 | 
			
		||||
    Parameters
 | 
			
		||||
    ----------
 | 
			
		||||
    params : list, length = len(coefs_) + len(intercepts_)
 | 
			
		||||
        The concatenated list containing coefs_ and intercepts_ in MLP model.
 | 
			
		||||
        Used for initializing velocities and updating params
 | 
			
		||||
 | 
			
		||||
    learning_rate_init : float, default=0.1
 | 
			
		||||
        The initial learning rate used. It controls the step-size in updating
 | 
			
		||||
        the weights
 | 
			
		||||
 | 
			
		||||
    lr_schedule : {'constant', 'adaptive', 'invscaling'}, default='constant'
 | 
			
		||||
        Learning rate schedule for weight updates.
 | 
			
		||||
 | 
			
		||||
        -'constant', is a constant learning rate given by
 | 
			
		||||
         'learning_rate_init'.
 | 
			
		||||
 | 
			
		||||
        -'invscaling' gradually decreases the learning rate 'learning_rate_' at
 | 
			
		||||
          each time step 't' using an inverse scaling exponent of 'power_t'.
 | 
			
		||||
          learning_rate_ = learning_rate_init / pow(t, power_t)
 | 
			
		||||
 | 
			
		||||
        -'adaptive', keeps the learning rate constant to
 | 
			
		||||
         'learning_rate_init' as long as the training keeps decreasing.
 | 
			
		||||
         Each time 2 consecutive epochs fail to decrease the training loss by
 | 
			
		||||
         tol, or fail to increase validation score by tol if 'early_stopping'
 | 
			
		||||
         is on, the current learning rate is divided by 5.
 | 
			
		||||
 | 
			
		||||
    momentum : float, default=0.9
 | 
			
		||||
        Value of momentum used, must be larger than or equal to 0
 | 
			
		||||
 | 
			
		||||
    nesterov : bool, default=True
 | 
			
		||||
        Whether to use nesterov's momentum or not. Use nesterov's if True
 | 
			
		||||
 | 
			
		||||
    power_t : float, default=0.5
 | 
			
		||||
        Power of time step 't' in inverse scaling. See `lr_schedule` for
 | 
			
		||||
        more details.
 | 
			
		||||
 | 
			
		||||
    Attributes
 | 
			
		||||
    ----------
 | 
			
		||||
    learning_rate : float
 | 
			
		||||
        the current learning rate
 | 
			
		||||
 | 
			
		||||
    velocities : list, length = len(params)
 | 
			
		||||
        velocities that are used to update params
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    def __init__(self, params, learning_rate_init=0.1, lr_schedule='constant',
 | 
			
		||||
                 momentum=0.9, nesterov=True, power_t=0.5):
 | 
			
		||||
        super().__init__(params, learning_rate_init)
 | 
			
		||||
 | 
			
		||||
        self.lr_schedule = lr_schedule
 | 
			
		||||
        self.momentum = momentum
 | 
			
		||||
        self.nesterov = nesterov
 | 
			
		||||
        self.power_t = power_t
 | 
			
		||||
        self.velocities = [np.zeros_like(param) for param in params]
 | 
			
		||||
 | 
			
		||||
    def iteration_ends(self, time_step):
 | 
			
		||||
        """Perform updates to learning rate and potential other states at the
 | 
			
		||||
        end of an iteration
 | 
			
		||||
 | 
			
		||||
        Parameters
 | 
			
		||||
        ----------
 | 
			
		||||
        time_step : int
 | 
			
		||||
            number of training samples trained on so far, used to update
 | 
			
		||||
            learning rate for 'invscaling'
 | 
			
		||||
        """
 | 
			
		||||
        if self.lr_schedule == 'invscaling':
 | 
			
		||||
            self.learning_rate = (float(self.learning_rate_init) /
 | 
			
		||||
                                  (time_step + 1) ** self.power_t)
 | 
			
		||||
 | 
			
		||||
    def trigger_stopping(self, msg, verbose):
 | 
			
		||||
        if self.lr_schedule != 'adaptive':
 | 
			
		||||
            if verbose:
 | 
			
		||||
                print(msg + " Stopping.")
 | 
			
		||||
            return True
 | 
			
		||||
 | 
			
		||||
        if self.learning_rate <= 1e-6:
 | 
			
		||||
            if verbose:
 | 
			
		||||
                print(msg + " Learning rate too small. Stopping.")
 | 
			
		||||
            return True
 | 
			
		||||
 | 
			
		||||
        self.learning_rate /= 5.
 | 
			
		||||
        if verbose:
 | 
			
		||||
            print(msg + " Setting learning rate to %f" %
 | 
			
		||||
                  self.learning_rate)
 | 
			
		||||
        return False
 | 
			
		||||
 | 
			
		||||
    def _get_updates(self, grads):
 | 
			
		||||
        """Get the values used to update params with given gradients
 | 
			
		||||
 | 
			
		||||
        Parameters
 | 
			
		||||
        ----------
 | 
			
		||||
        grads : list, length = len(coefs_) + len(intercepts_)
 | 
			
		||||
            Containing gradients with respect to coefs_ and intercepts_ in MLP
 | 
			
		||||
            model. So length should be aligned with params
 | 
			
		||||
 | 
			
		||||
        Returns
 | 
			
		||||
        -------
 | 
			
		||||
        updates : list, length = len(grads)
 | 
			
		||||
            The values to add to params
 | 
			
		||||
        """
 | 
			
		||||
        updates = [self.momentum * velocity - self.learning_rate * grad
 | 
			
		||||
                   for velocity, grad in zip(self.velocities, grads)]
 | 
			
		||||
        self.velocities = updates
 | 
			
		||||
 | 
			
		||||
        if self.nesterov:
 | 
			
		||||
            updates = [self.momentum * velocity - self.learning_rate * grad
 | 
			
		||||
                       for velocity, grad in zip(self.velocities, grads)]
 | 
			
		||||
 | 
			
		||||
        return updates
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class AdamOptimizer(BaseOptimizer):
 | 
			
		||||
    """Stochastic gradient descent optimizer with Adam
 | 
			
		||||
 | 
			
		||||
    Note: All default values are from the original Adam paper
 | 
			
		||||
 | 
			
		||||
    Parameters
 | 
			
		||||
    ----------
 | 
			
		||||
    params : list, length = len(coefs_) + len(intercepts_)
 | 
			
		||||
        The concatenated list containing coefs_ and intercepts_ in MLP model.
 | 
			
		||||
        Used for initializing velocities and updating params
 | 
			
		||||
 | 
			
		||||
    learning_rate_init : float, default=0.001
 | 
			
		||||
        The initial learning rate used. It controls the step-size in updating
 | 
			
		||||
        the weights
 | 
			
		||||
 | 
			
		||||
    beta_1 : float, default=0.9
 | 
			
		||||
        Exponential decay rate for estimates of first moment vector, should be
 | 
			
		||||
        in [0, 1)
 | 
			
		||||
 | 
			
		||||
    beta_2 : float, default=0.999
 | 
			
		||||
        Exponential decay rate for estimates of second moment vector, should be
 | 
			
		||||
        in [0, 1)
 | 
			
		||||
 | 
			
		||||
    epsilon : float, default=1e-8
 | 
			
		||||
        Value for numerical stability
 | 
			
		||||
 | 
			
		||||
    Attributes
 | 
			
		||||
    ----------
 | 
			
		||||
    learning_rate : float
 | 
			
		||||
        The current learning rate
 | 
			
		||||
 | 
			
		||||
    t : int
 | 
			
		||||
        Timestep
 | 
			
		||||
 | 
			
		||||
    ms : list, length = len(params)
 | 
			
		||||
        First moment vectors
 | 
			
		||||
 | 
			
		||||
    vs : list, length = len(params)
 | 
			
		||||
        Second moment vectors
 | 
			
		||||
 | 
			
		||||
    References
 | 
			
		||||
    ----------
 | 
			
		||||
    Kingma, Diederik, and Jimmy Ba.
 | 
			
		||||
    "Adam: A method for stochastic optimization."
 | 
			
		||||
    arXiv preprint arXiv:1412.6980 (2014).
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    def __init__(self, params, learning_rate_init=0.001, beta_1=0.9,
 | 
			
		||||
                 beta_2=0.999, epsilon=1e-8):
 | 
			
		||||
        super().__init__(params, learning_rate_init)
 | 
			
		||||
 | 
			
		||||
        self.beta_1 = beta_1
 | 
			
		||||
        self.beta_2 = beta_2
 | 
			
		||||
        self.epsilon = epsilon
 | 
			
		||||
        self.t = 0
 | 
			
		||||
        self.ms = [np.zeros_like(param) for param in params]
 | 
			
		||||
        self.vs = [np.zeros_like(param) for param in params]
 | 
			
		||||
 | 
			
		||||
    def _get_updates(self, grads):
 | 
			
		||||
        """Get the values used to update params with given gradients
 | 
			
		||||
 | 
			
		||||
        Parameters
 | 
			
		||||
        ----------
 | 
			
		||||
        grads : list, length = len(coefs_) + len(intercepts_)
 | 
			
		||||
            Containing gradients with respect to coefs_ and intercepts_ in MLP
 | 
			
		||||
            model. So length should be aligned with params
 | 
			
		||||
 | 
			
		||||
        Returns
 | 
			
		||||
        -------
 | 
			
		||||
        updates : list, length = len(grads)
 | 
			
		||||
            The values to add to params
 | 
			
		||||
        """
 | 
			
		||||
        self.t += 1
 | 
			
		||||
        self.ms = [self.beta_1 * m + (1 - self.beta_1) * grad
 | 
			
		||||
                   for m, grad in zip(self.ms, grads)]
 | 
			
		||||
        self.vs = [self.beta_2 * v + (1 - self.beta_2) * (grad ** 2)
 | 
			
		||||
                   for v, grad in zip(self.vs, grads)]
 | 
			
		||||
        self.learning_rate = (self.learning_rate_init *
 | 
			
		||||
                              np.sqrt(1 - self.beta_2 ** self.t) /
 | 
			
		||||
                              (1 - self.beta_1 ** self.t))
 | 
			
		||||
        updates = [-self.learning_rate * m / (np.sqrt(v) + self.epsilon)
 | 
			
		||||
                   for m, v in zip(self.ms, self.vs)]
 | 
			
		||||
        return updates
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,18 @@
 | 
			
		|||
 | 
			
		||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
 | 
			
		||||
import sys
 | 
			
		||||
# mypy error: Module X has no attribute y (typically for C extensions)
 | 
			
		||||
from . import _multilayer_perceptron  # type: ignore
 | 
			
		||||
from ..externals._pep562 import Pep562
 | 
			
		||||
from ..utils.deprecation import _raise_dep_warning_if_not_pytest
 | 
			
		||||
 | 
			
		||||
deprecated_path = 'sklearn.neural_network.multilayer_perceptron'
 | 
			
		||||
correct_import_path = 'sklearn.neural_network'
 | 
			
		||||
 | 
			
		||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
 | 
			
		||||
 | 
			
		||||
def __getattr__(name):
 | 
			
		||||
    return getattr(_multilayer_perceptron, name)
 | 
			
		||||
 | 
			
		||||
if not sys.version_info >= (3, 7):
 | 
			
		||||
    Pep562(__name__)
 | 
			
		||||
							
								
								
									
										18
									
								
								venv/Lib/site-packages/sklearn/neural_network/rbm.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								venv/Lib/site-packages/sklearn/neural_network/rbm.py
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,18 @@
 | 
			
		|||
 | 
			
		||||
# THIS FILE WAS AUTOMATICALLY GENERATED BY deprecated_modules.py
 | 
			
		||||
import sys
 | 
			
		||||
# mypy error: Module X has no attribute y (typically for C extensions)
 | 
			
		||||
from . import _rbm  # type: ignore
 | 
			
		||||
from ..externals._pep562 import Pep562
 | 
			
		||||
from ..utils.deprecation import _raise_dep_warning_if_not_pytest
 | 
			
		||||
 | 
			
		||||
deprecated_path = 'sklearn.neural_network.rbm'
 | 
			
		||||
correct_import_path = 'sklearn.neural_network'
 | 
			
		||||
 | 
			
		||||
_raise_dep_warning_if_not_pytest(deprecated_path, correct_import_path)
 | 
			
		||||
 | 
			
		||||
def __getattr__(name):
 | 
			
		||||
    return getattr(_rbm, name)
 | 
			
		||||
 | 
			
		||||
if not sys.version_info >= (3, 7):
 | 
			
		||||
    Pep562(__name__)
 | 
			
		||||
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							| 
						 | 
				
			
			@ -0,0 +1,26 @@
 | 
			
		|||
import pytest
 | 
			
		||||
import numpy as np
 | 
			
		||||
 | 
			
		||||
from sklearn.neural_network._base import binary_log_loss
 | 
			
		||||
from sklearn.neural_network._base import log_loss
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_binary_log_loss_1_prob_finite():
 | 
			
		||||
    # y_proba is equal to one should result in a finite logloss
 | 
			
		||||
    y_true = np.array([[0, 0, 1]]).T
 | 
			
		||||
    y_prob = np.array([[0.9, 1.0, 1.0]]).T
 | 
			
		||||
 | 
			
		||||
    loss = binary_log_loss(y_true, y_prob)
 | 
			
		||||
    assert np.isfinite(loss)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize("y_true, y_prob", [
 | 
			
		||||
    (np.array([[1, 0, 0], [0, 1, 0]]),
 | 
			
		||||
     np.array([[0., 1., 0.], [0.9, 0.05, 0.05]])),
 | 
			
		||||
    (np.array([[0, 0, 1]]).T,
 | 
			
		||||
     np.array([[0.9, 1.0, 1.0]]).T),
 | 
			
		||||
])
 | 
			
		||||
def test_log_loss_1_prob_finite(y_true, y_prob):
 | 
			
		||||
    # y_proba is equal to 1 should result in a finite logloss
 | 
			
		||||
    loss = log_loss(y_true, y_prob)
 | 
			
		||||
    assert np.isfinite(loss)
 | 
			
		||||
							
								
								
									
										718
									
								
								venv/Lib/site-packages/sklearn/neural_network/tests/test_mlp.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										718
									
								
								venv/Lib/site-packages/sklearn/neural_network/tests/test_mlp.py
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,718 @@
 | 
			
		|||
"""
 | 
			
		||||
Testing for Multi-layer Perceptron module (sklearn.neural_network)
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
# Author: Issam H. Laradji
 | 
			
		||||
# License: BSD 3 clause
 | 
			
		||||
 | 
			
		||||
import pytest
 | 
			
		||||
import sys
 | 
			
		||||
import warnings
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
import numpy as np
 | 
			
		||||
 | 
			
		||||
from numpy.testing import assert_almost_equal, assert_array_equal
 | 
			
		||||
 | 
			
		||||
from sklearn.datasets import load_digits, load_boston, load_iris
 | 
			
		||||
from sklearn.datasets import make_regression, make_multilabel_classification
 | 
			
		||||
from sklearn.exceptions import ConvergenceWarning
 | 
			
		||||
from io import StringIO
 | 
			
		||||
from sklearn.metrics import roc_auc_score
 | 
			
		||||
from sklearn.neural_network import MLPClassifier
 | 
			
		||||
from sklearn.neural_network import MLPRegressor
 | 
			
		||||
from sklearn.preprocessing import LabelBinarizer
 | 
			
		||||
from sklearn.preprocessing import StandardScaler, MinMaxScaler
 | 
			
		||||
from scipy.sparse import csr_matrix
 | 
			
		||||
from sklearn.utils._testing import ignore_warnings
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
ACTIVATION_TYPES = ["identity", "logistic", "tanh", "relu"]
 | 
			
		||||
 | 
			
		||||
X_digits, y_digits = load_digits(n_class=3, return_X_y=True)
 | 
			
		||||
 | 
			
		||||
X_digits_multi = MinMaxScaler().fit_transform(X_digits[:200])
 | 
			
		||||
y_digits_multi = y_digits[:200]
 | 
			
		||||
 | 
			
		||||
X_digits, y_digits = load_digits(n_class=2, return_X_y=True)
 | 
			
		||||
 | 
			
		||||
X_digits_binary = MinMaxScaler().fit_transform(X_digits[:200])
 | 
			
		||||
y_digits_binary = y_digits[:200]
 | 
			
		||||
 | 
			
		||||
classification_datasets = [(X_digits_multi, y_digits_multi),
 | 
			
		||||
                           (X_digits_binary, y_digits_binary)]
 | 
			
		||||
 | 
			
		||||
boston = load_boston()
 | 
			
		||||
 | 
			
		||||
Xboston = StandardScaler().fit_transform(boston.data)[: 200]
 | 
			
		||||
yboston = boston.target[:200]
 | 
			
		||||
 | 
			
		||||
regression_datasets = [(Xboston, yboston)]
 | 
			
		||||
 | 
			
		||||
iris = load_iris()
 | 
			
		||||
 | 
			
		||||
X_iris = iris.data
 | 
			
		||||
y_iris = iris.target
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_alpha():
 | 
			
		||||
    # Test that larger alpha yields weights closer to zero
 | 
			
		||||
    X = X_digits_binary[:100]
 | 
			
		||||
    y = y_digits_binary[:100]
 | 
			
		||||
 | 
			
		||||
    alpha_vectors = []
 | 
			
		||||
    alpha_values = np.arange(2)
 | 
			
		||||
    absolute_sum = lambda x: np.sum(np.abs(x))
 | 
			
		||||
 | 
			
		||||
    for alpha in alpha_values:
 | 
			
		||||
        mlp = MLPClassifier(hidden_layer_sizes=10, alpha=alpha, random_state=1)
 | 
			
		||||
        with ignore_warnings(category=ConvergenceWarning):
 | 
			
		||||
            mlp.fit(X, y)
 | 
			
		||||
        alpha_vectors.append(np.array([absolute_sum(mlp.coefs_[0]),
 | 
			
		||||
                                       absolute_sum(mlp.coefs_[1])]))
 | 
			
		||||
 | 
			
		||||
    for i in range(len(alpha_values) - 1):
 | 
			
		||||
        assert (alpha_vectors[i] > alpha_vectors[i + 1]).all()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_fit():
 | 
			
		||||
    # Test that the algorithm solution is equal to a worked out example.
 | 
			
		||||
    X = np.array([[0.6, 0.8, 0.7]])
 | 
			
		||||
    y = np.array([0])
 | 
			
		||||
    mlp = MLPClassifier(solver='sgd', learning_rate_init=0.1, alpha=0.1,
 | 
			
		||||
                        activation='logistic', random_state=1, max_iter=1,
 | 
			
		||||
                        hidden_layer_sizes=2, momentum=0)
 | 
			
		||||
    # set weights
 | 
			
		||||
    mlp.coefs_ = [0] * 2
 | 
			
		||||
    mlp.intercepts_ = [0] * 2
 | 
			
		||||
    mlp.n_outputs_ = 1
 | 
			
		||||
    mlp.coefs_[0] = np.array([[0.1, 0.2], [0.3, 0.1], [0.5, 0]])
 | 
			
		||||
    mlp.coefs_[1] = np.array([[0.1], [0.2]])
 | 
			
		||||
    mlp.intercepts_[0] = np.array([0.1, 0.1])
 | 
			
		||||
    mlp.intercepts_[1] = np.array([1.0])
 | 
			
		||||
    mlp._coef_grads = [] * 2
 | 
			
		||||
    mlp._intercept_grads = [] * 2
 | 
			
		||||
 | 
			
		||||
    # Initialize parameters
 | 
			
		||||
    mlp.n_iter_ = 0
 | 
			
		||||
    mlp.learning_rate_ = 0.1
 | 
			
		||||
 | 
			
		||||
    # Compute the number of layers
 | 
			
		||||
    mlp.n_layers_ = 3
 | 
			
		||||
 | 
			
		||||
    # Pre-allocate gradient matrices
 | 
			
		||||
    mlp._coef_grads = [0] * (mlp.n_layers_ - 1)
 | 
			
		||||
    mlp._intercept_grads = [0] * (mlp.n_layers_ - 1)
 | 
			
		||||
 | 
			
		||||
    mlp.out_activation_ = 'logistic'
 | 
			
		||||
    mlp.t_ = 0
 | 
			
		||||
    mlp.best_loss_ = np.inf
 | 
			
		||||
    mlp.loss_curve_ = []
 | 
			
		||||
    mlp._no_improvement_count = 0
 | 
			
		||||
    mlp._intercept_velocity = [np.zeros_like(intercepts) for
 | 
			
		||||
                               intercepts in
 | 
			
		||||
                               mlp.intercepts_]
 | 
			
		||||
    mlp._coef_velocity = [np.zeros_like(coefs) for coefs in
 | 
			
		||||
                          mlp.coefs_]
 | 
			
		||||
 | 
			
		||||
    mlp.partial_fit(X, y, classes=[0, 1])
 | 
			
		||||
    # Manually worked out example
 | 
			
		||||
    # h1 = g(X1 * W_i1 + b11) = g(0.6 * 0.1 + 0.8 * 0.3 + 0.7 * 0.5 + 0.1)
 | 
			
		||||
    #       =  0.679178699175393
 | 
			
		||||
    # h2 = g(X2 * W_i2 + b12) = g(0.6 * 0.2 + 0.8 * 0.1 + 0.7 * 0 + 0.1)
 | 
			
		||||
    #         = 0.574442516811659
 | 
			
		||||
    # o1 = g(h * W2 + b21) = g(0.679 * 0.1 + 0.574 * 0.2 + 1)
 | 
			
		||||
    #       = 0.7654329236196236
 | 
			
		||||
    # d21 = -(0 - 0.765) = 0.765
 | 
			
		||||
    # d11 = (1 - 0.679) * 0.679 * 0.765 * 0.1 = 0.01667
 | 
			
		||||
    # d12 = (1 - 0.574) * 0.574 * 0.765 * 0.2 = 0.0374
 | 
			
		||||
    # W1grad11 = X1 * d11 + alpha * W11 = 0.6 * 0.01667 + 0.1 * 0.1 = 0.0200
 | 
			
		||||
    # W1grad11 = X1 * d12 + alpha * W12 = 0.6 * 0.0374 + 0.1 * 0.2 = 0.04244
 | 
			
		||||
    # W1grad21 = X2 * d11 + alpha * W13 = 0.8 * 0.01667 + 0.1 * 0.3 = 0.043336
 | 
			
		||||
    # W1grad22 = X2 * d12 + alpha * W14 = 0.8 * 0.0374 + 0.1 * 0.1 = 0.03992
 | 
			
		||||
    # W1grad31 = X3 * d11 + alpha * W15 = 0.6 * 0.01667 + 0.1 * 0.5 = 0.060002
 | 
			
		||||
    # W1grad32 = X3 * d12 + alpha * W16 = 0.6 * 0.0374 + 0.1 * 0 = 0.02244
 | 
			
		||||
    # W2grad1 = h1 * d21 + alpha * W21 = 0.679 * 0.765 + 0.1 * 0.1 = 0.5294
 | 
			
		||||
    # W2grad2 = h2 * d21 + alpha * W22 = 0.574 * 0.765 + 0.1 * 0.2 = 0.45911
 | 
			
		||||
    # b1grad1 = d11 = 0.01667
 | 
			
		||||
    # b1grad2 = d12 = 0.0374
 | 
			
		||||
    # b2grad = d21 = 0.765
 | 
			
		||||
    # W1 = W1 - eta * [W1grad11, .., W1grad32] = [[0.1, 0.2], [0.3, 0.1],
 | 
			
		||||
    #          [0.5, 0]] - 0.1 * [[0.0200, 0.04244], [0.043336, 0.03992],
 | 
			
		||||
    #          [0.060002, 0.02244]] = [[0.098, 0.195756], [0.2956664,
 | 
			
		||||
    #          0.096008], [0.4939998, -0.002244]]
 | 
			
		||||
    # W2 = W2 - eta * [W2grad1, W2grad2] = [[0.1], [0.2]] - 0.1 *
 | 
			
		||||
    #        [[0.5294], [0.45911]] = [[0.04706], [0.154089]]
 | 
			
		||||
    # b1 = b1 - eta * [b1grad1, b1grad2] = 0.1 - 0.1 * [0.01667, 0.0374]
 | 
			
		||||
    #         = [0.098333, 0.09626]
 | 
			
		||||
    # b2 = b2 - eta * b2grad = 1.0 - 0.1 * 0.765 = 0.9235
 | 
			
		||||
    assert_almost_equal(mlp.coefs_[0], np.array([[0.098, 0.195756],
 | 
			
		||||
                                                 [0.2956664, 0.096008],
 | 
			
		||||
                                                 [0.4939998, -0.002244]]),
 | 
			
		||||
                        decimal=3)
 | 
			
		||||
    assert_almost_equal(mlp.coefs_[1], np.array([[0.04706], [0.154089]]),
 | 
			
		||||
                        decimal=3)
 | 
			
		||||
    assert_almost_equal(mlp.intercepts_[0],
 | 
			
		||||
                        np.array([0.098333, 0.09626]), decimal=3)
 | 
			
		||||
    assert_almost_equal(mlp.intercepts_[1], np.array(0.9235), decimal=3)
 | 
			
		||||
    # Testing output
 | 
			
		||||
    #  h1 = g(X1 * W_i1 + b11) = g(0.6 * 0.098 + 0.8 * 0.2956664 +
 | 
			
		||||
    #               0.7 * 0.4939998 + 0.098333) = 0.677
 | 
			
		||||
    #  h2 = g(X2 * W_i2 + b12) = g(0.6 * 0.195756 + 0.8 * 0.096008 +
 | 
			
		||||
    #            0.7 * -0.002244 + 0.09626) = 0.572
 | 
			
		||||
    #  o1 = h * W2 + b21 = 0.677 * 0.04706 +
 | 
			
		||||
    #             0.572 * 0.154089 + 0.9235 = 1.043
 | 
			
		||||
    #  prob = sigmoid(o1) = 0.739
 | 
			
		||||
    assert_almost_equal(mlp.predict_proba(X)[0, 1], 0.739, decimal=3)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_gradient():
 | 
			
		||||
    # Test gradient.
 | 
			
		||||
 | 
			
		||||
    # This makes sure that the activation functions and their derivatives
 | 
			
		||||
    # are correct. The numerical and analytical computation of the gradient
 | 
			
		||||
    # should be close.
 | 
			
		||||
    for n_labels in [2, 3]:
 | 
			
		||||
        n_samples = 5
 | 
			
		||||
        n_features = 10
 | 
			
		||||
        random_state = np.random.RandomState(seed=42)
 | 
			
		||||
        X = random_state.rand(n_samples, n_features)
 | 
			
		||||
        y = 1 + np.mod(np.arange(n_samples) + 1, n_labels)
 | 
			
		||||
        Y = LabelBinarizer().fit_transform(y)
 | 
			
		||||
 | 
			
		||||
        for activation in ACTIVATION_TYPES:
 | 
			
		||||
            mlp = MLPClassifier(activation=activation, hidden_layer_sizes=10,
 | 
			
		||||
                                solver='lbfgs', alpha=1e-5,
 | 
			
		||||
                                learning_rate_init=0.2, max_iter=1,
 | 
			
		||||
                                random_state=1)
 | 
			
		||||
            mlp.fit(X, y)
 | 
			
		||||
 | 
			
		||||
            theta = np.hstack([l.ravel() for l in mlp.coefs_ +
 | 
			
		||||
                               mlp.intercepts_])
 | 
			
		||||
 | 
			
		||||
            layer_units = ([X.shape[1]] + [mlp.hidden_layer_sizes] +
 | 
			
		||||
                           [mlp.n_outputs_])
 | 
			
		||||
 | 
			
		||||
            activations = []
 | 
			
		||||
            deltas = []
 | 
			
		||||
            coef_grads = []
 | 
			
		||||
            intercept_grads = []
 | 
			
		||||
 | 
			
		||||
            activations.append(X)
 | 
			
		||||
            for i in range(mlp.n_layers_ - 1):
 | 
			
		||||
                activations.append(np.empty((X.shape[0],
 | 
			
		||||
                                             layer_units[i + 1])))
 | 
			
		||||
                deltas.append(np.empty((X.shape[0],
 | 
			
		||||
                                        layer_units[i + 1])))
 | 
			
		||||
 | 
			
		||||
                fan_in = layer_units[i]
 | 
			
		||||
                fan_out = layer_units[i + 1]
 | 
			
		||||
                coef_grads.append(np.empty((fan_in, fan_out)))
 | 
			
		||||
                intercept_grads.append(np.empty(fan_out))
 | 
			
		||||
 | 
			
		||||
            # analytically compute the gradients
 | 
			
		||||
            def loss_grad_fun(t):
 | 
			
		||||
                return mlp._loss_grad_lbfgs(t, X, Y, activations, deltas,
 | 
			
		||||
                                            coef_grads, intercept_grads)
 | 
			
		||||
 | 
			
		||||
            [value, grad] = loss_grad_fun(theta)
 | 
			
		||||
            numgrad = np.zeros(np.size(theta))
 | 
			
		||||
            n = np.size(theta, 0)
 | 
			
		||||
            E = np.eye(n)
 | 
			
		||||
            epsilon = 1e-5
 | 
			
		||||
            # numerically compute the gradients
 | 
			
		||||
            for i in range(n):
 | 
			
		||||
                dtheta = E[:, i] * epsilon
 | 
			
		||||
                numgrad[i] = ((loss_grad_fun(theta + dtheta)[0] -
 | 
			
		||||
                              loss_grad_fun(theta - dtheta)[0]) /
 | 
			
		||||
                              (epsilon * 2.0))
 | 
			
		||||
            assert_almost_equal(numgrad, grad)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize('X,y', classification_datasets)
 | 
			
		||||
def test_lbfgs_classification(X, y):
 | 
			
		||||
    # Test lbfgs on classification.
 | 
			
		||||
    # It should achieve a score higher than 0.95 for the binary and multi-class
 | 
			
		||||
    # versions of the digits dataset.
 | 
			
		||||
    X_train = X[:150]
 | 
			
		||||
    y_train = y[:150]
 | 
			
		||||
    X_test = X[150:]
 | 
			
		||||
    expected_shape_dtype = (X_test.shape[0], y_train.dtype.kind)
 | 
			
		||||
 | 
			
		||||
    for activation in ACTIVATION_TYPES:
 | 
			
		||||
        mlp = MLPClassifier(solver='lbfgs', hidden_layer_sizes=50,
 | 
			
		||||
                            max_iter=150, shuffle=True, random_state=1,
 | 
			
		||||
                            activation=activation)
 | 
			
		||||
        mlp.fit(X_train, y_train)
 | 
			
		||||
        y_predict = mlp.predict(X_test)
 | 
			
		||||
        assert mlp.score(X_train, y_train) > 0.95
 | 
			
		||||
        assert ((y_predict.shape[0], y_predict.dtype.kind) ==
 | 
			
		||||
                expected_shape_dtype)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize('X,y', regression_datasets)
 | 
			
		||||
def test_lbfgs_regression(X, y):
 | 
			
		||||
    # Test lbfgs on the boston dataset, a regression problems.
 | 
			
		||||
    for activation in ACTIVATION_TYPES:
 | 
			
		||||
        mlp = MLPRegressor(solver='lbfgs', hidden_layer_sizes=50,
 | 
			
		||||
                           max_iter=150, shuffle=True, random_state=1,
 | 
			
		||||
                           activation=activation)
 | 
			
		||||
        mlp.fit(X, y)
 | 
			
		||||
        if activation == 'identity':
 | 
			
		||||
            assert mlp.score(X, y) > 0.84
 | 
			
		||||
        else:
 | 
			
		||||
            # Non linear models perform much better than linear bottleneck:
 | 
			
		||||
            assert mlp.score(X, y) > 0.95
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize('X,y', classification_datasets)
 | 
			
		||||
def test_lbfgs_classification_maxfun(X, y):
 | 
			
		||||
    # Test lbfgs parameter max_fun.
 | 
			
		||||
    # It should independently limit the number of iterations for lbfgs.
 | 
			
		||||
    max_fun = 10
 | 
			
		||||
    # classification tests
 | 
			
		||||
    for activation in ACTIVATION_TYPES:
 | 
			
		||||
        mlp = MLPClassifier(solver='lbfgs', hidden_layer_sizes=50,
 | 
			
		||||
                            max_iter=150, max_fun=max_fun, shuffle=True,
 | 
			
		||||
                            random_state=1, activation=activation)
 | 
			
		||||
        with pytest.warns(ConvergenceWarning):
 | 
			
		||||
            mlp.fit(X, y)
 | 
			
		||||
            assert max_fun >= mlp.n_iter_
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize('X,y', regression_datasets)
 | 
			
		||||
def test_lbfgs_regression_maxfun(X, y):
 | 
			
		||||
    # Test lbfgs parameter max_fun.
 | 
			
		||||
    # It should independently limit the number of iterations for lbfgs.
 | 
			
		||||
    max_fun = 10
 | 
			
		||||
    # regression tests
 | 
			
		||||
    for activation in ACTIVATION_TYPES:
 | 
			
		||||
        mlp = MLPRegressor(solver='lbfgs', hidden_layer_sizes=50,
 | 
			
		||||
                           max_iter=150, max_fun=max_fun, shuffle=True,
 | 
			
		||||
                           random_state=1, activation=activation)
 | 
			
		||||
        with pytest.warns(ConvergenceWarning):
 | 
			
		||||
            mlp.fit(X, y)
 | 
			
		||||
            assert max_fun >= mlp.n_iter_
 | 
			
		||||
 | 
			
		||||
    mlp.max_fun = -1
 | 
			
		||||
    with pytest.raises(ValueError):
 | 
			
		||||
        mlp.fit(X, y)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_learning_rate_warmstart():
 | 
			
		||||
    # Tests that warm_start reuse past solutions.
 | 
			
		||||
    X = [[3, 2], [1, 6], [5, 6], [-2, -4]]
 | 
			
		||||
    y = [1, 1, 1, 0]
 | 
			
		||||
    for learning_rate in ["invscaling", "constant"]:
 | 
			
		||||
        mlp = MLPClassifier(solver='sgd', hidden_layer_sizes=4,
 | 
			
		||||
                            learning_rate=learning_rate, max_iter=1,
 | 
			
		||||
                            power_t=0.25, warm_start=True)
 | 
			
		||||
        with ignore_warnings(category=ConvergenceWarning):
 | 
			
		||||
            mlp.fit(X, y)
 | 
			
		||||
            prev_eta = mlp._optimizer.learning_rate
 | 
			
		||||
            mlp.fit(X, y)
 | 
			
		||||
            post_eta = mlp._optimizer.learning_rate
 | 
			
		||||
 | 
			
		||||
        if learning_rate == 'constant':
 | 
			
		||||
            assert prev_eta == post_eta
 | 
			
		||||
        elif learning_rate == 'invscaling':
 | 
			
		||||
            assert (mlp.learning_rate_init / pow(8 + 1, mlp.power_t) ==
 | 
			
		||||
                         post_eta)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_multilabel_classification():
 | 
			
		||||
    # Test that multi-label classification works as expected.
 | 
			
		||||
    # test fit method
 | 
			
		||||
    X, y = make_multilabel_classification(n_samples=50, random_state=0,
 | 
			
		||||
                                          return_indicator=True)
 | 
			
		||||
    mlp = MLPClassifier(solver='lbfgs', hidden_layer_sizes=50, alpha=1e-5,
 | 
			
		||||
                        max_iter=150, random_state=0, activation='logistic',
 | 
			
		||||
                        learning_rate_init=0.2)
 | 
			
		||||
    mlp.fit(X, y)
 | 
			
		||||
    assert mlp.score(X, y) > 0.97
 | 
			
		||||
 | 
			
		||||
    # test partial fit method
 | 
			
		||||
    mlp = MLPClassifier(solver='sgd', hidden_layer_sizes=50, max_iter=150,
 | 
			
		||||
                        random_state=0, activation='logistic', alpha=1e-5,
 | 
			
		||||
                        learning_rate_init=0.2)
 | 
			
		||||
    for i in range(100):
 | 
			
		||||
        mlp.partial_fit(X, y, classes=[0, 1, 2, 3, 4])
 | 
			
		||||
    assert mlp.score(X, y) > 0.9
 | 
			
		||||
 | 
			
		||||
    # Make sure early stopping still work now that spliting is stratified by
 | 
			
		||||
    # default (it is disabled for multilabel classification)
 | 
			
		||||
    mlp = MLPClassifier(early_stopping=True)
 | 
			
		||||
    mlp.fit(X, y).predict(X)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_multioutput_regression():
 | 
			
		||||
    # Test that multi-output regression works as expected
 | 
			
		||||
    X, y = make_regression(n_samples=200, n_targets=5)
 | 
			
		||||
    mlp = MLPRegressor(solver='lbfgs', hidden_layer_sizes=50, max_iter=200,
 | 
			
		||||
                       random_state=1)
 | 
			
		||||
    mlp.fit(X, y)
 | 
			
		||||
    assert mlp.score(X, y) > 0.9
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_partial_fit_classes_error():
 | 
			
		||||
    # Tests that passing different classes to partial_fit raises an error
 | 
			
		||||
    X = [[3, 2]]
 | 
			
		||||
    y = [0]
 | 
			
		||||
    clf = MLPClassifier(solver='sgd')
 | 
			
		||||
    clf.partial_fit(X, y, classes=[0, 1])
 | 
			
		||||
    with pytest.raises(ValueError):
 | 
			
		||||
        clf.partial_fit(X, y, classes=[1, 2])
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_partial_fit_classification():
 | 
			
		||||
    # Test partial_fit on classification.
 | 
			
		||||
    # `partial_fit` should yield the same results as 'fit' for binary and
 | 
			
		||||
    # multi-class classification.
 | 
			
		||||
    for X, y in classification_datasets:
 | 
			
		||||
        X = X
 | 
			
		||||
        y = y
 | 
			
		||||
        mlp = MLPClassifier(solver='sgd', max_iter=100, random_state=1,
 | 
			
		||||
                            tol=0, alpha=1e-5, learning_rate_init=0.2)
 | 
			
		||||
 | 
			
		||||
        with ignore_warnings(category=ConvergenceWarning):
 | 
			
		||||
            mlp.fit(X, y)
 | 
			
		||||
        pred1 = mlp.predict(X)
 | 
			
		||||
        mlp = MLPClassifier(solver='sgd', random_state=1, alpha=1e-5,
 | 
			
		||||
                            learning_rate_init=0.2)
 | 
			
		||||
        for i in range(100):
 | 
			
		||||
            mlp.partial_fit(X, y, classes=np.unique(y))
 | 
			
		||||
        pred2 = mlp.predict(X)
 | 
			
		||||
        assert_array_equal(pred1, pred2)
 | 
			
		||||
        assert mlp.score(X, y) > 0.95
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_partial_fit_unseen_classes():
 | 
			
		||||
    # Non regression test for bug 6994
 | 
			
		||||
    # Tests for labeling errors in partial fit
 | 
			
		||||
 | 
			
		||||
    clf = MLPClassifier(random_state=0)
 | 
			
		||||
    clf.partial_fit([[1], [2], [3]], ["a", "b", "c"],
 | 
			
		||||
                    classes=["a", "b", "c", "d"])
 | 
			
		||||
    clf.partial_fit([[4]], ["d"])
 | 
			
		||||
    assert clf.score([[1], [2], [3], [4]], ["a", "b", "c", "d"]) > 0
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_partial_fit_regression():
 | 
			
		||||
    # Test partial_fit on regression.
 | 
			
		||||
    # `partial_fit` should yield the same results as 'fit' for regression.
 | 
			
		||||
    X = Xboston
 | 
			
		||||
    y = yboston
 | 
			
		||||
 | 
			
		||||
    for momentum in [0, .9]:
 | 
			
		||||
        mlp = MLPRegressor(solver='sgd', max_iter=100, activation='relu',
 | 
			
		||||
                           random_state=1, learning_rate_init=0.01,
 | 
			
		||||
                           batch_size=X.shape[0], momentum=momentum)
 | 
			
		||||
        with warnings.catch_warnings(record=True):
 | 
			
		||||
            # catch convergence warning
 | 
			
		||||
            mlp.fit(X, y)
 | 
			
		||||
        pred1 = mlp.predict(X)
 | 
			
		||||
        mlp = MLPRegressor(solver='sgd', activation='relu',
 | 
			
		||||
                           learning_rate_init=0.01, random_state=1,
 | 
			
		||||
                           batch_size=X.shape[0], momentum=momentum)
 | 
			
		||||
        for i in range(100):
 | 
			
		||||
            mlp.partial_fit(X, y)
 | 
			
		||||
 | 
			
		||||
        pred2 = mlp.predict(X)
 | 
			
		||||
        assert_almost_equal(pred1, pred2, decimal=2)
 | 
			
		||||
        score = mlp.score(X, y)
 | 
			
		||||
        assert score > 0.75
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_partial_fit_errors():
 | 
			
		||||
    # Test partial_fit error handling.
 | 
			
		||||
    X = [[3, 2], [1, 6]]
 | 
			
		||||
    y = [1, 0]
 | 
			
		||||
 | 
			
		||||
    # no classes passed
 | 
			
		||||
    with pytest.raises(ValueError):
 | 
			
		||||
        MLPClassifier(solver='sgd').partial_fit(X, y, classes=[2])
 | 
			
		||||
 | 
			
		||||
    # lbfgs doesn't support partial_fit
 | 
			
		||||
    assert not hasattr(MLPClassifier(solver='lbfgs'), 'partial_fit')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@pytest.mark.parametrize(
 | 
			
		||||
        "args",
 | 
			
		||||
        [{'hidden_layer_sizes': -1},
 | 
			
		||||
         {'max_iter': -1},
 | 
			
		||||
         {'shuffle': 'true'},
 | 
			
		||||
         {'alpha': -1},
 | 
			
		||||
         {'learning_rate_init': -1},
 | 
			
		||||
         {'momentum': 2},
 | 
			
		||||
         {'momentum': -0.5},
 | 
			
		||||
         {'nesterovs_momentum': 'invalid'},
 | 
			
		||||
         {'early_stopping': 'invalid'},
 | 
			
		||||
         {'validation_fraction': 1},
 | 
			
		||||
         {'validation_fraction': -0.5},
 | 
			
		||||
         {'beta_1': 1},
 | 
			
		||||
         {'beta_1': -0.5},
 | 
			
		||||
         {'beta_2': 1},
 | 
			
		||||
         {'beta_2': -0.5},
 | 
			
		||||
         {'epsilon': -0.5},
 | 
			
		||||
         {'n_iter_no_change': -1},
 | 
			
		||||
         {'solver': 'hadoken'},
 | 
			
		||||
         {'learning_rate': 'converge'},
 | 
			
		||||
         {'activation': 'cloak'}]
 | 
			
		||||
)
 | 
			
		||||
def test_params_errors(args):
 | 
			
		||||
    # Test that invalid parameters raise value error
 | 
			
		||||
    X = [[3, 2], [1, 6]]
 | 
			
		||||
    y = [1, 0]
 | 
			
		||||
    clf = MLPClassifier
 | 
			
		||||
 | 
			
		||||
    with pytest.raises(ValueError):
 | 
			
		||||
        clf(**args).fit(X, y)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_predict_proba_binary():
 | 
			
		||||
    # Test that predict_proba works as expected for binary class.
 | 
			
		||||
    X = X_digits_binary[:50]
 | 
			
		||||
    y = y_digits_binary[:50]
 | 
			
		||||
 | 
			
		||||
    clf = MLPClassifier(hidden_layer_sizes=5, activation='logistic',
 | 
			
		||||
                        random_state=1)
 | 
			
		||||
    with ignore_warnings(category=ConvergenceWarning):
 | 
			
		||||
        clf.fit(X, y)
 | 
			
		||||
    y_proba = clf.predict_proba(X)
 | 
			
		||||
    y_log_proba = clf.predict_log_proba(X)
 | 
			
		||||
 | 
			
		||||
    (n_samples, n_classes) = y.shape[0], 2
 | 
			
		||||
 | 
			
		||||
    proba_max = y_proba.argmax(axis=1)
 | 
			
		||||
    proba_log_max = y_log_proba.argmax(axis=1)
 | 
			
		||||
 | 
			
		||||
    assert y_proba.shape == (n_samples, n_classes)
 | 
			
		||||
    assert_array_equal(proba_max, proba_log_max)
 | 
			
		||||
    assert_array_equal(y_log_proba, np.log(y_proba))
 | 
			
		||||
 | 
			
		||||
    assert roc_auc_score(y, y_proba[:, 1]) == 1.0
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_predict_proba_multiclass():
 | 
			
		||||
    # Test that predict_proba works as expected for multi class.
 | 
			
		||||
    X = X_digits_multi[:10]
 | 
			
		||||
    y = y_digits_multi[:10]
 | 
			
		||||
 | 
			
		||||
    clf = MLPClassifier(hidden_layer_sizes=5)
 | 
			
		||||
    with ignore_warnings(category=ConvergenceWarning):
 | 
			
		||||
        clf.fit(X, y)
 | 
			
		||||
    y_proba = clf.predict_proba(X)
 | 
			
		||||
    y_log_proba = clf.predict_log_proba(X)
 | 
			
		||||
 | 
			
		||||
    (n_samples, n_classes) = y.shape[0], np.unique(y).size
 | 
			
		||||
 | 
			
		||||
    proba_max = y_proba.argmax(axis=1)
 | 
			
		||||
    proba_log_max = y_log_proba.argmax(axis=1)
 | 
			
		||||
 | 
			
		||||
    assert y_proba.shape == (n_samples, n_classes)
 | 
			
		||||
    assert_array_equal(proba_max, proba_log_max)
 | 
			
		||||
    assert_array_equal(y_log_proba, np.log(y_proba))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_predict_proba_multilabel():
 | 
			
		||||
    # Test that predict_proba works as expected for multilabel.
 | 
			
		||||
    # Multilabel should not use softmax which makes probabilities sum to 1
 | 
			
		||||
    X, Y = make_multilabel_classification(n_samples=50, random_state=0,
 | 
			
		||||
                                          return_indicator=True)
 | 
			
		||||
    n_samples, n_classes = Y.shape
 | 
			
		||||
 | 
			
		||||
    clf = MLPClassifier(solver='lbfgs', hidden_layer_sizes=30,
 | 
			
		||||
                        random_state=0)
 | 
			
		||||
    clf.fit(X, Y)
 | 
			
		||||
    y_proba = clf.predict_proba(X)
 | 
			
		||||
 | 
			
		||||
    assert y_proba.shape == (n_samples, n_classes)
 | 
			
		||||
    assert_array_equal(y_proba > 0.5, Y)
 | 
			
		||||
 | 
			
		||||
    y_log_proba = clf.predict_log_proba(X)
 | 
			
		||||
    proba_max = y_proba.argmax(axis=1)
 | 
			
		||||
    proba_log_max = y_log_proba.argmax(axis=1)
 | 
			
		||||
 | 
			
		||||
    assert (y_proba.sum(1) - 1).dot(y_proba.sum(1) - 1) > 1e-10
 | 
			
		||||
    assert_array_equal(proba_max, proba_log_max)
 | 
			
		||||
    assert_array_equal(y_log_proba, np.log(y_proba))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_shuffle():
 | 
			
		||||
    # Test that the shuffle parameter affects the training process (it should)
 | 
			
		||||
    X, y = make_regression(n_samples=50, n_features=5, n_targets=1,
 | 
			
		||||
                           random_state=0)
 | 
			
		||||
 | 
			
		||||
    # The coefficients will be identical if both do or do not shuffle
 | 
			
		||||
    for shuffle in [True, False]:
 | 
			
		||||
        mlp1 = MLPRegressor(hidden_layer_sizes=1, max_iter=1, batch_size=1,
 | 
			
		||||
                            random_state=0, shuffle=shuffle)
 | 
			
		||||
        mlp2 = MLPRegressor(hidden_layer_sizes=1, max_iter=1, batch_size=1,
 | 
			
		||||
                            random_state=0, shuffle=shuffle)
 | 
			
		||||
        mlp1.fit(X, y)
 | 
			
		||||
        mlp2.fit(X, y)
 | 
			
		||||
 | 
			
		||||
        assert np.array_equal(mlp1.coefs_[0], mlp2.coefs_[0])
 | 
			
		||||
 | 
			
		||||
    # The coefficients will be slightly different if shuffle=True
 | 
			
		||||
    mlp1 = MLPRegressor(hidden_layer_sizes=1, max_iter=1, batch_size=1,
 | 
			
		||||
                        random_state=0, shuffle=True)
 | 
			
		||||
    mlp2 = MLPRegressor(hidden_layer_sizes=1, max_iter=1, batch_size=1,
 | 
			
		||||
                        random_state=0, shuffle=False)
 | 
			
		||||
    mlp1.fit(X, y)
 | 
			
		||||
    mlp2.fit(X, y)
 | 
			
		||||
 | 
			
		||||
    assert not np.array_equal(mlp1.coefs_[0], mlp2.coefs_[0])
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_sparse_matrices():
 | 
			
		||||
    # Test that sparse and dense input matrices output the same results.
 | 
			
		||||
    X = X_digits_binary[:50]
 | 
			
		||||
    y = y_digits_binary[:50]
 | 
			
		||||
    X_sparse = csr_matrix(X)
 | 
			
		||||
    mlp = MLPClassifier(solver='lbfgs', hidden_layer_sizes=15,
 | 
			
		||||
                        random_state=1)
 | 
			
		||||
    mlp.fit(X, y)
 | 
			
		||||
    pred1 = mlp.predict(X)
 | 
			
		||||
    mlp.fit(X_sparse, y)
 | 
			
		||||
    pred2 = mlp.predict(X_sparse)
 | 
			
		||||
    assert_almost_equal(pred1, pred2)
 | 
			
		||||
    pred1 = mlp.predict(X)
 | 
			
		||||
    pred2 = mlp.predict(X_sparse)
 | 
			
		||||
    assert_array_equal(pred1, pred2)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_tolerance():
 | 
			
		||||
    # Test tolerance.
 | 
			
		||||
    # It should force the solver to exit the loop when it converges.
 | 
			
		||||
    X = [[3, 2], [1, 6]]
 | 
			
		||||
    y = [1, 0]
 | 
			
		||||
    clf = MLPClassifier(tol=0.5, max_iter=3000, solver='sgd')
 | 
			
		||||
    clf.fit(X, y)
 | 
			
		||||
    assert clf.max_iter > clf.n_iter_
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_verbose_sgd():
 | 
			
		||||
    # Test verbose.
 | 
			
		||||
    X = [[3, 2], [1, 6]]
 | 
			
		||||
    y = [1, 0]
 | 
			
		||||
    clf = MLPClassifier(solver='sgd', max_iter=2, verbose=10,
 | 
			
		||||
                        hidden_layer_sizes=2)
 | 
			
		||||
    old_stdout = sys.stdout
 | 
			
		||||
    sys.stdout = output = StringIO()
 | 
			
		||||
 | 
			
		||||
    with ignore_warnings(category=ConvergenceWarning):
 | 
			
		||||
        clf.fit(X, y)
 | 
			
		||||
    clf.partial_fit(X, y)
 | 
			
		||||
 | 
			
		||||
    sys.stdout = old_stdout
 | 
			
		||||
    assert 'Iteration' in output.getvalue()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_early_stopping():
 | 
			
		||||
    X = X_digits_binary[:100]
 | 
			
		||||
    y = y_digits_binary[:100]
 | 
			
		||||
    tol = 0.2
 | 
			
		||||
    clf = MLPClassifier(tol=tol, max_iter=3000, solver='sgd',
 | 
			
		||||
                        early_stopping=True)
 | 
			
		||||
    clf.fit(X, y)
 | 
			
		||||
    assert clf.max_iter > clf.n_iter_
 | 
			
		||||
 | 
			
		||||
    valid_scores = clf.validation_scores_
 | 
			
		||||
    best_valid_score = clf.best_validation_score_
 | 
			
		||||
    assert max(valid_scores) == best_valid_score
 | 
			
		||||
    assert best_valid_score + tol > valid_scores[-2]
 | 
			
		||||
    assert best_valid_score + tol > valid_scores[-1]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_adaptive_learning_rate():
 | 
			
		||||
    X = [[3, 2], [1, 6]]
 | 
			
		||||
    y = [1, 0]
 | 
			
		||||
    clf = MLPClassifier(tol=0.5, max_iter=3000, solver='sgd',
 | 
			
		||||
                        learning_rate='adaptive')
 | 
			
		||||
    clf.fit(X, y)
 | 
			
		||||
    assert clf.max_iter > clf.n_iter_
 | 
			
		||||
    assert 1e-6 > clf._optimizer.learning_rate
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ignore_warnings(category=RuntimeWarning)
 | 
			
		||||
def test_warm_start():
 | 
			
		||||
    X = X_iris
 | 
			
		||||
    y = y_iris
 | 
			
		||||
 | 
			
		||||
    y_2classes = np.array([0] * 75 + [1] * 75)
 | 
			
		||||
    y_3classes = np.array([0] * 40 + [1] * 40 + [2] * 70)
 | 
			
		||||
    y_3classes_alt = np.array([0] * 50 + [1] * 50 + [3] * 50)
 | 
			
		||||
    y_4classes = np.array([0] * 37 + [1] * 37 + [2] * 38 + [3] * 38)
 | 
			
		||||
    y_5classes = np.array([0] * 30 + [1] * 30 + [2] * 30 + [3] * 30 + [4] * 30)
 | 
			
		||||
 | 
			
		||||
    # No error raised
 | 
			
		||||
    clf = MLPClassifier(hidden_layer_sizes=2, solver='lbfgs',
 | 
			
		||||
                        warm_start=True).fit(X, y)
 | 
			
		||||
    clf.fit(X, y)
 | 
			
		||||
    clf.fit(X, y_3classes)
 | 
			
		||||
 | 
			
		||||
    for y_i in (y_2classes, y_3classes_alt, y_4classes, y_5classes):
 | 
			
		||||
        clf = MLPClassifier(hidden_layer_sizes=2, solver='lbfgs',
 | 
			
		||||
                            warm_start=True).fit(X, y)
 | 
			
		||||
        message = ('warm_start can only be used where `y` has the same '
 | 
			
		||||
                   'classes as in the previous call to fit.'
 | 
			
		||||
                   ' Previously got [0 1 2], `y` has %s' % np.unique(y_i))
 | 
			
		||||
        with pytest.raises(ValueError, match=re.escape(message)):
 | 
			
		||||
            clf.fit(X, y_i)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_n_iter_no_change():
 | 
			
		||||
    # test n_iter_no_change using binary data set
 | 
			
		||||
    # the classifying fitting process is not prone to loss curve fluctuations
 | 
			
		||||
    X = X_digits_binary[:100]
 | 
			
		||||
    y = y_digits_binary[:100]
 | 
			
		||||
    tol = 0.01
 | 
			
		||||
    max_iter = 3000
 | 
			
		||||
 | 
			
		||||
    # test multiple n_iter_no_change
 | 
			
		||||
    for n_iter_no_change in [2, 5, 10, 50, 100]:
 | 
			
		||||
        clf = MLPClassifier(tol=tol, max_iter=max_iter, solver='sgd',
 | 
			
		||||
                            n_iter_no_change=n_iter_no_change)
 | 
			
		||||
        clf.fit(X, y)
 | 
			
		||||
 | 
			
		||||
        # validate n_iter_no_change
 | 
			
		||||
        assert clf._no_improvement_count == n_iter_no_change + 1
 | 
			
		||||
        assert max_iter > clf.n_iter_
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ignore_warnings(category=ConvergenceWarning)
 | 
			
		||||
def test_n_iter_no_change_inf():
 | 
			
		||||
    # test n_iter_no_change using binary data set
 | 
			
		||||
    # the fitting process should go to max_iter iterations
 | 
			
		||||
    X = X_digits_binary[:100]
 | 
			
		||||
    y = y_digits_binary[:100]
 | 
			
		||||
 | 
			
		||||
    # set a ridiculous tolerance
 | 
			
		||||
    # this should always trigger _update_no_improvement_count()
 | 
			
		||||
    tol = 1e9
 | 
			
		||||
 | 
			
		||||
    # fit
 | 
			
		||||
    n_iter_no_change = np.inf
 | 
			
		||||
    max_iter = 3000
 | 
			
		||||
    clf = MLPClassifier(tol=tol, max_iter=max_iter, solver='sgd',
 | 
			
		||||
                        n_iter_no_change=n_iter_no_change)
 | 
			
		||||
    clf.fit(X, y)
 | 
			
		||||
 | 
			
		||||
    # validate n_iter_no_change doesn't cause early stopping
 | 
			
		||||
    assert clf.n_iter_ == max_iter
 | 
			
		||||
 | 
			
		||||
    # validate _update_no_improvement_count() was always triggered
 | 
			
		||||
    assert clf._no_improvement_count == clf.n_iter_ - 1
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_early_stopping_stratified():
 | 
			
		||||
    # Make sure data splitting for early stopping is stratified
 | 
			
		||||
    X = [[1, 2], [2, 3], [3, 4], [4, 5]]
 | 
			
		||||
    y = [0, 0, 0, 1]
 | 
			
		||||
 | 
			
		||||
    mlp = MLPClassifier(early_stopping=True)
 | 
			
		||||
    with pytest.raises(
 | 
			
		||||
            ValueError,
 | 
			
		||||
            match='The least populated class in y has only 1 member'):
 | 
			
		||||
        mlp.fit(X, y)
 | 
			
		||||
							
								
								
									
										191
									
								
								venv/Lib/site-packages/sklearn/neural_network/tests/test_rbm.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										191
									
								
								venv/Lib/site-packages/sklearn/neural_network/tests/test_rbm.py
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,191 @@
 | 
			
		|||
import sys
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
import numpy as np
 | 
			
		||||
from scipy.sparse import csc_matrix, csr_matrix, lil_matrix
 | 
			
		||||
from sklearn.utils._testing import (assert_almost_equal, assert_array_equal)
 | 
			
		||||
 | 
			
		||||
from sklearn.datasets import load_digits
 | 
			
		||||
from io import StringIO
 | 
			
		||||
from sklearn.neural_network import BernoulliRBM
 | 
			
		||||
from sklearn.utils.validation import assert_all_finite
 | 
			
		||||
 | 
			
		||||
Xdigits, _ = load_digits(return_X_y=True)
 | 
			
		||||
Xdigits -= Xdigits.min()
 | 
			
		||||
Xdigits /= Xdigits.max()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_fit():
 | 
			
		||||
    X = Xdigits.copy()
 | 
			
		||||
 | 
			
		||||
    rbm = BernoulliRBM(n_components=64, learning_rate=0.1,
 | 
			
		||||
                       batch_size=10, n_iter=7, random_state=9)
 | 
			
		||||
    rbm.fit(X)
 | 
			
		||||
 | 
			
		||||
    assert_almost_equal(rbm.score_samples(X).mean(), -21., decimal=0)
 | 
			
		||||
 | 
			
		||||
    # in-place tricks shouldn't have modified X
 | 
			
		||||
    assert_array_equal(X, Xdigits)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_partial_fit():
 | 
			
		||||
    X = Xdigits.copy()
 | 
			
		||||
    rbm = BernoulliRBM(n_components=64, learning_rate=0.1,
 | 
			
		||||
                       batch_size=20, random_state=9)
 | 
			
		||||
    n_samples = X.shape[0]
 | 
			
		||||
    n_batches = int(np.ceil(float(n_samples) / rbm.batch_size))
 | 
			
		||||
    batch_slices = np.array_split(X, n_batches)
 | 
			
		||||
 | 
			
		||||
    for i in range(7):
 | 
			
		||||
        for batch in batch_slices:
 | 
			
		||||
            rbm.partial_fit(batch)
 | 
			
		||||
 | 
			
		||||
    assert_almost_equal(rbm.score_samples(X).mean(), -21., decimal=0)
 | 
			
		||||
    assert_array_equal(X, Xdigits)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_transform():
 | 
			
		||||
    X = Xdigits[:100]
 | 
			
		||||
    rbm1 = BernoulliRBM(n_components=16, batch_size=5,
 | 
			
		||||
                        n_iter=5, random_state=42)
 | 
			
		||||
    rbm1.fit(X)
 | 
			
		||||
 | 
			
		||||
    Xt1 = rbm1.transform(X)
 | 
			
		||||
    Xt2 = rbm1._mean_hiddens(X)
 | 
			
		||||
 | 
			
		||||
    assert_array_equal(Xt1, Xt2)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_small_sparse():
 | 
			
		||||
    # BernoulliRBM should work on small sparse matrices.
 | 
			
		||||
    X = csr_matrix(Xdigits[:4])
 | 
			
		||||
    BernoulliRBM().fit(X)       # no exception
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_small_sparse_partial_fit():
 | 
			
		||||
    for sparse in [csc_matrix, csr_matrix]:
 | 
			
		||||
        X_sparse = sparse(Xdigits[:100])
 | 
			
		||||
        X = Xdigits[:100].copy()
 | 
			
		||||
 | 
			
		||||
        rbm1 = BernoulliRBM(n_components=64, learning_rate=0.1,
 | 
			
		||||
                            batch_size=10, random_state=9)
 | 
			
		||||
        rbm2 = BernoulliRBM(n_components=64, learning_rate=0.1,
 | 
			
		||||
                            batch_size=10, random_state=9)
 | 
			
		||||
 | 
			
		||||
        rbm1.partial_fit(X_sparse)
 | 
			
		||||
        rbm2.partial_fit(X)
 | 
			
		||||
 | 
			
		||||
        assert_almost_equal(rbm1.score_samples(X).mean(),
 | 
			
		||||
                            rbm2.score_samples(X).mean(),
 | 
			
		||||
                            decimal=0)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_sample_hiddens():
 | 
			
		||||
    rng = np.random.RandomState(0)
 | 
			
		||||
    X = Xdigits[:100]
 | 
			
		||||
    rbm1 = BernoulliRBM(n_components=2, batch_size=5,
 | 
			
		||||
                        n_iter=5, random_state=42)
 | 
			
		||||
    rbm1.fit(X)
 | 
			
		||||
 | 
			
		||||
    h = rbm1._mean_hiddens(X[0])
 | 
			
		||||
    hs = np.mean([rbm1._sample_hiddens(X[0], rng) for i in range(100)], 0)
 | 
			
		||||
 | 
			
		||||
    assert_almost_equal(h, hs, decimal=1)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_fit_gibbs():
 | 
			
		||||
    # Gibbs on the RBM hidden layer should be able to recreate [[0], [1]]
 | 
			
		||||
    # from the same input
 | 
			
		||||
    rng = np.random.RandomState(42)
 | 
			
		||||
    X = np.array([[0.], [1.]])
 | 
			
		||||
    rbm1 = BernoulliRBM(n_components=2, batch_size=2,
 | 
			
		||||
                        n_iter=42, random_state=rng)
 | 
			
		||||
    # you need that much iters
 | 
			
		||||
    rbm1.fit(X)
 | 
			
		||||
    assert_almost_equal(rbm1.components_,
 | 
			
		||||
                        np.array([[0.02649814], [0.02009084]]), decimal=4)
 | 
			
		||||
    assert_almost_equal(rbm1.gibbs(X), X)
 | 
			
		||||
    return rbm1
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_fit_gibbs_sparse():
 | 
			
		||||
    # Gibbs on the RBM hidden layer should be able to recreate [[0], [1]] from
 | 
			
		||||
    # the same input even when the input is sparse, and test against non-sparse
 | 
			
		||||
    rbm1 = test_fit_gibbs()
 | 
			
		||||
    rng = np.random.RandomState(42)
 | 
			
		||||
    from scipy.sparse import csc_matrix
 | 
			
		||||
    X = csc_matrix([[0.], [1.]])
 | 
			
		||||
    rbm2 = BernoulliRBM(n_components=2, batch_size=2,
 | 
			
		||||
                        n_iter=42, random_state=rng)
 | 
			
		||||
    rbm2.fit(X)
 | 
			
		||||
    assert_almost_equal(rbm2.components_,
 | 
			
		||||
                        np.array([[0.02649814], [0.02009084]]), decimal=4)
 | 
			
		||||
    assert_almost_equal(rbm2.gibbs(X), X.toarray())
 | 
			
		||||
    assert_almost_equal(rbm1.components_, rbm2.components_)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_gibbs_smoke():
 | 
			
		||||
    # Check if we don't get NaNs sampling the full digits dataset.
 | 
			
		||||
    # Also check that sampling again will yield different results.
 | 
			
		||||
    X = Xdigits
 | 
			
		||||
    rbm1 = BernoulliRBM(n_components=42, batch_size=40,
 | 
			
		||||
                        n_iter=20, random_state=42)
 | 
			
		||||
    rbm1.fit(X)
 | 
			
		||||
    X_sampled = rbm1.gibbs(X)
 | 
			
		||||
    assert_all_finite(X_sampled)
 | 
			
		||||
    X_sampled2 = rbm1.gibbs(X)
 | 
			
		||||
    assert np.all((X_sampled != X_sampled2).max(axis=1))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_score_samples():
 | 
			
		||||
    # Test score_samples (pseudo-likelihood) method.
 | 
			
		||||
    # Assert that pseudo-likelihood is computed without clipping.
 | 
			
		||||
    # See Fabian's blog, http://bit.ly/1iYefRk
 | 
			
		||||
    rng = np.random.RandomState(42)
 | 
			
		||||
    X = np.vstack([np.zeros(1000), np.ones(1000)])
 | 
			
		||||
    rbm1 = BernoulliRBM(n_components=10, batch_size=2,
 | 
			
		||||
                        n_iter=10, random_state=rng)
 | 
			
		||||
    rbm1.fit(X)
 | 
			
		||||
    assert (rbm1.score_samples(X) < -300).all()
 | 
			
		||||
 | 
			
		||||
    # Sparse vs. dense should not affect the output. Also test sparse input
 | 
			
		||||
    # validation.
 | 
			
		||||
    rbm1.random_state = 42
 | 
			
		||||
    d_score = rbm1.score_samples(X)
 | 
			
		||||
    rbm1.random_state = 42
 | 
			
		||||
    s_score = rbm1.score_samples(lil_matrix(X))
 | 
			
		||||
    assert_almost_equal(d_score, s_score)
 | 
			
		||||
 | 
			
		||||
    # Test numerical stability (#2785): would previously generate infinities
 | 
			
		||||
    # and crash with an exception.
 | 
			
		||||
    with np.errstate(under='ignore'):
 | 
			
		||||
        rbm1.score_samples([np.arange(1000) * 100])
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_rbm_verbose():
 | 
			
		||||
    rbm = BernoulliRBM(n_iter=2, verbose=10)
 | 
			
		||||
    old_stdout = sys.stdout
 | 
			
		||||
    sys.stdout = StringIO()
 | 
			
		||||
    try:
 | 
			
		||||
        rbm.fit(Xdigits)
 | 
			
		||||
    finally:
 | 
			
		||||
        sys.stdout = old_stdout
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_sparse_and_verbose():
 | 
			
		||||
    # Make sure RBM works with sparse input when verbose=True
 | 
			
		||||
    old_stdout = sys.stdout
 | 
			
		||||
    sys.stdout = StringIO()
 | 
			
		||||
    from scipy.sparse import csc_matrix
 | 
			
		||||
    X = csc_matrix([[0.], [1.]])
 | 
			
		||||
    rbm = BernoulliRBM(n_components=2, batch_size=2, n_iter=1,
 | 
			
		||||
                       random_state=42, verbose=True)
 | 
			
		||||
    try:
 | 
			
		||||
        rbm.fit(X)
 | 
			
		||||
        s = sys.stdout.getvalue()
 | 
			
		||||
        # make sure output is sound
 | 
			
		||||
        assert re.match(r"\[BernoulliRBM\] Iteration 1,"
 | 
			
		||||
                        r" pseudo-likelihood = -?(\d)+(\.\d+)?,"
 | 
			
		||||
                        r" time = (\d|\.)+s", s)
 | 
			
		||||
    finally:
 | 
			
		||||
        sys.stdout = old_stdout
 | 
			
		||||
| 
						 | 
				
			
			@ -0,0 +1,108 @@
 | 
			
		|||
import numpy as np
 | 
			
		||||
 | 
			
		||||
from sklearn.neural_network._stochastic_optimizers import (BaseOptimizer,
 | 
			
		||||
                                                           SGDOptimizer,
 | 
			
		||||
                                                           AdamOptimizer)
 | 
			
		||||
from sklearn.utils._testing import assert_array_equal
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
shapes = [(4, 6), (6, 8), (7, 8, 9)]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_base_optimizer():
 | 
			
		||||
    params = [np.zeros(shape) for shape in shapes]
 | 
			
		||||
 | 
			
		||||
    for lr in [10 ** i for i in range(-3, 4)]:
 | 
			
		||||
        optimizer = BaseOptimizer(params, lr)
 | 
			
		||||
        assert optimizer.trigger_stopping('', False)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_sgd_optimizer_no_momentum():
 | 
			
		||||
    params = [np.zeros(shape) for shape in shapes]
 | 
			
		||||
 | 
			
		||||
    for lr in [10 ** i for i in range(-3, 4)]:
 | 
			
		||||
        optimizer = SGDOptimizer(params, lr, momentum=0, nesterov=False)
 | 
			
		||||
        grads = [np.random.random(shape) for shape in shapes]
 | 
			
		||||
        expected = [param - lr * grad for param, grad in zip(params, grads)]
 | 
			
		||||
        optimizer.update_params(grads)
 | 
			
		||||
 | 
			
		||||
        for exp, param in zip(expected, optimizer.params):
 | 
			
		||||
            assert_array_equal(exp, param)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_sgd_optimizer_momentum():
 | 
			
		||||
    params = [np.zeros(shape) for shape in shapes]
 | 
			
		||||
    lr = 0.1
 | 
			
		||||
 | 
			
		||||
    for momentum in np.arange(0.5, 0.9, 0.1):
 | 
			
		||||
        optimizer = SGDOptimizer(params, lr, momentum=momentum, nesterov=False)
 | 
			
		||||
        velocities = [np.random.random(shape) for shape in shapes]
 | 
			
		||||
        optimizer.velocities = velocities
 | 
			
		||||
        grads = [np.random.random(shape) for shape in shapes]
 | 
			
		||||
        updates = [momentum * velocity - lr * grad
 | 
			
		||||
                   for velocity, grad in zip(velocities, grads)]
 | 
			
		||||
        expected = [param + update for param, update in zip(params, updates)]
 | 
			
		||||
        optimizer.update_params(grads)
 | 
			
		||||
 | 
			
		||||
        for exp, param in zip(expected, optimizer.params):
 | 
			
		||||
            assert_array_equal(exp, param)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_sgd_optimizer_trigger_stopping():
 | 
			
		||||
    params = [np.zeros(shape) for shape in shapes]
 | 
			
		||||
    lr = 2e-6
 | 
			
		||||
    optimizer = SGDOptimizer(params, lr, lr_schedule='adaptive')
 | 
			
		||||
    assert not optimizer.trigger_stopping('', False)
 | 
			
		||||
    assert lr / 5 == optimizer.learning_rate
 | 
			
		||||
    assert optimizer.trigger_stopping('', False)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_sgd_optimizer_nesterovs_momentum():
 | 
			
		||||
    params = [np.zeros(shape) for shape in shapes]
 | 
			
		||||
    lr = 0.1
 | 
			
		||||
 | 
			
		||||
    for momentum in np.arange(0.5, 0.9, 0.1):
 | 
			
		||||
        optimizer = SGDOptimizer(params, lr, momentum=momentum, nesterov=True)
 | 
			
		||||
        velocities = [np.random.random(shape) for shape in shapes]
 | 
			
		||||
        optimizer.velocities = velocities
 | 
			
		||||
        grads = [np.random.random(shape) for shape in shapes]
 | 
			
		||||
        updates = [momentum * velocity - lr * grad
 | 
			
		||||
                   for velocity, grad in zip(velocities, grads)]
 | 
			
		||||
        updates = [momentum * update - lr * grad
 | 
			
		||||
                   for update, grad in zip(updates, grads)]
 | 
			
		||||
        expected = [param + update for param, update in zip(params, updates)]
 | 
			
		||||
        optimizer.update_params(grads)
 | 
			
		||||
 | 
			
		||||
        for exp, param in zip(expected, optimizer.params):
 | 
			
		||||
            assert_array_equal(exp, param)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_adam_optimizer():
 | 
			
		||||
    params = [np.zeros(shape) for shape in shapes]
 | 
			
		||||
    lr = 0.001
 | 
			
		||||
    epsilon = 1e-8
 | 
			
		||||
 | 
			
		||||
    for beta_1 in np.arange(0.9, 1.0, 0.05):
 | 
			
		||||
        for beta_2 in np.arange(0.995, 1.0, 0.001):
 | 
			
		||||
            optimizer = AdamOptimizer(params, lr, beta_1, beta_2, epsilon)
 | 
			
		||||
            ms = [np.random.random(shape) for shape in shapes]
 | 
			
		||||
            vs = [np.random.random(shape) for shape in shapes]
 | 
			
		||||
            t = 10
 | 
			
		||||
            optimizer.ms = ms
 | 
			
		||||
            optimizer.vs = vs
 | 
			
		||||
            optimizer.t = t - 1
 | 
			
		||||
            grads = [np.random.random(shape) for shape in shapes]
 | 
			
		||||
 | 
			
		||||
            ms = [beta_1 * m + (1 - beta_1) * grad
 | 
			
		||||
                  for m, grad in zip(ms, grads)]
 | 
			
		||||
            vs = [beta_2 * v + (1 - beta_2) * (grad ** 2)
 | 
			
		||||
                  for v, grad in zip(vs, grads)]
 | 
			
		||||
            learning_rate = lr * np.sqrt(1 - beta_2 ** t) / (1 - beta_1**t)
 | 
			
		||||
            updates = [-learning_rate * m / (np.sqrt(v) + epsilon)
 | 
			
		||||
                       for m, v in zip(ms, vs)]
 | 
			
		||||
            expected = [param + update
 | 
			
		||||
                        for param, update in zip(params, updates)]
 | 
			
		||||
 | 
			
		||||
            optimizer.update_params(grads)
 | 
			
		||||
            for exp, param in zip(expected, optimizer.params):
 | 
			
		||||
                assert_array_equal(exp, param)
 | 
			
		||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue