Fixed database typo and removed unnecessary class identifier.

2020-10-14 10:10:37 -04:00 · 2020-10-14 10:10:37 -04:00 · 45fb349a7d
commit 45fb349a7d
parent 00ad49a143
5098 changed files with 952558 additions and 85 deletions
--- a/venv/Lib/site-packages/scipy/optimize/_hessian_update_strategy.py
+++ b/venv/Lib/site-packages/scipy/optimize/_hessian_update_strategy.py
@ -0,0 +1,429 @@
+"""Hessian update strategies for quasi-Newton optimization methods."""
+import numpy as np
+from numpy.linalg import norm
+from scipy.linalg import get_blas_funcs
+from warnings import warn
+
+
+__all__ = ['HessianUpdateStrategy', 'BFGS', 'SR1']
+
+
+class HessianUpdateStrategy(object):
+    """Interface for implementing Hessian update strategies.
+
+    Many optimization methods make use of Hessian (or inverse Hessian)
+    approximations, such as the quasi-Newton methods BFGS, SR1, L-BFGS.
+    Some of these  approximations, however, do not actually need to store
+    the entire matrix or can compute the internal matrix product with a
+    given vector in a very efficiently manner. This class serves as an
+    abstract interface between the optimization algorithm and the
+    quasi-Newton update strategies, giving freedom of implementation
+    to store and update the internal matrix as efficiently as possible.
+    Different choices of initialization and update procedure will result
+    in different quasi-Newton strategies.
+
+    Four methods should be implemented in derived classes: ``initialize``,
+    ``update``, ``dot`` and ``get_matrix``.
+
+    Notes
+    -----
+    Any instance of a class that implements this interface,
+    can be accepted by the method ``minimize`` and used by
+    the compatible solvers to approximate the Hessian (or
+    inverse Hessian) used by the optimization algorithms.
+    """
+
+    def initialize(self, n, approx_type):
+        """Initialize internal matrix.
+
+        Allocate internal memory for storing and updating
+        the Hessian or its inverse.
+
+        Parameters
+        ----------
+        n : int
+            Problem dimension.
+        approx_type : {'hess', 'inv_hess'}
+            Selects either the Hessian or the inverse Hessian.
+            When set to 'hess' the Hessian will be stored and updated.
+            When set to 'inv_hess' its inverse will be used instead.
+        """
+        raise NotImplementedError("The method ``initialize(n, approx_type)``"
+                                  " is not implemented.")
+
+    def update(self, delta_x, delta_grad):
+        """Update internal matrix.
+
+        Update Hessian matrix or its inverse (depending on how 'approx_type'
+        is defined) using information about the last evaluated points.
+
+        Parameters
+        ----------
+        delta_x : ndarray
+            The difference between two points the gradient
+            function have been evaluated at: ``delta_x = x2 - x1``.
+        delta_grad : ndarray
+            The difference between the gradients:
+            ``delta_grad = grad(x2) - grad(x1)``.
+        """
+        raise NotImplementedError("The method ``update(delta_x, delta_grad)``"
+                                  " is not implemented.")
+
+    def dot(self, p):
+        """Compute the product of the internal matrix with the given vector.
+
+        Parameters
+        ----------
+        p : array_like
+            1-D array representing a vector.
+
+        Returns
+        -------
+        Hp : array
+            1-D represents the result of multiplying the approximation matrix
+            by vector p.
+        """
+        raise NotImplementedError("The method ``dot(p)``"
+                                  " is not implemented.")
+
+    def get_matrix(self):
+        """Return current internal matrix.
+
+        Returns
+        -------
+        H : ndarray, shape (n, n)
+            Dense matrix containing either the Hessian
+            or its inverse (depending on how 'approx_type'
+            is defined).
+        """
+        raise NotImplementedError("The method ``get_matrix(p)``"
+                                  " is not implemented.")
+
+
+class FullHessianUpdateStrategy(HessianUpdateStrategy):
+    """Hessian update strategy with full dimensional internal representation.
+    """
+    _syr = get_blas_funcs('syr', dtype='d')  # Symmetric rank 1 update
+    _syr2 = get_blas_funcs('syr2', dtype='d')  # Symmetric rank 2 update
+    # Symmetric matrix-vector product
+    _symv = get_blas_funcs('symv', dtype='d')
+
+    def __init__(self, init_scale='auto'):
+        self.init_scale = init_scale
+        # Until initialize is called we can't really use the class,
+        # so it makes sense to set everything to None.
+        self.first_iteration = None
+        self.approx_type = None
+        self.B = None
+        self.H = None
+
+    def initialize(self, n, approx_type):
+        """Initialize internal matrix.
+
+        Allocate internal memory for storing and updating
+        the Hessian or its inverse.
+
+        Parameters
+        ----------
+        n : int
+            Problem dimension.
+        approx_type : {'hess', 'inv_hess'}
+            Selects either the Hessian or the inverse Hessian.
+            When set to 'hess' the Hessian will be stored and updated.
+            When set to 'inv_hess' its inverse will be used instead.
+        """
+        self.first_iteration = True
+        self.n = n
+        self.approx_type = approx_type
+        if approx_type not in ('hess', 'inv_hess'):
+            raise ValueError("`approx_type` must be 'hess' or 'inv_hess'.")
+        # Create matrix
+        if self.approx_type == 'hess':
+            self.B = np.eye(n, dtype=float)
+        else:
+            self.H = np.eye(n, dtype=float)
+
+    def _auto_scale(self, delta_x, delta_grad):
+        # Heuristic to scale matrix at first iteration.
+        # Described in Nocedal and Wright "Numerical Optimization"
+        # p.143 formula (6.20).
+        s_norm2 = np.dot(delta_x, delta_x)
+        y_norm2 = np.dot(delta_grad, delta_grad)
+        ys = np.abs(np.dot(delta_grad, delta_x))
+        if ys == 0.0 or y_norm2 == 0 or s_norm2 == 0:
+            return 1
+        if self.approx_type == 'hess':
+            return y_norm2 / ys
+        else:
+            return ys / y_norm2
+
+    def _update_implementation(self, delta_x, delta_grad):
+        raise NotImplementedError("The method ``_update_implementation``"
+                                  " is not implemented.")
+
+    def update(self, delta_x, delta_grad):
+        """Update internal matrix.
+
+        Update Hessian matrix or its inverse (depending on how 'approx_type'
+        is defined) using information about the last evaluated points.
+
+        Parameters
+        ----------
+        delta_x : ndarray
+            The difference between two points the gradient
+            function have been evaluated at: ``delta_x = x2 - x1``.
+        delta_grad : ndarray
+            The difference between the gradients:
+            ``delta_grad = grad(x2) - grad(x1)``.
+        """
+        if np.all(delta_x == 0.0):
+            return
+        if np.all(delta_grad == 0.0):
+            warn('delta_grad == 0.0. Check if the approximated '
+                 'function is linear. If the function is linear '
+                 'better results can be obtained by defining the '
+                 'Hessian as zero instead of using quasi-Newton '
+                 'approximations.', UserWarning)
+            return
+        if self.first_iteration:
+            # Get user specific scale
+            if self.init_scale == "auto":
+                scale = self._auto_scale(delta_x, delta_grad)
+            else:
+                scale = float(self.init_scale)
+            # Scale initial matrix with ``scale * np.eye(n)``
+            if self.approx_type == 'hess':
+                self.B *= scale
+            else:
+                self.H *= scale
+            self.first_iteration = False
+        self._update_implementation(delta_x, delta_grad)
+
+    def dot(self, p):
+        """Compute the product of the internal matrix with the given vector.
+
+        Parameters
+        ----------
+        p : array_like
+            1-D array representing a vector.
+
+        Returns
+        -------
+        Hp : array
+            1-D represents the result of multiplying the approximation matrix
+            by vector p.
+        """
+        if self.approx_type == 'hess':
+            return self._symv(1, self.B, p)
+        else:
+            return self._symv(1, self.H, p)
+
+    def get_matrix(self):
+        """Return the current internal matrix.
+
+        Returns
+        -------
+        M : ndarray, shape (n, n)
+            Dense matrix containing either the Hessian or its inverse
+            (depending on how `approx_type` was defined).
+        """
+        if self.approx_type == 'hess':
+            M = np.copy(self.B)
+        else:
+            M = np.copy(self.H)
+        li = np.tril_indices_from(M, k=-1)
+        M[li] = M.T[li]
+        return M
+
+
+class BFGS(FullHessianUpdateStrategy):
+    """Broyden-Fletcher-Goldfarb-Shanno (BFGS) Hessian update strategy.
+
+    Parameters
+    ----------
+    exception_strategy : {'skip_update', 'damp_update'}, optional
+        Define how to proceed when the curvature condition is violated.
+        Set it to 'skip_update' to just skip the update. Or, alternatively,
+        set it to 'damp_update' to interpolate between the actual BFGS
+        result and the unmodified matrix. Both exceptions strategies
+        are explained  in [1]_, p.536-537.
+    min_curvature : float
+        This number, scaled by a normalization factor, defines the
+        minimum curvature ``dot(delta_grad, delta_x)`` allowed to go
+        unaffected by the exception strategy. By default is equal to
+        1e-8 when ``exception_strategy = 'skip_update'`` and equal
+        to 0.2 when ``exception_strategy = 'damp_update'``.
+    init_scale : {float, 'auto'}
+        Matrix scale at first iteration. At the first
+        iteration the Hessian matrix or its inverse will be initialized
+        with ``init_scale*np.eye(n)``, where ``n`` is the problem dimension.
+        Set it to 'auto' in order to use an automatic heuristic for choosing
+        the initial scale. The heuristic is described in [1]_, p.143.
+        By default uses 'auto'.
+
+    Notes
+    -----
+    The update is based on the description in [1]_, p.140.
+
+    References
+    ----------
+    .. [1] Nocedal, Jorge, and Stephen J. Wright. "Numerical optimization"
+           Second Edition (2006).
+    """
+
+    def __init__(self, exception_strategy='skip_update', min_curvature=None,
+                 init_scale='auto'):
+        if exception_strategy == 'skip_update':
+            if min_curvature is not None:
+                self.min_curvature = min_curvature
+            else:
+                self.min_curvature = 1e-8
+        elif exception_strategy == 'damp_update':
+            if min_curvature is not None:
+                self.min_curvature = min_curvature
+            else:
+                self.min_curvature = 0.2
+        else:
+            raise ValueError("`exception_strategy` must be 'skip_update' "
+                             "or 'damp_update'.")
+
+        super(BFGS, self).__init__(init_scale)
+        self.exception_strategy = exception_strategy
+
+    def _update_inverse_hessian(self, ys, Hy, yHy, s):
+        """Update the inverse Hessian matrix.
+
+        BFGS update using the formula:
+
+            ``H <- H + ((H*y).T*y + s.T*y)/(s.T*y)^2 * (s*s.T)
+                     - 1/(s.T*y) * ((H*y)*s.T + s*(H*y).T)``
+
+        where ``s = delta_x`` and ``y = delta_grad``. This formula is
+        equivalent to (6.17) in [1]_ written in a more efficient way
+        for implementation.
+
+        References
+        ----------
+        .. [1] Nocedal, Jorge, and Stephen J. Wright. "Numerical optimization"
+               Second Edition (2006).
+        """
+        self.H = self._syr2(-1.0 / ys, s, Hy, a=self.H)
+        self.H = self._syr((ys+yHy)/ys**2, s, a=self.H)
+
+    def _update_hessian(self, ys, Bs, sBs, y):
+        """Update the Hessian matrix.
+
+        BFGS update using the formula:
+
+            ``B <- B - (B*s)*(B*s).T/s.T*(B*s) + y*y^T/s.T*y``
+
+        where ``s`` is short for ``delta_x`` and ``y`` is short
+        for ``delta_grad``. Formula (6.19) in [1]_.
+
+        References
+        ----------
+        .. [1] Nocedal, Jorge, and Stephen J. Wright. "Numerical optimization"
+               Second Edition (2006).
+        """
+        self.B = self._syr(1.0 / ys, y, a=self.B)
+        self.B = self._syr(-1.0 / sBs, Bs, a=self.B)
+
+    def _update_implementation(self, delta_x, delta_grad):
+        # Auxiliary variables w and z
+        if self.approx_type == 'hess':
+            w = delta_x
+            z = delta_grad
+        else:
+            w = delta_grad
+            z = delta_x
+        # Do some common operations
+        wz = np.dot(w, z)
+        Mw = self.dot(w)
+        wMw = Mw.dot(w)
+        # Guarantee that wMw > 0 by reinitializing matrix.
+        # While this is always true in exact arithmetics,
+        # indefinite matrix may appear due to roundoff errors.
+        if wMw <= 0.0:
+            scale = self._auto_scale(delta_x, delta_grad)
+            # Reinitialize matrix
+            if self.approx_type == 'hess':
+                self.B = scale * np.eye(self.n, dtype=float)
+            else:
+                self.H = scale * np.eye(self.n, dtype=float)
+            # Do common operations for new matrix
+            Mw = self.dot(w)
+            wMw = Mw.dot(w)
+        # Check if curvature condition is violated
+        if wz <= self.min_curvature * wMw:
+            # If the option 'skip_update' is set
+            # we just skip the update when the condion
+            # is violated.
+            if self.exception_strategy == 'skip_update':
+                return
+            # If the option 'damp_update' is set we
+            # interpolate between the actual BFGS
+            # result and the unmodified matrix.
+            elif self.exception_strategy == 'damp_update':
+                update_factor = (1-self.min_curvature) / (1 - wz/wMw)
+                z = update_factor*z + (1-update_factor)*Mw
+                wz = np.dot(w, z)
+        # Update matrix
+        if self.approx_type == 'hess':
+            self._update_hessian(wz, Mw, wMw, z)
+        else:
+            self._update_inverse_hessian(wz, Mw, wMw, z)
+
+
+class SR1(FullHessianUpdateStrategy):
+    """Symmetric-rank-1 Hessian update strategy.
+
+    Parameters
+    ----------
+    min_denominator : float
+        This number, scaled by a normalization factor,
+        defines the minimum denominator magnitude allowed
+        in the update. When the condition is violated we skip
+        the update. By default uses ``1e-8``.
+    init_scale : {float, 'auto'}, optional
+        Matrix scale at first iteration. At the first
+        iteration the Hessian matrix or its inverse will be initialized
+        with ``init_scale*np.eye(n)``, where ``n`` is the problem dimension.
+        Set it to 'auto' in order to use an automatic heuristic for choosing
+        the initial scale. The heuristic is described in [1]_, p.143.
+        By default uses 'auto'.
+
+    Notes
+    -----
+    The update is based on the description in [1]_, p.144-146.
+
+    References
+    ----------
+    .. [1] Nocedal, Jorge, and Stephen J. Wright. "Numerical optimization"
+           Second Edition (2006).
+    """
+
+    def __init__(self, min_denominator=1e-8, init_scale='auto'):
+        self.min_denominator = min_denominator
+        super(SR1, self).__init__(init_scale)
+
+    def _update_implementation(self, delta_x, delta_grad):
+        # Auxiliary variables w and z
+        if self.approx_type == 'hess':
+            w = delta_x
+            z = delta_grad
+        else:
+            w = delta_grad
+            z = delta_x
+        # Do some common operations
+        Mw = self.dot(w)
+        z_minus_Mw = z - Mw
+        denominator = np.dot(w, z_minus_Mw)
+        # If the denominator is too small
+        # we just skip the update.
+        if np.abs(denominator) <= self.min_denominator*norm(w)*norm(z_minus_Mw):
+            return
+        # Update matrix
+        if self.approx_type == 'hess':
+            self.B = self._syr(1/denominator, z_minus_Mw, a=self.B)
+        else:
+            self.H = self._syr(1/denominator, z_minus_Mw, a=self.H)