Fixed database typo and removed unnecessary class identifier.

2020-10-14 10:10:37 -04:00 · 2020-10-14 10:10:37 -04:00 · 45fb349a7d
commit 45fb349a7d
parent 00ad49a143
5098 changed files with 952558 additions and 85 deletions
--- a/venv/Lib/site-packages/scipy/optimize/_trustregion_exact.py
+++ b/venv/Lib/site-packages/scipy/optimize/_trustregion_exact.py
@ -0,0 +1,430 @@
+"""Nearly exact trust-region optimization subproblem."""
+import numpy as np
+from scipy.linalg import (norm, get_lapack_funcs, solve_triangular,
+                          cho_solve)
+from ._trustregion import (_minimize_trust_region, BaseQuadraticSubproblem)
+
+__all__ = ['_minimize_trustregion_exact',
+           'estimate_smallest_singular_value',
+           'singular_leading_submatrix',
+           'IterativeSubproblem']
+
+
+def _minimize_trustregion_exact(fun, x0, args=(), jac=None, hess=None,
+                                **trust_region_options):
+    """
+    Minimization of scalar function of one or more variables using
+    a nearly exact trust-region algorithm.
+
+    Options
+    -------
+    initial_tr_radius : float
+        Initial trust-region radius.
+    max_tr_radius : float
+        Maximum value of the trust-region radius. No steps that are longer
+        than this value will be proposed.
+    eta : float
+        Trust region related acceptance stringency for proposed steps.
+    gtol : float
+        Gradient norm must be less than ``gtol`` before successful
+        termination.
+    """
+
+    if jac is None:
+        raise ValueError('Jacobian is required for trust region '
+                         'exact minimization.')
+    if hess is None:
+        raise ValueError('Hessian matrix is required for trust region '
+                         'exact minimization.')
+    return _minimize_trust_region(fun, x0, args=args, jac=jac, hess=hess,
+                                  subproblem=IterativeSubproblem,
+                                  **trust_region_options)
+
+
+def estimate_smallest_singular_value(U):
+    """Given upper triangular matrix ``U`` estimate the smallest singular
+    value and the correspondent right singular vector in O(n**2) operations.
+
+    Parameters
+    ----------
+    U : ndarray
+        Square upper triangular matrix.
+
+    Returns
+    -------
+    s_min : float
+        Estimated smallest singular value of the provided matrix.
+    z_min : ndarray
+        Estimatied right singular vector.
+
+    Notes
+    -----
+    The procedure is based on [1]_ and is done in two steps. First, it finds
+    a vector ``e`` with components selected from {+1, -1} such that the
+    solution ``w`` from the system ``U.T w = e`` is as large as possible.
+    Next it estimate ``U v = w``. The smallest singular value is close
+    to ``norm(w)/norm(v)`` and the right singular vector is close
+    to ``v/norm(v)``.
+
+    The estimation will be better more ill-conditioned is the matrix.
+
+    References
+    ----------
+    .. [1] Cline, A. K., Moler, C. B., Stewart, G. W., Wilkinson, J. H.
+           An estimate for the condition number of a matrix.  1979.
+           SIAM Journal on Numerical Analysis, 16(2), 368-375.
+    """
+
+    U = np.atleast_2d(U)
+    m, n = U.shape
+
+    if m != n:
+        raise ValueError("A square triangular matrix should be provided.")
+
+    # A vector `e` with components selected from {+1, -1}
+    # is selected so that the solution `w` to the system
+    # `U.T w = e` is as large as possible. Implementation
+    # based on algorithm 3.5.1, p. 142, from reference [2]
+    # adapted for lower triangular matrix.
+
+    p = np.zeros(n)
+    w = np.empty(n)
+
+    # Implemented according to:  Golub, G. H., Van Loan, C. F. (2013).
+    # "Matrix computations". Forth Edition. JHU press. pp. 140-142.
+    for k in range(n):
+        wp = (1-p[k]) / U.T[k, k]
+        wm = (-1-p[k]) / U.T[k, k]
+        pp = p[k+1:] + U.T[k+1:, k]*wp
+        pm = p[k+1:] + U.T[k+1:, k]*wm
+
+        if abs(wp) + norm(pp, 1) >= abs(wm) + norm(pm, 1):
+            w[k] = wp
+            p[k+1:] = pp
+        else:
+            w[k] = wm
+            p[k+1:] = pm
+
+    # The system `U v = w` is solved using backward substitution.
+    v = solve_triangular(U, w)
+
+    v_norm = norm(v)
+    w_norm = norm(w)
+
+    # Smallest singular value
+    s_min = w_norm / v_norm
+
+    # Associated vector
+    z_min = v / v_norm
+
+    return s_min, z_min
+
+
+def gershgorin_bounds(H):
+    """
+    Given a square matrix ``H`` compute upper
+    and lower bounds for its eigenvalues (Gregoshgorin Bounds).
+    Defined ref. [1].
+
+    References
+    ----------
+    .. [1] Conn, A. R., Gould, N. I., & Toint, P. L.
+           Trust region methods. 2000. Siam. pp. 19.
+    """
+
+    H_diag = np.diag(H)
+    H_diag_abs = np.abs(H_diag)
+    H_row_sums = np.sum(np.abs(H), axis=1)
+    lb = np.min(H_diag + H_diag_abs - H_row_sums)
+    ub = np.max(H_diag - H_diag_abs + H_row_sums)
+
+    return lb, ub
+
+
+def singular_leading_submatrix(A, U, k):
+    """
+    Compute term that makes the leading ``k`` by ``k``
+    submatrix from ``A`` singular.
+
+    Parameters
+    ----------
+    A : ndarray
+        Symmetric matrix that is not positive definite.
+    U : ndarray
+        Upper triangular matrix resulting of an incomplete
+        Cholesky decomposition of matrix ``A``.
+    k : int
+        Positive integer such that the leading k by k submatrix from
+        `A` is the first non-positive definite leading submatrix.
+
+    Returns
+    -------
+    delta : float
+        Amount that should be added to the element (k, k) of the
+        leading k by k submatrix of ``A`` to make it singular.
+    v : ndarray
+        A vector such that ``v.T B v = 0``. Where B is the matrix A after
+        ``delta`` is added to its element (k, k).
+    """
+
+    # Compute delta
+    delta = np.sum(U[:k-1, k-1]**2) - A[k-1, k-1]
+
+    n = len(A)
+
+    # Inicialize v
+    v = np.zeros(n)
+    v[k-1] = 1
+
+    # Compute the remaining values of v by solving a triangular system.
+    if k != 1:
+        v[:k-1] = solve_triangular(U[:k-1, :k-1], -U[:k-1, k-1])
+
+    return delta, v
+
+
+class IterativeSubproblem(BaseQuadraticSubproblem):
+    """Quadratic subproblem solved by nearly exact iterative method.
+
+    Notes
+    -----
+    This subproblem solver was based on [1]_, [2]_ and [3]_,
+    which implement similar algorithms. The algorithm is basically
+    that of [1]_ but ideas from [2]_ and [3]_ were also used.
+
+    References
+    ----------
+    .. [1] A.R. Conn, N.I. Gould, and P.L. Toint, "Trust region methods",
+           Siam, pp. 169-200, 2000.
+    .. [2] J. Nocedal and  S. Wright, "Numerical optimization",
+           Springer Science & Business Media. pp. 83-91, 2006.
+    .. [3] J.J. More and D.C. Sorensen, "Computing a trust region step",
+           SIAM Journal on Scientific and Statistical Computing, vol. 4(3),
+           pp. 553-572, 1983.
+    """
+
+    # UPDATE_COEFF appears in reference [1]_
+    # in formula 7.3.14 (p. 190) named as "theta".
+    # As recommended there it value is fixed in 0.01.
+    UPDATE_COEFF = 0.01
+
+    EPS = np.finfo(float).eps
+
+    def __init__(self, x, fun, jac, hess, hessp=None,
+                 k_easy=0.1, k_hard=0.2):
+
+        super(IterativeSubproblem, self).__init__(x, fun, jac, hess)
+
+        # When the trust-region shrinks in two consecutive
+        # calculations (``tr_radius < previous_tr_radius``)
+        # the lower bound ``lambda_lb`` may be reused,
+        # facilitating  the convergence. To indicate no
+        # previous value is known at first ``previous_tr_radius``
+        # is set to -1  and ``lambda_lb`` to None.
+        self.previous_tr_radius = -1
+        self.lambda_lb = None
+
+        self.niter = 0
+
+        # ``k_easy`` and ``k_hard`` are parameters used
+        # to determine the stop criteria to the iterative
+        # subproblem solver. Take a look at pp. 194-197
+        # from reference _[1] for a more detailed description.
+        self.k_easy = k_easy
+        self.k_hard = k_hard
+
+        # Get Lapack function for cholesky decomposition.
+        # The implemented SciPy wrapper does not return
+        # the incomplete factorization needed by the method.
+        self.cholesky, = get_lapack_funcs(('potrf',), (self.hess,))
+
+        # Get info about Hessian
+        self.dimension = len(self.hess)
+        self.hess_gershgorin_lb,\
+            self.hess_gershgorin_ub = gershgorin_bounds(self.hess)
+        self.hess_inf = norm(self.hess, np.Inf)
+        self.hess_fro = norm(self.hess, 'fro')
+
+        # A constant such that for vectors smaler than that
+        # backward substituition is not reliable. It was stabilished
+        # based on Golub, G. H., Van Loan, C. F. (2013).
+        # "Matrix computations". Forth Edition. JHU press., p.165.
+        self.CLOSE_TO_ZERO = self.dimension * self.EPS * self.hess_inf
+
+    def _initial_values(self, tr_radius):
+        """Given a trust radius, return a good initial guess for
+        the damping factor, the lower bound and the upper bound.
+        The values were chosen accordingly to the guidelines on
+        section 7.3.8 (p. 192) from [1]_.
+        """
+
+        # Upper bound for the damping factor
+        lambda_ub = max(0, self.jac_mag/tr_radius + min(-self.hess_gershgorin_lb,
+                                                        self.hess_fro,
+                                                        self.hess_inf))
+
+        # Lower bound for the damping factor
+        lambda_lb = max(0, -min(self.hess.diagonal()),
+                        self.jac_mag/tr_radius - min(self.hess_gershgorin_ub,
+                                                     self.hess_fro,
+                                                     self.hess_inf))
+
+        # Improve bounds with previous info
+        if tr_radius < self.previous_tr_radius:
+            lambda_lb = max(self.lambda_lb, lambda_lb)
+
+        # Initial guess for the damping factor
+        if lambda_lb == 0:
+            lambda_initial = 0
+        else:
+            lambda_initial = max(np.sqrt(lambda_lb * lambda_ub),
+                                 lambda_lb + self.UPDATE_COEFF*(lambda_ub-lambda_lb))
+
+        return lambda_initial, lambda_lb, lambda_ub
+
+    def solve(self, tr_radius):
+        """Solve quadratic subproblem"""
+
+        lambda_current, lambda_lb, lambda_ub = self._initial_values(tr_radius)
+        n = self.dimension
+        hits_boundary = True
+        already_factorized = False
+        self.niter = 0
+
+        while True:
+
+            # Compute Cholesky factorization
+            if already_factorized:
+                already_factorized = False
+            else:
+                H = self.hess+lambda_current*np.eye(n)
+                U, info = self.cholesky(H, lower=False,
+                                        overwrite_a=False,
+                                        clean=True)
+
+            self.niter += 1
+
+            # Check if factorization succeeded
+            if info == 0 and self.jac_mag > self.CLOSE_TO_ZERO:
+                # Successful factorization
+
+                # Solve `U.T U p = s`
+                p = cho_solve((U, False), -self.jac)
+
+                p_norm = norm(p)
+
+                # Check for interior convergence
+                if p_norm <= tr_radius and lambda_current == 0:
+                    hits_boundary = False
+                    break
+
+                # Solve `U.T w = p`
+                w = solve_triangular(U, p, trans='T')
+
+                w_norm = norm(w)
+
+                # Compute Newton step accordingly to
+                # formula (4.44) p.87 from ref [2]_.
+                delta_lambda = (p_norm/w_norm)**2 * (p_norm-tr_radius)/tr_radius
+                lambda_new = lambda_current + delta_lambda
+
+                if p_norm < tr_radius:  # Inside boundary
+                    s_min, z_min = estimate_smallest_singular_value(U)
+
+                    ta, tb = self.get_boundaries_intersections(p, z_min,
+                                                               tr_radius)
+
+                    # Choose `step_len` with the smallest magnitude.
+                    # The reason for this choice is explained at
+                    # ref [3]_, p. 6 (Immediately before the formula
+                    # for `tau`).
+                    step_len = min([ta, tb], key=abs)
+
+                    # Compute the quadratic term  (p.T*H*p)
+                    quadratic_term = np.dot(p, np.dot(H, p))
+
+                    # Check stop criteria
+                    relative_error = (step_len**2 * s_min**2) / (quadratic_term + lambda_current*tr_radius**2)
+                    if relative_error <= self.k_hard:
+                        p += step_len * z_min
+                        break
+
+                    # Update uncertanty bounds
+                    lambda_ub = lambda_current
+                    lambda_lb = max(lambda_lb, lambda_current - s_min**2)
+
+                    # Compute Cholesky factorization
+                    H = self.hess + lambda_new*np.eye(n)
+                    c, info = self.cholesky(H, lower=False,
+                                            overwrite_a=False,
+                                            clean=True)
+
+                    # Check if the factorization have succeeded
+                    #
+                    if info == 0:  # Successful factorization
+                        # Update damping factor
+                        lambda_current = lambda_new
+                        already_factorized = True
+                    else:  # Unsuccessful factorization
+                        # Update uncertanty bounds
+                        lambda_lb = max(lambda_lb, lambda_new)
+
+                        # Update damping factor
+                        lambda_current = max(np.sqrt(lambda_lb * lambda_ub),
+                                             lambda_lb + self.UPDATE_COEFF*(lambda_ub-lambda_lb))
+
+                else:  # Outside boundary
+                    # Check stop criteria
+                    relative_error = abs(p_norm - tr_radius) / tr_radius
+                    if relative_error <= self.k_easy:
+                        break
+
+                    # Update uncertanty bounds
+                    lambda_lb = lambda_current
+
+                    # Update damping factor
+                    lambda_current = lambda_new
+
+            elif info == 0 and self.jac_mag <= self.CLOSE_TO_ZERO:
+                # jac_mag very close to zero
+
+                # Check for interior convergence
+                if lambda_current == 0:
+                    p = np.zeros(n)
+                    hits_boundary = False
+                    break
+
+                s_min, z_min = estimate_smallest_singular_value(U)
+                step_len = tr_radius
+
+                # Check stop criteria
+                if step_len**2 * s_min**2 <= self.k_hard * lambda_current * tr_radius**2:
+                    p = step_len * z_min
+                    break
+
+                # Update uncertanty bounds
+                lambda_ub = lambda_current
+                lambda_lb = max(lambda_lb, lambda_current - s_min**2)
+
+                # Update damping factor
+                lambda_current = max(np.sqrt(lambda_lb * lambda_ub),
+                                     lambda_lb + self.UPDATE_COEFF*(lambda_ub-lambda_lb))
+
+            else:  # Unsuccessful factorization
+
+                # Compute auxiliary terms
+                delta, v = singular_leading_submatrix(H, U, info)
+                v_norm = norm(v)
+
+                # Update uncertanty interval
+                lambda_lb = max(lambda_lb, lambda_current + delta/v_norm**2)
+
+                # Update damping factor
+                lambda_current = max(np.sqrt(lambda_lb * lambda_ub),
+                                     lambda_lb + self.UPDATE_COEFF*(lambda_ub-lambda_lb))
+
+        self.lambda_lb = lambda_lb
+        self.lambda_current = lambda_current
+        self.previous_tr_radius = tr_radius
+
+        return p, hits_boundary