Fixed database typo and removed unnecessary class identifier.

2020-10-14 10:10:37 -04:00 · 2020-10-14 10:10:37 -04:00 · 45fb349a7d
commit 45fb349a7d
parent 00ad49a143
5098 changed files with 952558 additions and 85 deletions
--- a/venv/Lib/site-packages/scipy/optimize/_lsq/dogbox.py
+++ b/venv/Lib/site-packages/scipy/optimize/_lsq/dogbox.py
@ -0,0 +1,329 @@
+"""
+Dogleg algorithm with rectangular trust regions for least-squares minimization.
+
+The description of the algorithm can be found in [Voglis]_. The algorithm does
+trust-region iterations, but the shape of trust regions is rectangular as
+opposed to conventional elliptical. The intersection of a trust region and
+an initial feasible region is again some rectangle. Thus, on each iteration a
+bound-constrained quadratic optimization problem is solved.
+
+A quadratic problem is solved by well-known dogleg approach, where the
+function is minimized along piecewise-linear "dogleg" path [NumOpt]_,
+Chapter 4. If Jacobian is not rank-deficient then the function is decreasing
+along this path, and optimization amounts to simply following along this
+path as long as a point stays within the bounds. A constrained Cauchy step
+(along the anti-gradient) is considered for safety in rank deficient cases,
+in this situations the convergence might be slow.
+
+If during iterations some variable hit the initial bound and the component
+of anti-gradient points outside the feasible region, then a next dogleg step
+won't make any progress. At this state such variables satisfy first-order
+optimality conditions and they are excluded before computing a next dogleg
+step.
+
+Gauss-Newton step can be computed exactly by `numpy.linalg.lstsq` (for dense
+Jacobian matrices) or by iterative procedure `scipy.sparse.linalg.lsmr` (for
+dense and sparse matrices, or Jacobian being LinearOperator). The second
+option allows to solve very large problems (up to couple of millions of
+residuals on a regular PC), provided the Jacobian matrix is sufficiently
+sparse. But note that dogbox is not very good for solving problems with
+large number of constraints, because of variables exclusion-inclusion on each
+iteration (a required number of function evaluations might be high or accuracy
+of a solution will be poor), thus its large-scale usage is probably limited
+to unconstrained problems.
+
+References
+----------
+.. [Voglis] C. Voglis and I. E. Lagaris, "A Rectangular Trust Region Dogleg
+            Approach for Unconstrained and Bound Constrained Nonlinear
+            Optimization", WSEAS International Conference on Applied
+            Mathematics, Corfu, Greece, 2004.
+.. [NumOpt] J. Nocedal and S. J. Wright, "Numerical optimization, 2nd edition".
+"""
+import numpy as np
+from numpy.linalg import lstsq, norm
+
+from scipy.sparse.linalg import LinearOperator, aslinearoperator, lsmr
+from scipy.optimize import OptimizeResult
+
+from .common import (
+    step_size_to_bound, in_bounds, update_tr_radius, evaluate_quadratic,
+    build_quadratic_1d, minimize_quadratic_1d, compute_grad,
+    compute_jac_scale, check_termination, scale_for_robust_loss_function,
+    print_header_nonlinear, print_iteration_nonlinear)
+
+
+def lsmr_operator(Jop, d, active_set):
+    """Compute LinearOperator to use in LSMR by dogbox algorithm.
+
+    `active_set` mask is used to excluded active variables from computations
+    of matrix-vector products.
+    """
+    m, n = Jop.shape
+
+    def matvec(x):
+        x_free = x.ravel().copy()
+        x_free[active_set] = 0
+        return Jop.matvec(x * d)
+
+    def rmatvec(x):
+        r = d * Jop.rmatvec(x)
+        r[active_set] = 0
+        return r
+
+    return LinearOperator((m, n), matvec=matvec, rmatvec=rmatvec, dtype=float)
+
+
+def find_intersection(x, tr_bounds, lb, ub):
+    """Find intersection of trust-region bounds and initial bounds.
+
+    Returns
+    -------
+    lb_total, ub_total : ndarray with shape of x
+        Lower and upper bounds of the intersection region.
+    orig_l, orig_u : ndarray of bool with shape of x
+        True means that an original bound is taken as a corresponding bound
+        in the intersection region.
+    tr_l, tr_u : ndarray of bool with shape of x
+        True means that a trust-region bound is taken as a corresponding bound
+        in the intersection region.
+    """
+    lb_centered = lb - x
+    ub_centered = ub - x
+
+    lb_total = np.maximum(lb_centered, -tr_bounds)
+    ub_total = np.minimum(ub_centered, tr_bounds)
+
+    orig_l = np.equal(lb_total, lb_centered)
+    orig_u = np.equal(ub_total, ub_centered)
+
+    tr_l = np.equal(lb_total, -tr_bounds)
+    tr_u = np.equal(ub_total, tr_bounds)
+
+    return lb_total, ub_total, orig_l, orig_u, tr_l, tr_u
+
+
+def dogleg_step(x, newton_step, g, a, b, tr_bounds, lb, ub):
+    """Find dogleg step in a rectangular region.
+
+    Returns
+    -------
+    step : ndarray, shape (n,)
+        Computed dogleg step.
+    bound_hits : ndarray of int, shape (n,)
+        Each component shows whether a corresponding variable hits the
+        initial bound after the step is taken:
+            *  0 - a variable doesn't hit the bound.
+            * -1 - lower bound is hit.
+            *  1 - upper bound is hit.
+    tr_hit : bool
+        Whether the step hit the boundary of the trust-region.
+    """
+    lb_total, ub_total, orig_l, orig_u, tr_l, tr_u = find_intersection(
+        x, tr_bounds, lb, ub
+    )
+    bound_hits = np.zeros_like(x, dtype=int)
+
+    if in_bounds(newton_step, lb_total, ub_total):
+        return newton_step, bound_hits, False
+
+    to_bounds, _ = step_size_to_bound(np.zeros_like(x), -g, lb_total, ub_total)
+
+    # The classical dogleg algorithm would check if Cauchy step fits into
+    # the bounds, and just return it constrained version if not. But in a
+    # rectangular trust region it makes sense to try to improve constrained
+    # Cauchy step too. Thus, we don't distinguish these two cases.
+
+    cauchy_step = -minimize_quadratic_1d(a, b, 0, to_bounds)[0] * g
+
+    step_diff = newton_step - cauchy_step
+    step_size, hits = step_size_to_bound(cauchy_step, step_diff,
+                                         lb_total, ub_total)
+    bound_hits[(hits < 0) & orig_l] = -1
+    bound_hits[(hits > 0) & orig_u] = 1
+    tr_hit = np.any((hits < 0) & tr_l | (hits > 0) & tr_u)
+
+    return cauchy_step + step_size * step_diff, bound_hits, tr_hit
+
+
+def dogbox(fun, jac, x0, f0, J0, lb, ub, ftol, xtol, gtol, max_nfev, x_scale,
+           loss_function, tr_solver, tr_options, verbose):
+    f = f0
+    f_true = f.copy()
+    nfev = 1
+
+    J = J0
+    njev = 1
+
+    if loss_function is not None:
+        rho = loss_function(f)
+        cost = 0.5 * np.sum(rho[0])
+        J, f = scale_for_robust_loss_function(J, f, rho)
+    else:
+        cost = 0.5 * np.dot(f, f)
+
+    g = compute_grad(J, f)
+
+    jac_scale = isinstance(x_scale, str) and x_scale == 'jac'
+    if jac_scale:
+        scale, scale_inv = compute_jac_scale(J)
+    else:
+        scale, scale_inv = x_scale, 1 / x_scale
+
+    Delta = norm(x0 * scale_inv, ord=np.inf)
+    if Delta == 0:
+        Delta = 1.0
+
+    on_bound = np.zeros_like(x0, dtype=int)
+    on_bound[np.equal(x0, lb)] = -1
+    on_bound[np.equal(x0, ub)] = 1
+
+    x = x0
+    step = np.empty_like(x0)
+
+    if max_nfev is None:
+        max_nfev = x0.size * 100
+
+    termination_status = None
+    iteration = 0
+    step_norm = None
+    actual_reduction = None
+
+    if verbose == 2:
+        print_header_nonlinear()
+
+    while True:
+        active_set = on_bound * g < 0
+        free_set = ~active_set
+
+        g_free = g[free_set]
+        g_full = g.copy()
+        g[active_set] = 0
+
+        g_norm = norm(g, ord=np.inf)
+        if g_norm < gtol:
+            termination_status = 1
+
+        if verbose == 2:
+            print_iteration_nonlinear(iteration, nfev, cost, actual_reduction,
+                                      step_norm, g_norm)
+
+        if termination_status is not None or nfev == max_nfev:
+            break
+
+        x_free = x[free_set]
+        lb_free = lb[free_set]
+        ub_free = ub[free_set]
+        scale_free = scale[free_set]
+
+        # Compute (Gauss-)Newton and build quadratic model for Cauchy step.
+        if tr_solver == 'exact':
+            J_free = J[:, free_set]
+            newton_step = lstsq(J_free, -f, rcond=-1)[0]
+
+            # Coefficients for the quadratic model along the anti-gradient.
+            a, b = build_quadratic_1d(J_free, g_free, -g_free)
+        elif tr_solver == 'lsmr':
+            Jop = aslinearoperator(J)
+
+            # We compute lsmr step in scaled variables and then
+            # transform back to normal variables, if lsmr would give exact lsq
+            # solution, this would be equivalent to not doing any
+            # transformations, but from experience it's better this way.
+
+            # We pass active_set to make computations as if we selected
+            # the free subset of J columns, but without actually doing any
+            # slicing, which is expensive for sparse matrices and impossible
+            # for LinearOperator.
+
+            lsmr_op = lsmr_operator(Jop, scale, active_set)
+            newton_step = -lsmr(lsmr_op, f, **tr_options)[0][free_set]
+            newton_step *= scale_free
+
+            # Components of g for active variables were zeroed, so this call
+            # is correct and equivalent to using J_free and g_free.
+            a, b = build_quadratic_1d(Jop, g, -g)
+
+        actual_reduction = -1.0
+        while actual_reduction <= 0 and nfev < max_nfev:
+            tr_bounds = Delta * scale_free
+
+            step_free, on_bound_free, tr_hit = dogleg_step(
+                x_free, newton_step, g_free, a, b, tr_bounds, lb_free, ub_free)
+
+            step.fill(0.0)
+            step[free_set] = step_free
+
+            if tr_solver == 'exact':
+                predicted_reduction = -evaluate_quadratic(J_free, g_free,
+                                                          step_free)
+            elif tr_solver == 'lsmr':
+                predicted_reduction = -evaluate_quadratic(Jop, g, step)
+
+            x_new = x + step
+            f_new = fun(x_new)
+            nfev += 1
+
+            step_h_norm = norm(step * scale_inv, ord=np.inf)
+
+            if not np.all(np.isfinite(f_new)):
+                Delta = 0.25 * step_h_norm
+                continue
+
+            # Usual trust-region step quality estimation.
+            if loss_function is not None:
+                cost_new = loss_function(f_new, cost_only=True)
+            else:
+                cost_new = 0.5 * np.dot(f_new, f_new)
+            actual_reduction = cost - cost_new
+
+            Delta, ratio = update_tr_radius(
+                Delta, actual_reduction, predicted_reduction,
+                step_h_norm, tr_hit
+            )
+
+            step_norm = norm(step)
+            termination_status = check_termination(
+                actual_reduction, cost, step_norm, norm(x), ratio, ftol, xtol)
+
+            if termination_status is not None:
+                break
+
+        if actual_reduction > 0:
+            on_bound[free_set] = on_bound_free
+
+            x = x_new
+            # Set variables exactly at the boundary.
+            mask = on_bound == -1
+            x[mask] = lb[mask]
+            mask = on_bound == 1
+            x[mask] = ub[mask]
+
+            f = f_new
+            f_true = f.copy()
+
+            cost = cost_new
+
+            J = jac(x, f)
+            njev += 1
+
+            if loss_function is not None:
+                rho = loss_function(f)
+                J, f = scale_for_robust_loss_function(J, f, rho)
+
+            g = compute_grad(J, f)
+
+            if jac_scale:
+                scale, scale_inv = compute_jac_scale(J, scale_inv)
+        else:
+            step_norm = 0
+            actual_reduction = 0
+
+        iteration += 1
+
+    if termination_status is None:
+        termination_status = 0
+
+    return OptimizeResult(
+        x=x, cost=cost, fun=f_true, jac=J, grad=g_full, optimality=g_norm,
+        active_mask=on_bound, nfev=nfev, njev=njev, status=termination_status)