Fixed database typo and removed unnecessary class identifier.
This commit is contained in:
parent
00ad49a143
commit
45fb349a7d
5098 changed files with 952558 additions and 85 deletions
5
venv/Lib/site-packages/scipy/optimize/_lsq/__init__.py
Normal file
5
venv/Lib/site-packages/scipy/optimize/_lsq/__init__.py
Normal file
|
@ -0,0 +1,5 @@
|
|||
"""This module contains least-squares algorithms."""
|
||||
from .least_squares import least_squares
|
||||
from .lsq_linear import lsq_linear
|
||||
|
||||
__all__ = ['least_squares', 'lsq_linear']
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
178
venv/Lib/site-packages/scipy/optimize/_lsq/bvls.py
Normal file
178
venv/Lib/site-packages/scipy/optimize/_lsq/bvls.py
Normal file
|
@ -0,0 +1,178 @@
|
|||
"""Bounded-variable least-squares algorithm."""
|
||||
import numpy as np
|
||||
from numpy.linalg import norm, lstsq
|
||||
from scipy.optimize import OptimizeResult
|
||||
|
||||
from .common import print_header_linear, print_iteration_linear
|
||||
|
||||
|
||||
def compute_kkt_optimality(g, on_bound):
|
||||
"""Compute the maximum violation of KKT conditions."""
|
||||
g_kkt = g * on_bound
|
||||
free_set = on_bound == 0
|
||||
g_kkt[free_set] = np.abs(g[free_set])
|
||||
return np.max(g_kkt)
|
||||
|
||||
|
||||
def bvls(A, b, x_lsq, lb, ub, tol, max_iter, verbose):
|
||||
m, n = A.shape
|
||||
|
||||
x = x_lsq.copy()
|
||||
on_bound = np.zeros(n)
|
||||
|
||||
mask = x < lb
|
||||
x[mask] = lb[mask]
|
||||
on_bound[mask] = -1
|
||||
|
||||
mask = x > ub
|
||||
x[mask] = ub[mask]
|
||||
on_bound[mask] = 1
|
||||
|
||||
free_set = on_bound == 0
|
||||
active_set = ~free_set
|
||||
free_set, = np.nonzero(free_set)
|
||||
|
||||
r = A.dot(x) - b
|
||||
cost = 0.5 * np.dot(r, r)
|
||||
initial_cost = cost
|
||||
g = A.T.dot(r)
|
||||
|
||||
cost_change = None
|
||||
step_norm = None
|
||||
iteration = 0
|
||||
|
||||
if verbose == 2:
|
||||
print_header_linear()
|
||||
|
||||
# This is the initialization loop. The requirement is that the
|
||||
# least-squares solution on free variables is feasible before BVLS starts.
|
||||
# One possible initialization is to set all variables to lower or upper
|
||||
# bounds, but many iterations may be required from this state later on.
|
||||
# The implemented ad-hoc procedure which intuitively should give a better
|
||||
# initial state: find the least-squares solution on current free variables,
|
||||
# if its feasible then stop, otherwise, set violating variables to
|
||||
# corresponding bounds and continue on the reduced set of free variables.
|
||||
|
||||
while free_set.size > 0:
|
||||
if verbose == 2:
|
||||
optimality = compute_kkt_optimality(g, on_bound)
|
||||
print_iteration_linear(iteration, cost, cost_change, step_norm,
|
||||
optimality)
|
||||
|
||||
iteration += 1
|
||||
x_free_old = x[free_set].copy()
|
||||
|
||||
A_free = A[:, free_set]
|
||||
b_free = b - A.dot(x * active_set)
|
||||
z = lstsq(A_free, b_free, rcond=-1)[0]
|
||||
|
||||
lbv = z < lb[free_set]
|
||||
ubv = z > ub[free_set]
|
||||
v = lbv | ubv
|
||||
|
||||
if np.any(lbv):
|
||||
ind = free_set[lbv]
|
||||
x[ind] = lb[ind]
|
||||
active_set[ind] = True
|
||||
on_bound[ind] = -1
|
||||
|
||||
if np.any(ubv):
|
||||
ind = free_set[ubv]
|
||||
x[ind] = ub[ind]
|
||||
active_set[ind] = True
|
||||
on_bound[ind] = 1
|
||||
|
||||
ind = free_set[~v]
|
||||
x[ind] = z[~v]
|
||||
|
||||
r = A.dot(x) - b
|
||||
cost_new = 0.5 * np.dot(r, r)
|
||||
cost_change = cost - cost_new
|
||||
cost = cost_new
|
||||
g = A.T.dot(r)
|
||||
step_norm = norm(x[free_set] - x_free_old)
|
||||
|
||||
if np.any(v):
|
||||
free_set = free_set[~v]
|
||||
else:
|
||||
break
|
||||
|
||||
if max_iter is None:
|
||||
max_iter = n
|
||||
max_iter += iteration
|
||||
|
||||
termination_status = None
|
||||
|
||||
# Main BVLS loop.
|
||||
|
||||
optimality = compute_kkt_optimality(g, on_bound)
|
||||
for iteration in range(iteration, max_iter):
|
||||
if verbose == 2:
|
||||
print_iteration_linear(iteration, cost, cost_change,
|
||||
step_norm, optimality)
|
||||
|
||||
if optimality < tol:
|
||||
termination_status = 1
|
||||
|
||||
if termination_status is not None:
|
||||
break
|
||||
|
||||
move_to_free = np.argmax(g * on_bound)
|
||||
on_bound[move_to_free] = 0
|
||||
free_set = on_bound == 0
|
||||
active_set = ~free_set
|
||||
free_set, = np.nonzero(free_set)
|
||||
|
||||
x_free = x[free_set]
|
||||
x_free_old = x_free.copy()
|
||||
lb_free = lb[free_set]
|
||||
ub_free = ub[free_set]
|
||||
|
||||
A_free = A[:, free_set]
|
||||
b_free = b - A.dot(x * active_set)
|
||||
z = lstsq(A_free, b_free, rcond=-1)[0]
|
||||
|
||||
lbv, = np.nonzero(z < lb_free)
|
||||
ubv, = np.nonzero(z > ub_free)
|
||||
v = np.hstack((lbv, ubv))
|
||||
|
||||
if v.size > 0:
|
||||
alphas = np.hstack((
|
||||
lb_free[lbv] - x_free[lbv],
|
||||
ub_free[ubv] - x_free[ubv])) / (z[v] - x_free[v])
|
||||
|
||||
i = np.argmin(alphas)
|
||||
i_free = v[i]
|
||||
alpha = alphas[i]
|
||||
|
||||
x_free *= 1 - alpha
|
||||
x_free += alpha * z
|
||||
|
||||
if i < lbv.size:
|
||||
on_bound[free_set[i_free]] = -1
|
||||
else:
|
||||
on_bound[free_set[i_free]] = 1
|
||||
else:
|
||||
x_free = z
|
||||
|
||||
x[free_set] = x_free
|
||||
step_norm = norm(x_free - x_free_old)
|
||||
|
||||
r = A.dot(x) - b
|
||||
cost_new = 0.5 * np.dot(r, r)
|
||||
cost_change = cost - cost_new
|
||||
|
||||
if cost_change < tol * cost:
|
||||
termination_status = 2
|
||||
cost = cost_new
|
||||
|
||||
g = A.T.dot(r)
|
||||
optimality = compute_kkt_optimality(g, on_bound)
|
||||
|
||||
if termination_status is None:
|
||||
termination_status = 0
|
||||
|
||||
return OptimizeResult(
|
||||
x=x, fun=r, cost=cost, optimality=optimality, active_mask=on_bound,
|
||||
nit=iteration + 1, status=termination_status,
|
||||
initial_cost=initial_cost)
|
734
venv/Lib/site-packages/scipy/optimize/_lsq/common.py
Normal file
734
venv/Lib/site-packages/scipy/optimize/_lsq/common.py
Normal file
|
@ -0,0 +1,734 @@
|
|||
"""Functions used by least-squares algorithms."""
|
||||
from math import copysign
|
||||
|
||||
import numpy as np
|
||||
from numpy.linalg import norm
|
||||
|
||||
from scipy.linalg import cho_factor, cho_solve, LinAlgError
|
||||
from scipy.sparse import issparse
|
||||
from scipy.sparse.linalg import LinearOperator, aslinearoperator
|
||||
|
||||
|
||||
EPS = np.finfo(float).eps
|
||||
|
||||
|
||||
# Functions related to a trust-region problem.
|
||||
|
||||
|
||||
def intersect_trust_region(x, s, Delta):
|
||||
"""Find the intersection of a line with the boundary of a trust region.
|
||||
|
||||
This function solves the quadratic equation with respect to t
|
||||
||(x + s*t)||**2 = Delta**2.
|
||||
|
||||
Returns
|
||||
-------
|
||||
t_neg, t_pos : tuple of float
|
||||
Negative and positive roots.
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
If `s` is zero or `x` is not within the trust region.
|
||||
"""
|
||||
a = np.dot(s, s)
|
||||
if a == 0:
|
||||
raise ValueError("`s` is zero.")
|
||||
|
||||
b = np.dot(x, s)
|
||||
|
||||
c = np.dot(x, x) - Delta**2
|
||||
if c > 0:
|
||||
raise ValueError("`x` is not within the trust region.")
|
||||
|
||||
d = np.sqrt(b*b - a*c) # Root from one fourth of the discriminant.
|
||||
|
||||
# Computations below avoid loss of significance, see "Numerical Recipes".
|
||||
q = -(b + copysign(d, b))
|
||||
t1 = q / a
|
||||
t2 = c / q
|
||||
|
||||
if t1 < t2:
|
||||
return t1, t2
|
||||
else:
|
||||
return t2, t1
|
||||
|
||||
|
||||
def solve_lsq_trust_region(n, m, uf, s, V, Delta, initial_alpha=None,
|
||||
rtol=0.01, max_iter=10):
|
||||
"""Solve a trust-region problem arising in least-squares minimization.
|
||||
|
||||
This function implements a method described by J. J. More [1]_ and used
|
||||
in MINPACK, but it relies on a single SVD of Jacobian instead of series
|
||||
of Cholesky decompositions. Before running this function, compute:
|
||||
``U, s, VT = svd(J, full_matrices=False)``.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
n : int
|
||||
Number of variables.
|
||||
m : int
|
||||
Number of residuals.
|
||||
uf : ndarray
|
||||
Computed as U.T.dot(f).
|
||||
s : ndarray
|
||||
Singular values of J.
|
||||
V : ndarray
|
||||
Transpose of VT.
|
||||
Delta : float
|
||||
Radius of a trust region.
|
||||
initial_alpha : float, optional
|
||||
Initial guess for alpha, which might be available from a previous
|
||||
iteration. If None, determined automatically.
|
||||
rtol : float, optional
|
||||
Stopping tolerance for the root-finding procedure. Namely, the
|
||||
solution ``p`` will satisfy ``abs(norm(p) - Delta) < rtol * Delta``.
|
||||
max_iter : int, optional
|
||||
Maximum allowed number of iterations for the root-finding procedure.
|
||||
|
||||
Returns
|
||||
-------
|
||||
p : ndarray, shape (n,)
|
||||
Found solution of a trust-region problem.
|
||||
alpha : float
|
||||
Positive value such that (J.T*J + alpha*I)*p = -J.T*f.
|
||||
Sometimes called Levenberg-Marquardt parameter.
|
||||
n_iter : int
|
||||
Number of iterations made by root-finding procedure. Zero means
|
||||
that Gauss-Newton step was selected as the solution.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] More, J. J., "The Levenberg-Marquardt Algorithm: Implementation
|
||||
and Theory," Numerical Analysis, ed. G. A. Watson, Lecture Notes
|
||||
in Mathematics 630, Springer Verlag, pp. 105-116, 1977.
|
||||
"""
|
||||
def phi_and_derivative(alpha, suf, s, Delta):
|
||||
"""Function of which to find zero.
|
||||
|
||||
It is defined as "norm of regularized (by alpha) least-squares
|
||||
solution minus `Delta`". Refer to [1]_.
|
||||
"""
|
||||
denom = s**2 + alpha
|
||||
p_norm = norm(suf / denom)
|
||||
phi = p_norm - Delta
|
||||
phi_prime = -np.sum(suf ** 2 / denom**3) / p_norm
|
||||
return phi, phi_prime
|
||||
|
||||
suf = s * uf
|
||||
|
||||
# Check if J has full rank and try Gauss-Newton step.
|
||||
if m >= n:
|
||||
threshold = EPS * m * s[0]
|
||||
full_rank = s[-1] > threshold
|
||||
else:
|
||||
full_rank = False
|
||||
|
||||
if full_rank:
|
||||
p = -V.dot(uf / s)
|
||||
if norm(p) <= Delta:
|
||||
return p, 0.0, 0
|
||||
|
||||
alpha_upper = norm(suf) / Delta
|
||||
|
||||
if full_rank:
|
||||
phi, phi_prime = phi_and_derivative(0.0, suf, s, Delta)
|
||||
alpha_lower = -phi / phi_prime
|
||||
else:
|
||||
alpha_lower = 0.0
|
||||
|
||||
if initial_alpha is None or not full_rank and initial_alpha == 0:
|
||||
alpha = max(0.001 * alpha_upper, (alpha_lower * alpha_upper)**0.5)
|
||||
else:
|
||||
alpha = initial_alpha
|
||||
|
||||
for it in range(max_iter):
|
||||
if alpha < alpha_lower or alpha > alpha_upper:
|
||||
alpha = max(0.001 * alpha_upper, (alpha_lower * alpha_upper)**0.5)
|
||||
|
||||
phi, phi_prime = phi_and_derivative(alpha, suf, s, Delta)
|
||||
|
||||
if phi < 0:
|
||||
alpha_upper = alpha
|
||||
|
||||
ratio = phi / phi_prime
|
||||
alpha_lower = max(alpha_lower, alpha - ratio)
|
||||
alpha -= (phi + Delta) * ratio / Delta
|
||||
|
||||
if np.abs(phi) < rtol * Delta:
|
||||
break
|
||||
|
||||
p = -V.dot(suf / (s**2 + alpha))
|
||||
|
||||
# Make the norm of p equal to Delta, p is changed only slightly during
|
||||
# this. It is done to prevent p lie outside the trust region (which can
|
||||
# cause problems later).
|
||||
p *= Delta / norm(p)
|
||||
|
||||
return p, alpha, it + 1
|
||||
|
||||
|
||||
def solve_trust_region_2d(B, g, Delta):
|
||||
"""Solve a general trust-region problem in 2 dimensions.
|
||||
|
||||
The problem is reformulated as a 4th order algebraic equation,
|
||||
the solution of which is found by numpy.roots.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
B : ndarray, shape (2, 2)
|
||||
Symmetric matrix, defines a quadratic term of the function.
|
||||
g : ndarray, shape (2,)
|
||||
Defines a linear term of the function.
|
||||
Delta : float
|
||||
Radius of a trust region.
|
||||
|
||||
Returns
|
||||
-------
|
||||
p : ndarray, shape (2,)
|
||||
Found solution.
|
||||
newton_step : bool
|
||||
Whether the returned solution is the Newton step which lies within
|
||||
the trust region.
|
||||
"""
|
||||
try:
|
||||
R, lower = cho_factor(B)
|
||||
p = -cho_solve((R, lower), g)
|
||||
if np.dot(p, p) <= Delta**2:
|
||||
return p, True
|
||||
except LinAlgError:
|
||||
pass
|
||||
|
||||
a = B[0, 0] * Delta**2
|
||||
b = B[0, 1] * Delta**2
|
||||
c = B[1, 1] * Delta**2
|
||||
|
||||
d = g[0] * Delta
|
||||
f = g[1] * Delta
|
||||
|
||||
coeffs = np.array(
|
||||
[-b + d, 2 * (a - c + f), 6 * b, 2 * (-a + c + f), -b - d])
|
||||
t = np.roots(coeffs) # Can handle leading zeros.
|
||||
t = np.real(t[np.isreal(t)])
|
||||
|
||||
p = Delta * np.vstack((2 * t / (1 + t**2), (1 - t**2) / (1 + t**2)))
|
||||
value = 0.5 * np.sum(p * B.dot(p), axis=0) + np.dot(g, p)
|
||||
i = np.argmin(value)
|
||||
p = p[:, i]
|
||||
|
||||
return p, False
|
||||
|
||||
|
||||
def update_tr_radius(Delta, actual_reduction, predicted_reduction,
|
||||
step_norm, bound_hit):
|
||||
"""Update the radius of a trust region based on the cost reduction.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Delta : float
|
||||
New radius.
|
||||
ratio : float
|
||||
Ratio between actual and predicted reductions.
|
||||
"""
|
||||
if predicted_reduction > 0:
|
||||
ratio = actual_reduction / predicted_reduction
|
||||
elif predicted_reduction == actual_reduction == 0:
|
||||
ratio = 1
|
||||
else:
|
||||
ratio = 0
|
||||
|
||||
if ratio < 0.25:
|
||||
Delta = 0.25 * step_norm
|
||||
elif ratio > 0.75 and bound_hit:
|
||||
Delta *= 2.0
|
||||
|
||||
return Delta, ratio
|
||||
|
||||
|
||||
# Construction and minimization of quadratic functions.
|
||||
|
||||
|
||||
def build_quadratic_1d(J, g, s, diag=None, s0=None):
|
||||
"""Parameterize a multivariate quadratic function along a line.
|
||||
|
||||
The resulting univariate quadratic function is given as follows:
|
||||
::
|
||||
f(t) = 0.5 * (s0 + s*t).T * (J.T*J + diag) * (s0 + s*t) +
|
||||
g.T * (s0 + s*t)
|
||||
|
||||
Parameters
|
||||
----------
|
||||
J : ndarray, sparse matrix or LinearOperator shape (m, n)
|
||||
Jacobian matrix, affects the quadratic term.
|
||||
g : ndarray, shape (n,)
|
||||
Gradient, defines the linear term.
|
||||
s : ndarray, shape (n,)
|
||||
Direction vector of a line.
|
||||
diag : None or ndarray with shape (n,), optional
|
||||
Addition diagonal part, affects the quadratic term.
|
||||
If None, assumed to be 0.
|
||||
s0 : None or ndarray with shape (n,), optional
|
||||
Initial point. If None, assumed to be 0.
|
||||
|
||||
Returns
|
||||
-------
|
||||
a : float
|
||||
Coefficient for t**2.
|
||||
b : float
|
||||
Coefficient for t.
|
||||
c : float
|
||||
Free term. Returned only if `s0` is provided.
|
||||
"""
|
||||
v = J.dot(s)
|
||||
a = np.dot(v, v)
|
||||
if diag is not None:
|
||||
a += np.dot(s * diag, s)
|
||||
a *= 0.5
|
||||
|
||||
b = np.dot(g, s)
|
||||
|
||||
if s0 is not None:
|
||||
u = J.dot(s0)
|
||||
b += np.dot(u, v)
|
||||
c = 0.5 * np.dot(u, u) + np.dot(g, s0)
|
||||
if diag is not None:
|
||||
b += np.dot(s0 * diag, s)
|
||||
c += 0.5 * np.dot(s0 * diag, s0)
|
||||
return a, b, c
|
||||
else:
|
||||
return a, b
|
||||
|
||||
|
||||
def minimize_quadratic_1d(a, b, lb, ub, c=0):
|
||||
"""Minimize a 1-D quadratic function subject to bounds.
|
||||
|
||||
The free term `c` is 0 by default. Bounds must be finite.
|
||||
|
||||
Returns
|
||||
-------
|
||||
t : float
|
||||
Minimum point.
|
||||
y : float
|
||||
Minimum value.
|
||||
"""
|
||||
t = [lb, ub]
|
||||
if a != 0:
|
||||
extremum = -0.5 * b / a
|
||||
if lb < extremum < ub:
|
||||
t.append(extremum)
|
||||
t = np.asarray(t)
|
||||
y = t * (a * t + b) + c
|
||||
min_index = np.argmin(y)
|
||||
return t[min_index], y[min_index]
|
||||
|
||||
|
||||
def evaluate_quadratic(J, g, s, diag=None):
|
||||
"""Compute values of a quadratic function arising in least squares.
|
||||
|
||||
The function is 0.5 * s.T * (J.T * J + diag) * s + g.T * s.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
J : ndarray, sparse matrix or LinearOperator, shape (m, n)
|
||||
Jacobian matrix, affects the quadratic term.
|
||||
g : ndarray, shape (n,)
|
||||
Gradient, defines the linear term.
|
||||
s : ndarray, shape (k, n) or (n,)
|
||||
Array containing steps as rows.
|
||||
diag : ndarray, shape (n,), optional
|
||||
Addition diagonal part, affects the quadratic term.
|
||||
If None, assumed to be 0.
|
||||
|
||||
Returns
|
||||
-------
|
||||
values : ndarray with shape (k,) or float
|
||||
Values of the function. If `s` was 2-D, then ndarray is
|
||||
returned, otherwise, float is returned.
|
||||
"""
|
||||
if s.ndim == 1:
|
||||
Js = J.dot(s)
|
||||
q = np.dot(Js, Js)
|
||||
if diag is not None:
|
||||
q += np.dot(s * diag, s)
|
||||
else:
|
||||
Js = J.dot(s.T)
|
||||
q = np.sum(Js**2, axis=0)
|
||||
if diag is not None:
|
||||
q += np.sum(diag * s**2, axis=1)
|
||||
|
||||
l = np.dot(s, g)
|
||||
|
||||
return 0.5 * q + l
|
||||
|
||||
|
||||
# Utility functions to work with bound constraints.
|
||||
|
||||
|
||||
def in_bounds(x, lb, ub):
|
||||
"""Check if a point lies within bounds."""
|
||||
return np.all((x >= lb) & (x <= ub))
|
||||
|
||||
|
||||
def step_size_to_bound(x, s, lb, ub):
|
||||
"""Compute a min_step size required to reach a bound.
|
||||
|
||||
The function computes a positive scalar t, such that x + s * t is on
|
||||
the bound.
|
||||
|
||||
Returns
|
||||
-------
|
||||
step : float
|
||||
Computed step. Non-negative value.
|
||||
hits : ndarray of int with shape of x
|
||||
Each element indicates whether a corresponding variable reaches the
|
||||
bound:
|
||||
|
||||
* 0 - the bound was not hit.
|
||||
* -1 - the lower bound was hit.
|
||||
* 1 - the upper bound was hit.
|
||||
"""
|
||||
non_zero = np.nonzero(s)
|
||||
s_non_zero = s[non_zero]
|
||||
steps = np.empty_like(x)
|
||||
steps.fill(np.inf)
|
||||
with np.errstate(over='ignore'):
|
||||
steps[non_zero] = np.maximum((lb - x)[non_zero] / s_non_zero,
|
||||
(ub - x)[non_zero] / s_non_zero)
|
||||
min_step = np.min(steps)
|
||||
return min_step, np.equal(steps, min_step) * np.sign(s).astype(int)
|
||||
|
||||
|
||||
def find_active_constraints(x, lb, ub, rtol=1e-10):
|
||||
"""Determine which constraints are active in a given point.
|
||||
|
||||
The threshold is computed using `rtol` and the absolute value of the
|
||||
closest bound.
|
||||
|
||||
Returns
|
||||
-------
|
||||
active : ndarray of int with shape of x
|
||||
Each component shows whether the corresponding constraint is active:
|
||||
|
||||
* 0 - a constraint is not active.
|
||||
* -1 - a lower bound is active.
|
||||
* 1 - a upper bound is active.
|
||||
"""
|
||||
active = np.zeros_like(x, dtype=int)
|
||||
|
||||
if rtol == 0:
|
||||
active[x <= lb] = -1
|
||||
active[x >= ub] = 1
|
||||
return active
|
||||
|
||||
lower_dist = x - lb
|
||||
upper_dist = ub - x
|
||||
|
||||
lower_threshold = rtol * np.maximum(1, np.abs(lb))
|
||||
upper_threshold = rtol * np.maximum(1, np.abs(ub))
|
||||
|
||||
lower_active = (np.isfinite(lb) &
|
||||
(lower_dist <= np.minimum(upper_dist, lower_threshold)))
|
||||
active[lower_active] = -1
|
||||
|
||||
upper_active = (np.isfinite(ub) &
|
||||
(upper_dist <= np.minimum(lower_dist, upper_threshold)))
|
||||
active[upper_active] = 1
|
||||
|
||||
return active
|
||||
|
||||
|
||||
def make_strictly_feasible(x, lb, ub, rstep=1e-10):
|
||||
"""Shift a point to the interior of a feasible region.
|
||||
|
||||
Each element of the returned vector is at least at a relative distance
|
||||
`rstep` from the closest bound. If ``rstep=0`` then `np.nextafter` is used.
|
||||
"""
|
||||
x_new = x.copy()
|
||||
|
||||
active = find_active_constraints(x, lb, ub, rstep)
|
||||
lower_mask = np.equal(active, -1)
|
||||
upper_mask = np.equal(active, 1)
|
||||
|
||||
if rstep == 0:
|
||||
x_new[lower_mask] = np.nextafter(lb[lower_mask], ub[lower_mask])
|
||||
x_new[upper_mask] = np.nextafter(ub[upper_mask], lb[upper_mask])
|
||||
else:
|
||||
x_new[lower_mask] = (lb[lower_mask] +
|
||||
rstep * np.maximum(1, np.abs(lb[lower_mask])))
|
||||
x_new[upper_mask] = (ub[upper_mask] -
|
||||
rstep * np.maximum(1, np.abs(ub[upper_mask])))
|
||||
|
||||
tight_bounds = (x_new < lb) | (x_new > ub)
|
||||
x_new[tight_bounds] = 0.5 * (lb[tight_bounds] + ub[tight_bounds])
|
||||
|
||||
return x_new
|
||||
|
||||
|
||||
def CL_scaling_vector(x, g, lb, ub):
|
||||
"""Compute Coleman-Li scaling vector and its derivatives.
|
||||
|
||||
Components of a vector v are defined as follows:
|
||||
::
|
||||
| ub[i] - x[i], if g[i] < 0 and ub[i] < np.inf
|
||||
v[i] = | x[i] - lb[i], if g[i] > 0 and lb[i] > -np.inf
|
||||
| 1, otherwise
|
||||
|
||||
According to this definition v[i] >= 0 for all i. It differs from the
|
||||
definition in paper [1]_ (eq. (2.2)), where the absolute value of v is
|
||||
used. Both definitions are equivalent down the line.
|
||||
Derivatives of v with respect to x take value 1, -1 or 0 depending on a
|
||||
case.
|
||||
|
||||
Returns
|
||||
-------
|
||||
v : ndarray with shape of x
|
||||
Scaling vector.
|
||||
dv : ndarray with shape of x
|
||||
Derivatives of v[i] with respect to x[i], diagonal elements of v's
|
||||
Jacobian.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] M.A. Branch, T.F. Coleman, and Y. Li, "A Subspace, Interior,
|
||||
and Conjugate Gradient Method for Large-Scale Bound-Constrained
|
||||
Minimization Problems," SIAM Journal on Scientific Computing,
|
||||
Vol. 21, Number 1, pp 1-23, 1999.
|
||||
"""
|
||||
v = np.ones_like(x)
|
||||
dv = np.zeros_like(x)
|
||||
|
||||
mask = (g < 0) & np.isfinite(ub)
|
||||
v[mask] = ub[mask] - x[mask]
|
||||
dv[mask] = -1
|
||||
|
||||
mask = (g > 0) & np.isfinite(lb)
|
||||
v[mask] = x[mask] - lb[mask]
|
||||
dv[mask] = 1
|
||||
|
||||
return v, dv
|
||||
|
||||
|
||||
def reflective_transformation(y, lb, ub):
|
||||
"""Compute reflective transformation and its gradient."""
|
||||
if in_bounds(y, lb, ub):
|
||||
return y, np.ones_like(y)
|
||||
|
||||
lb_finite = np.isfinite(lb)
|
||||
ub_finite = np.isfinite(ub)
|
||||
|
||||
x = y.copy()
|
||||
g_negative = np.zeros_like(y, dtype=bool)
|
||||
|
||||
mask = lb_finite & ~ub_finite
|
||||
x[mask] = np.maximum(y[mask], 2 * lb[mask] - y[mask])
|
||||
g_negative[mask] = y[mask] < lb[mask]
|
||||
|
||||
mask = ~lb_finite & ub_finite
|
||||
x[mask] = np.minimum(y[mask], 2 * ub[mask] - y[mask])
|
||||
g_negative[mask] = y[mask] > ub[mask]
|
||||
|
||||
mask = lb_finite & ub_finite
|
||||
d = ub - lb
|
||||
t = np.remainder(y[mask] - lb[mask], 2 * d[mask])
|
||||
x[mask] = lb[mask] + np.minimum(t, 2 * d[mask] - t)
|
||||
g_negative[mask] = t > d[mask]
|
||||
|
||||
g = np.ones_like(y)
|
||||
g[g_negative] = -1
|
||||
|
||||
return x, g
|
||||
|
||||
|
||||
# Functions to display algorithm's progress.
|
||||
|
||||
|
||||
def print_header_nonlinear():
|
||||
print("{0:^15}{1:^15}{2:^15}{3:^15}{4:^15}{5:^15}"
|
||||
.format("Iteration", "Total nfev", "Cost", "Cost reduction",
|
||||
"Step norm", "Optimality"))
|
||||
|
||||
|
||||
def print_iteration_nonlinear(iteration, nfev, cost, cost_reduction,
|
||||
step_norm, optimality):
|
||||
if cost_reduction is None:
|
||||
cost_reduction = " " * 15
|
||||
else:
|
||||
cost_reduction = "{0:^15.2e}".format(cost_reduction)
|
||||
|
||||
if step_norm is None:
|
||||
step_norm = " " * 15
|
||||
else:
|
||||
step_norm = "{0:^15.2e}".format(step_norm)
|
||||
|
||||
print("{0:^15}{1:^15}{2:^15.4e}{3}{4}{5:^15.2e}"
|
||||
.format(iteration, nfev, cost, cost_reduction,
|
||||
step_norm, optimality))
|
||||
|
||||
|
||||
def print_header_linear():
|
||||
print("{0:^15}{1:^15}{2:^15}{3:^15}{4:^15}"
|
||||
.format("Iteration", "Cost", "Cost reduction", "Step norm",
|
||||
"Optimality"))
|
||||
|
||||
|
||||
def print_iteration_linear(iteration, cost, cost_reduction, step_norm,
|
||||
optimality):
|
||||
if cost_reduction is None:
|
||||
cost_reduction = " " * 15
|
||||
else:
|
||||
cost_reduction = "{0:^15.2e}".format(cost_reduction)
|
||||
|
||||
if step_norm is None:
|
||||
step_norm = " " * 15
|
||||
else:
|
||||
step_norm = "{0:^15.2e}".format(step_norm)
|
||||
|
||||
print("{0:^15}{1:^15.4e}{2}{3}{4:^15.2e}".format(
|
||||
iteration, cost, cost_reduction, step_norm, optimality))
|
||||
|
||||
|
||||
# Simple helper functions.
|
||||
|
||||
|
||||
def compute_grad(J, f):
|
||||
"""Compute gradient of the least-squares cost function."""
|
||||
if isinstance(J, LinearOperator):
|
||||
return J.rmatvec(f)
|
||||
else:
|
||||
return J.T.dot(f)
|
||||
|
||||
|
||||
def compute_jac_scale(J, scale_inv_old=None):
|
||||
"""Compute variables scale based on the Jacobian matrix."""
|
||||
if issparse(J):
|
||||
scale_inv = np.asarray(J.power(2).sum(axis=0)).ravel()**0.5
|
||||
else:
|
||||
scale_inv = np.sum(J**2, axis=0)**0.5
|
||||
|
||||
if scale_inv_old is None:
|
||||
scale_inv[scale_inv == 0] = 1
|
||||
else:
|
||||
scale_inv = np.maximum(scale_inv, scale_inv_old)
|
||||
|
||||
return 1 / scale_inv, scale_inv
|
||||
|
||||
|
||||
def left_multiplied_operator(J, d):
|
||||
"""Return diag(d) J as LinearOperator."""
|
||||
J = aslinearoperator(J)
|
||||
|
||||
def matvec(x):
|
||||
return d * J.matvec(x)
|
||||
|
||||
def matmat(X):
|
||||
return d[:, np.newaxis] * J.matmat(X)
|
||||
|
||||
def rmatvec(x):
|
||||
return J.rmatvec(x.ravel() * d)
|
||||
|
||||
return LinearOperator(J.shape, matvec=matvec, matmat=matmat,
|
||||
rmatvec=rmatvec)
|
||||
|
||||
|
||||
def right_multiplied_operator(J, d):
|
||||
"""Return J diag(d) as LinearOperator."""
|
||||
J = aslinearoperator(J)
|
||||
|
||||
def matvec(x):
|
||||
return J.matvec(np.ravel(x) * d)
|
||||
|
||||
def matmat(X):
|
||||
return J.matmat(X * d[:, np.newaxis])
|
||||
|
||||
def rmatvec(x):
|
||||
return d * J.rmatvec(x)
|
||||
|
||||
return LinearOperator(J.shape, matvec=matvec, matmat=matmat,
|
||||
rmatvec=rmatvec)
|
||||
|
||||
|
||||
def regularized_lsq_operator(J, diag):
|
||||
"""Return a matrix arising in regularized least squares as LinearOperator.
|
||||
|
||||
The matrix is
|
||||
[ J ]
|
||||
[ D ]
|
||||
where D is diagonal matrix with elements from `diag`.
|
||||
"""
|
||||
J = aslinearoperator(J)
|
||||
m, n = J.shape
|
||||
|
||||
def matvec(x):
|
||||
return np.hstack((J.matvec(x), diag * x))
|
||||
|
||||
def rmatvec(x):
|
||||
x1 = x[:m]
|
||||
x2 = x[m:]
|
||||
return J.rmatvec(x1) + diag * x2
|
||||
|
||||
return LinearOperator((m + n, n), matvec=matvec, rmatvec=rmatvec)
|
||||
|
||||
|
||||
def right_multiply(J, d, copy=True):
|
||||
"""Compute J diag(d).
|
||||
|
||||
If `copy` is False, `J` is modified in place (unless being LinearOperator).
|
||||
"""
|
||||
if copy and not isinstance(J, LinearOperator):
|
||||
J = J.copy()
|
||||
|
||||
if issparse(J):
|
||||
J.data *= d.take(J.indices, mode='clip') # scikit-learn recipe.
|
||||
elif isinstance(J, LinearOperator):
|
||||
J = right_multiplied_operator(J, d)
|
||||
else:
|
||||
J *= d
|
||||
|
||||
return J
|
||||
|
||||
|
||||
def left_multiply(J, d, copy=True):
|
||||
"""Compute diag(d) J.
|
||||
|
||||
If `copy` is False, `J` is modified in place (unless being LinearOperator).
|
||||
"""
|
||||
if copy and not isinstance(J, LinearOperator):
|
||||
J = J.copy()
|
||||
|
||||
if issparse(J):
|
||||
J.data *= np.repeat(d, np.diff(J.indptr)) # scikit-learn recipe.
|
||||
elif isinstance(J, LinearOperator):
|
||||
J = left_multiplied_operator(J, d)
|
||||
else:
|
||||
J *= d[:, np.newaxis]
|
||||
|
||||
return J
|
||||
|
||||
|
||||
def check_termination(dF, F, dx_norm, x_norm, ratio, ftol, xtol):
|
||||
"""Check termination condition for nonlinear least squares."""
|
||||
ftol_satisfied = dF < ftol * F and ratio > 0.25
|
||||
xtol_satisfied = dx_norm < xtol * (xtol + x_norm)
|
||||
|
||||
if ftol_satisfied and xtol_satisfied:
|
||||
return 4
|
||||
elif ftol_satisfied:
|
||||
return 2
|
||||
elif xtol_satisfied:
|
||||
return 3
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def scale_for_robust_loss_function(J, f, rho):
|
||||
"""Scale Jacobian and residuals for a robust loss function.
|
||||
|
||||
Arrays are modified in place.
|
||||
"""
|
||||
J_scale = rho[1] + 2 * rho[2] * f**2
|
||||
J_scale[J_scale < EPS] = EPS
|
||||
J_scale **= 0.5
|
||||
|
||||
f *= rho[1] / J_scale
|
||||
|
||||
return left_multiply(J, J_scale, copy=False), f
|
329
venv/Lib/site-packages/scipy/optimize/_lsq/dogbox.py
Normal file
329
venv/Lib/site-packages/scipy/optimize/_lsq/dogbox.py
Normal file
|
@ -0,0 +1,329 @@
|
|||
"""
|
||||
Dogleg algorithm with rectangular trust regions for least-squares minimization.
|
||||
|
||||
The description of the algorithm can be found in [Voglis]_. The algorithm does
|
||||
trust-region iterations, but the shape of trust regions is rectangular as
|
||||
opposed to conventional elliptical. The intersection of a trust region and
|
||||
an initial feasible region is again some rectangle. Thus, on each iteration a
|
||||
bound-constrained quadratic optimization problem is solved.
|
||||
|
||||
A quadratic problem is solved by well-known dogleg approach, where the
|
||||
function is minimized along piecewise-linear "dogleg" path [NumOpt]_,
|
||||
Chapter 4. If Jacobian is not rank-deficient then the function is decreasing
|
||||
along this path, and optimization amounts to simply following along this
|
||||
path as long as a point stays within the bounds. A constrained Cauchy step
|
||||
(along the anti-gradient) is considered for safety in rank deficient cases,
|
||||
in this situations the convergence might be slow.
|
||||
|
||||
If during iterations some variable hit the initial bound and the component
|
||||
of anti-gradient points outside the feasible region, then a next dogleg step
|
||||
won't make any progress. At this state such variables satisfy first-order
|
||||
optimality conditions and they are excluded before computing a next dogleg
|
||||
step.
|
||||
|
||||
Gauss-Newton step can be computed exactly by `numpy.linalg.lstsq` (for dense
|
||||
Jacobian matrices) or by iterative procedure `scipy.sparse.linalg.lsmr` (for
|
||||
dense and sparse matrices, or Jacobian being LinearOperator). The second
|
||||
option allows to solve very large problems (up to couple of millions of
|
||||
residuals on a regular PC), provided the Jacobian matrix is sufficiently
|
||||
sparse. But note that dogbox is not very good for solving problems with
|
||||
large number of constraints, because of variables exclusion-inclusion on each
|
||||
iteration (a required number of function evaluations might be high or accuracy
|
||||
of a solution will be poor), thus its large-scale usage is probably limited
|
||||
to unconstrained problems.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [Voglis] C. Voglis and I. E. Lagaris, "A Rectangular Trust Region Dogleg
|
||||
Approach for Unconstrained and Bound Constrained Nonlinear
|
||||
Optimization", WSEAS International Conference on Applied
|
||||
Mathematics, Corfu, Greece, 2004.
|
||||
.. [NumOpt] J. Nocedal and S. J. Wright, "Numerical optimization, 2nd edition".
|
||||
"""
|
||||
import numpy as np
|
||||
from numpy.linalg import lstsq, norm
|
||||
|
||||
from scipy.sparse.linalg import LinearOperator, aslinearoperator, lsmr
|
||||
from scipy.optimize import OptimizeResult
|
||||
|
||||
from .common import (
|
||||
step_size_to_bound, in_bounds, update_tr_radius, evaluate_quadratic,
|
||||
build_quadratic_1d, minimize_quadratic_1d, compute_grad,
|
||||
compute_jac_scale, check_termination, scale_for_robust_loss_function,
|
||||
print_header_nonlinear, print_iteration_nonlinear)
|
||||
|
||||
|
||||
def lsmr_operator(Jop, d, active_set):
|
||||
"""Compute LinearOperator to use in LSMR by dogbox algorithm.
|
||||
|
||||
`active_set` mask is used to excluded active variables from computations
|
||||
of matrix-vector products.
|
||||
"""
|
||||
m, n = Jop.shape
|
||||
|
||||
def matvec(x):
|
||||
x_free = x.ravel().copy()
|
||||
x_free[active_set] = 0
|
||||
return Jop.matvec(x * d)
|
||||
|
||||
def rmatvec(x):
|
||||
r = d * Jop.rmatvec(x)
|
||||
r[active_set] = 0
|
||||
return r
|
||||
|
||||
return LinearOperator((m, n), matvec=matvec, rmatvec=rmatvec, dtype=float)
|
||||
|
||||
|
||||
def find_intersection(x, tr_bounds, lb, ub):
|
||||
"""Find intersection of trust-region bounds and initial bounds.
|
||||
|
||||
Returns
|
||||
-------
|
||||
lb_total, ub_total : ndarray with shape of x
|
||||
Lower and upper bounds of the intersection region.
|
||||
orig_l, orig_u : ndarray of bool with shape of x
|
||||
True means that an original bound is taken as a corresponding bound
|
||||
in the intersection region.
|
||||
tr_l, tr_u : ndarray of bool with shape of x
|
||||
True means that a trust-region bound is taken as a corresponding bound
|
||||
in the intersection region.
|
||||
"""
|
||||
lb_centered = lb - x
|
||||
ub_centered = ub - x
|
||||
|
||||
lb_total = np.maximum(lb_centered, -tr_bounds)
|
||||
ub_total = np.minimum(ub_centered, tr_bounds)
|
||||
|
||||
orig_l = np.equal(lb_total, lb_centered)
|
||||
orig_u = np.equal(ub_total, ub_centered)
|
||||
|
||||
tr_l = np.equal(lb_total, -tr_bounds)
|
||||
tr_u = np.equal(ub_total, tr_bounds)
|
||||
|
||||
return lb_total, ub_total, orig_l, orig_u, tr_l, tr_u
|
||||
|
||||
|
||||
def dogleg_step(x, newton_step, g, a, b, tr_bounds, lb, ub):
|
||||
"""Find dogleg step in a rectangular region.
|
||||
|
||||
Returns
|
||||
-------
|
||||
step : ndarray, shape (n,)
|
||||
Computed dogleg step.
|
||||
bound_hits : ndarray of int, shape (n,)
|
||||
Each component shows whether a corresponding variable hits the
|
||||
initial bound after the step is taken:
|
||||
* 0 - a variable doesn't hit the bound.
|
||||
* -1 - lower bound is hit.
|
||||
* 1 - upper bound is hit.
|
||||
tr_hit : bool
|
||||
Whether the step hit the boundary of the trust-region.
|
||||
"""
|
||||
lb_total, ub_total, orig_l, orig_u, tr_l, tr_u = find_intersection(
|
||||
x, tr_bounds, lb, ub
|
||||
)
|
||||
bound_hits = np.zeros_like(x, dtype=int)
|
||||
|
||||
if in_bounds(newton_step, lb_total, ub_total):
|
||||
return newton_step, bound_hits, False
|
||||
|
||||
to_bounds, _ = step_size_to_bound(np.zeros_like(x), -g, lb_total, ub_total)
|
||||
|
||||
# The classical dogleg algorithm would check if Cauchy step fits into
|
||||
# the bounds, and just return it constrained version if not. But in a
|
||||
# rectangular trust region it makes sense to try to improve constrained
|
||||
# Cauchy step too. Thus, we don't distinguish these two cases.
|
||||
|
||||
cauchy_step = -minimize_quadratic_1d(a, b, 0, to_bounds)[0] * g
|
||||
|
||||
step_diff = newton_step - cauchy_step
|
||||
step_size, hits = step_size_to_bound(cauchy_step, step_diff,
|
||||
lb_total, ub_total)
|
||||
bound_hits[(hits < 0) & orig_l] = -1
|
||||
bound_hits[(hits > 0) & orig_u] = 1
|
||||
tr_hit = np.any((hits < 0) & tr_l | (hits > 0) & tr_u)
|
||||
|
||||
return cauchy_step + step_size * step_diff, bound_hits, tr_hit
|
||||
|
||||
|
||||
def dogbox(fun, jac, x0, f0, J0, lb, ub, ftol, xtol, gtol, max_nfev, x_scale,
|
||||
loss_function, tr_solver, tr_options, verbose):
|
||||
f = f0
|
||||
f_true = f.copy()
|
||||
nfev = 1
|
||||
|
||||
J = J0
|
||||
njev = 1
|
||||
|
||||
if loss_function is not None:
|
||||
rho = loss_function(f)
|
||||
cost = 0.5 * np.sum(rho[0])
|
||||
J, f = scale_for_robust_loss_function(J, f, rho)
|
||||
else:
|
||||
cost = 0.5 * np.dot(f, f)
|
||||
|
||||
g = compute_grad(J, f)
|
||||
|
||||
jac_scale = isinstance(x_scale, str) and x_scale == 'jac'
|
||||
if jac_scale:
|
||||
scale, scale_inv = compute_jac_scale(J)
|
||||
else:
|
||||
scale, scale_inv = x_scale, 1 / x_scale
|
||||
|
||||
Delta = norm(x0 * scale_inv, ord=np.inf)
|
||||
if Delta == 0:
|
||||
Delta = 1.0
|
||||
|
||||
on_bound = np.zeros_like(x0, dtype=int)
|
||||
on_bound[np.equal(x0, lb)] = -1
|
||||
on_bound[np.equal(x0, ub)] = 1
|
||||
|
||||
x = x0
|
||||
step = np.empty_like(x0)
|
||||
|
||||
if max_nfev is None:
|
||||
max_nfev = x0.size * 100
|
||||
|
||||
termination_status = None
|
||||
iteration = 0
|
||||
step_norm = None
|
||||
actual_reduction = None
|
||||
|
||||
if verbose == 2:
|
||||
print_header_nonlinear()
|
||||
|
||||
while True:
|
||||
active_set = on_bound * g < 0
|
||||
free_set = ~active_set
|
||||
|
||||
g_free = g[free_set]
|
||||
g_full = g.copy()
|
||||
g[active_set] = 0
|
||||
|
||||
g_norm = norm(g, ord=np.inf)
|
||||
if g_norm < gtol:
|
||||
termination_status = 1
|
||||
|
||||
if verbose == 2:
|
||||
print_iteration_nonlinear(iteration, nfev, cost, actual_reduction,
|
||||
step_norm, g_norm)
|
||||
|
||||
if termination_status is not None or nfev == max_nfev:
|
||||
break
|
||||
|
||||
x_free = x[free_set]
|
||||
lb_free = lb[free_set]
|
||||
ub_free = ub[free_set]
|
||||
scale_free = scale[free_set]
|
||||
|
||||
# Compute (Gauss-)Newton and build quadratic model for Cauchy step.
|
||||
if tr_solver == 'exact':
|
||||
J_free = J[:, free_set]
|
||||
newton_step = lstsq(J_free, -f, rcond=-1)[0]
|
||||
|
||||
# Coefficients for the quadratic model along the anti-gradient.
|
||||
a, b = build_quadratic_1d(J_free, g_free, -g_free)
|
||||
elif tr_solver == 'lsmr':
|
||||
Jop = aslinearoperator(J)
|
||||
|
||||
# We compute lsmr step in scaled variables and then
|
||||
# transform back to normal variables, if lsmr would give exact lsq
|
||||
# solution, this would be equivalent to not doing any
|
||||
# transformations, but from experience it's better this way.
|
||||
|
||||
# We pass active_set to make computations as if we selected
|
||||
# the free subset of J columns, but without actually doing any
|
||||
# slicing, which is expensive for sparse matrices and impossible
|
||||
# for LinearOperator.
|
||||
|
||||
lsmr_op = lsmr_operator(Jop, scale, active_set)
|
||||
newton_step = -lsmr(lsmr_op, f, **tr_options)[0][free_set]
|
||||
newton_step *= scale_free
|
||||
|
||||
# Components of g for active variables were zeroed, so this call
|
||||
# is correct and equivalent to using J_free and g_free.
|
||||
a, b = build_quadratic_1d(Jop, g, -g)
|
||||
|
||||
actual_reduction = -1.0
|
||||
while actual_reduction <= 0 and nfev < max_nfev:
|
||||
tr_bounds = Delta * scale_free
|
||||
|
||||
step_free, on_bound_free, tr_hit = dogleg_step(
|
||||
x_free, newton_step, g_free, a, b, tr_bounds, lb_free, ub_free)
|
||||
|
||||
step.fill(0.0)
|
||||
step[free_set] = step_free
|
||||
|
||||
if tr_solver == 'exact':
|
||||
predicted_reduction = -evaluate_quadratic(J_free, g_free,
|
||||
step_free)
|
||||
elif tr_solver == 'lsmr':
|
||||
predicted_reduction = -evaluate_quadratic(Jop, g, step)
|
||||
|
||||
x_new = x + step
|
||||
f_new = fun(x_new)
|
||||
nfev += 1
|
||||
|
||||
step_h_norm = norm(step * scale_inv, ord=np.inf)
|
||||
|
||||
if not np.all(np.isfinite(f_new)):
|
||||
Delta = 0.25 * step_h_norm
|
||||
continue
|
||||
|
||||
# Usual trust-region step quality estimation.
|
||||
if loss_function is not None:
|
||||
cost_new = loss_function(f_new, cost_only=True)
|
||||
else:
|
||||
cost_new = 0.5 * np.dot(f_new, f_new)
|
||||
actual_reduction = cost - cost_new
|
||||
|
||||
Delta, ratio = update_tr_radius(
|
||||
Delta, actual_reduction, predicted_reduction,
|
||||
step_h_norm, tr_hit
|
||||
)
|
||||
|
||||
step_norm = norm(step)
|
||||
termination_status = check_termination(
|
||||
actual_reduction, cost, step_norm, norm(x), ratio, ftol, xtol)
|
||||
|
||||
if termination_status is not None:
|
||||
break
|
||||
|
||||
if actual_reduction > 0:
|
||||
on_bound[free_set] = on_bound_free
|
||||
|
||||
x = x_new
|
||||
# Set variables exactly at the boundary.
|
||||
mask = on_bound == -1
|
||||
x[mask] = lb[mask]
|
||||
mask = on_bound == 1
|
||||
x[mask] = ub[mask]
|
||||
|
||||
f = f_new
|
||||
f_true = f.copy()
|
||||
|
||||
cost = cost_new
|
||||
|
||||
J = jac(x, f)
|
||||
njev += 1
|
||||
|
||||
if loss_function is not None:
|
||||
rho = loss_function(f)
|
||||
J, f = scale_for_robust_loss_function(J, f, rho)
|
||||
|
||||
g = compute_grad(J, f)
|
||||
|
||||
if jac_scale:
|
||||
scale, scale_inv = compute_jac_scale(J, scale_inv)
|
||||
else:
|
||||
step_norm = 0
|
||||
actual_reduction = 0
|
||||
|
||||
iteration += 1
|
||||
|
||||
if termination_status is None:
|
||||
termination_status = 0
|
||||
|
||||
return OptimizeResult(
|
||||
x=x, cost=cost, fun=f_true, jac=J, grad=g_full, optimality=g_norm,
|
||||
active_mask=on_bound, nfev=nfev, njev=njev, status=termination_status)
|
Binary file not shown.
940
venv/Lib/site-packages/scipy/optimize/_lsq/least_squares.py
Normal file
940
venv/Lib/site-packages/scipy/optimize/_lsq/least_squares.py
Normal file
|
@ -0,0 +1,940 @@
|
|||
"""Generic interface for least-squares minimization."""
|
||||
from warnings import warn
|
||||
|
||||
import numpy as np
|
||||
from numpy.linalg import norm
|
||||
|
||||
from scipy.sparse import issparse, csr_matrix
|
||||
from scipy.sparse.linalg import LinearOperator
|
||||
from scipy.optimize import _minpack, OptimizeResult
|
||||
from scipy.optimize._numdiff import approx_derivative, group_columns
|
||||
|
||||
from .trf import trf
|
||||
from .dogbox import dogbox
|
||||
from .common import EPS, in_bounds, make_strictly_feasible
|
||||
|
||||
|
||||
TERMINATION_MESSAGES = {
|
||||
-1: "Improper input parameters status returned from `leastsq`",
|
||||
0: "The maximum number of function evaluations is exceeded.",
|
||||
1: "`gtol` termination condition is satisfied.",
|
||||
2: "`ftol` termination condition is satisfied.",
|
||||
3: "`xtol` termination condition is satisfied.",
|
||||
4: "Both `ftol` and `xtol` termination conditions are satisfied."
|
||||
}
|
||||
|
||||
|
||||
FROM_MINPACK_TO_COMMON = {
|
||||
0: -1, # Improper input parameters from MINPACK.
|
||||
1: 2,
|
||||
2: 3,
|
||||
3: 4,
|
||||
4: 1,
|
||||
5: 0
|
||||
# There are 6, 7, 8 for too small tolerance parameters,
|
||||
# but we guard against it by checking ftol, xtol, gtol beforehand.
|
||||
}
|
||||
|
||||
|
||||
def call_minpack(fun, x0, jac, ftol, xtol, gtol, max_nfev, x_scale, diff_step):
|
||||
n = x0.size
|
||||
|
||||
if diff_step is None:
|
||||
epsfcn = EPS
|
||||
else:
|
||||
epsfcn = diff_step**2
|
||||
|
||||
# Compute MINPACK's `diag`, which is inverse of our `x_scale` and
|
||||
# ``x_scale='jac'`` corresponds to ``diag=None``.
|
||||
if isinstance(x_scale, str) and x_scale == 'jac':
|
||||
diag = None
|
||||
else:
|
||||
diag = 1 / x_scale
|
||||
|
||||
full_output = True
|
||||
col_deriv = False
|
||||
factor = 100.0
|
||||
|
||||
if jac is None:
|
||||
if max_nfev is None:
|
||||
# n squared to account for Jacobian evaluations.
|
||||
max_nfev = 100 * n * (n + 1)
|
||||
x, info, status = _minpack._lmdif(
|
||||
fun, x0, (), full_output, ftol, xtol, gtol,
|
||||
max_nfev, epsfcn, factor, diag)
|
||||
else:
|
||||
if max_nfev is None:
|
||||
max_nfev = 100 * n
|
||||
x, info, status = _minpack._lmder(
|
||||
fun, jac, x0, (), full_output, col_deriv,
|
||||
ftol, xtol, gtol, max_nfev, factor, diag)
|
||||
|
||||
f = info['fvec']
|
||||
|
||||
if callable(jac):
|
||||
J = jac(x)
|
||||
else:
|
||||
J = np.atleast_2d(approx_derivative(fun, x))
|
||||
|
||||
cost = 0.5 * np.dot(f, f)
|
||||
g = J.T.dot(f)
|
||||
g_norm = norm(g, ord=np.inf)
|
||||
|
||||
nfev = info['nfev']
|
||||
njev = info.get('njev', None)
|
||||
|
||||
status = FROM_MINPACK_TO_COMMON[status]
|
||||
active_mask = np.zeros_like(x0, dtype=int)
|
||||
|
||||
return OptimizeResult(
|
||||
x=x, cost=cost, fun=f, jac=J, grad=g, optimality=g_norm,
|
||||
active_mask=active_mask, nfev=nfev, njev=njev, status=status)
|
||||
|
||||
|
||||
def prepare_bounds(bounds, n):
|
||||
lb, ub = [np.asarray(b, dtype=float) for b in bounds]
|
||||
if lb.ndim == 0:
|
||||
lb = np.resize(lb, n)
|
||||
|
||||
if ub.ndim == 0:
|
||||
ub = np.resize(ub, n)
|
||||
|
||||
return lb, ub
|
||||
|
||||
|
||||
def check_tolerance(ftol, xtol, gtol, method):
|
||||
def check(tol, name):
|
||||
if tol is None:
|
||||
tol = 0
|
||||
elif tol < EPS:
|
||||
warn("Setting `{}` below the machine epsilon ({:.2e}) effectively "
|
||||
"disables the corresponding termination condition."
|
||||
.format(name, EPS))
|
||||
return tol
|
||||
|
||||
ftol = check(ftol, "ftol")
|
||||
xtol = check(xtol, "xtol")
|
||||
gtol = check(gtol, "gtol")
|
||||
|
||||
if method == "lm" and (ftol < EPS or xtol < EPS or gtol < EPS):
|
||||
raise ValueError("All tolerances must be higher than machine epsilon "
|
||||
"({:.2e}) for method 'lm'.".format(EPS))
|
||||
elif ftol < EPS and xtol < EPS and gtol < EPS:
|
||||
raise ValueError("At least one of the tolerances must be higher than "
|
||||
"machine epsilon ({:.2e}).".format(EPS))
|
||||
|
||||
return ftol, xtol, gtol
|
||||
|
||||
|
||||
def check_x_scale(x_scale, x0):
|
||||
if isinstance(x_scale, str) and x_scale == 'jac':
|
||||
return x_scale
|
||||
|
||||
try:
|
||||
x_scale = np.asarray(x_scale, dtype=float)
|
||||
valid = np.all(np.isfinite(x_scale)) and np.all(x_scale > 0)
|
||||
except (ValueError, TypeError):
|
||||
valid = False
|
||||
|
||||
if not valid:
|
||||
raise ValueError("`x_scale` must be 'jac' or array_like with "
|
||||
"positive numbers.")
|
||||
|
||||
if x_scale.ndim == 0:
|
||||
x_scale = np.resize(x_scale, x0.shape)
|
||||
|
||||
if x_scale.shape != x0.shape:
|
||||
raise ValueError("Inconsistent shapes between `x_scale` and `x0`.")
|
||||
|
||||
return x_scale
|
||||
|
||||
|
||||
def check_jac_sparsity(jac_sparsity, m, n):
|
||||
if jac_sparsity is None:
|
||||
return None
|
||||
|
||||
if not issparse(jac_sparsity):
|
||||
jac_sparsity = np.atleast_2d(jac_sparsity)
|
||||
|
||||
if jac_sparsity.shape != (m, n):
|
||||
raise ValueError("`jac_sparsity` has wrong shape.")
|
||||
|
||||
return jac_sparsity, group_columns(jac_sparsity)
|
||||
|
||||
|
||||
# Loss functions.
|
||||
|
||||
|
||||
def huber(z, rho, cost_only):
|
||||
mask = z <= 1
|
||||
rho[0, mask] = z[mask]
|
||||
rho[0, ~mask] = 2 * z[~mask]**0.5 - 1
|
||||
if cost_only:
|
||||
return
|
||||
rho[1, mask] = 1
|
||||
rho[1, ~mask] = z[~mask]**-0.5
|
||||
rho[2, mask] = 0
|
||||
rho[2, ~mask] = -0.5 * z[~mask]**-1.5
|
||||
|
||||
|
||||
def soft_l1(z, rho, cost_only):
|
||||
t = 1 + z
|
||||
rho[0] = 2 * (t**0.5 - 1)
|
||||
if cost_only:
|
||||
return
|
||||
rho[1] = t**-0.5
|
||||
rho[2] = -0.5 * t**-1.5
|
||||
|
||||
|
||||
def cauchy(z, rho, cost_only):
|
||||
rho[0] = np.log1p(z)
|
||||
if cost_only:
|
||||
return
|
||||
t = 1 + z
|
||||
rho[1] = 1 / t
|
||||
rho[2] = -1 / t**2
|
||||
|
||||
|
||||
def arctan(z, rho, cost_only):
|
||||
rho[0] = np.arctan(z)
|
||||
if cost_only:
|
||||
return
|
||||
t = 1 + z**2
|
||||
rho[1] = 1 / t
|
||||
rho[2] = -2 * z / t**2
|
||||
|
||||
|
||||
IMPLEMENTED_LOSSES = dict(linear=None, huber=huber, soft_l1=soft_l1,
|
||||
cauchy=cauchy, arctan=arctan)
|
||||
|
||||
|
||||
def construct_loss_function(m, loss, f_scale):
|
||||
if loss == 'linear':
|
||||
return None
|
||||
|
||||
if not callable(loss):
|
||||
loss = IMPLEMENTED_LOSSES[loss]
|
||||
rho = np.empty((3, m))
|
||||
|
||||
def loss_function(f, cost_only=False):
|
||||
z = (f / f_scale) ** 2
|
||||
loss(z, rho, cost_only=cost_only)
|
||||
if cost_only:
|
||||
return 0.5 * f_scale ** 2 * np.sum(rho[0])
|
||||
rho[0] *= f_scale ** 2
|
||||
rho[2] /= f_scale ** 2
|
||||
return rho
|
||||
else:
|
||||
def loss_function(f, cost_only=False):
|
||||
z = (f / f_scale) ** 2
|
||||
rho = loss(z)
|
||||
if cost_only:
|
||||
return 0.5 * f_scale ** 2 * np.sum(rho[0])
|
||||
rho[0] *= f_scale ** 2
|
||||
rho[2] /= f_scale ** 2
|
||||
return rho
|
||||
|
||||
return loss_function
|
||||
|
||||
|
||||
def least_squares(
|
||||
fun, x0, jac='2-point', bounds=(-np.inf, np.inf), method='trf',
|
||||
ftol=1e-8, xtol=1e-8, gtol=1e-8, x_scale=1.0, loss='linear',
|
||||
f_scale=1.0, diff_step=None, tr_solver=None, tr_options={},
|
||||
jac_sparsity=None, max_nfev=None, verbose=0, args=(), kwargs={}):
|
||||
"""Solve a nonlinear least-squares problem with bounds on the variables.
|
||||
|
||||
Given the residuals f(x) (an m-D real function of n real
|
||||
variables) and the loss function rho(s) (a scalar function), `least_squares`
|
||||
finds a local minimum of the cost function F(x)::
|
||||
|
||||
minimize F(x) = 0.5 * sum(rho(f_i(x)**2), i = 0, ..., m - 1)
|
||||
subject to lb <= x <= ub
|
||||
|
||||
The purpose of the loss function rho(s) is to reduce the influence of
|
||||
outliers on the solution.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
fun : callable
|
||||
Function which computes the vector of residuals, with the signature
|
||||
``fun(x, *args, **kwargs)``, i.e., the minimization proceeds with
|
||||
respect to its first argument. The argument ``x`` passed to this
|
||||
function is an ndarray of shape (n,) (never a scalar, even for n=1).
|
||||
It must allocate and return a 1-D array_like of shape (m,) or a scalar.
|
||||
If the argument ``x`` is complex or the function ``fun`` returns
|
||||
complex residuals, it must be wrapped in a real function of real
|
||||
arguments, as shown at the end of the Examples section.
|
||||
x0 : array_like with shape (n,) or float
|
||||
Initial guess on independent variables. If float, it will be treated
|
||||
as a 1-D array with one element.
|
||||
jac : {'2-point', '3-point', 'cs', callable}, optional
|
||||
Method of computing the Jacobian matrix (an m-by-n matrix, where
|
||||
element (i, j) is the partial derivative of f[i] with respect to
|
||||
x[j]). The keywords select a finite difference scheme for numerical
|
||||
estimation. The scheme '3-point' is more accurate, but requires
|
||||
twice as many operations as '2-point' (default). The scheme 'cs'
|
||||
uses complex steps, and while potentially the most accurate, it is
|
||||
applicable only when `fun` correctly handles complex inputs and
|
||||
can be analytically continued to the complex plane. Method 'lm'
|
||||
always uses the '2-point' scheme. If callable, it is used as
|
||||
``jac(x, *args, **kwargs)`` and should return a good approximation
|
||||
(or the exact value) for the Jacobian as an array_like (np.atleast_2d
|
||||
is applied), a sparse matrix or a `scipy.sparse.linalg.LinearOperator`.
|
||||
bounds : 2-tuple of array_like, optional
|
||||
Lower and upper bounds on independent variables. Defaults to no bounds.
|
||||
Each array must match the size of `x0` or be a scalar, in the latter
|
||||
case a bound will be the same for all variables. Use ``np.inf`` with
|
||||
an appropriate sign to disable bounds on all or some variables.
|
||||
method : {'trf', 'dogbox', 'lm'}, optional
|
||||
Algorithm to perform minimization.
|
||||
|
||||
* 'trf' : Trust Region Reflective algorithm, particularly suitable
|
||||
for large sparse problems with bounds. Generally robust method.
|
||||
* 'dogbox' : dogleg algorithm with rectangular trust regions,
|
||||
typical use case is small problems with bounds. Not recommended
|
||||
for problems with rank-deficient Jacobian.
|
||||
* 'lm' : Levenberg-Marquardt algorithm as implemented in MINPACK.
|
||||
Doesn't handle bounds and sparse Jacobians. Usually the most
|
||||
efficient method for small unconstrained problems.
|
||||
|
||||
Default is 'trf'. See Notes for more information.
|
||||
ftol : float or None, optional
|
||||
Tolerance for termination by the change of the cost function. Default
|
||||
is 1e-8. The optimization process is stopped when ``dF < ftol * F``,
|
||||
and there was an adequate agreement between a local quadratic model and
|
||||
the true model in the last step. If None, the termination by this
|
||||
condition is disabled.
|
||||
xtol : float or None, optional
|
||||
Tolerance for termination by the change of the independent variables.
|
||||
Default is 1e-8. The exact condition depends on the `method` used:
|
||||
|
||||
* For 'trf' and 'dogbox' : ``norm(dx) < xtol * (xtol + norm(x))``.
|
||||
* For 'lm' : ``Delta < xtol * norm(xs)``, where ``Delta`` is
|
||||
a trust-region radius and ``xs`` is the value of ``x``
|
||||
scaled according to `x_scale` parameter (see below).
|
||||
|
||||
If None, the termination by this condition is disabled.
|
||||
gtol : float or None, optional
|
||||
Tolerance for termination by the norm of the gradient. Default is 1e-8.
|
||||
The exact condition depends on a `method` used:
|
||||
|
||||
* For 'trf' : ``norm(g_scaled, ord=np.inf) < gtol``, where
|
||||
``g_scaled`` is the value of the gradient scaled to account for
|
||||
the presence of the bounds [STIR]_.
|
||||
* For 'dogbox' : ``norm(g_free, ord=np.inf) < gtol``, where
|
||||
``g_free`` is the gradient with respect to the variables which
|
||||
are not in the optimal state on the boundary.
|
||||
* For 'lm' : the maximum absolute value of the cosine of angles
|
||||
between columns of the Jacobian and the residual vector is less
|
||||
than `gtol`, or the residual vector is zero.
|
||||
|
||||
If None, the termination by this condition is disabled.
|
||||
x_scale : array_like or 'jac', optional
|
||||
Characteristic scale of each variable. Setting `x_scale` is equivalent
|
||||
to reformulating the problem in scaled variables ``xs = x / x_scale``.
|
||||
An alternative view is that the size of a trust region along jth
|
||||
dimension is proportional to ``x_scale[j]``. Improved convergence may
|
||||
be achieved by setting `x_scale` such that a step of a given size
|
||||
along any of the scaled variables has a similar effect on the cost
|
||||
function. If set to 'jac', the scale is iteratively updated using the
|
||||
inverse norms of the columns of the Jacobian matrix (as described in
|
||||
[JJMore]_).
|
||||
loss : str or callable, optional
|
||||
Determines the loss function. The following keyword values are allowed:
|
||||
|
||||
* 'linear' (default) : ``rho(z) = z``. Gives a standard
|
||||
least-squares problem.
|
||||
* 'soft_l1' : ``rho(z) = 2 * ((1 + z)**0.5 - 1)``. The smooth
|
||||
approximation of l1 (absolute value) loss. Usually a good
|
||||
choice for robust least squares.
|
||||
* 'huber' : ``rho(z) = z if z <= 1 else 2*z**0.5 - 1``. Works
|
||||
similarly to 'soft_l1'.
|
||||
* 'cauchy' : ``rho(z) = ln(1 + z)``. Severely weakens outliers
|
||||
influence, but may cause difficulties in optimization process.
|
||||
* 'arctan' : ``rho(z) = arctan(z)``. Limits a maximum loss on
|
||||
a single residual, has properties similar to 'cauchy'.
|
||||
|
||||
If callable, it must take a 1-D ndarray ``z=f**2`` and return an
|
||||
array_like with shape (3, m) where row 0 contains function values,
|
||||
row 1 contains first derivatives and row 2 contains second
|
||||
derivatives. Method 'lm' supports only 'linear' loss.
|
||||
f_scale : float, optional
|
||||
Value of soft margin between inlier and outlier residuals, default
|
||||
is 1.0. The loss function is evaluated as follows
|
||||
``rho_(f**2) = C**2 * rho(f**2 / C**2)``, where ``C`` is `f_scale`,
|
||||
and ``rho`` is determined by `loss` parameter. This parameter has
|
||||
no effect with ``loss='linear'``, but for other `loss` values it is
|
||||
of crucial importance.
|
||||
max_nfev : None or int, optional
|
||||
Maximum number of function evaluations before the termination.
|
||||
If None (default), the value is chosen automatically:
|
||||
|
||||
* For 'trf' and 'dogbox' : 100 * n.
|
||||
* For 'lm' : 100 * n if `jac` is callable and 100 * n * (n + 1)
|
||||
otherwise (because 'lm' counts function calls in Jacobian
|
||||
estimation).
|
||||
|
||||
diff_step : None or array_like, optional
|
||||
Determines the relative step size for the finite difference
|
||||
approximation of the Jacobian. The actual step is computed as
|
||||
``x * diff_step``. If None (default), then `diff_step` is taken to be
|
||||
a conventional "optimal" power of machine epsilon for the finite
|
||||
difference scheme used [NR]_.
|
||||
tr_solver : {None, 'exact', 'lsmr'}, optional
|
||||
Method for solving trust-region subproblems, relevant only for 'trf'
|
||||
and 'dogbox' methods.
|
||||
|
||||
* 'exact' is suitable for not very large problems with dense
|
||||
Jacobian matrices. The computational complexity per iteration is
|
||||
comparable to a singular value decomposition of the Jacobian
|
||||
matrix.
|
||||
* 'lsmr' is suitable for problems with sparse and large Jacobian
|
||||
matrices. It uses the iterative procedure
|
||||
`scipy.sparse.linalg.lsmr` for finding a solution of a linear
|
||||
least-squares problem and only requires matrix-vector product
|
||||
evaluations.
|
||||
|
||||
If None (default), the solver is chosen based on the type of Jacobian
|
||||
returned on the first iteration.
|
||||
tr_options : dict, optional
|
||||
Keyword options passed to trust-region solver.
|
||||
|
||||
* ``tr_solver='exact'``: `tr_options` are ignored.
|
||||
* ``tr_solver='lsmr'``: options for `scipy.sparse.linalg.lsmr`.
|
||||
Additionally, ``method='trf'`` supports 'regularize' option
|
||||
(bool, default is True), which adds a regularization term to the
|
||||
normal equation, which improves convergence if the Jacobian is
|
||||
rank-deficient [Byrd]_ (eq. 3.4).
|
||||
|
||||
jac_sparsity : {None, array_like, sparse matrix}, optional
|
||||
Defines the sparsity structure of the Jacobian matrix for finite
|
||||
difference estimation, its shape must be (m, n). If the Jacobian has
|
||||
only few non-zero elements in *each* row, providing the sparsity
|
||||
structure will greatly speed up the computations [Curtis]_. A zero
|
||||
entry means that a corresponding element in the Jacobian is identically
|
||||
zero. If provided, forces the use of 'lsmr' trust-region solver.
|
||||
If None (default), then dense differencing will be used. Has no effect
|
||||
for 'lm' method.
|
||||
verbose : {0, 1, 2}, optional
|
||||
Level of algorithm's verbosity:
|
||||
|
||||
* 0 (default) : work silently.
|
||||
* 1 : display a termination report.
|
||||
* 2 : display progress during iterations (not supported by 'lm'
|
||||
method).
|
||||
|
||||
args, kwargs : tuple and dict, optional
|
||||
Additional arguments passed to `fun` and `jac`. Both empty by default.
|
||||
The calling signature is ``fun(x, *args, **kwargs)`` and the same for
|
||||
`jac`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
`OptimizeResult` with the following fields defined:
|
||||
x : ndarray, shape (n,)
|
||||
Solution found.
|
||||
cost : float
|
||||
Value of the cost function at the solution.
|
||||
fun : ndarray, shape (m,)
|
||||
Vector of residuals at the solution.
|
||||
jac : ndarray, sparse matrix or LinearOperator, shape (m, n)
|
||||
Modified Jacobian matrix at the solution, in the sense that J^T J
|
||||
is a Gauss-Newton approximation of the Hessian of the cost function.
|
||||
The type is the same as the one used by the algorithm.
|
||||
grad : ndarray, shape (m,)
|
||||
Gradient of the cost function at the solution.
|
||||
optimality : float
|
||||
First-order optimality measure. In unconstrained problems, it is always
|
||||
the uniform norm of the gradient. In constrained problems, it is the
|
||||
quantity which was compared with `gtol` during iterations.
|
||||
active_mask : ndarray of int, shape (n,)
|
||||
Each component shows whether a corresponding constraint is active
|
||||
(that is, whether a variable is at the bound):
|
||||
|
||||
* 0 : a constraint is not active.
|
||||
* -1 : a lower bound is active.
|
||||
* 1 : an upper bound is active.
|
||||
|
||||
Might be somewhat arbitrary for 'trf' method as it generates a sequence
|
||||
of strictly feasible iterates and `active_mask` is determined within a
|
||||
tolerance threshold.
|
||||
nfev : int
|
||||
Number of function evaluations done. Methods 'trf' and 'dogbox' do not
|
||||
count function calls for numerical Jacobian approximation, as opposed
|
||||
to 'lm' method.
|
||||
njev : int or None
|
||||
Number of Jacobian evaluations done. If numerical Jacobian
|
||||
approximation is used in 'lm' method, it is set to None.
|
||||
status : int
|
||||
The reason for algorithm termination:
|
||||
|
||||
* -1 : improper input parameters status returned from MINPACK.
|
||||
* 0 : the maximum number of function evaluations is exceeded.
|
||||
* 1 : `gtol` termination condition is satisfied.
|
||||
* 2 : `ftol` termination condition is satisfied.
|
||||
* 3 : `xtol` termination condition is satisfied.
|
||||
* 4 : Both `ftol` and `xtol` termination conditions are satisfied.
|
||||
|
||||
message : str
|
||||
Verbal description of the termination reason.
|
||||
success : bool
|
||||
True if one of the convergence criteria is satisfied (`status` > 0).
|
||||
|
||||
See Also
|
||||
--------
|
||||
leastsq : A legacy wrapper for the MINPACK implementation of the
|
||||
Levenberg-Marquadt algorithm.
|
||||
curve_fit : Least-squares minimization applied to a curve-fitting problem.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Method 'lm' (Levenberg-Marquardt) calls a wrapper over least-squares
|
||||
algorithms implemented in MINPACK (lmder, lmdif). It runs the
|
||||
Levenberg-Marquardt algorithm formulated as a trust-region type algorithm.
|
||||
The implementation is based on paper [JJMore]_, it is very robust and
|
||||
efficient with a lot of smart tricks. It should be your first choice
|
||||
for unconstrained problems. Note that it doesn't support bounds. Also,
|
||||
it doesn't work when m < n.
|
||||
|
||||
Method 'trf' (Trust Region Reflective) is motivated by the process of
|
||||
solving a system of equations, which constitute the first-order optimality
|
||||
condition for a bound-constrained minimization problem as formulated in
|
||||
[STIR]_. The algorithm iteratively solves trust-region subproblems
|
||||
augmented by a special diagonal quadratic term and with trust-region shape
|
||||
determined by the distance from the bounds and the direction of the
|
||||
gradient. This enhancements help to avoid making steps directly into bounds
|
||||
and efficiently explore the whole space of variables. To further improve
|
||||
convergence, the algorithm considers search directions reflected from the
|
||||
bounds. To obey theoretical requirements, the algorithm keeps iterates
|
||||
strictly feasible. With dense Jacobians trust-region subproblems are
|
||||
solved by an exact method very similar to the one described in [JJMore]_
|
||||
(and implemented in MINPACK). The difference from the MINPACK
|
||||
implementation is that a singular value decomposition of a Jacobian
|
||||
matrix is done once per iteration, instead of a QR decomposition and series
|
||||
of Givens rotation eliminations. For large sparse Jacobians a 2-D subspace
|
||||
approach of solving trust-region subproblems is used [STIR]_, [Byrd]_.
|
||||
The subspace is spanned by a scaled gradient and an approximate
|
||||
Gauss-Newton solution delivered by `scipy.sparse.linalg.lsmr`. When no
|
||||
constraints are imposed the algorithm is very similar to MINPACK and has
|
||||
generally comparable performance. The algorithm works quite robust in
|
||||
unbounded and bounded problems, thus it is chosen as a default algorithm.
|
||||
|
||||
Method 'dogbox' operates in a trust-region framework, but considers
|
||||
rectangular trust regions as opposed to conventional ellipsoids [Voglis]_.
|
||||
The intersection of a current trust region and initial bounds is again
|
||||
rectangular, so on each iteration a quadratic minimization problem subject
|
||||
to bound constraints is solved approximately by Powell's dogleg method
|
||||
[NumOpt]_. The required Gauss-Newton step can be computed exactly for
|
||||
dense Jacobians or approximately by `scipy.sparse.linalg.lsmr` for large
|
||||
sparse Jacobians. The algorithm is likely to exhibit slow convergence when
|
||||
the rank of Jacobian is less than the number of variables. The algorithm
|
||||
often outperforms 'trf' in bounded problems with a small number of
|
||||
variables.
|
||||
|
||||
Robust loss functions are implemented as described in [BA]_. The idea
|
||||
is to modify a residual vector and a Jacobian matrix on each iteration
|
||||
such that computed gradient and Gauss-Newton Hessian approximation match
|
||||
the true gradient and Hessian approximation of the cost function. Then
|
||||
the algorithm proceeds in a normal way, i.e., robust loss functions are
|
||||
implemented as a simple wrapper over standard least-squares algorithms.
|
||||
|
||||
.. versionadded:: 0.17.0
|
||||
|
||||
References
|
||||
----------
|
||||
.. [STIR] M. A. Branch, T. F. Coleman, and Y. Li, "A Subspace, Interior,
|
||||
and Conjugate Gradient Method for Large-Scale Bound-Constrained
|
||||
Minimization Problems," SIAM Journal on Scientific Computing,
|
||||
Vol. 21, Number 1, pp 1-23, 1999.
|
||||
.. [NR] William H. Press et. al., "Numerical Recipes. The Art of Scientific
|
||||
Computing. 3rd edition", Sec. 5.7.
|
||||
.. [Byrd] R. H. Byrd, R. B. Schnabel and G. A. Shultz, "Approximate
|
||||
solution of the trust region problem by minimization over
|
||||
two-dimensional subspaces", Math. Programming, 40, pp. 247-263,
|
||||
1988.
|
||||
.. [Curtis] A. Curtis, M. J. D. Powell, and J. Reid, "On the estimation of
|
||||
sparse Jacobian matrices", Journal of the Institute of
|
||||
Mathematics and its Applications, 13, pp. 117-120, 1974.
|
||||
.. [JJMore] J. J. More, "The Levenberg-Marquardt Algorithm: Implementation
|
||||
and Theory," Numerical Analysis, ed. G. A. Watson, Lecture
|
||||
Notes in Mathematics 630, Springer Verlag, pp. 105-116, 1977.
|
||||
.. [Voglis] C. Voglis and I. E. Lagaris, "A Rectangular Trust Region
|
||||
Dogleg Approach for Unconstrained and Bound Constrained
|
||||
Nonlinear Optimization", WSEAS International Conference on
|
||||
Applied Mathematics, Corfu, Greece, 2004.
|
||||
.. [NumOpt] J. Nocedal and S. J. Wright, "Numerical optimization,
|
||||
2nd edition", Chapter 4.
|
||||
.. [BA] B. Triggs et. al., "Bundle Adjustment - A Modern Synthesis",
|
||||
Proceedings of the International Workshop on Vision Algorithms:
|
||||
Theory and Practice, pp. 298-372, 1999.
|
||||
|
||||
Examples
|
||||
--------
|
||||
In this example we find a minimum of the Rosenbrock function without bounds
|
||||
on independent variables.
|
||||
|
||||
>>> def fun_rosenbrock(x):
|
||||
... return np.array([10 * (x[1] - x[0]**2), (1 - x[0])])
|
||||
|
||||
Notice that we only provide the vector of the residuals. The algorithm
|
||||
constructs the cost function as a sum of squares of the residuals, which
|
||||
gives the Rosenbrock function. The exact minimum is at ``x = [1.0, 1.0]``.
|
||||
|
||||
>>> from scipy.optimize import least_squares
|
||||
>>> x0_rosenbrock = np.array([2, 2])
|
||||
>>> res_1 = least_squares(fun_rosenbrock, x0_rosenbrock)
|
||||
>>> res_1.x
|
||||
array([ 1., 1.])
|
||||
>>> res_1.cost
|
||||
9.8669242910846867e-30
|
||||
>>> res_1.optimality
|
||||
8.8928864934219529e-14
|
||||
|
||||
We now constrain the variables, in such a way that the previous solution
|
||||
becomes infeasible. Specifically, we require that ``x[1] >= 1.5``, and
|
||||
``x[0]`` left unconstrained. To this end, we specify the `bounds` parameter
|
||||
to `least_squares` in the form ``bounds=([-np.inf, 1.5], np.inf)``.
|
||||
|
||||
We also provide the analytic Jacobian:
|
||||
|
||||
>>> def jac_rosenbrock(x):
|
||||
... return np.array([
|
||||
... [-20 * x[0], 10],
|
||||
... [-1, 0]])
|
||||
|
||||
Putting this all together, we see that the new solution lies on the bound:
|
||||
|
||||
>>> res_2 = least_squares(fun_rosenbrock, x0_rosenbrock, jac_rosenbrock,
|
||||
... bounds=([-np.inf, 1.5], np.inf))
|
||||
>>> res_2.x
|
||||
array([ 1.22437075, 1.5 ])
|
||||
>>> res_2.cost
|
||||
0.025213093946805685
|
||||
>>> res_2.optimality
|
||||
1.5885401433157753e-07
|
||||
|
||||
Now we solve a system of equations (i.e., the cost function should be zero
|
||||
at a minimum) for a Broyden tridiagonal vector-valued function of 100000
|
||||
variables:
|
||||
|
||||
>>> def fun_broyden(x):
|
||||
... f = (3 - x) * x + 1
|
||||
... f[1:] -= x[:-1]
|
||||
... f[:-1] -= 2 * x[1:]
|
||||
... return f
|
||||
|
||||
The corresponding Jacobian matrix is sparse. We tell the algorithm to
|
||||
estimate it by finite differences and provide the sparsity structure of
|
||||
Jacobian to significantly speed up this process.
|
||||
|
||||
>>> from scipy.sparse import lil_matrix
|
||||
>>> def sparsity_broyden(n):
|
||||
... sparsity = lil_matrix((n, n), dtype=int)
|
||||
... i = np.arange(n)
|
||||
... sparsity[i, i] = 1
|
||||
... i = np.arange(1, n)
|
||||
... sparsity[i, i - 1] = 1
|
||||
... i = np.arange(n - 1)
|
||||
... sparsity[i, i + 1] = 1
|
||||
... return sparsity
|
||||
...
|
||||
>>> n = 100000
|
||||
>>> x0_broyden = -np.ones(n)
|
||||
...
|
||||
>>> res_3 = least_squares(fun_broyden, x0_broyden,
|
||||
... jac_sparsity=sparsity_broyden(n))
|
||||
>>> res_3.cost
|
||||
4.5687069299604613e-23
|
||||
>>> res_3.optimality
|
||||
1.1650454296851518e-11
|
||||
|
||||
Let's also solve a curve fitting problem using robust loss function to
|
||||
take care of outliers in the data. Define the model function as
|
||||
``y = a + b * exp(c * t)``, where t is a predictor variable, y is an
|
||||
observation and a, b, c are parameters to estimate.
|
||||
|
||||
First, define the function which generates the data with noise and
|
||||
outliers, define the model parameters, and generate data:
|
||||
|
||||
>>> def gen_data(t, a, b, c, noise=0, n_outliers=0, random_state=0):
|
||||
... y = a + b * np.exp(t * c)
|
||||
...
|
||||
... rnd = np.random.RandomState(random_state)
|
||||
... error = noise * rnd.randn(t.size)
|
||||
... outliers = rnd.randint(0, t.size, n_outliers)
|
||||
... error[outliers] *= 10
|
||||
...
|
||||
... return y + error
|
||||
...
|
||||
>>> a = 0.5
|
||||
>>> b = 2.0
|
||||
>>> c = -1
|
||||
>>> t_min = 0
|
||||
>>> t_max = 10
|
||||
>>> n_points = 15
|
||||
...
|
||||
>>> t_train = np.linspace(t_min, t_max, n_points)
|
||||
>>> y_train = gen_data(t_train, a, b, c, noise=0.1, n_outliers=3)
|
||||
|
||||
Define function for computing residuals and initial estimate of
|
||||
parameters.
|
||||
|
||||
>>> def fun(x, t, y):
|
||||
... return x[0] + x[1] * np.exp(x[2] * t) - y
|
||||
...
|
||||
>>> x0 = np.array([1.0, 1.0, 0.0])
|
||||
|
||||
Compute a standard least-squares solution:
|
||||
|
||||
>>> res_lsq = least_squares(fun, x0, args=(t_train, y_train))
|
||||
|
||||
Now compute two solutions with two different robust loss functions. The
|
||||
parameter `f_scale` is set to 0.1, meaning that inlier residuals should
|
||||
not significantly exceed 0.1 (the noise level used).
|
||||
|
||||
>>> res_soft_l1 = least_squares(fun, x0, loss='soft_l1', f_scale=0.1,
|
||||
... args=(t_train, y_train))
|
||||
>>> res_log = least_squares(fun, x0, loss='cauchy', f_scale=0.1,
|
||||
... args=(t_train, y_train))
|
||||
|
||||
And, finally, plot all the curves. We see that by selecting an appropriate
|
||||
`loss` we can get estimates close to optimal even in the presence of
|
||||
strong outliers. But keep in mind that generally it is recommended to try
|
||||
'soft_l1' or 'huber' losses first (if at all necessary) as the other two
|
||||
options may cause difficulties in optimization process.
|
||||
|
||||
>>> t_test = np.linspace(t_min, t_max, n_points * 10)
|
||||
>>> y_true = gen_data(t_test, a, b, c)
|
||||
>>> y_lsq = gen_data(t_test, *res_lsq.x)
|
||||
>>> y_soft_l1 = gen_data(t_test, *res_soft_l1.x)
|
||||
>>> y_log = gen_data(t_test, *res_log.x)
|
||||
...
|
||||
>>> import matplotlib.pyplot as plt
|
||||
>>> plt.plot(t_train, y_train, 'o')
|
||||
>>> plt.plot(t_test, y_true, 'k', linewidth=2, label='true')
|
||||
>>> plt.plot(t_test, y_lsq, label='linear loss')
|
||||
>>> plt.plot(t_test, y_soft_l1, label='soft_l1 loss')
|
||||
>>> plt.plot(t_test, y_log, label='cauchy loss')
|
||||
>>> plt.xlabel("t")
|
||||
>>> plt.ylabel("y")
|
||||
>>> plt.legend()
|
||||
>>> plt.show()
|
||||
|
||||
In the next example, we show how complex-valued residual functions of
|
||||
complex variables can be optimized with ``least_squares()``. Consider the
|
||||
following function:
|
||||
|
||||
>>> def f(z):
|
||||
... return z - (0.5 + 0.5j)
|
||||
|
||||
We wrap it into a function of real variables that returns real residuals
|
||||
by simply handling the real and imaginary parts as independent variables:
|
||||
|
||||
>>> def f_wrap(x):
|
||||
... fx = f(x[0] + 1j*x[1])
|
||||
... return np.array([fx.real, fx.imag])
|
||||
|
||||
Thus, instead of the original m-D complex function of n complex
|
||||
variables we optimize a 2m-D real function of 2n real variables:
|
||||
|
||||
>>> from scipy.optimize import least_squares
|
||||
>>> res_wrapped = least_squares(f_wrap, (0.1, 0.1), bounds=([0, 0], [1, 1]))
|
||||
>>> z = res_wrapped.x[0] + res_wrapped.x[1]*1j
|
||||
>>> z
|
||||
(0.49999999999925893+0.49999999999925893j)
|
||||
|
||||
"""
|
||||
if method not in ['trf', 'dogbox', 'lm']:
|
||||
raise ValueError("`method` must be 'trf', 'dogbox' or 'lm'.")
|
||||
|
||||
if jac not in ['2-point', '3-point', 'cs'] and not callable(jac):
|
||||
raise ValueError("`jac` must be '2-point', '3-point', 'cs' or "
|
||||
"callable.")
|
||||
|
||||
if tr_solver not in [None, 'exact', 'lsmr']:
|
||||
raise ValueError("`tr_solver` must be None, 'exact' or 'lsmr'.")
|
||||
|
||||
if loss not in IMPLEMENTED_LOSSES and not callable(loss):
|
||||
raise ValueError("`loss` must be one of {0} or a callable."
|
||||
.format(IMPLEMENTED_LOSSES.keys()))
|
||||
|
||||
if method == 'lm' and loss != 'linear':
|
||||
raise ValueError("method='lm' supports only 'linear' loss function.")
|
||||
|
||||
if verbose not in [0, 1, 2]:
|
||||
raise ValueError("`verbose` must be in [0, 1, 2].")
|
||||
|
||||
if len(bounds) != 2:
|
||||
raise ValueError("`bounds` must contain 2 elements.")
|
||||
|
||||
if max_nfev is not None and max_nfev <= 0:
|
||||
raise ValueError("`max_nfev` must be None or positive integer.")
|
||||
|
||||
if np.iscomplexobj(x0):
|
||||
raise ValueError("`x0` must be real.")
|
||||
|
||||
x0 = np.atleast_1d(x0).astype(float)
|
||||
|
||||
if x0.ndim > 1:
|
||||
raise ValueError("`x0` must have at most 1 dimension.")
|
||||
|
||||
lb, ub = prepare_bounds(bounds, x0.shape[0])
|
||||
|
||||
if method == 'lm' and not np.all((lb == -np.inf) & (ub == np.inf)):
|
||||
raise ValueError("Method 'lm' doesn't support bounds.")
|
||||
|
||||
if lb.shape != x0.shape or ub.shape != x0.shape:
|
||||
raise ValueError("Inconsistent shapes between bounds and `x0`.")
|
||||
|
||||
if np.any(lb >= ub):
|
||||
raise ValueError("Each lower bound must be strictly less than each "
|
||||
"upper bound.")
|
||||
|
||||
if not in_bounds(x0, lb, ub):
|
||||
raise ValueError("`x0` is infeasible.")
|
||||
|
||||
x_scale = check_x_scale(x_scale, x0)
|
||||
|
||||
ftol, xtol, gtol = check_tolerance(ftol, xtol, gtol, method)
|
||||
|
||||
def fun_wrapped(x):
|
||||
return np.atleast_1d(fun(x, *args, **kwargs))
|
||||
|
||||
if method == 'trf':
|
||||
x0 = make_strictly_feasible(x0, lb, ub)
|
||||
|
||||
f0 = fun_wrapped(x0)
|
||||
|
||||
if f0.ndim != 1:
|
||||
raise ValueError("`fun` must return at most 1-d array_like. "
|
||||
"f0.shape: {0}".format(f0.shape))
|
||||
|
||||
if not np.all(np.isfinite(f0)):
|
||||
raise ValueError("Residuals are not finite in the initial point.")
|
||||
|
||||
n = x0.size
|
||||
m = f0.size
|
||||
|
||||
if method == 'lm' and m < n:
|
||||
raise ValueError("Method 'lm' doesn't work when the number of "
|
||||
"residuals is less than the number of variables.")
|
||||
|
||||
loss_function = construct_loss_function(m, loss, f_scale)
|
||||
if callable(loss):
|
||||
rho = loss_function(f0)
|
||||
if rho.shape != (3, m):
|
||||
raise ValueError("The return value of `loss` callable has wrong "
|
||||
"shape.")
|
||||
initial_cost = 0.5 * np.sum(rho[0])
|
||||
elif loss_function is not None:
|
||||
initial_cost = loss_function(f0, cost_only=True)
|
||||
else:
|
||||
initial_cost = 0.5 * np.dot(f0, f0)
|
||||
|
||||
if callable(jac):
|
||||
J0 = jac(x0, *args, **kwargs)
|
||||
|
||||
if issparse(J0):
|
||||
J0 = csr_matrix(J0)
|
||||
|
||||
def jac_wrapped(x, _=None):
|
||||
return csr_matrix(jac(x, *args, **kwargs))
|
||||
|
||||
elif isinstance(J0, LinearOperator):
|
||||
def jac_wrapped(x, _=None):
|
||||
return jac(x, *args, **kwargs)
|
||||
|
||||
else:
|
||||
J0 = np.atleast_2d(J0)
|
||||
|
||||
def jac_wrapped(x, _=None):
|
||||
return np.atleast_2d(jac(x, *args, **kwargs))
|
||||
|
||||
else: # Estimate Jacobian by finite differences.
|
||||
if method == 'lm':
|
||||
if jac_sparsity is not None:
|
||||
raise ValueError("method='lm' does not support "
|
||||
"`jac_sparsity`.")
|
||||
|
||||
if jac != '2-point':
|
||||
warn("jac='{0}' works equivalently to '2-point' "
|
||||
"for method='lm'.".format(jac))
|
||||
|
||||
J0 = jac_wrapped = None
|
||||
else:
|
||||
if jac_sparsity is not None and tr_solver == 'exact':
|
||||
raise ValueError("tr_solver='exact' is incompatible "
|
||||
"with `jac_sparsity`.")
|
||||
|
||||
jac_sparsity = check_jac_sparsity(jac_sparsity, m, n)
|
||||
|
||||
def jac_wrapped(x, f):
|
||||
J = approx_derivative(fun, x, rel_step=diff_step, method=jac,
|
||||
f0=f, bounds=bounds, args=args,
|
||||
kwargs=kwargs, sparsity=jac_sparsity)
|
||||
if J.ndim != 2: # J is guaranteed not sparse.
|
||||
J = np.atleast_2d(J)
|
||||
|
||||
return J
|
||||
|
||||
J0 = jac_wrapped(x0, f0)
|
||||
|
||||
if J0 is not None:
|
||||
if J0.shape != (m, n):
|
||||
raise ValueError(
|
||||
"The return value of `jac` has wrong shape: expected {0}, "
|
||||
"actual {1}.".format((m, n), J0.shape))
|
||||
|
||||
if not isinstance(J0, np.ndarray):
|
||||
if method == 'lm':
|
||||
raise ValueError("method='lm' works only with dense "
|
||||
"Jacobian matrices.")
|
||||
|
||||
if tr_solver == 'exact':
|
||||
raise ValueError(
|
||||
"tr_solver='exact' works only with dense "
|
||||
"Jacobian matrices.")
|
||||
|
||||
jac_scale = isinstance(x_scale, str) and x_scale == 'jac'
|
||||
if isinstance(J0, LinearOperator) and jac_scale:
|
||||
raise ValueError("x_scale='jac' can't be used when `jac` "
|
||||
"returns LinearOperator.")
|
||||
|
||||
if tr_solver is None:
|
||||
if isinstance(J0, np.ndarray):
|
||||
tr_solver = 'exact'
|
||||
else:
|
||||
tr_solver = 'lsmr'
|
||||
|
||||
if method == 'lm':
|
||||
result = call_minpack(fun_wrapped, x0, jac_wrapped, ftol, xtol, gtol,
|
||||
max_nfev, x_scale, diff_step)
|
||||
|
||||
elif method == 'trf':
|
||||
result = trf(fun_wrapped, jac_wrapped, x0, f0, J0, lb, ub, ftol, xtol,
|
||||
gtol, max_nfev, x_scale, loss_function, tr_solver,
|
||||
tr_options.copy(), verbose)
|
||||
|
||||
elif method == 'dogbox':
|
||||
if tr_solver == 'lsmr' and 'regularize' in tr_options:
|
||||
warn("The keyword 'regularize' in `tr_options` is not relevant "
|
||||
"for 'dogbox' method.")
|
||||
tr_options = tr_options.copy()
|
||||
del tr_options['regularize']
|
||||
|
||||
result = dogbox(fun_wrapped, jac_wrapped, x0, f0, J0, lb, ub, ftol,
|
||||
xtol, gtol, max_nfev, x_scale, loss_function,
|
||||
tr_solver, tr_options, verbose)
|
||||
|
||||
result.message = TERMINATION_MESSAGES[result.status]
|
||||
result.success = result.status > 0
|
||||
|
||||
if verbose >= 1:
|
||||
print(result.message)
|
||||
print("Function evaluations {0}, initial cost {1:.4e}, final cost "
|
||||
"{2:.4e}, first-order optimality {3:.2e}."
|
||||
.format(result.nfev, initial_cost, result.cost,
|
||||
result.optimality))
|
||||
|
||||
return result
|
315
venv/Lib/site-packages/scipy/optimize/_lsq/lsq_linear.py
Normal file
315
venv/Lib/site-packages/scipy/optimize/_lsq/lsq_linear.py
Normal file
|
@ -0,0 +1,315 @@
|
|||
"""Linear least squares with bound constraints on independent variables."""
|
||||
import numpy as np
|
||||
from numpy.linalg import norm
|
||||
from scipy.sparse import issparse, csr_matrix
|
||||
from scipy.sparse.linalg import LinearOperator, lsmr
|
||||
from scipy.optimize import OptimizeResult
|
||||
|
||||
from .common import in_bounds, compute_grad
|
||||
from .trf_linear import trf_linear
|
||||
from .bvls import bvls
|
||||
|
||||
|
||||
def prepare_bounds(bounds, n):
|
||||
lb, ub = [np.asarray(b, dtype=float) for b in bounds]
|
||||
|
||||
if lb.ndim == 0:
|
||||
lb = np.resize(lb, n)
|
||||
|
||||
if ub.ndim == 0:
|
||||
ub = np.resize(ub, n)
|
||||
|
||||
return lb, ub
|
||||
|
||||
|
||||
TERMINATION_MESSAGES = {
|
||||
-1: "The algorithm was not able to make progress on the last iteration.",
|
||||
0: "The maximum number of iterations is exceeded.",
|
||||
1: "The first-order optimality measure is less than `tol`.",
|
||||
2: "The relative change of the cost function is less than `tol`.",
|
||||
3: "The unconstrained solution is optimal."
|
||||
}
|
||||
|
||||
|
||||
def lsq_linear(A, b, bounds=(-np.inf, np.inf), method='trf', tol=1e-10,
|
||||
lsq_solver=None, lsmr_tol=None, max_iter=None, verbose=0):
|
||||
r"""Solve a linear least-squares problem with bounds on the variables.
|
||||
|
||||
Given a m-by-n design matrix A and a target vector b with m elements,
|
||||
`lsq_linear` solves the following optimization problem::
|
||||
|
||||
minimize 0.5 * ||A x - b||**2
|
||||
subject to lb <= x <= ub
|
||||
|
||||
This optimization problem is convex, hence a found minimum (if iterations
|
||||
have converged) is guaranteed to be global.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
A : array_like, sparse matrix of LinearOperator, shape (m, n)
|
||||
Design matrix. Can be `scipy.sparse.linalg.LinearOperator`.
|
||||
b : array_like, shape (m,)
|
||||
Target vector.
|
||||
bounds : 2-tuple of array_like, optional
|
||||
Lower and upper bounds on independent variables. Defaults to no bounds.
|
||||
Each array must have shape (n,) or be a scalar, in the latter
|
||||
case a bound will be the same for all variables. Use ``np.inf`` with
|
||||
an appropriate sign to disable bounds on all or some variables.
|
||||
method : 'trf' or 'bvls', optional
|
||||
Method to perform minimization.
|
||||
|
||||
* 'trf' : Trust Region Reflective algorithm adapted for a linear
|
||||
least-squares problem. This is an interior-point-like method
|
||||
and the required number of iterations is weakly correlated with
|
||||
the number of variables.
|
||||
* 'bvls' : Bounded-variable least-squares algorithm. This is
|
||||
an active set method, which requires the number of iterations
|
||||
comparable to the number of variables. Can't be used when `A` is
|
||||
sparse or LinearOperator.
|
||||
|
||||
Default is 'trf'.
|
||||
tol : float, optional
|
||||
Tolerance parameter. The algorithm terminates if a relative change
|
||||
of the cost function is less than `tol` on the last iteration.
|
||||
Additionally, the first-order optimality measure is considered:
|
||||
|
||||
* ``method='trf'`` terminates if the uniform norm of the gradient,
|
||||
scaled to account for the presence of the bounds, is less than
|
||||
`tol`.
|
||||
* ``method='bvls'`` terminates if Karush-Kuhn-Tucker conditions
|
||||
are satisfied within `tol` tolerance.
|
||||
|
||||
lsq_solver : {None, 'exact', 'lsmr'}, optional
|
||||
Method of solving unbounded least-squares problems throughout
|
||||
iterations:
|
||||
|
||||
* 'exact' : Use dense QR or SVD decomposition approach. Can't be
|
||||
used when `A` is sparse or LinearOperator.
|
||||
* 'lsmr' : Use `scipy.sparse.linalg.lsmr` iterative procedure
|
||||
which requires only matrix-vector product evaluations. Can't
|
||||
be used with ``method='bvls'``.
|
||||
|
||||
If None (default), the solver is chosen based on type of `A`.
|
||||
lsmr_tol : None, float or 'auto', optional
|
||||
Tolerance parameters 'atol' and 'btol' for `scipy.sparse.linalg.lsmr`
|
||||
If None (default), it is set to ``1e-2 * tol``. If 'auto', the
|
||||
tolerance will be adjusted based on the optimality of the current
|
||||
iterate, which can speed up the optimization process, but is not always
|
||||
reliable.
|
||||
max_iter : None or int, optional
|
||||
Maximum number of iterations before termination. If None (default), it
|
||||
is set to 100 for ``method='trf'`` or to the number of variables for
|
||||
``method='bvls'`` (not counting iterations for 'bvls' initialization).
|
||||
verbose : {0, 1, 2}, optional
|
||||
Level of algorithm's verbosity:
|
||||
|
||||
* 0 : work silently (default).
|
||||
* 1 : display a termination report.
|
||||
* 2 : display progress during iterations.
|
||||
|
||||
Returns
|
||||
-------
|
||||
OptimizeResult with the following fields defined:
|
||||
x : ndarray, shape (n,)
|
||||
Solution found.
|
||||
cost : float
|
||||
Value of the cost function at the solution.
|
||||
fun : ndarray, shape (m,)
|
||||
Vector of residuals at the solution.
|
||||
optimality : float
|
||||
First-order optimality measure. The exact meaning depends on `method`,
|
||||
refer to the description of `tol` parameter.
|
||||
active_mask : ndarray of int, shape (n,)
|
||||
Each component shows whether a corresponding constraint is active
|
||||
(that is, whether a variable is at the bound):
|
||||
|
||||
* 0 : a constraint is not active.
|
||||
* -1 : a lower bound is active.
|
||||
* 1 : an upper bound is active.
|
||||
|
||||
Might be somewhat arbitrary for the `trf` method as it generates a
|
||||
sequence of strictly feasible iterates and active_mask is determined
|
||||
within a tolerance threshold.
|
||||
nit : int
|
||||
Number of iterations. Zero if the unconstrained solution is optimal.
|
||||
status : int
|
||||
Reason for algorithm termination:
|
||||
|
||||
* -1 : the algorithm was not able to make progress on the last
|
||||
iteration.
|
||||
* 0 : the maximum number of iterations is exceeded.
|
||||
* 1 : the first-order optimality measure is less than `tol`.
|
||||
* 2 : the relative change of the cost function is less than `tol`.
|
||||
* 3 : the unconstrained solution is optimal.
|
||||
|
||||
message : str
|
||||
Verbal description of the termination reason.
|
||||
success : bool
|
||||
True if one of the convergence criteria is satisfied (`status` > 0).
|
||||
|
||||
See Also
|
||||
--------
|
||||
nnls : Linear least squares with non-negativity constraint.
|
||||
least_squares : Nonlinear least squares with bounds on the variables.
|
||||
|
||||
Notes
|
||||
-----
|
||||
The algorithm first computes the unconstrained least-squares solution by
|
||||
`numpy.linalg.lstsq` or `scipy.sparse.linalg.lsmr` depending on
|
||||
`lsq_solver`. This solution is returned as optimal if it lies within the
|
||||
bounds.
|
||||
|
||||
Method 'trf' runs the adaptation of the algorithm described in [STIR]_ for
|
||||
a linear least-squares problem. The iterations are essentially the same as
|
||||
in the nonlinear least-squares algorithm, but as the quadratic function
|
||||
model is always accurate, we don't need to track or modify the radius of
|
||||
a trust region. The line search (backtracking) is used as a safety net
|
||||
when a selected step does not decrease the cost function. Read more
|
||||
detailed description of the algorithm in `scipy.optimize.least_squares`.
|
||||
|
||||
Method 'bvls' runs a Python implementation of the algorithm described in
|
||||
[BVLS]_. The algorithm maintains active and free sets of variables, on
|
||||
each iteration chooses a new variable to move from the active set to the
|
||||
free set and then solves the unconstrained least-squares problem on free
|
||||
variables. This algorithm is guaranteed to give an accurate solution
|
||||
eventually, but may require up to n iterations for a problem with n
|
||||
variables. Additionally, an ad-hoc initialization procedure is
|
||||
implemented, that determines which variables to set free or active
|
||||
initially. It takes some number of iterations before actual BVLS starts,
|
||||
but can significantly reduce the number of further iterations.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [STIR] M. A. Branch, T. F. Coleman, and Y. Li, "A Subspace, Interior,
|
||||
and Conjugate Gradient Method for Large-Scale Bound-Constrained
|
||||
Minimization Problems," SIAM Journal on Scientific Computing,
|
||||
Vol. 21, Number 1, pp 1-23, 1999.
|
||||
.. [BVLS] P. B. Start and R. L. Parker, "Bounded-Variable Least-Squares:
|
||||
an Algorithm and Applications", Computational Statistics, 10,
|
||||
129-141, 1995.
|
||||
|
||||
Examples
|
||||
--------
|
||||
In this example, a problem with a large sparse matrix and bounds on the
|
||||
variables is solved.
|
||||
|
||||
>>> from scipy.sparse import rand
|
||||
>>> from scipy.optimize import lsq_linear
|
||||
...
|
||||
>>> np.random.seed(0)
|
||||
...
|
||||
>>> m = 20000
|
||||
>>> n = 10000
|
||||
...
|
||||
>>> A = rand(m, n, density=1e-4)
|
||||
>>> b = np.random.randn(m)
|
||||
...
|
||||
>>> lb = np.random.randn(n)
|
||||
>>> ub = lb + 1
|
||||
...
|
||||
>>> res = lsq_linear(A, b, bounds=(lb, ub), lsmr_tol='auto', verbose=1)
|
||||
# may vary
|
||||
The relative change of the cost function is less than `tol`.
|
||||
Number of iterations 16, initial cost 1.5039e+04, final cost 1.1112e+04,
|
||||
first-order optimality 4.66e-08.
|
||||
"""
|
||||
if method not in ['trf', 'bvls']:
|
||||
raise ValueError("`method` must be 'trf' or 'bvls'")
|
||||
|
||||
if lsq_solver not in [None, 'exact', 'lsmr']:
|
||||
raise ValueError("`solver` must be None, 'exact' or 'lsmr'.")
|
||||
|
||||
if verbose not in [0, 1, 2]:
|
||||
raise ValueError("`verbose` must be in [0, 1, 2].")
|
||||
|
||||
if issparse(A):
|
||||
A = csr_matrix(A)
|
||||
elif not isinstance(A, LinearOperator):
|
||||
A = np.atleast_2d(A)
|
||||
|
||||
if method == 'bvls':
|
||||
if lsq_solver == 'lsmr':
|
||||
raise ValueError("method='bvls' can't be used with "
|
||||
"lsq_solver='lsmr'")
|
||||
|
||||
if not isinstance(A, np.ndarray):
|
||||
raise ValueError("method='bvls' can't be used with `A` being "
|
||||
"sparse or LinearOperator.")
|
||||
|
||||
if lsq_solver is None:
|
||||
if isinstance(A, np.ndarray):
|
||||
lsq_solver = 'exact'
|
||||
else:
|
||||
lsq_solver = 'lsmr'
|
||||
elif lsq_solver == 'exact' and not isinstance(A, np.ndarray):
|
||||
raise ValueError("`exact` solver can't be used when `A` is "
|
||||
"sparse or LinearOperator.")
|
||||
|
||||
if len(A.shape) != 2: # No ndim for LinearOperator.
|
||||
raise ValueError("`A` must have at most 2 dimensions.")
|
||||
|
||||
if len(bounds) != 2:
|
||||
raise ValueError("`bounds` must contain 2 elements.")
|
||||
|
||||
if max_iter is not None and max_iter <= 0:
|
||||
raise ValueError("`max_iter` must be None or positive integer.")
|
||||
|
||||
m, n = A.shape
|
||||
|
||||
b = np.atleast_1d(b)
|
||||
if b.ndim != 1:
|
||||
raise ValueError("`b` must have at most 1 dimension.")
|
||||
|
||||
if b.size != m:
|
||||
raise ValueError("Inconsistent shapes between `A` and `b`.")
|
||||
|
||||
lb, ub = prepare_bounds(bounds, n)
|
||||
|
||||
if lb.shape != (n,) and ub.shape != (n,):
|
||||
raise ValueError("Bounds have wrong shape.")
|
||||
|
||||
if np.any(lb >= ub):
|
||||
raise ValueError("Each lower bound must be strictly less than each "
|
||||
"upper bound.")
|
||||
|
||||
if lsq_solver == 'exact':
|
||||
x_lsq = np.linalg.lstsq(A, b, rcond=-1)[0]
|
||||
elif lsq_solver == 'lsmr':
|
||||
x_lsq = lsmr(A, b, atol=tol, btol=tol)[0]
|
||||
|
||||
if in_bounds(x_lsq, lb, ub):
|
||||
r = A.dot(x_lsq) - b
|
||||
cost = 0.5 * np.dot(r, r)
|
||||
termination_status = 3
|
||||
termination_message = TERMINATION_MESSAGES[termination_status]
|
||||
g = compute_grad(A, r)
|
||||
g_norm = norm(g, ord=np.inf)
|
||||
|
||||
if verbose > 0:
|
||||
print(termination_message)
|
||||
print("Final cost {0:.4e}, first-order optimality {1:.2e}"
|
||||
.format(cost, g_norm))
|
||||
|
||||
return OptimizeResult(
|
||||
x=x_lsq, fun=r, cost=cost, optimality=g_norm,
|
||||
active_mask=np.zeros(n), nit=0, status=termination_status,
|
||||
message=termination_message, success=True)
|
||||
|
||||
if method == 'trf':
|
||||
res = trf_linear(A, b, x_lsq, lb, ub, tol, lsq_solver, lsmr_tol,
|
||||
max_iter, verbose)
|
||||
elif method == 'bvls':
|
||||
res = bvls(A, b, x_lsq, lb, ub, tol, max_iter, verbose)
|
||||
|
||||
res.message = TERMINATION_MESSAGES[res.status]
|
||||
res.success = res.status > 0
|
||||
|
||||
if verbose > 0:
|
||||
print(res.message)
|
||||
print("Number of iterations {0}, initial cost {1:.4e}, "
|
||||
"final cost {2:.4e}, first-order optimality {3:.2e}."
|
||||
.format(res.nit, res.initial_cost, res.cost, res.optimality))
|
||||
|
||||
del res.initial_cost
|
||||
|
||||
return res
|
12
venv/Lib/site-packages/scipy/optimize/_lsq/setup.py
Normal file
12
venv/Lib/site-packages/scipy/optimize/_lsq/setup.py
Normal file
|
@ -0,0 +1,12 @@
|
|||
|
||||
def configuration(parent_package='', top_path=None):
|
||||
from numpy.distutils.misc_util import Configuration
|
||||
config = Configuration('_lsq', parent_package, top_path)
|
||||
config.add_extension('givens_elimination',
|
||||
sources=['givens_elimination.c'])
|
||||
return config
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from numpy.distutils.core import setup
|
||||
setup(**configuration(top_path='').todict())
|
560
venv/Lib/site-packages/scipy/optimize/_lsq/trf.py
Normal file
560
venv/Lib/site-packages/scipy/optimize/_lsq/trf.py
Normal file
|
@ -0,0 +1,560 @@
|
|||
"""Trust Region Reflective algorithm for least-squares optimization.
|
||||
|
||||
The algorithm is based on ideas from paper [STIR]_. The main idea is to
|
||||
account for the presence of the bounds by appropriate scaling of the variables (or,
|
||||
equivalently, changing a trust-region shape). Let's introduce a vector v:
|
||||
|
||||
| ub[i] - x[i], if g[i] < 0 and ub[i] < np.inf
|
||||
v[i] = | x[i] - lb[i], if g[i] > 0 and lb[i] > -np.inf
|
||||
| 1, otherwise
|
||||
|
||||
where g is the gradient of a cost function and lb, ub are the bounds. Its
|
||||
components are distances to the bounds at which the anti-gradient points (if
|
||||
this distance is finite). Define a scaling matrix D = diag(v**0.5).
|
||||
First-order optimality conditions can be stated as
|
||||
|
||||
D^2 g(x) = 0.
|
||||
|
||||
Meaning that components of the gradient should be zero for strictly interior
|
||||
variables, and components must point inside the feasible region for variables
|
||||
on the bound.
|
||||
|
||||
Now consider this system of equations as a new optimization problem. If the
|
||||
point x is strictly interior (not on the bound), then the left-hand side is
|
||||
differentiable and the Newton step for it satisfies
|
||||
|
||||
(D^2 H + diag(g) Jv) p = -D^2 g
|
||||
|
||||
where H is the Hessian matrix (or its J^T J approximation in least squares),
|
||||
Jv is the Jacobian matrix of v with components -1, 1 or 0, such that all
|
||||
elements of matrix C = diag(g) Jv are non-negative. Introduce the change
|
||||
of the variables x = D x_h (_h would be "hat" in LaTeX). In the new variables,
|
||||
we have a Newton step satisfying
|
||||
|
||||
B_h p_h = -g_h,
|
||||
|
||||
where B_h = D H D + C, g_h = D g. In least squares B_h = J_h^T J_h, where
|
||||
J_h = J D. Note that J_h and g_h are proper Jacobian and gradient with respect
|
||||
to "hat" variables. To guarantee global convergence we formulate a
|
||||
trust-region problem based on the Newton step in the new variables:
|
||||
|
||||
0.5 * p_h^T B_h p + g_h^T p_h -> min, ||p_h|| <= Delta
|
||||
|
||||
In the original space B = H + D^{-1} C D^{-1}, and the equivalent trust-region
|
||||
problem is
|
||||
|
||||
0.5 * p^T B p + g^T p -> min, ||D^{-1} p|| <= Delta
|
||||
|
||||
Here, the meaning of the matrix D becomes more clear: it alters the shape
|
||||
of a trust-region, such that large steps towards the bounds are not allowed.
|
||||
In the implementation, the trust-region problem is solved in "hat" space,
|
||||
but handling of the bounds is done in the original space (see below and read
|
||||
the code).
|
||||
|
||||
The introduction of the matrix D doesn't allow to ignore bounds, the algorithm
|
||||
must keep iterates strictly feasible (to satisfy aforementioned
|
||||
differentiability), the parameter theta controls step back from the boundary
|
||||
(see the code for details).
|
||||
|
||||
The algorithm does another important trick. If the trust-region solution
|
||||
doesn't fit into the bounds, then a reflected (from a firstly encountered
|
||||
bound) search direction is considered. For motivation and analysis refer to
|
||||
[STIR]_ paper (and other papers of the authors). In practice, it doesn't need
|
||||
a lot of justifications, the algorithm simply chooses the best step among
|
||||
three: a constrained trust-region step, a reflected step and a constrained
|
||||
Cauchy step (a minimizer along -g_h in "hat" space, or -D^2 g in the original
|
||||
space).
|
||||
|
||||
Another feature is that a trust-region radius control strategy is modified to
|
||||
account for appearance of the diagonal C matrix (called diag_h in the code).
|
||||
|
||||
Note that all described peculiarities are completely gone as we consider
|
||||
problems without bounds (the algorithm becomes a standard trust-region type
|
||||
algorithm very similar to ones implemented in MINPACK).
|
||||
|
||||
The implementation supports two methods of solving the trust-region problem.
|
||||
The first, called 'exact', applies SVD on Jacobian and then solves the problem
|
||||
very accurately using the algorithm described in [JJMore]_. It is not
|
||||
applicable to large problem. The second, called 'lsmr', uses the 2-D subspace
|
||||
approach (sometimes called "indefinite dogleg"), where the problem is solved
|
||||
in a subspace spanned by the gradient and the approximate Gauss-Newton step
|
||||
found by ``scipy.sparse.linalg.lsmr``. A 2-D trust-region problem is
|
||||
reformulated as a 4th order algebraic equation and solved very accurately by
|
||||
``numpy.roots``. The subspace approach allows to solve very large problems
|
||||
(up to couple of millions of residuals on a regular PC), provided the Jacobian
|
||||
matrix is sufficiently sparse.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [STIR] Branch, M.A., T.F. Coleman, and Y. Li, "A Subspace, Interior,
|
||||
and Conjugate Gradient Method for Large-Scale Bound-Constrained
|
||||
Minimization Problems," SIAM Journal on Scientific Computing,
|
||||
Vol. 21, Number 1, pp 1-23, 1999.
|
||||
.. [JJMore] More, J. J., "The Levenberg-Marquardt Algorithm: Implementation
|
||||
and Theory," Numerical Analysis, ed. G. A. Watson, Lecture
|
||||
"""
|
||||
import numpy as np
|
||||
from numpy.linalg import norm
|
||||
from scipy.linalg import svd, qr
|
||||
from scipy.sparse.linalg import lsmr
|
||||
from scipy.optimize import OptimizeResult
|
||||
|
||||
from .common import (
|
||||
step_size_to_bound, find_active_constraints, in_bounds,
|
||||
make_strictly_feasible, intersect_trust_region, solve_lsq_trust_region,
|
||||
solve_trust_region_2d, minimize_quadratic_1d, build_quadratic_1d,
|
||||
evaluate_quadratic, right_multiplied_operator, regularized_lsq_operator,
|
||||
CL_scaling_vector, compute_grad, compute_jac_scale, check_termination,
|
||||
update_tr_radius, scale_for_robust_loss_function, print_header_nonlinear,
|
||||
print_iteration_nonlinear)
|
||||
|
||||
|
||||
def trf(fun, jac, x0, f0, J0, lb, ub, ftol, xtol, gtol, max_nfev, x_scale,
|
||||
loss_function, tr_solver, tr_options, verbose):
|
||||
# For efficiency, it makes sense to run the simplified version of the
|
||||
# algorithm when no bounds are imposed. We decided to write the two
|
||||
# separate functions. It violates the DRY principle, but the individual
|
||||
# functions are kept the most readable.
|
||||
if np.all(lb == -np.inf) and np.all(ub == np.inf):
|
||||
return trf_no_bounds(
|
||||
fun, jac, x0, f0, J0, ftol, xtol, gtol, max_nfev, x_scale,
|
||||
loss_function, tr_solver, tr_options, verbose)
|
||||
else:
|
||||
return trf_bounds(
|
||||
fun, jac, x0, f0, J0, lb, ub, ftol, xtol, gtol, max_nfev, x_scale,
|
||||
loss_function, tr_solver, tr_options, verbose)
|
||||
|
||||
|
||||
def select_step(x, J_h, diag_h, g_h, p, p_h, d, Delta, lb, ub, theta):
|
||||
"""Select the best step according to Trust Region Reflective algorithm."""
|
||||
if in_bounds(x + p, lb, ub):
|
||||
p_value = evaluate_quadratic(J_h, g_h, p_h, diag=diag_h)
|
||||
return p, p_h, -p_value
|
||||
|
||||
p_stride, hits = step_size_to_bound(x, p, lb, ub)
|
||||
|
||||
# Compute the reflected direction.
|
||||
r_h = np.copy(p_h)
|
||||
r_h[hits.astype(bool)] *= -1
|
||||
r = d * r_h
|
||||
|
||||
# Restrict trust-region step, such that it hits the bound.
|
||||
p *= p_stride
|
||||
p_h *= p_stride
|
||||
x_on_bound = x + p
|
||||
|
||||
# Reflected direction will cross first either feasible region or trust
|
||||
# region boundary.
|
||||
_, to_tr = intersect_trust_region(p_h, r_h, Delta)
|
||||
to_bound, _ = step_size_to_bound(x_on_bound, r, lb, ub)
|
||||
|
||||
# Find lower and upper bounds on a step size along the reflected
|
||||
# direction, considering the strict feasibility requirement. There is no
|
||||
# single correct way to do that, the chosen approach seems to work best
|
||||
# on test problems.
|
||||
r_stride = min(to_bound, to_tr)
|
||||
if r_stride > 0:
|
||||
r_stride_l = (1 - theta) * p_stride / r_stride
|
||||
if r_stride == to_bound:
|
||||
r_stride_u = theta * to_bound
|
||||
else:
|
||||
r_stride_u = to_tr
|
||||
else:
|
||||
r_stride_l = 0
|
||||
r_stride_u = -1
|
||||
|
||||
# Check if reflection step is available.
|
||||
if r_stride_l <= r_stride_u:
|
||||
a, b, c = build_quadratic_1d(J_h, g_h, r_h, s0=p_h, diag=diag_h)
|
||||
r_stride, r_value = minimize_quadratic_1d(
|
||||
a, b, r_stride_l, r_stride_u, c=c)
|
||||
r_h *= r_stride
|
||||
r_h += p_h
|
||||
r = r_h * d
|
||||
else:
|
||||
r_value = np.inf
|
||||
|
||||
# Now correct p_h to make it strictly interior.
|
||||
p *= theta
|
||||
p_h *= theta
|
||||
p_value = evaluate_quadratic(J_h, g_h, p_h, diag=diag_h)
|
||||
|
||||
ag_h = -g_h
|
||||
ag = d * ag_h
|
||||
|
||||
to_tr = Delta / norm(ag_h)
|
||||
to_bound, _ = step_size_to_bound(x, ag, lb, ub)
|
||||
if to_bound < to_tr:
|
||||
ag_stride = theta * to_bound
|
||||
else:
|
||||
ag_stride = to_tr
|
||||
|
||||
a, b = build_quadratic_1d(J_h, g_h, ag_h, diag=diag_h)
|
||||
ag_stride, ag_value = minimize_quadratic_1d(a, b, 0, ag_stride)
|
||||
ag_h *= ag_stride
|
||||
ag *= ag_stride
|
||||
|
||||
if p_value < r_value and p_value < ag_value:
|
||||
return p, p_h, -p_value
|
||||
elif r_value < p_value and r_value < ag_value:
|
||||
return r, r_h, -r_value
|
||||
else:
|
||||
return ag, ag_h, -ag_value
|
||||
|
||||
|
||||
def trf_bounds(fun, jac, x0, f0, J0, lb, ub, ftol, xtol, gtol, max_nfev,
|
||||
x_scale, loss_function, tr_solver, tr_options, verbose):
|
||||
x = x0.copy()
|
||||
|
||||
f = f0
|
||||
f_true = f.copy()
|
||||
nfev = 1
|
||||
|
||||
J = J0
|
||||
njev = 1
|
||||
m, n = J.shape
|
||||
|
||||
if loss_function is not None:
|
||||
rho = loss_function(f)
|
||||
cost = 0.5 * np.sum(rho[0])
|
||||
J, f = scale_for_robust_loss_function(J, f, rho)
|
||||
else:
|
||||
cost = 0.5 * np.dot(f, f)
|
||||
|
||||
g = compute_grad(J, f)
|
||||
|
||||
jac_scale = isinstance(x_scale, str) and x_scale == 'jac'
|
||||
if jac_scale:
|
||||
scale, scale_inv = compute_jac_scale(J)
|
||||
else:
|
||||
scale, scale_inv = x_scale, 1 / x_scale
|
||||
|
||||
v, dv = CL_scaling_vector(x, g, lb, ub)
|
||||
v[dv != 0] *= scale_inv[dv != 0]
|
||||
Delta = norm(x0 * scale_inv / v**0.5)
|
||||
if Delta == 0:
|
||||
Delta = 1.0
|
||||
|
||||
g_norm = norm(g * v, ord=np.inf)
|
||||
|
||||
f_augmented = np.zeros((m + n))
|
||||
if tr_solver == 'exact':
|
||||
J_augmented = np.empty((m + n, n))
|
||||
elif tr_solver == 'lsmr':
|
||||
reg_term = 0.0
|
||||
regularize = tr_options.pop('regularize', True)
|
||||
|
||||
if max_nfev is None:
|
||||
max_nfev = x0.size * 100
|
||||
|
||||
alpha = 0.0 # "Levenberg-Marquardt" parameter
|
||||
|
||||
termination_status = None
|
||||
iteration = 0
|
||||
step_norm = None
|
||||
actual_reduction = None
|
||||
|
||||
if verbose == 2:
|
||||
print_header_nonlinear()
|
||||
|
||||
while True:
|
||||
v, dv = CL_scaling_vector(x, g, lb, ub)
|
||||
|
||||
g_norm = norm(g * v, ord=np.inf)
|
||||
if g_norm < gtol:
|
||||
termination_status = 1
|
||||
|
||||
if verbose == 2:
|
||||
print_iteration_nonlinear(iteration, nfev, cost, actual_reduction,
|
||||
step_norm, g_norm)
|
||||
|
||||
if termination_status is not None or nfev == max_nfev:
|
||||
break
|
||||
|
||||
# Now compute variables in "hat" space. Here, we also account for
|
||||
# scaling introduced by `x_scale` parameter. This part is a bit tricky,
|
||||
# you have to write down the formulas and see how the trust-region
|
||||
# problem is formulated when the two types of scaling are applied.
|
||||
# The idea is that first we apply `x_scale` and then apply Coleman-Li
|
||||
# approach in the new variables.
|
||||
|
||||
# v is recomputed in the variables after applying `x_scale`, note that
|
||||
# components which were identically 1 not affected.
|
||||
v[dv != 0] *= scale_inv[dv != 0]
|
||||
|
||||
# Here, we apply two types of scaling.
|
||||
d = v**0.5 * scale
|
||||
|
||||
# C = diag(g * scale) Jv
|
||||
diag_h = g * dv * scale
|
||||
|
||||
# After all this has been done, we continue normally.
|
||||
|
||||
# "hat" gradient.
|
||||
g_h = d * g
|
||||
|
||||
f_augmented[:m] = f
|
||||
if tr_solver == 'exact':
|
||||
J_augmented[:m] = J * d
|
||||
J_h = J_augmented[:m] # Memory view.
|
||||
J_augmented[m:] = np.diag(diag_h**0.5)
|
||||
U, s, V = svd(J_augmented, full_matrices=False)
|
||||
V = V.T
|
||||
uf = U.T.dot(f_augmented)
|
||||
elif tr_solver == 'lsmr':
|
||||
J_h = right_multiplied_operator(J, d)
|
||||
|
||||
if regularize:
|
||||
a, b = build_quadratic_1d(J_h, g_h, -g_h, diag=diag_h)
|
||||
to_tr = Delta / norm(g_h)
|
||||
ag_value = minimize_quadratic_1d(a, b, 0, to_tr)[1]
|
||||
reg_term = -ag_value / Delta**2
|
||||
|
||||
lsmr_op = regularized_lsq_operator(J_h, (diag_h + reg_term)**0.5)
|
||||
gn_h = lsmr(lsmr_op, f_augmented, **tr_options)[0]
|
||||
S = np.vstack((g_h, gn_h)).T
|
||||
S, _ = qr(S, mode='economic')
|
||||
JS = J_h.dot(S) # LinearOperator does dot too.
|
||||
B_S = np.dot(JS.T, JS) + np.dot(S.T * diag_h, S)
|
||||
g_S = S.T.dot(g_h)
|
||||
|
||||
# theta controls step back step ratio from the bounds.
|
||||
theta = max(0.995, 1 - g_norm)
|
||||
|
||||
actual_reduction = -1
|
||||
while actual_reduction <= 0 and nfev < max_nfev:
|
||||
if tr_solver == 'exact':
|
||||
p_h, alpha, n_iter = solve_lsq_trust_region(
|
||||
n, m, uf, s, V, Delta, initial_alpha=alpha)
|
||||
elif tr_solver == 'lsmr':
|
||||
p_S, _ = solve_trust_region_2d(B_S, g_S, Delta)
|
||||
p_h = S.dot(p_S)
|
||||
|
||||
p = d * p_h # Trust-region solution in the original space.
|
||||
step, step_h, predicted_reduction = select_step(
|
||||
x, J_h, diag_h, g_h, p, p_h, d, Delta, lb, ub, theta)
|
||||
|
||||
x_new = make_strictly_feasible(x + step, lb, ub, rstep=0)
|
||||
f_new = fun(x_new)
|
||||
nfev += 1
|
||||
|
||||
step_h_norm = norm(step_h)
|
||||
|
||||
if not np.all(np.isfinite(f_new)):
|
||||
Delta = 0.25 * step_h_norm
|
||||
continue
|
||||
|
||||
# Usual trust-region step quality estimation.
|
||||
if loss_function is not None:
|
||||
cost_new = loss_function(f_new, cost_only=True)
|
||||
else:
|
||||
cost_new = 0.5 * np.dot(f_new, f_new)
|
||||
actual_reduction = cost - cost_new
|
||||
Delta_new, ratio = update_tr_radius(
|
||||
Delta, actual_reduction, predicted_reduction,
|
||||
step_h_norm, step_h_norm > 0.95 * Delta)
|
||||
|
||||
step_norm = norm(step)
|
||||
termination_status = check_termination(
|
||||
actual_reduction, cost, step_norm, norm(x), ratio, ftol, xtol)
|
||||
if termination_status is not None:
|
||||
break
|
||||
|
||||
alpha *= Delta / Delta_new
|
||||
Delta = Delta_new
|
||||
|
||||
if actual_reduction > 0:
|
||||
x = x_new
|
||||
|
||||
f = f_new
|
||||
f_true = f.copy()
|
||||
|
||||
cost = cost_new
|
||||
|
||||
J = jac(x, f)
|
||||
njev += 1
|
||||
|
||||
if loss_function is not None:
|
||||
rho = loss_function(f)
|
||||
J, f = scale_for_robust_loss_function(J, f, rho)
|
||||
|
||||
g = compute_grad(J, f)
|
||||
|
||||
if jac_scale:
|
||||
scale, scale_inv = compute_jac_scale(J, scale_inv)
|
||||
else:
|
||||
step_norm = 0
|
||||
actual_reduction = 0
|
||||
|
||||
iteration += 1
|
||||
|
||||
if termination_status is None:
|
||||
termination_status = 0
|
||||
|
||||
active_mask = find_active_constraints(x, lb, ub, rtol=xtol)
|
||||
return OptimizeResult(
|
||||
x=x, cost=cost, fun=f_true, jac=J, grad=g, optimality=g_norm,
|
||||
active_mask=active_mask, nfev=nfev, njev=njev,
|
||||
status=termination_status)
|
||||
|
||||
|
||||
def trf_no_bounds(fun, jac, x0, f0, J0, ftol, xtol, gtol, max_nfev,
|
||||
x_scale, loss_function, tr_solver, tr_options, verbose):
|
||||
x = x0.copy()
|
||||
|
||||
f = f0
|
||||
f_true = f.copy()
|
||||
nfev = 1
|
||||
|
||||
J = J0
|
||||
njev = 1
|
||||
m, n = J.shape
|
||||
|
||||
if loss_function is not None:
|
||||
rho = loss_function(f)
|
||||
cost = 0.5 * np.sum(rho[0])
|
||||
J, f = scale_for_robust_loss_function(J, f, rho)
|
||||
else:
|
||||
cost = 0.5 * np.dot(f, f)
|
||||
|
||||
g = compute_grad(J, f)
|
||||
|
||||
jac_scale = isinstance(x_scale, str) and x_scale == 'jac'
|
||||
if jac_scale:
|
||||
scale, scale_inv = compute_jac_scale(J)
|
||||
else:
|
||||
scale, scale_inv = x_scale, 1 / x_scale
|
||||
|
||||
Delta = norm(x0 * scale_inv)
|
||||
if Delta == 0:
|
||||
Delta = 1.0
|
||||
|
||||
if tr_solver == 'lsmr':
|
||||
reg_term = 0
|
||||
damp = tr_options.pop('damp', 0.0)
|
||||
regularize = tr_options.pop('regularize', True)
|
||||
|
||||
if max_nfev is None:
|
||||
max_nfev = x0.size * 100
|
||||
|
||||
alpha = 0.0 # "Levenberg-Marquardt" parameter
|
||||
|
||||
termination_status = None
|
||||
iteration = 0
|
||||
step_norm = None
|
||||
actual_reduction = None
|
||||
|
||||
if verbose == 2:
|
||||
print_header_nonlinear()
|
||||
|
||||
while True:
|
||||
g_norm = norm(g, ord=np.inf)
|
||||
if g_norm < gtol:
|
||||
termination_status = 1
|
||||
|
||||
if verbose == 2:
|
||||
print_iteration_nonlinear(iteration, nfev, cost, actual_reduction,
|
||||
step_norm, g_norm)
|
||||
|
||||
if termination_status is not None or nfev == max_nfev:
|
||||
break
|
||||
|
||||
d = scale
|
||||
g_h = d * g
|
||||
|
||||
if tr_solver == 'exact':
|
||||
J_h = J * d
|
||||
U, s, V = svd(J_h, full_matrices=False)
|
||||
V = V.T
|
||||
uf = U.T.dot(f)
|
||||
elif tr_solver == 'lsmr':
|
||||
J_h = right_multiplied_operator(J, d)
|
||||
|
||||
if regularize:
|
||||
a, b = build_quadratic_1d(J_h, g_h, -g_h)
|
||||
to_tr = Delta / norm(g_h)
|
||||
ag_value = minimize_quadratic_1d(a, b, 0, to_tr)[1]
|
||||
reg_term = -ag_value / Delta**2
|
||||
|
||||
damp_full = (damp**2 + reg_term)**0.5
|
||||
gn_h = lsmr(J_h, f, damp=damp_full, **tr_options)[0]
|
||||
S = np.vstack((g_h, gn_h)).T
|
||||
S, _ = qr(S, mode='economic')
|
||||
JS = J_h.dot(S)
|
||||
B_S = np.dot(JS.T, JS)
|
||||
g_S = S.T.dot(g_h)
|
||||
|
||||
actual_reduction = -1
|
||||
while actual_reduction <= 0 and nfev < max_nfev:
|
||||
if tr_solver == 'exact':
|
||||
step_h, alpha, n_iter = solve_lsq_trust_region(
|
||||
n, m, uf, s, V, Delta, initial_alpha=alpha)
|
||||
elif tr_solver == 'lsmr':
|
||||
p_S, _ = solve_trust_region_2d(B_S, g_S, Delta)
|
||||
step_h = S.dot(p_S)
|
||||
|
||||
predicted_reduction = -evaluate_quadratic(J_h, g_h, step_h)
|
||||
step = d * step_h
|
||||
x_new = x + step
|
||||
f_new = fun(x_new)
|
||||
nfev += 1
|
||||
|
||||
step_h_norm = norm(step_h)
|
||||
|
||||
if not np.all(np.isfinite(f_new)):
|
||||
Delta = 0.25 * step_h_norm
|
||||
continue
|
||||
|
||||
# Usual trust-region step quality estimation.
|
||||
if loss_function is not None:
|
||||
cost_new = loss_function(f_new, cost_only=True)
|
||||
else:
|
||||
cost_new = 0.5 * np.dot(f_new, f_new)
|
||||
actual_reduction = cost - cost_new
|
||||
|
||||
Delta_new, ratio = update_tr_radius(
|
||||
Delta, actual_reduction, predicted_reduction,
|
||||
step_h_norm, step_h_norm > 0.95 * Delta)
|
||||
|
||||
step_norm = norm(step)
|
||||
termination_status = check_termination(
|
||||
actual_reduction, cost, step_norm, norm(x), ratio, ftol, xtol)
|
||||
if termination_status is not None:
|
||||
break
|
||||
|
||||
alpha *= Delta / Delta_new
|
||||
Delta = Delta_new
|
||||
|
||||
if actual_reduction > 0:
|
||||
x = x_new
|
||||
|
||||
f = f_new
|
||||
f_true = f.copy()
|
||||
|
||||
cost = cost_new
|
||||
|
||||
J = jac(x, f)
|
||||
njev += 1
|
||||
|
||||
if loss_function is not None:
|
||||
rho = loss_function(f)
|
||||
J, f = scale_for_robust_loss_function(J, f, rho)
|
||||
|
||||
g = compute_grad(J, f)
|
||||
|
||||
if jac_scale:
|
||||
scale, scale_inv = compute_jac_scale(J, scale_inv)
|
||||
else:
|
||||
step_norm = 0
|
||||
actual_reduction = 0
|
||||
|
||||
iteration += 1
|
||||
|
||||
if termination_status is None:
|
||||
termination_status = 0
|
||||
|
||||
active_mask = np.zeros_like(x)
|
||||
return OptimizeResult(
|
||||
x=x, cost=cost, fun=f_true, jac=J, grad=g, optimality=g_norm,
|
||||
active_mask=active_mask, nfev=nfev, njev=njev,
|
||||
status=termination_status)
|
249
venv/Lib/site-packages/scipy/optimize/_lsq/trf_linear.py
Normal file
249
venv/Lib/site-packages/scipy/optimize/_lsq/trf_linear.py
Normal file
|
@ -0,0 +1,249 @@
|
|||
"""The adaptation of Trust Region Reflective algorithm for a linear
|
||||
least-squares problem."""
|
||||
import numpy as np
|
||||
from numpy.linalg import norm
|
||||
from scipy.linalg import qr, solve_triangular
|
||||
from scipy.sparse.linalg import lsmr
|
||||
from scipy.optimize import OptimizeResult
|
||||
|
||||
from .givens_elimination import givens_elimination
|
||||
from .common import (
|
||||
EPS, step_size_to_bound, find_active_constraints, in_bounds,
|
||||
make_strictly_feasible, build_quadratic_1d, evaluate_quadratic,
|
||||
minimize_quadratic_1d, CL_scaling_vector, reflective_transformation,
|
||||
print_header_linear, print_iteration_linear, compute_grad,
|
||||
regularized_lsq_operator, right_multiplied_operator)
|
||||
|
||||
|
||||
def regularized_lsq_with_qr(m, n, R, QTb, perm, diag, copy_R=True):
|
||||
"""Solve regularized least squares using information from QR-decomposition.
|
||||
|
||||
The initial problem is to solve the following system in a least-squares
|
||||
sense:
|
||||
::
|
||||
|
||||
A x = b
|
||||
D x = 0
|
||||
|
||||
where D is diagonal matrix. The method is based on QR decomposition
|
||||
of the form A P = Q R, where P is a column permutation matrix, Q is an
|
||||
orthogonal matrix and R is an upper triangular matrix.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
m, n : int
|
||||
Initial shape of A.
|
||||
R : ndarray, shape (n, n)
|
||||
Upper triangular matrix from QR decomposition of A.
|
||||
QTb : ndarray, shape (n,)
|
||||
First n components of Q^T b.
|
||||
perm : ndarray, shape (n,)
|
||||
Array defining column permutation of A, such that ith column of
|
||||
P is perm[i]-th column of identity matrix.
|
||||
diag : ndarray, shape (n,)
|
||||
Array containing diagonal elements of D.
|
||||
|
||||
Returns
|
||||
-------
|
||||
x : ndarray, shape (n,)
|
||||
Found least-squares solution.
|
||||
"""
|
||||
if copy_R:
|
||||
R = R.copy()
|
||||
v = QTb.copy()
|
||||
|
||||
givens_elimination(R, v, diag[perm])
|
||||
|
||||
abs_diag_R = np.abs(np.diag(R))
|
||||
threshold = EPS * max(m, n) * np.max(abs_diag_R)
|
||||
nns, = np.nonzero(abs_diag_R > threshold)
|
||||
|
||||
R = R[np.ix_(nns, nns)]
|
||||
v = v[nns]
|
||||
|
||||
x = np.zeros(n)
|
||||
x[perm[nns]] = solve_triangular(R, v)
|
||||
|
||||
return x
|
||||
|
||||
|
||||
def backtracking(A, g, x, p, theta, p_dot_g, lb, ub):
|
||||
"""Find an appropriate step size using backtracking line search."""
|
||||
alpha = 1
|
||||
while True:
|
||||
x_new, _ = reflective_transformation(x + alpha * p, lb, ub)
|
||||
step = x_new - x
|
||||
cost_change = -evaluate_quadratic(A, g, step)
|
||||
if cost_change > -0.1 * alpha * p_dot_g:
|
||||
break
|
||||
alpha *= 0.5
|
||||
|
||||
active = find_active_constraints(x_new, lb, ub)
|
||||
if np.any(active != 0):
|
||||
x_new, _ = reflective_transformation(x + theta * alpha * p, lb, ub)
|
||||
x_new = make_strictly_feasible(x_new, lb, ub, rstep=0)
|
||||
step = x_new - x
|
||||
cost_change = -evaluate_quadratic(A, g, step)
|
||||
|
||||
return x, step, cost_change
|
||||
|
||||
|
||||
def select_step(x, A_h, g_h, c_h, p, p_h, d, lb, ub, theta):
|
||||
"""Select the best step according to Trust Region Reflective algorithm."""
|
||||
if in_bounds(x + p, lb, ub):
|
||||
return p
|
||||
|
||||
p_stride, hits = step_size_to_bound(x, p, lb, ub)
|
||||
r_h = np.copy(p_h)
|
||||
r_h[hits.astype(bool)] *= -1
|
||||
r = d * r_h
|
||||
|
||||
# Restrict step, such that it hits the bound.
|
||||
p *= p_stride
|
||||
p_h *= p_stride
|
||||
x_on_bound = x + p
|
||||
|
||||
# Find the step size along reflected direction.
|
||||
r_stride_u, _ = step_size_to_bound(x_on_bound, r, lb, ub)
|
||||
|
||||
# Stay interior.
|
||||
r_stride_l = (1 - theta) * r_stride_u
|
||||
r_stride_u *= theta
|
||||
|
||||
if r_stride_u > 0:
|
||||
a, b, c = build_quadratic_1d(A_h, g_h, r_h, s0=p_h, diag=c_h)
|
||||
r_stride, r_value = minimize_quadratic_1d(
|
||||
a, b, r_stride_l, r_stride_u, c=c)
|
||||
r_h = p_h + r_h * r_stride
|
||||
r = d * r_h
|
||||
else:
|
||||
r_value = np.inf
|
||||
|
||||
# Now correct p_h to make it strictly interior.
|
||||
p_h *= theta
|
||||
p *= theta
|
||||
p_value = evaluate_quadratic(A_h, g_h, p_h, diag=c_h)
|
||||
|
||||
ag_h = -g_h
|
||||
ag = d * ag_h
|
||||
ag_stride_u, _ = step_size_to_bound(x, ag, lb, ub)
|
||||
ag_stride_u *= theta
|
||||
a, b = build_quadratic_1d(A_h, g_h, ag_h, diag=c_h)
|
||||
ag_stride, ag_value = minimize_quadratic_1d(a, b, 0, ag_stride_u)
|
||||
ag *= ag_stride
|
||||
|
||||
if p_value < r_value and p_value < ag_value:
|
||||
return p
|
||||
elif r_value < p_value and r_value < ag_value:
|
||||
return r
|
||||
else:
|
||||
return ag
|
||||
|
||||
|
||||
def trf_linear(A, b, x_lsq, lb, ub, tol, lsq_solver, lsmr_tol, max_iter,
|
||||
verbose):
|
||||
m, n = A.shape
|
||||
x, _ = reflective_transformation(x_lsq, lb, ub)
|
||||
x = make_strictly_feasible(x, lb, ub, rstep=0.1)
|
||||
|
||||
if lsq_solver == 'exact':
|
||||
QT, R, perm = qr(A, mode='economic', pivoting=True)
|
||||
QT = QT.T
|
||||
|
||||
if m < n:
|
||||
R = np.vstack((R, np.zeros((n - m, n))))
|
||||
|
||||
QTr = np.zeros(n)
|
||||
k = min(m, n)
|
||||
elif lsq_solver == 'lsmr':
|
||||
r_aug = np.zeros(m + n)
|
||||
auto_lsmr_tol = False
|
||||
if lsmr_tol is None:
|
||||
lsmr_tol = 1e-2 * tol
|
||||
elif lsmr_tol == 'auto':
|
||||
auto_lsmr_tol = True
|
||||
|
||||
r = A.dot(x) - b
|
||||
g = compute_grad(A, r)
|
||||
cost = 0.5 * np.dot(r, r)
|
||||
initial_cost = cost
|
||||
|
||||
termination_status = None
|
||||
step_norm = None
|
||||
cost_change = None
|
||||
|
||||
if max_iter is None:
|
||||
max_iter = 100
|
||||
|
||||
if verbose == 2:
|
||||
print_header_linear()
|
||||
|
||||
for iteration in range(max_iter):
|
||||
v, dv = CL_scaling_vector(x, g, lb, ub)
|
||||
g_scaled = g * v
|
||||
g_norm = norm(g_scaled, ord=np.inf)
|
||||
if g_norm < tol:
|
||||
termination_status = 1
|
||||
|
||||
if verbose == 2:
|
||||
print_iteration_linear(iteration, cost, cost_change,
|
||||
step_norm, g_norm)
|
||||
|
||||
if termination_status is not None:
|
||||
break
|
||||
|
||||
diag_h = g * dv
|
||||
diag_root_h = diag_h ** 0.5
|
||||
d = v ** 0.5
|
||||
g_h = d * g
|
||||
|
||||
A_h = right_multiplied_operator(A, d)
|
||||
if lsq_solver == 'exact':
|
||||
QTr[:k] = QT.dot(r)
|
||||
p_h = -regularized_lsq_with_qr(m, n, R * d[perm], QTr, perm,
|
||||
diag_root_h, copy_R=False)
|
||||
elif lsq_solver == 'lsmr':
|
||||
lsmr_op = regularized_lsq_operator(A_h, diag_root_h)
|
||||
r_aug[:m] = r
|
||||
if auto_lsmr_tol:
|
||||
eta = 1e-2 * min(0.5, g_norm)
|
||||
lsmr_tol = max(EPS, min(0.1, eta * g_norm))
|
||||
p_h = -lsmr(lsmr_op, r_aug, atol=lsmr_tol, btol=lsmr_tol)[0]
|
||||
|
||||
p = d * p_h
|
||||
|
||||
p_dot_g = np.dot(p, g)
|
||||
if p_dot_g > 0:
|
||||
termination_status = -1
|
||||
|
||||
theta = 1 - min(0.005, g_norm)
|
||||
step = select_step(x, A_h, g_h, diag_h, p, p_h, d, lb, ub, theta)
|
||||
cost_change = -evaluate_quadratic(A, g, step)
|
||||
|
||||
# Perhaps almost never executed, the idea is that `p` is descent
|
||||
# direction thus we must find acceptable cost decrease using simple
|
||||
# "backtracking", otherwise the algorithm's logic would break.
|
||||
if cost_change < 0:
|
||||
x, step, cost_change = backtracking(
|
||||
A, g, x, p, theta, p_dot_g, lb, ub)
|
||||
else:
|
||||
x = make_strictly_feasible(x + step, lb, ub, rstep=0)
|
||||
|
||||
step_norm = norm(step)
|
||||
r = A.dot(x) - b
|
||||
g = compute_grad(A, r)
|
||||
|
||||
if cost_change < tol * cost:
|
||||
termination_status = 2
|
||||
|
||||
cost = 0.5 * np.dot(r, r)
|
||||
|
||||
if termination_status is None:
|
||||
termination_status = 0
|
||||
|
||||
active_mask = find_active_constraints(x, lb, ub, rtol=tol)
|
||||
|
||||
return OptimizeResult(
|
||||
x=x, fun=r, cost=cost, optimality=g_norm, active_mask=active_mask,
|
||||
nit=iteration + 1, status=termination_status,
|
||||
initial_cost=initial_cost)
|
Loading…
Add table
Add a link
Reference in a new issue