1585 lines
48 KiB
Python
1585 lines
48 KiB
Python
|
r"""
|
||
|
|
||
|
Nonlinear solvers
|
||
|
-----------------
|
||
|
|
||
|
.. currentmodule:: scipy.optimize
|
||
|
|
||
|
This is a collection of general-purpose nonlinear multidimensional
|
||
|
solvers. These solvers find *x* for which *F(x) = 0*. Both *x*
|
||
|
and *F* can be multidimensional.
|
||
|
|
||
|
Routines
|
||
|
~~~~~~~~
|
||
|
|
||
|
Large-scale nonlinear solvers:
|
||
|
|
||
|
.. autosummary::
|
||
|
|
||
|
newton_krylov
|
||
|
anderson
|
||
|
|
||
|
General nonlinear solvers:
|
||
|
|
||
|
.. autosummary::
|
||
|
|
||
|
broyden1
|
||
|
broyden2
|
||
|
|
||
|
Simple iterations:
|
||
|
|
||
|
.. autosummary::
|
||
|
|
||
|
excitingmixing
|
||
|
linearmixing
|
||
|
diagbroyden
|
||
|
|
||
|
|
||
|
Examples
|
||
|
~~~~~~~~
|
||
|
|
||
|
**Small problem**
|
||
|
|
||
|
>>> def F(x):
|
||
|
... return np.cos(x) + x[::-1] - [1, 2, 3, 4]
|
||
|
>>> import scipy.optimize
|
||
|
>>> x = scipy.optimize.broyden1(F, [1,1,1,1], f_tol=1e-14)
|
||
|
>>> x
|
||
|
array([ 4.04674914, 3.91158389, 2.71791677, 1.61756251])
|
||
|
>>> np.cos(x) + x[::-1]
|
||
|
array([ 1., 2., 3., 4.])
|
||
|
|
||
|
|
||
|
**Large problem**
|
||
|
|
||
|
Suppose that we needed to solve the following integrodifferential
|
||
|
equation on the square :math:`[0,1]\times[0,1]`:
|
||
|
|
||
|
.. math::
|
||
|
|
||
|
\nabla^2 P = 10 \left(\int_0^1\int_0^1\cosh(P)\,dx\,dy\right)^2
|
||
|
|
||
|
with :math:`P(x,1) = 1` and :math:`P=0` elsewhere on the boundary of
|
||
|
the square.
|
||
|
|
||
|
The solution can be found using the `newton_krylov` solver:
|
||
|
|
||
|
.. plot::
|
||
|
|
||
|
import numpy as np
|
||
|
from scipy.optimize import newton_krylov
|
||
|
from numpy import cosh, zeros_like, mgrid, zeros
|
||
|
|
||
|
# parameters
|
||
|
nx, ny = 75, 75
|
||
|
hx, hy = 1./(nx-1), 1./(ny-1)
|
||
|
|
||
|
P_left, P_right = 0, 0
|
||
|
P_top, P_bottom = 1, 0
|
||
|
|
||
|
def residual(P):
|
||
|
d2x = zeros_like(P)
|
||
|
d2y = zeros_like(P)
|
||
|
|
||
|
d2x[1:-1] = (P[2:] - 2*P[1:-1] + P[:-2]) / hx/hx
|
||
|
d2x[0] = (P[1] - 2*P[0] + P_left)/hx/hx
|
||
|
d2x[-1] = (P_right - 2*P[-1] + P[-2])/hx/hx
|
||
|
|
||
|
d2y[:,1:-1] = (P[:,2:] - 2*P[:,1:-1] + P[:,:-2])/hy/hy
|
||
|
d2y[:,0] = (P[:,1] - 2*P[:,0] + P_bottom)/hy/hy
|
||
|
d2y[:,-1] = (P_top - 2*P[:,-1] + P[:,-2])/hy/hy
|
||
|
|
||
|
return d2x + d2y - 10*cosh(P).mean()**2
|
||
|
|
||
|
# solve
|
||
|
guess = zeros((nx, ny), float)
|
||
|
sol = newton_krylov(residual, guess, method='lgmres', verbose=1)
|
||
|
print('Residual: %g' % abs(residual(sol)).max())
|
||
|
|
||
|
# visualize
|
||
|
import matplotlib.pyplot as plt
|
||
|
x, y = mgrid[0:1:(nx*1j), 0:1:(ny*1j)]
|
||
|
plt.pcolormesh(x, y, sol, shading='gouraud')
|
||
|
plt.colorbar()
|
||
|
plt.show()
|
||
|
|
||
|
"""
|
||
|
# Copyright (C) 2009, Pauli Virtanen <pav@iki.fi>
|
||
|
# Distributed under the same license as SciPy.
|
||
|
|
||
|
import sys
|
||
|
import numpy as np
|
||
|
from scipy.linalg import norm, solve, inv, qr, svd, LinAlgError
|
||
|
from numpy import asarray, dot, vdot
|
||
|
import scipy.sparse.linalg
|
||
|
import scipy.sparse
|
||
|
from scipy.linalg import get_blas_funcs
|
||
|
import inspect
|
||
|
from scipy._lib._util import getfullargspec_no_self as _getfullargspec
|
||
|
from .linesearch import scalar_search_wolfe1, scalar_search_armijo
|
||
|
|
||
|
|
||
|
__all__ = [
|
||
|
'broyden1', 'broyden2', 'anderson', 'linearmixing',
|
||
|
'diagbroyden', 'excitingmixing', 'newton_krylov']
|
||
|
|
||
|
#------------------------------------------------------------------------------
|
||
|
# Utility functions
|
||
|
#------------------------------------------------------------------------------
|
||
|
|
||
|
|
||
|
class NoConvergence(Exception):
|
||
|
pass
|
||
|
|
||
|
|
||
|
def maxnorm(x):
|
||
|
return np.absolute(x).max()
|
||
|
|
||
|
|
||
|
def _as_inexact(x):
|
||
|
"""Return `x` as an array, of either floats or complex floats"""
|
||
|
x = asarray(x)
|
||
|
if not np.issubdtype(x.dtype, np.inexact):
|
||
|
return asarray(x, dtype=np.float_)
|
||
|
return x
|
||
|
|
||
|
|
||
|
def _array_like(x, x0):
|
||
|
"""Return ndarray `x` as same array subclass and shape as `x0`"""
|
||
|
x = np.reshape(x, np.shape(x0))
|
||
|
wrap = getattr(x0, '__array_wrap__', x.__array_wrap__)
|
||
|
return wrap(x)
|
||
|
|
||
|
|
||
|
def _safe_norm(v):
|
||
|
if not np.isfinite(v).all():
|
||
|
return np.array(np.inf)
|
||
|
return norm(v)
|
||
|
|
||
|
#------------------------------------------------------------------------------
|
||
|
# Generic nonlinear solver machinery
|
||
|
#------------------------------------------------------------------------------
|
||
|
|
||
|
|
||
|
_doc_parts = dict(
|
||
|
params_basic="""
|
||
|
F : function(x) -> f
|
||
|
Function whose root to find; should take and return an array-like
|
||
|
object.
|
||
|
xin : array_like
|
||
|
Initial guess for the solution
|
||
|
""".strip(),
|
||
|
params_extra="""
|
||
|
iter : int, optional
|
||
|
Number of iterations to make. If omitted (default), make as many
|
||
|
as required to meet tolerances.
|
||
|
verbose : bool, optional
|
||
|
Print status to stdout on every iteration.
|
||
|
maxiter : int, optional
|
||
|
Maximum number of iterations to make. If more are needed to
|
||
|
meet convergence, `NoConvergence` is raised.
|
||
|
f_tol : float, optional
|
||
|
Absolute tolerance (in max-norm) for the residual.
|
||
|
If omitted, default is 6e-6.
|
||
|
f_rtol : float, optional
|
||
|
Relative tolerance for the residual. If omitted, not used.
|
||
|
x_tol : float, optional
|
||
|
Absolute minimum step size, as determined from the Jacobian
|
||
|
approximation. If the step size is smaller than this, optimization
|
||
|
is terminated as successful. If omitted, not used.
|
||
|
x_rtol : float, optional
|
||
|
Relative minimum step size. If omitted, not used.
|
||
|
tol_norm : function(vector) -> scalar, optional
|
||
|
Norm to use in convergence check. Default is the maximum norm.
|
||
|
line_search : {None, 'armijo' (default), 'wolfe'}, optional
|
||
|
Which type of a line search to use to determine the step size in the
|
||
|
direction given by the Jacobian approximation. Defaults to 'armijo'.
|
||
|
callback : function, optional
|
||
|
Optional callback function. It is called on every iteration as
|
||
|
``callback(x, f)`` where `x` is the current solution and `f`
|
||
|
the corresponding residual.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
sol : ndarray
|
||
|
An array (of similar array type as `x0`) containing the final solution.
|
||
|
|
||
|
Raises
|
||
|
------
|
||
|
NoConvergence
|
||
|
When a solution was not found.
|
||
|
|
||
|
""".strip()
|
||
|
)
|
||
|
|
||
|
|
||
|
def _set_doc(obj):
|
||
|
if obj.__doc__:
|
||
|
obj.__doc__ = obj.__doc__ % _doc_parts
|
||
|
|
||
|
|
||
|
def nonlin_solve(F, x0, jacobian='krylov', iter=None, verbose=False,
|
||
|
maxiter=None, f_tol=None, f_rtol=None, x_tol=None, x_rtol=None,
|
||
|
tol_norm=None, line_search='armijo', callback=None,
|
||
|
full_output=False, raise_exception=True):
|
||
|
"""
|
||
|
Find a root of a function, in a way suitable for large-scale problems.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
%(params_basic)s
|
||
|
jacobian : Jacobian
|
||
|
A Jacobian approximation: `Jacobian` object or something that
|
||
|
`asjacobian` can transform to one. Alternatively, a string specifying
|
||
|
which of the builtin Jacobian approximations to use:
|
||
|
|
||
|
krylov, broyden1, broyden2, anderson
|
||
|
diagbroyden, linearmixing, excitingmixing
|
||
|
|
||
|
%(params_extra)s
|
||
|
full_output : bool
|
||
|
If true, returns a dictionary `info` containing convergence
|
||
|
information.
|
||
|
raise_exception : bool
|
||
|
If True, a `NoConvergence` exception is raise if no solution is found.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
asjacobian, Jacobian
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
This algorithm implements the inexact Newton method, with
|
||
|
backtracking or full line searches. Several Jacobian
|
||
|
approximations are available, including Krylov and Quasi-Newton
|
||
|
methods.
|
||
|
|
||
|
References
|
||
|
----------
|
||
|
.. [KIM] C. T. Kelley, \"Iterative Methods for Linear and Nonlinear
|
||
|
Equations\". Society for Industrial and Applied Mathematics. (1995)
|
||
|
https://archive.siam.org/books/kelley/fr16/
|
||
|
|
||
|
"""
|
||
|
# Can't use default parameters because it's being explicitly passed as None
|
||
|
# from the calling function, so we need to set it here.
|
||
|
tol_norm = maxnorm if tol_norm is None else tol_norm
|
||
|
condition = TerminationCondition(f_tol=f_tol, f_rtol=f_rtol,
|
||
|
x_tol=x_tol, x_rtol=x_rtol,
|
||
|
iter=iter, norm=tol_norm)
|
||
|
|
||
|
x0 = _as_inexact(x0)
|
||
|
func = lambda z: _as_inexact(F(_array_like(z, x0))).flatten()
|
||
|
x = x0.flatten()
|
||
|
|
||
|
dx = np.inf
|
||
|
Fx = func(x)
|
||
|
Fx_norm = norm(Fx)
|
||
|
|
||
|
jacobian = asjacobian(jacobian)
|
||
|
jacobian.setup(x.copy(), Fx, func)
|
||
|
|
||
|
if maxiter is None:
|
||
|
if iter is not None:
|
||
|
maxiter = iter + 1
|
||
|
else:
|
||
|
maxiter = 100*(x.size+1)
|
||
|
|
||
|
if line_search is True:
|
||
|
line_search = 'armijo'
|
||
|
elif line_search is False:
|
||
|
line_search = None
|
||
|
|
||
|
if line_search not in (None, 'armijo', 'wolfe'):
|
||
|
raise ValueError("Invalid line search")
|
||
|
|
||
|
# Solver tolerance selection
|
||
|
gamma = 0.9
|
||
|
eta_max = 0.9999
|
||
|
eta_treshold = 0.1
|
||
|
eta = 1e-3
|
||
|
|
||
|
for n in range(maxiter):
|
||
|
status = condition.check(Fx, x, dx)
|
||
|
if status:
|
||
|
break
|
||
|
|
||
|
# The tolerance, as computed for scipy.sparse.linalg.* routines
|
||
|
tol = min(eta, eta*Fx_norm)
|
||
|
dx = -jacobian.solve(Fx, tol=tol)
|
||
|
|
||
|
if norm(dx) == 0:
|
||
|
raise ValueError("Jacobian inversion yielded zero vector. "
|
||
|
"This indicates a bug in the Jacobian "
|
||
|
"approximation.")
|
||
|
|
||
|
# Line search, or Newton step
|
||
|
if line_search:
|
||
|
s, x, Fx, Fx_norm_new = _nonlin_line_search(func, x, Fx, dx,
|
||
|
line_search)
|
||
|
else:
|
||
|
s = 1.0
|
||
|
x = x + dx
|
||
|
Fx = func(x)
|
||
|
Fx_norm_new = norm(Fx)
|
||
|
|
||
|
jacobian.update(x.copy(), Fx)
|
||
|
|
||
|
if callback:
|
||
|
callback(x, Fx)
|
||
|
|
||
|
# Adjust forcing parameters for inexact methods
|
||
|
eta_A = gamma * Fx_norm_new**2 / Fx_norm**2
|
||
|
if gamma * eta**2 < eta_treshold:
|
||
|
eta = min(eta_max, eta_A)
|
||
|
else:
|
||
|
eta = min(eta_max, max(eta_A, gamma*eta**2))
|
||
|
|
||
|
Fx_norm = Fx_norm_new
|
||
|
|
||
|
# Print status
|
||
|
if verbose:
|
||
|
sys.stdout.write("%d: |F(x)| = %g; step %g\n" % (
|
||
|
n, tol_norm(Fx), s))
|
||
|
sys.stdout.flush()
|
||
|
else:
|
||
|
if raise_exception:
|
||
|
raise NoConvergence(_array_like(x, x0))
|
||
|
else:
|
||
|
status = 2
|
||
|
|
||
|
if full_output:
|
||
|
info = {'nit': condition.iteration,
|
||
|
'fun': Fx,
|
||
|
'status': status,
|
||
|
'success': status == 1,
|
||
|
'message': {1: 'A solution was found at the specified '
|
||
|
'tolerance.',
|
||
|
2: 'The maximum number of iterations allowed '
|
||
|
'has been reached.'
|
||
|
}[status]
|
||
|
}
|
||
|
return _array_like(x, x0), info
|
||
|
else:
|
||
|
return _array_like(x, x0)
|
||
|
|
||
|
|
||
|
_set_doc(nonlin_solve)
|
||
|
|
||
|
|
||
|
def _nonlin_line_search(func, x, Fx, dx, search_type='armijo', rdiff=1e-8,
|
||
|
smin=1e-2):
|
||
|
tmp_s = [0]
|
||
|
tmp_Fx = [Fx]
|
||
|
tmp_phi = [norm(Fx)**2]
|
||
|
s_norm = norm(x) / norm(dx)
|
||
|
|
||
|
def phi(s, store=True):
|
||
|
if s == tmp_s[0]:
|
||
|
return tmp_phi[0]
|
||
|
xt = x + s*dx
|
||
|
v = func(xt)
|
||
|
p = _safe_norm(v)**2
|
||
|
if store:
|
||
|
tmp_s[0] = s
|
||
|
tmp_phi[0] = p
|
||
|
tmp_Fx[0] = v
|
||
|
return p
|
||
|
|
||
|
def derphi(s):
|
||
|
ds = (abs(s) + s_norm + 1) * rdiff
|
||
|
return (phi(s+ds, store=False) - phi(s)) / ds
|
||
|
|
||
|
if search_type == 'wolfe':
|
||
|
s, phi1, phi0 = scalar_search_wolfe1(phi, derphi, tmp_phi[0],
|
||
|
xtol=1e-2, amin=smin)
|
||
|
elif search_type == 'armijo':
|
||
|
s, phi1 = scalar_search_armijo(phi, tmp_phi[0], -tmp_phi[0],
|
||
|
amin=smin)
|
||
|
|
||
|
if s is None:
|
||
|
# XXX: No suitable step length found. Take the full Newton step,
|
||
|
# and hope for the best.
|
||
|
s = 1.0
|
||
|
|
||
|
x = x + s*dx
|
||
|
if s == tmp_s[0]:
|
||
|
Fx = tmp_Fx[0]
|
||
|
else:
|
||
|
Fx = func(x)
|
||
|
Fx_norm = norm(Fx)
|
||
|
|
||
|
return s, x, Fx, Fx_norm
|
||
|
|
||
|
|
||
|
class TerminationCondition(object):
|
||
|
"""
|
||
|
Termination condition for an iteration. It is terminated if
|
||
|
|
||
|
- |F| < f_rtol*|F_0|, AND
|
||
|
- |F| < f_tol
|
||
|
|
||
|
AND
|
||
|
|
||
|
- |dx| < x_rtol*|x|, AND
|
||
|
- |dx| < x_tol
|
||
|
|
||
|
"""
|
||
|
def __init__(self, f_tol=None, f_rtol=None, x_tol=None, x_rtol=None,
|
||
|
iter=None, norm=maxnorm):
|
||
|
|
||
|
if f_tol is None:
|
||
|
f_tol = np.finfo(np.float_).eps ** (1./3)
|
||
|
if f_rtol is None:
|
||
|
f_rtol = np.inf
|
||
|
if x_tol is None:
|
||
|
x_tol = np.inf
|
||
|
if x_rtol is None:
|
||
|
x_rtol = np.inf
|
||
|
|
||
|
self.x_tol = x_tol
|
||
|
self.x_rtol = x_rtol
|
||
|
self.f_tol = f_tol
|
||
|
self.f_rtol = f_rtol
|
||
|
|
||
|
self.norm = norm
|
||
|
|
||
|
self.iter = iter
|
||
|
|
||
|
self.f0_norm = None
|
||
|
self.iteration = 0
|
||
|
|
||
|
def check(self, f, x, dx):
|
||
|
self.iteration += 1
|
||
|
f_norm = self.norm(f)
|
||
|
x_norm = self.norm(x)
|
||
|
dx_norm = self.norm(dx)
|
||
|
|
||
|
if self.f0_norm is None:
|
||
|
self.f0_norm = f_norm
|
||
|
|
||
|
if f_norm == 0:
|
||
|
return 1
|
||
|
|
||
|
if self.iter is not None:
|
||
|
# backwards compatibility with SciPy 0.6.0
|
||
|
return 2 * (self.iteration > self.iter)
|
||
|
|
||
|
# NB: condition must succeed for rtol=inf even if norm == 0
|
||
|
return int((f_norm <= self.f_tol
|
||
|
and f_norm/self.f_rtol <= self.f0_norm)
|
||
|
and (dx_norm <= self.x_tol
|
||
|
and dx_norm/self.x_rtol <= x_norm))
|
||
|
|
||
|
|
||
|
#------------------------------------------------------------------------------
|
||
|
# Generic Jacobian approximation
|
||
|
#------------------------------------------------------------------------------
|
||
|
|
||
|
class Jacobian(object):
|
||
|
"""
|
||
|
Common interface for Jacobians or Jacobian approximations.
|
||
|
|
||
|
The optional methods come useful when implementing trust region
|
||
|
etc., algorithms that often require evaluating transposes of the
|
||
|
Jacobian.
|
||
|
|
||
|
Methods
|
||
|
-------
|
||
|
solve
|
||
|
Returns J^-1 * v
|
||
|
update
|
||
|
Updates Jacobian to point `x` (where the function has residual `Fx`)
|
||
|
|
||
|
matvec : optional
|
||
|
Returns J * v
|
||
|
rmatvec : optional
|
||
|
Returns A^H * v
|
||
|
rsolve : optional
|
||
|
Returns A^-H * v
|
||
|
matmat : optional
|
||
|
Returns A * V, where V is a dense matrix with dimensions (N,K).
|
||
|
todense : optional
|
||
|
Form the dense Jacobian matrix. Necessary for dense trust region
|
||
|
algorithms, and useful for testing.
|
||
|
|
||
|
Attributes
|
||
|
----------
|
||
|
shape
|
||
|
Matrix dimensions (M, N)
|
||
|
dtype
|
||
|
Data type of the matrix.
|
||
|
func : callable, optional
|
||
|
Function the Jacobian corresponds to
|
||
|
|
||
|
"""
|
||
|
|
||
|
def __init__(self, **kw):
|
||
|
names = ["solve", "update", "matvec", "rmatvec", "rsolve",
|
||
|
"matmat", "todense", "shape", "dtype"]
|
||
|
for name, value in kw.items():
|
||
|
if name not in names:
|
||
|
raise ValueError("Unknown keyword argument %s" % name)
|
||
|
if value is not None:
|
||
|
setattr(self, name, kw[name])
|
||
|
|
||
|
if hasattr(self, 'todense'):
|
||
|
self.__array__ = lambda: self.todense()
|
||
|
|
||
|
def aspreconditioner(self):
|
||
|
return InverseJacobian(self)
|
||
|
|
||
|
def solve(self, v, tol=0):
|
||
|
raise NotImplementedError
|
||
|
|
||
|
def update(self, x, F):
|
||
|
pass
|
||
|
|
||
|
def setup(self, x, F, func):
|
||
|
self.func = func
|
||
|
self.shape = (F.size, x.size)
|
||
|
self.dtype = F.dtype
|
||
|
if self.__class__.setup is Jacobian.setup:
|
||
|
# Call on the first point unless overridden
|
||
|
self.update(x, F)
|
||
|
|
||
|
|
||
|
class InverseJacobian(object):
|
||
|
def __init__(self, jacobian):
|
||
|
self.jacobian = jacobian
|
||
|
self.matvec = jacobian.solve
|
||
|
self.update = jacobian.update
|
||
|
if hasattr(jacobian, 'setup'):
|
||
|
self.setup = jacobian.setup
|
||
|
if hasattr(jacobian, 'rsolve'):
|
||
|
self.rmatvec = jacobian.rsolve
|
||
|
|
||
|
@property
|
||
|
def shape(self):
|
||
|
return self.jacobian.shape
|
||
|
|
||
|
@property
|
||
|
def dtype(self):
|
||
|
return self.jacobian.dtype
|
||
|
|
||
|
|
||
|
def asjacobian(J):
|
||
|
"""
|
||
|
Convert given object to one suitable for use as a Jacobian.
|
||
|
"""
|
||
|
spsolve = scipy.sparse.linalg.spsolve
|
||
|
if isinstance(J, Jacobian):
|
||
|
return J
|
||
|
elif inspect.isclass(J) and issubclass(J, Jacobian):
|
||
|
return J()
|
||
|
elif isinstance(J, np.ndarray):
|
||
|
if J.ndim > 2:
|
||
|
raise ValueError('array must have rank <= 2')
|
||
|
J = np.atleast_2d(np.asarray(J))
|
||
|
if J.shape[0] != J.shape[1]:
|
||
|
raise ValueError('array must be square')
|
||
|
|
||
|
return Jacobian(matvec=lambda v: dot(J, v),
|
||
|
rmatvec=lambda v: dot(J.conj().T, v),
|
||
|
solve=lambda v: solve(J, v),
|
||
|
rsolve=lambda v: solve(J.conj().T, v),
|
||
|
dtype=J.dtype, shape=J.shape)
|
||
|
elif scipy.sparse.isspmatrix(J):
|
||
|
if J.shape[0] != J.shape[1]:
|
||
|
raise ValueError('matrix must be square')
|
||
|
return Jacobian(matvec=lambda v: J*v,
|
||
|
rmatvec=lambda v: J.conj().T * v,
|
||
|
solve=lambda v: spsolve(J, v),
|
||
|
rsolve=lambda v: spsolve(J.conj().T, v),
|
||
|
dtype=J.dtype, shape=J.shape)
|
||
|
elif hasattr(J, 'shape') and hasattr(J, 'dtype') and hasattr(J, 'solve'):
|
||
|
return Jacobian(matvec=getattr(J, 'matvec'),
|
||
|
rmatvec=getattr(J, 'rmatvec'),
|
||
|
solve=J.solve,
|
||
|
rsolve=getattr(J, 'rsolve'),
|
||
|
update=getattr(J, 'update'),
|
||
|
setup=getattr(J, 'setup'),
|
||
|
dtype=J.dtype,
|
||
|
shape=J.shape)
|
||
|
elif callable(J):
|
||
|
# Assume it's a function J(x) that returns the Jacobian
|
||
|
class Jac(Jacobian):
|
||
|
def update(self, x, F):
|
||
|
self.x = x
|
||
|
|
||
|
def solve(self, v, tol=0):
|
||
|
m = J(self.x)
|
||
|
if isinstance(m, np.ndarray):
|
||
|
return solve(m, v)
|
||
|
elif scipy.sparse.isspmatrix(m):
|
||
|
return spsolve(m, v)
|
||
|
else:
|
||
|
raise ValueError("Unknown matrix type")
|
||
|
|
||
|
def matvec(self, v):
|
||
|
m = J(self.x)
|
||
|
if isinstance(m, np.ndarray):
|
||
|
return dot(m, v)
|
||
|
elif scipy.sparse.isspmatrix(m):
|
||
|
return m*v
|
||
|
else:
|
||
|
raise ValueError("Unknown matrix type")
|
||
|
|
||
|
def rsolve(self, v, tol=0):
|
||
|
m = J(self.x)
|
||
|
if isinstance(m, np.ndarray):
|
||
|
return solve(m.conj().T, v)
|
||
|
elif scipy.sparse.isspmatrix(m):
|
||
|
return spsolve(m.conj().T, v)
|
||
|
else:
|
||
|
raise ValueError("Unknown matrix type")
|
||
|
|
||
|
def rmatvec(self, v):
|
||
|
m = J(self.x)
|
||
|
if isinstance(m, np.ndarray):
|
||
|
return dot(m.conj().T, v)
|
||
|
elif scipy.sparse.isspmatrix(m):
|
||
|
return m.conj().T * v
|
||
|
else:
|
||
|
raise ValueError("Unknown matrix type")
|
||
|
return Jac()
|
||
|
elif isinstance(J, str):
|
||
|
return dict(broyden1=BroydenFirst,
|
||
|
broyden2=BroydenSecond,
|
||
|
anderson=Anderson,
|
||
|
diagbroyden=DiagBroyden,
|
||
|
linearmixing=LinearMixing,
|
||
|
excitingmixing=ExcitingMixing,
|
||
|
krylov=KrylovJacobian)[J]()
|
||
|
else:
|
||
|
raise TypeError('Cannot convert object to a Jacobian')
|
||
|
|
||
|
|
||
|
#------------------------------------------------------------------------------
|
||
|
# Broyden
|
||
|
#------------------------------------------------------------------------------
|
||
|
|
||
|
class GenericBroyden(Jacobian):
|
||
|
def setup(self, x0, f0, func):
|
||
|
Jacobian.setup(self, x0, f0, func)
|
||
|
self.last_f = f0
|
||
|
self.last_x = x0
|
||
|
|
||
|
if hasattr(self, 'alpha') and self.alpha is None:
|
||
|
# Autoscale the initial Jacobian parameter
|
||
|
# unless we have already guessed the solution.
|
||
|
normf0 = norm(f0)
|
||
|
if normf0:
|
||
|
self.alpha = 0.5*max(norm(x0), 1) / normf0
|
||
|
else:
|
||
|
self.alpha = 1.0
|
||
|
|
||
|
def _update(self, x, f, dx, df, dx_norm, df_norm):
|
||
|
raise NotImplementedError
|
||
|
|
||
|
def update(self, x, f):
|
||
|
df = f - self.last_f
|
||
|
dx = x - self.last_x
|
||
|
self._update(x, f, dx, df, norm(dx), norm(df))
|
||
|
self.last_f = f
|
||
|
self.last_x = x
|
||
|
|
||
|
|
||
|
class LowRankMatrix(object):
|
||
|
r"""
|
||
|
A matrix represented as
|
||
|
|
||
|
.. math:: \alpha I + \sum_{n=0}^{n=M} c_n d_n^\dagger
|
||
|
|
||
|
However, if the rank of the matrix reaches the dimension of the vectors,
|
||
|
full matrix representation will be used thereon.
|
||
|
|
||
|
"""
|
||
|
|
||
|
def __init__(self, alpha, n, dtype):
|
||
|
self.alpha = alpha
|
||
|
self.cs = []
|
||
|
self.ds = []
|
||
|
self.n = n
|
||
|
self.dtype = dtype
|
||
|
self.collapsed = None
|
||
|
|
||
|
@staticmethod
|
||
|
def _matvec(v, alpha, cs, ds):
|
||
|
axpy, scal, dotc = get_blas_funcs(['axpy', 'scal', 'dotc'],
|
||
|
cs[:1] + [v])
|
||
|
w = alpha * v
|
||
|
for c, d in zip(cs, ds):
|
||
|
a = dotc(d, v)
|
||
|
w = axpy(c, w, w.size, a)
|
||
|
return w
|
||
|
|
||
|
@staticmethod
|
||
|
def _solve(v, alpha, cs, ds):
|
||
|
"""Evaluate w = M^-1 v"""
|
||
|
if len(cs) == 0:
|
||
|
return v/alpha
|
||
|
|
||
|
# (B + C D^H)^-1 = B^-1 - B^-1 C (I + D^H B^-1 C)^-1 D^H B^-1
|
||
|
|
||
|
axpy, dotc = get_blas_funcs(['axpy', 'dotc'], cs[:1] + [v])
|
||
|
|
||
|
c0 = cs[0]
|
||
|
A = alpha * np.identity(len(cs), dtype=c0.dtype)
|
||
|
for i, d in enumerate(ds):
|
||
|
for j, c in enumerate(cs):
|
||
|
A[i,j] += dotc(d, c)
|
||
|
|
||
|
q = np.zeros(len(cs), dtype=c0.dtype)
|
||
|
for j, d in enumerate(ds):
|
||
|
q[j] = dotc(d, v)
|
||
|
q /= alpha
|
||
|
q = solve(A, q)
|
||
|
|
||
|
w = v/alpha
|
||
|
for c, qc in zip(cs, q):
|
||
|
w = axpy(c, w, w.size, -qc)
|
||
|
|
||
|
return w
|
||
|
|
||
|
def matvec(self, v):
|
||
|
"""Evaluate w = M v"""
|
||
|
if self.collapsed is not None:
|
||
|
return np.dot(self.collapsed, v)
|
||
|
return LowRankMatrix._matvec(v, self.alpha, self.cs, self.ds)
|
||
|
|
||
|
def rmatvec(self, v):
|
||
|
"""Evaluate w = M^H v"""
|
||
|
if self.collapsed is not None:
|
||
|
return np.dot(self.collapsed.T.conj(), v)
|
||
|
return LowRankMatrix._matvec(v, np.conj(self.alpha), self.ds, self.cs)
|
||
|
|
||
|
def solve(self, v, tol=0):
|
||
|
"""Evaluate w = M^-1 v"""
|
||
|
if self.collapsed is not None:
|
||
|
return solve(self.collapsed, v)
|
||
|
return LowRankMatrix._solve(v, self.alpha, self.cs, self.ds)
|
||
|
|
||
|
def rsolve(self, v, tol=0):
|
||
|
"""Evaluate w = M^-H v"""
|
||
|
if self.collapsed is not None:
|
||
|
return solve(self.collapsed.T.conj(), v)
|
||
|
return LowRankMatrix._solve(v, np.conj(self.alpha), self.ds, self.cs)
|
||
|
|
||
|
def append(self, c, d):
|
||
|
if self.collapsed is not None:
|
||
|
self.collapsed += c[:,None] * d[None,:].conj()
|
||
|
return
|
||
|
|
||
|
self.cs.append(c)
|
||
|
self.ds.append(d)
|
||
|
|
||
|
if len(self.cs) > c.size:
|
||
|
self.collapse()
|
||
|
|
||
|
def __array__(self):
|
||
|
if self.collapsed is not None:
|
||
|
return self.collapsed
|
||
|
|
||
|
Gm = self.alpha*np.identity(self.n, dtype=self.dtype)
|
||
|
for c, d in zip(self.cs, self.ds):
|
||
|
Gm += c[:,None]*d[None,:].conj()
|
||
|
return Gm
|
||
|
|
||
|
def collapse(self):
|
||
|
"""Collapse the low-rank matrix to a full-rank one."""
|
||
|
self.collapsed = np.array(self)
|
||
|
self.cs = None
|
||
|
self.ds = None
|
||
|
self.alpha = None
|
||
|
|
||
|
def restart_reduce(self, rank):
|
||
|
"""
|
||
|
Reduce the rank of the matrix by dropping all vectors.
|
||
|
"""
|
||
|
if self.collapsed is not None:
|
||
|
return
|
||
|
assert rank > 0
|
||
|
if len(self.cs) > rank:
|
||
|
del self.cs[:]
|
||
|
del self.ds[:]
|
||
|
|
||
|
def simple_reduce(self, rank):
|
||
|
"""
|
||
|
Reduce the rank of the matrix by dropping oldest vectors.
|
||
|
"""
|
||
|
if self.collapsed is not None:
|
||
|
return
|
||
|
assert rank > 0
|
||
|
while len(self.cs) > rank:
|
||
|
del self.cs[0]
|
||
|
del self.ds[0]
|
||
|
|
||
|
def svd_reduce(self, max_rank, to_retain=None):
|
||
|
"""
|
||
|
Reduce the rank of the matrix by retaining some SVD components.
|
||
|
|
||
|
This corresponds to the \"Broyden Rank Reduction Inverse\"
|
||
|
algorithm described in [1]_.
|
||
|
|
||
|
Note that the SVD decomposition can be done by solving only a
|
||
|
problem whose size is the effective rank of this matrix, which
|
||
|
is viable even for large problems.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
max_rank : int
|
||
|
Maximum rank of this matrix after reduction.
|
||
|
to_retain : int, optional
|
||
|
Number of SVD components to retain when reduction is done
|
||
|
(ie. rank > max_rank). Default is ``max_rank - 2``.
|
||
|
|
||
|
References
|
||
|
----------
|
||
|
.. [1] B.A. van der Rotten, PhD thesis,
|
||
|
\"A limited memory Broyden method to solve high-dimensional
|
||
|
systems of nonlinear equations\". Mathematisch Instituut,
|
||
|
Universiteit Leiden, The Netherlands (2003).
|
||
|
|
||
|
https://web.archive.org/web/20161022015821/http://www.math.leidenuniv.nl/scripties/Rotten.pdf
|
||
|
|
||
|
"""
|
||
|
if self.collapsed is not None:
|
||
|
return
|
||
|
|
||
|
p = max_rank
|
||
|
if to_retain is not None:
|
||
|
q = to_retain
|
||
|
else:
|
||
|
q = p - 2
|
||
|
|
||
|
if self.cs:
|
||
|
p = min(p, len(self.cs[0]))
|
||
|
q = max(0, min(q, p-1))
|
||
|
|
||
|
m = len(self.cs)
|
||
|
if m < p:
|
||
|
# nothing to do
|
||
|
return
|
||
|
|
||
|
C = np.array(self.cs).T
|
||
|
D = np.array(self.ds).T
|
||
|
|
||
|
D, R = qr(D, mode='economic')
|
||
|
C = dot(C, R.T.conj())
|
||
|
|
||
|
U, S, WH = svd(C, full_matrices=False, compute_uv=True)
|
||
|
|
||
|
C = dot(C, inv(WH))
|
||
|
D = dot(D, WH.T.conj())
|
||
|
|
||
|
for k in range(q):
|
||
|
self.cs[k] = C[:,k].copy()
|
||
|
self.ds[k] = D[:,k].copy()
|
||
|
|
||
|
del self.cs[q:]
|
||
|
del self.ds[q:]
|
||
|
|
||
|
|
||
|
_doc_parts['broyden_params'] = """
|
||
|
alpha : float, optional
|
||
|
Initial guess for the Jacobian is ``(-1/alpha)``.
|
||
|
reduction_method : str or tuple, optional
|
||
|
Method used in ensuring that the rank of the Broyden matrix
|
||
|
stays low. Can either be a string giving the name of the method,
|
||
|
or a tuple of the form ``(method, param1, param2, ...)``
|
||
|
that gives the name of the method and values for additional parameters.
|
||
|
|
||
|
Methods available:
|
||
|
|
||
|
- ``restart``: drop all matrix columns. Has no extra parameters.
|
||
|
- ``simple``: drop oldest matrix column. Has no extra parameters.
|
||
|
- ``svd``: keep only the most significant SVD components.
|
||
|
Takes an extra parameter, ``to_retain``, which determines the
|
||
|
number of SVD components to retain when rank reduction is done.
|
||
|
Default is ``max_rank - 2``.
|
||
|
|
||
|
max_rank : int, optional
|
||
|
Maximum rank for the Broyden matrix.
|
||
|
Default is infinity (i.e., no rank reduction).
|
||
|
""".strip()
|
||
|
|
||
|
|
||
|
class BroydenFirst(GenericBroyden):
|
||
|
r"""
|
||
|
Find a root of a function, using Broyden's first Jacobian approximation.
|
||
|
|
||
|
This method is also known as \"Broyden's good method\".
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
%(params_basic)s
|
||
|
%(broyden_params)s
|
||
|
%(params_extra)s
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
root : Interface to root finding algorithms for multivariate
|
||
|
functions. See ``method=='broyden1'`` in particular.
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
This algorithm implements the inverse Jacobian Quasi-Newton update
|
||
|
|
||
|
.. math:: H_+ = H + (dx - H df) dx^\dagger H / ( dx^\dagger H df)
|
||
|
|
||
|
which corresponds to Broyden's first Jacobian update
|
||
|
|
||
|
.. math:: J_+ = J + (df - J dx) dx^\dagger / dx^\dagger dx
|
||
|
|
||
|
|
||
|
References
|
||
|
----------
|
||
|
.. [1] B.A. van der Rotten, PhD thesis,
|
||
|
\"A limited memory Broyden method to solve high-dimensional
|
||
|
systems of nonlinear equations\". Mathematisch Instituut,
|
||
|
Universiteit Leiden, The Netherlands (2003).
|
||
|
|
||
|
https://web.archive.org/web/20161022015821/http://www.math.leidenuniv.nl/scripties/Rotten.pdf
|
||
|
|
||
|
"""
|
||
|
|
||
|
def __init__(self, alpha=None, reduction_method='restart', max_rank=None):
|
||
|
GenericBroyden.__init__(self)
|
||
|
self.alpha = alpha
|
||
|
self.Gm = None
|
||
|
|
||
|
if max_rank is None:
|
||
|
max_rank = np.inf
|
||
|
self.max_rank = max_rank
|
||
|
|
||
|
if isinstance(reduction_method, str):
|
||
|
reduce_params = ()
|
||
|
else:
|
||
|
reduce_params = reduction_method[1:]
|
||
|
reduction_method = reduction_method[0]
|
||
|
reduce_params = (max_rank - 1,) + reduce_params
|
||
|
|
||
|
if reduction_method == 'svd':
|
||
|
self._reduce = lambda: self.Gm.svd_reduce(*reduce_params)
|
||
|
elif reduction_method == 'simple':
|
||
|
self._reduce = lambda: self.Gm.simple_reduce(*reduce_params)
|
||
|
elif reduction_method == 'restart':
|
||
|
self._reduce = lambda: self.Gm.restart_reduce(*reduce_params)
|
||
|
else:
|
||
|
raise ValueError("Unknown rank reduction method '%s'" %
|
||
|
reduction_method)
|
||
|
|
||
|
def setup(self, x, F, func):
|
||
|
GenericBroyden.setup(self, x, F, func)
|
||
|
self.Gm = LowRankMatrix(-self.alpha, self.shape[0], self.dtype)
|
||
|
|
||
|
def todense(self):
|
||
|
return inv(self.Gm)
|
||
|
|
||
|
def solve(self, f, tol=0):
|
||
|
r = self.Gm.matvec(f)
|
||
|
if not np.isfinite(r).all():
|
||
|
# singular; reset the Jacobian approximation
|
||
|
self.setup(self.last_x, self.last_f, self.func)
|
||
|
return self.Gm.matvec(f)
|
||
|
|
||
|
def matvec(self, f):
|
||
|
return self.Gm.solve(f)
|
||
|
|
||
|
def rsolve(self, f, tol=0):
|
||
|
return self.Gm.rmatvec(f)
|
||
|
|
||
|
def rmatvec(self, f):
|
||
|
return self.Gm.rsolve(f)
|
||
|
|
||
|
def _update(self, x, f, dx, df, dx_norm, df_norm):
|
||
|
self._reduce() # reduce first to preserve secant condition
|
||
|
|
||
|
v = self.Gm.rmatvec(dx)
|
||
|
c = dx - self.Gm.matvec(df)
|
||
|
d = v / vdot(df, v)
|
||
|
|
||
|
self.Gm.append(c, d)
|
||
|
|
||
|
|
||
|
class BroydenSecond(BroydenFirst):
|
||
|
"""
|
||
|
Find a root of a function, using Broyden\'s second Jacobian approximation.
|
||
|
|
||
|
This method is also known as \"Broyden's bad method\".
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
%(params_basic)s
|
||
|
%(broyden_params)s
|
||
|
%(params_extra)s
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
root : Interface to root finding algorithms for multivariate
|
||
|
functions. See ``method=='broyden2'`` in particular.
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
This algorithm implements the inverse Jacobian Quasi-Newton update
|
||
|
|
||
|
.. math:: H_+ = H + (dx - H df) df^\\dagger / ( df^\\dagger df)
|
||
|
|
||
|
corresponding to Broyden's second method.
|
||
|
|
||
|
References
|
||
|
----------
|
||
|
.. [1] B.A. van der Rotten, PhD thesis,
|
||
|
\"A limited memory Broyden method to solve high-dimensional
|
||
|
systems of nonlinear equations\". Mathematisch Instituut,
|
||
|
Universiteit Leiden, The Netherlands (2003).
|
||
|
|
||
|
https://web.archive.org/web/20161022015821/http://www.math.leidenuniv.nl/scripties/Rotten.pdf
|
||
|
|
||
|
"""
|
||
|
|
||
|
def _update(self, x, f, dx, df, dx_norm, df_norm):
|
||
|
self._reduce() # reduce first to preserve secant condition
|
||
|
|
||
|
v = df
|
||
|
c = dx - self.Gm.matvec(df)
|
||
|
d = v / df_norm**2
|
||
|
self.Gm.append(c, d)
|
||
|
|
||
|
|
||
|
#------------------------------------------------------------------------------
|
||
|
# Broyden-like (restricted memory)
|
||
|
#------------------------------------------------------------------------------
|
||
|
|
||
|
class Anderson(GenericBroyden):
|
||
|
"""
|
||
|
Find a root of a function, using (extended) Anderson mixing.
|
||
|
|
||
|
The Jacobian is formed by for a 'best' solution in the space
|
||
|
spanned by last `M` vectors. As a result, only a MxM matrix
|
||
|
inversions and MxN multiplications are required. [Ey]_
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
%(params_basic)s
|
||
|
alpha : float, optional
|
||
|
Initial guess for the Jacobian is (-1/alpha).
|
||
|
M : float, optional
|
||
|
Number of previous vectors to retain. Defaults to 5.
|
||
|
w0 : float, optional
|
||
|
Regularization parameter for numerical stability.
|
||
|
Compared to unity, good values of the order of 0.01.
|
||
|
%(params_extra)s
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
root : Interface to root finding algorithms for multivariate
|
||
|
functions. See ``method=='anderson'`` in particular.
|
||
|
|
||
|
References
|
||
|
----------
|
||
|
.. [Ey] V. Eyert, J. Comp. Phys., 124, 271 (1996).
|
||
|
|
||
|
"""
|
||
|
|
||
|
# Note:
|
||
|
#
|
||
|
# Anderson method maintains a rank M approximation of the inverse Jacobian,
|
||
|
#
|
||
|
# J^-1 v ~ -v*alpha + (dX + alpha dF) A^-1 dF^H v
|
||
|
# A = W + dF^H dF
|
||
|
# W = w0^2 diag(dF^H dF)
|
||
|
#
|
||
|
# so that for w0 = 0 the secant condition applies for last M iterates, i.e.,
|
||
|
#
|
||
|
# J^-1 df_j = dx_j
|
||
|
#
|
||
|
# for all j = 0 ... M-1.
|
||
|
#
|
||
|
# Moreover, (from Sherman-Morrison-Woodbury formula)
|
||
|
#
|
||
|
# J v ~ [ b I - b^2 C (I + b dF^H A^-1 C)^-1 dF^H ] v
|
||
|
# C = (dX + alpha dF) A^-1
|
||
|
# b = -1/alpha
|
||
|
#
|
||
|
# and after simplification
|
||
|
#
|
||
|
# J v ~ -v/alpha + (dX/alpha + dF) (dF^H dX - alpha W)^-1 dF^H v
|
||
|
#
|
||
|
|
||
|
def __init__(self, alpha=None, w0=0.01, M=5):
|
||
|
GenericBroyden.__init__(self)
|
||
|
self.alpha = alpha
|
||
|
self.M = M
|
||
|
self.dx = []
|
||
|
self.df = []
|
||
|
self.gamma = None
|
||
|
self.w0 = w0
|
||
|
|
||
|
def solve(self, f, tol=0):
|
||
|
dx = -self.alpha*f
|
||
|
|
||
|
n = len(self.dx)
|
||
|
if n == 0:
|
||
|
return dx
|
||
|
|
||
|
df_f = np.empty(n, dtype=f.dtype)
|
||
|
for k in range(n):
|
||
|
df_f[k] = vdot(self.df[k], f)
|
||
|
|
||
|
try:
|
||
|
gamma = solve(self.a, df_f)
|
||
|
except LinAlgError:
|
||
|
# singular; reset the Jacobian approximation
|
||
|
del self.dx[:]
|
||
|
del self.df[:]
|
||
|
return dx
|
||
|
|
||
|
for m in range(n):
|
||
|
dx += gamma[m]*(self.dx[m] + self.alpha*self.df[m])
|
||
|
return dx
|
||
|
|
||
|
def matvec(self, f):
|
||
|
dx = -f/self.alpha
|
||
|
|
||
|
n = len(self.dx)
|
||
|
if n == 0:
|
||
|
return dx
|
||
|
|
||
|
df_f = np.empty(n, dtype=f.dtype)
|
||
|
for k in range(n):
|
||
|
df_f[k] = vdot(self.df[k], f)
|
||
|
|
||
|
b = np.empty((n, n), dtype=f.dtype)
|
||
|
for i in range(n):
|
||
|
for j in range(n):
|
||
|
b[i,j] = vdot(self.df[i], self.dx[j])
|
||
|
if i == j and self.w0 != 0:
|
||
|
b[i,j] -= vdot(self.df[i], self.df[i])*self.w0**2*self.alpha
|
||
|
gamma = solve(b, df_f)
|
||
|
|
||
|
for m in range(n):
|
||
|
dx += gamma[m]*(self.df[m] + self.dx[m]/self.alpha)
|
||
|
return dx
|
||
|
|
||
|
def _update(self, x, f, dx, df, dx_norm, df_norm):
|
||
|
if self.M == 0:
|
||
|
return
|
||
|
|
||
|
self.dx.append(dx)
|
||
|
self.df.append(df)
|
||
|
|
||
|
while len(self.dx) > self.M:
|
||
|
self.dx.pop(0)
|
||
|
self.df.pop(0)
|
||
|
|
||
|
n = len(self.dx)
|
||
|
a = np.zeros((n, n), dtype=f.dtype)
|
||
|
|
||
|
for i in range(n):
|
||
|
for j in range(i, n):
|
||
|
if i == j:
|
||
|
wd = self.w0**2
|
||
|
else:
|
||
|
wd = 0
|
||
|
a[i,j] = (1+wd)*vdot(self.df[i], self.df[j])
|
||
|
|
||
|
a += np.triu(a, 1).T.conj()
|
||
|
self.a = a
|
||
|
|
||
|
#------------------------------------------------------------------------------
|
||
|
# Simple iterations
|
||
|
#------------------------------------------------------------------------------
|
||
|
|
||
|
|
||
|
class DiagBroyden(GenericBroyden):
|
||
|
"""
|
||
|
Find a root of a function, using diagonal Broyden Jacobian approximation.
|
||
|
|
||
|
The Jacobian approximation is derived from previous iterations, by
|
||
|
retaining only the diagonal of Broyden matrices.
|
||
|
|
||
|
.. warning::
|
||
|
|
||
|
This algorithm may be useful for specific problems, but whether
|
||
|
it will work may depend strongly on the problem.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
%(params_basic)s
|
||
|
alpha : float, optional
|
||
|
Initial guess for the Jacobian is (-1/alpha).
|
||
|
%(params_extra)s
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
root : Interface to root finding algorithms for multivariate
|
||
|
functions. See ``method=='diagbroyden'`` in particular.
|
||
|
"""
|
||
|
|
||
|
def __init__(self, alpha=None):
|
||
|
GenericBroyden.__init__(self)
|
||
|
self.alpha = alpha
|
||
|
|
||
|
def setup(self, x, F, func):
|
||
|
GenericBroyden.setup(self, x, F, func)
|
||
|
self.d = np.full((self.shape[0],), 1 / self.alpha, dtype=self.dtype)
|
||
|
|
||
|
def solve(self, f, tol=0):
|
||
|
return -f / self.d
|
||
|
|
||
|
def matvec(self, f):
|
||
|
return -f * self.d
|
||
|
|
||
|
def rsolve(self, f, tol=0):
|
||
|
return -f / self.d.conj()
|
||
|
|
||
|
def rmatvec(self, f):
|
||
|
return -f * self.d.conj()
|
||
|
|
||
|
def todense(self):
|
||
|
return np.diag(-self.d)
|
||
|
|
||
|
def _update(self, x, f, dx, df, dx_norm, df_norm):
|
||
|
self.d -= (df + self.d*dx)*dx/dx_norm**2
|
||
|
|
||
|
|
||
|
class LinearMixing(GenericBroyden):
|
||
|
"""
|
||
|
Find a root of a function, using a scalar Jacobian approximation.
|
||
|
|
||
|
.. warning::
|
||
|
|
||
|
This algorithm may be useful for specific problems, but whether
|
||
|
it will work may depend strongly on the problem.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
%(params_basic)s
|
||
|
alpha : float, optional
|
||
|
The Jacobian approximation is (-1/alpha).
|
||
|
%(params_extra)s
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
root : Interface to root finding algorithms for multivariate
|
||
|
functions. See ``method=='linearmixing'`` in particular.
|
||
|
|
||
|
"""
|
||
|
|
||
|
def __init__(self, alpha=None):
|
||
|
GenericBroyden.__init__(self)
|
||
|
self.alpha = alpha
|
||
|
|
||
|
def solve(self, f, tol=0):
|
||
|
return -f*self.alpha
|
||
|
|
||
|
def matvec(self, f):
|
||
|
return -f/self.alpha
|
||
|
|
||
|
def rsolve(self, f, tol=0):
|
||
|
return -f*np.conj(self.alpha)
|
||
|
|
||
|
def rmatvec(self, f):
|
||
|
return -f/np.conj(self.alpha)
|
||
|
|
||
|
def todense(self):
|
||
|
return np.diag(np.full(self.shape[0], -1/self.alpha))
|
||
|
|
||
|
def _update(self, x, f, dx, df, dx_norm, df_norm):
|
||
|
pass
|
||
|
|
||
|
|
||
|
class ExcitingMixing(GenericBroyden):
|
||
|
"""
|
||
|
Find a root of a function, using a tuned diagonal Jacobian approximation.
|
||
|
|
||
|
The Jacobian matrix is diagonal and is tuned on each iteration.
|
||
|
|
||
|
.. warning::
|
||
|
|
||
|
This algorithm may be useful for specific problems, but whether
|
||
|
it will work may depend strongly on the problem.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
root : Interface to root finding algorithms for multivariate
|
||
|
functions. See ``method=='excitingmixing'`` in particular.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
%(params_basic)s
|
||
|
alpha : float, optional
|
||
|
Initial Jacobian approximation is (-1/alpha).
|
||
|
alphamax : float, optional
|
||
|
The entries of the diagonal Jacobian are kept in the range
|
||
|
``[alpha, alphamax]``.
|
||
|
%(params_extra)s
|
||
|
"""
|
||
|
|
||
|
def __init__(self, alpha=None, alphamax=1.0):
|
||
|
GenericBroyden.__init__(self)
|
||
|
self.alpha = alpha
|
||
|
self.alphamax = alphamax
|
||
|
self.beta = None
|
||
|
|
||
|
def setup(self, x, F, func):
|
||
|
GenericBroyden.setup(self, x, F, func)
|
||
|
self.beta = np.full((self.shape[0],), self.alpha, dtype=self.dtype)
|
||
|
|
||
|
def solve(self, f, tol=0):
|
||
|
return -f*self.beta
|
||
|
|
||
|
def matvec(self, f):
|
||
|
return -f/self.beta
|
||
|
|
||
|
def rsolve(self, f, tol=0):
|
||
|
return -f*self.beta.conj()
|
||
|
|
||
|
def rmatvec(self, f):
|
||
|
return -f/self.beta.conj()
|
||
|
|
||
|
def todense(self):
|
||
|
return np.diag(-1/self.beta)
|
||
|
|
||
|
def _update(self, x, f, dx, df, dx_norm, df_norm):
|
||
|
incr = f*self.last_f > 0
|
||
|
self.beta[incr] += self.alpha
|
||
|
self.beta[~incr] = self.alpha
|
||
|
np.clip(self.beta, 0, self.alphamax, out=self.beta)
|
||
|
|
||
|
|
||
|
#------------------------------------------------------------------------------
|
||
|
# Iterative/Krylov approximated Jacobians
|
||
|
#------------------------------------------------------------------------------
|
||
|
|
||
|
class KrylovJacobian(Jacobian):
|
||
|
r"""
|
||
|
Find a root of a function, using Krylov approximation for inverse Jacobian.
|
||
|
|
||
|
This method is suitable for solving large-scale problems.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
%(params_basic)s
|
||
|
rdiff : float, optional
|
||
|
Relative step size to use in numerical differentiation.
|
||
|
method : {'lgmres', 'gmres', 'bicgstab', 'cgs', 'minres'} or function
|
||
|
Krylov method to use to approximate the Jacobian.
|
||
|
Can be a string, or a function implementing the same interface as
|
||
|
the iterative solvers in `scipy.sparse.linalg`.
|
||
|
|
||
|
The default is `scipy.sparse.linalg.lgmres`.
|
||
|
inner_maxiter : int, optional
|
||
|
Parameter to pass to the "inner" Krylov solver: maximum number of
|
||
|
iterations. Iteration will stop after maxiter steps even if the
|
||
|
specified tolerance has not been achieved.
|
||
|
inner_M : LinearOperator or InverseJacobian
|
||
|
Preconditioner for the inner Krylov iteration.
|
||
|
Note that you can use also inverse Jacobians as (adaptive)
|
||
|
preconditioners. For example,
|
||
|
|
||
|
>>> from scipy.optimize.nonlin import BroydenFirst, KrylovJacobian
|
||
|
>>> from scipy.optimize.nonlin import InverseJacobian
|
||
|
>>> jac = BroydenFirst()
|
||
|
>>> kjac = KrylovJacobian(inner_M=InverseJacobian(jac))
|
||
|
|
||
|
If the preconditioner has a method named 'update', it will be called
|
||
|
as ``update(x, f)`` after each nonlinear step, with ``x`` giving
|
||
|
the current point, and ``f`` the current function value.
|
||
|
outer_k : int, optional
|
||
|
Size of the subspace kept across LGMRES nonlinear iterations.
|
||
|
See `scipy.sparse.linalg.lgmres` for details.
|
||
|
inner_kwargs : kwargs
|
||
|
Keyword parameters for the "inner" Krylov solver
|
||
|
(defined with `method`). Parameter names must start with
|
||
|
the `inner_` prefix which will be stripped before passing on
|
||
|
the inner method. See, e.g., `scipy.sparse.linalg.gmres` for details.
|
||
|
%(params_extra)s
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
root : Interface to root finding algorithms for multivariate
|
||
|
functions. See ``method=='krylov'`` in particular.
|
||
|
scipy.sparse.linalg.gmres
|
||
|
scipy.sparse.linalg.lgmres
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
This function implements a Newton-Krylov solver. The basic idea is
|
||
|
to compute the inverse of the Jacobian with an iterative Krylov
|
||
|
method. These methods require only evaluating the Jacobian-vector
|
||
|
products, which are conveniently approximated by a finite difference:
|
||
|
|
||
|
.. math:: J v \approx (f(x + \omega*v/|v|) - f(x)) / \omega
|
||
|
|
||
|
Due to the use of iterative matrix inverses, these methods can
|
||
|
deal with large nonlinear problems.
|
||
|
|
||
|
SciPy's `scipy.sparse.linalg` module offers a selection of Krylov
|
||
|
solvers to choose from. The default here is `lgmres`, which is a
|
||
|
variant of restarted GMRES iteration that reuses some of the
|
||
|
information obtained in the previous Newton steps to invert
|
||
|
Jacobians in subsequent steps.
|
||
|
|
||
|
For a review on Newton-Krylov methods, see for example [1]_,
|
||
|
and for the LGMRES sparse inverse method, see [2]_.
|
||
|
|
||
|
References
|
||
|
----------
|
||
|
.. [1] D.A. Knoll and D.E. Keyes, J. Comp. Phys. 193, 357 (2004).
|
||
|
:doi:`10.1016/j.jcp.2003.08.010`
|
||
|
.. [2] A.H. Baker and E.R. Jessup and T. Manteuffel,
|
||
|
SIAM J. Matrix Anal. Appl. 26, 962 (2005).
|
||
|
:doi:`10.1137/S0895479803422014`
|
||
|
|
||
|
"""
|
||
|
|
||
|
def __init__(self, rdiff=None, method='lgmres', inner_maxiter=20,
|
||
|
inner_M=None, outer_k=10, **kw):
|
||
|
self.preconditioner = inner_M
|
||
|
self.rdiff = rdiff
|
||
|
self.method = dict(
|
||
|
bicgstab=scipy.sparse.linalg.bicgstab,
|
||
|
gmres=scipy.sparse.linalg.gmres,
|
||
|
lgmres=scipy.sparse.linalg.lgmres,
|
||
|
cgs=scipy.sparse.linalg.cgs,
|
||
|
minres=scipy.sparse.linalg.minres,
|
||
|
).get(method, method)
|
||
|
|
||
|
self.method_kw = dict(maxiter=inner_maxiter, M=self.preconditioner)
|
||
|
|
||
|
if self.method is scipy.sparse.linalg.gmres:
|
||
|
# Replace GMRES's outer iteration with Newton steps
|
||
|
self.method_kw['restrt'] = inner_maxiter
|
||
|
self.method_kw['maxiter'] = 1
|
||
|
self.method_kw.setdefault('atol', 0)
|
||
|
elif self.method is scipy.sparse.linalg.gcrotmk:
|
||
|
self.method_kw.setdefault('atol', 0)
|
||
|
elif self.method is scipy.sparse.linalg.lgmres:
|
||
|
self.method_kw['outer_k'] = outer_k
|
||
|
# Replace LGMRES's outer iteration with Newton steps
|
||
|
self.method_kw['maxiter'] = 1
|
||
|
# Carry LGMRES's `outer_v` vectors across nonlinear iterations
|
||
|
self.method_kw.setdefault('outer_v', [])
|
||
|
self.method_kw.setdefault('prepend_outer_v', True)
|
||
|
# But don't carry the corresponding Jacobian*v products, in case
|
||
|
# the Jacobian changes a lot in the nonlinear step
|
||
|
#
|
||
|
# XXX: some trust-region inspired ideas might be more efficient...
|
||
|
# See e.g., Brown & Saad. But needs to be implemented separately
|
||
|
# since it's not an inexact Newton method.
|
||
|
self.method_kw.setdefault('store_outer_Av', False)
|
||
|
self.method_kw.setdefault('atol', 0)
|
||
|
|
||
|
for key, value in kw.items():
|
||
|
if not key.startswith('inner_'):
|
||
|
raise ValueError("Unknown parameter %s" % key)
|
||
|
self.method_kw[key[6:]] = value
|
||
|
|
||
|
def _update_diff_step(self):
|
||
|
mx = abs(self.x0).max()
|
||
|
mf = abs(self.f0).max()
|
||
|
self.omega = self.rdiff * max(1, mx) / max(1, mf)
|
||
|
|
||
|
def matvec(self, v):
|
||
|
nv = norm(v)
|
||
|
if nv == 0:
|
||
|
return 0*v
|
||
|
sc = self.omega / nv
|
||
|
r = (self.func(self.x0 + sc*v) - self.f0) / sc
|
||
|
if not np.all(np.isfinite(r)) and np.all(np.isfinite(v)):
|
||
|
raise ValueError('Function returned non-finite results')
|
||
|
return r
|
||
|
|
||
|
def solve(self, rhs, tol=0):
|
||
|
if 'tol' in self.method_kw:
|
||
|
sol, info = self.method(self.op, rhs, **self.method_kw)
|
||
|
else:
|
||
|
sol, info = self.method(self.op, rhs, tol=tol, **self.method_kw)
|
||
|
return sol
|
||
|
|
||
|
def update(self, x, f):
|
||
|
self.x0 = x
|
||
|
self.f0 = f
|
||
|
self._update_diff_step()
|
||
|
|
||
|
# Update also the preconditioner, if possible
|
||
|
if self.preconditioner is not None:
|
||
|
if hasattr(self.preconditioner, 'update'):
|
||
|
self.preconditioner.update(x, f)
|
||
|
|
||
|
def setup(self, x, f, func):
|
||
|
Jacobian.setup(self, x, f, func)
|
||
|
self.x0 = x
|
||
|
self.f0 = f
|
||
|
self.op = scipy.sparse.linalg.aslinearoperator(self)
|
||
|
|
||
|
if self.rdiff is None:
|
||
|
self.rdiff = np.finfo(x.dtype).eps ** (1./2)
|
||
|
|
||
|
self._update_diff_step()
|
||
|
|
||
|
# Setup also the preconditioner, if possible
|
||
|
if self.preconditioner is not None:
|
||
|
if hasattr(self.preconditioner, 'setup'):
|
||
|
self.preconditioner.setup(x, f, func)
|
||
|
|
||
|
|
||
|
#------------------------------------------------------------------------------
|
||
|
# Wrapper functions
|
||
|
#------------------------------------------------------------------------------
|
||
|
|
||
|
def _nonlin_wrapper(name, jac):
|
||
|
"""
|
||
|
Construct a solver wrapper with given name and Jacobian approx.
|
||
|
|
||
|
It inspects the keyword arguments of ``jac.__init__``, and allows to
|
||
|
use the same arguments in the wrapper function, in addition to the
|
||
|
keyword arguments of `nonlin_solve`
|
||
|
|
||
|
"""
|
||
|
signature = _getfullargspec(jac.__init__)
|
||
|
args, varargs, varkw, defaults, kwonlyargs, kwdefaults, _ = signature
|
||
|
kwargs = list(zip(args[-len(defaults):], defaults))
|
||
|
kw_str = ", ".join(["%s=%r" % (k, v) for k, v in kwargs])
|
||
|
if kw_str:
|
||
|
kw_str = ", " + kw_str
|
||
|
kwkw_str = ", ".join(["%s=%s" % (k, k) for k, v in kwargs])
|
||
|
if kwkw_str:
|
||
|
kwkw_str = kwkw_str + ", "
|
||
|
if kwonlyargs:
|
||
|
raise ValueError('Unexpected signature %s' % signature)
|
||
|
|
||
|
# Construct the wrapper function so that its keyword arguments
|
||
|
# are visible in pydoc.help etc.
|
||
|
wrapper = """
|
||
|
def %(name)s(F, xin, iter=None %(kw)s, verbose=False, maxiter=None,
|
||
|
f_tol=None, f_rtol=None, x_tol=None, x_rtol=None,
|
||
|
tol_norm=None, line_search='armijo', callback=None, **kw):
|
||
|
jac = %(jac)s(%(kwkw)s **kw)
|
||
|
return nonlin_solve(F, xin, jac, iter, verbose, maxiter,
|
||
|
f_tol, f_rtol, x_tol, x_rtol, tol_norm, line_search,
|
||
|
callback)
|
||
|
"""
|
||
|
|
||
|
wrapper = wrapper % dict(name=name, kw=kw_str, jac=jac.__name__,
|
||
|
kwkw=kwkw_str)
|
||
|
ns = {}
|
||
|
ns.update(globals())
|
||
|
exec(wrapper, ns)
|
||
|
func = ns[name]
|
||
|
func.__doc__ = jac.__doc__
|
||
|
_set_doc(func)
|
||
|
return func
|
||
|
|
||
|
|
||
|
broyden1 = _nonlin_wrapper('broyden1', BroydenFirst)
|
||
|
broyden2 = _nonlin_wrapper('broyden2', BroydenSecond)
|
||
|
anderson = _nonlin_wrapper('anderson', Anderson)
|
||
|
linearmixing = _nonlin_wrapper('linearmixing', LinearMixing)
|
||
|
diagbroyden = _nonlin_wrapper('diagbroyden', DiagBroyden)
|
||
|
excitingmixing = _nonlin_wrapper('excitingmixing', ExcitingMixing)
|
||
|
newton_krylov = _nonlin_wrapper('newton_krylov', KrylovJacobian)
|