568 lines
20 KiB
Python
568 lines
20 KiB
Python
"""Sparse Equations and Least Squares.
|
|
|
|
The original Fortran code was written by C. C. Paige and M. A. Saunders as
|
|
described in
|
|
|
|
C. C. Paige and M. A. Saunders, LSQR: An algorithm for sparse linear
|
|
equations and sparse least squares, TOMS 8(1), 43--71 (1982).
|
|
|
|
C. C. Paige and M. A. Saunders, Algorithm 583; LSQR: Sparse linear
|
|
equations and least-squares problems, TOMS 8(2), 195--209 (1982).
|
|
|
|
It is licensed under the following BSD license:
|
|
|
|
Copyright (c) 2006, Systems Optimization Laboratory
|
|
All rights reserved.
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions are
|
|
met:
|
|
|
|
* Redistributions of source code must retain the above copyright
|
|
notice, this list of conditions and the following disclaimer.
|
|
|
|
* Redistributions in binary form must reproduce the above
|
|
copyright notice, this list of conditions and the following
|
|
disclaimer in the documentation and/or other materials provided
|
|
with the distribution.
|
|
|
|
* Neither the name of Stanford University nor the names of its
|
|
contributors may be used to endorse or promote products derived
|
|
from this software without specific prior written permission.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
The Fortran code was translated to Python for use in CVXOPT by Jeffery
|
|
Kline with contributions by Mridul Aanjaneya and Bob Myhill.
|
|
|
|
Adapted for SciPy by Stefan van der Walt.
|
|
|
|
"""
|
|
|
|
__all__ = ['lsqr']
|
|
|
|
import numpy as np
|
|
from math import sqrt
|
|
from scipy.sparse.linalg.interface import aslinearoperator
|
|
|
|
eps = np.finfo(np.float64).eps
|
|
|
|
|
|
def _sym_ortho(a, b):
|
|
"""
|
|
Stable implementation of Givens rotation.
|
|
|
|
Notes
|
|
-----
|
|
The routine 'SymOrtho' was added for numerical stability. This is
|
|
recommended by S.-C. Choi in [1]_. It removes the unpleasant potential of
|
|
``1/eps`` in some important places (see, for example text following
|
|
"Compute the next plane rotation Qk" in minres.py).
|
|
|
|
References
|
|
----------
|
|
.. [1] S.-C. Choi, "Iterative Methods for Singular Linear Equations
|
|
and Least-Squares Problems", Dissertation,
|
|
http://www.stanford.edu/group/SOL/dissertations/sou-cheng-choi-thesis.pdf
|
|
|
|
"""
|
|
if b == 0:
|
|
return np.sign(a), 0, abs(a)
|
|
elif a == 0:
|
|
return 0, np.sign(b), abs(b)
|
|
elif abs(b) > abs(a):
|
|
tau = a / b
|
|
s = np.sign(b) / sqrt(1 + tau * tau)
|
|
c = s * tau
|
|
r = b / s
|
|
else:
|
|
tau = b / a
|
|
c = np.sign(a) / sqrt(1+tau*tau)
|
|
s = c * tau
|
|
r = a / c
|
|
return c, s, r
|
|
|
|
|
|
def lsqr(A, b, damp=0.0, atol=1e-8, btol=1e-8, conlim=1e8,
|
|
iter_lim=None, show=False, calc_var=False, x0=None):
|
|
"""Find the least-squares solution to a large, sparse, linear system
|
|
of equations.
|
|
|
|
The function solves ``Ax = b`` or ``min ||Ax - b||^2`` or
|
|
``min ||Ax - b||^2 + d^2 ||x||^2``.
|
|
|
|
The matrix A may be square or rectangular (over-determined or
|
|
under-determined), and may have any rank.
|
|
|
|
::
|
|
|
|
1. Unsymmetric equations -- solve A*x = b
|
|
|
|
2. Linear least squares -- solve A*x = b
|
|
in the least-squares sense
|
|
|
|
3. Damped least squares -- solve ( A )*x = ( b )
|
|
( damp*I ) ( 0 )
|
|
in the least-squares sense
|
|
|
|
Parameters
|
|
----------
|
|
A : {sparse matrix, ndarray, LinearOperator}
|
|
Representation of an m-by-n matrix.
|
|
Alternatively, ``A`` can be a linear operator which can
|
|
produce ``Ax`` and ``A^T x`` using, e.g.,
|
|
``scipy.sparse.linalg.LinearOperator``.
|
|
b : array_like, shape (m,)
|
|
Right-hand side vector ``b``.
|
|
damp : float
|
|
Damping coefficient.
|
|
atol, btol : float, optional
|
|
Stopping tolerances. If both are 1.0e-9 (say), the final
|
|
residual norm should be accurate to about 9 digits. (The
|
|
final x will usually have fewer correct digits, depending on
|
|
cond(A) and the size of damp.)
|
|
conlim : float, optional
|
|
Another stopping tolerance. lsqr terminates if an estimate of
|
|
``cond(A)`` exceeds `conlim`. For compatible systems ``Ax =
|
|
b``, `conlim` could be as large as 1.0e+12 (say). For
|
|
least-squares problems, conlim should be less than 1.0e+8.
|
|
Maximum precision can be obtained by setting ``atol = btol =
|
|
conlim = zero``, but the number of iterations may then be
|
|
excessive.
|
|
iter_lim : int, optional
|
|
Explicit limitation on number of iterations (for safety).
|
|
show : bool, optional
|
|
Display an iteration log.
|
|
calc_var : bool, optional
|
|
Whether to estimate diagonals of ``(A'A + damp^2*I)^{-1}``.
|
|
x0 : array_like, shape (n,), optional
|
|
Initial guess of x, if None zeros are used.
|
|
|
|
.. versionadded:: 1.0.0
|
|
|
|
Returns
|
|
-------
|
|
x : ndarray of float
|
|
The final solution.
|
|
istop : int
|
|
Gives the reason for termination.
|
|
1 means x is an approximate solution to Ax = b.
|
|
2 means x approximately solves the least-squares problem.
|
|
itn : int
|
|
Iteration number upon termination.
|
|
r1norm : float
|
|
``norm(r)``, where ``r = b - Ax``.
|
|
r2norm : float
|
|
``sqrt( norm(r)^2 + damp^2 * norm(x)^2 )``. Equal to `r1norm` if
|
|
``damp == 0``.
|
|
anorm : float
|
|
Estimate of Frobenius norm of ``Abar = [[A]; [damp*I]]``.
|
|
acond : float
|
|
Estimate of ``cond(Abar)``.
|
|
arnorm : float
|
|
Estimate of ``norm(A'*r - damp^2*x)``.
|
|
xnorm : float
|
|
``norm(x)``
|
|
var : ndarray of float
|
|
If ``calc_var`` is True, estimates all diagonals of
|
|
``(A'A)^{-1}`` (if ``damp == 0``) or more generally ``(A'A +
|
|
damp^2*I)^{-1}``. This is well defined if A has full column
|
|
rank or ``damp > 0``. (Not sure what var means if ``rank(A)
|
|
< n`` and ``damp = 0.``)
|
|
|
|
Notes
|
|
-----
|
|
LSQR uses an iterative method to approximate the solution. The
|
|
number of iterations required to reach a certain accuracy depends
|
|
strongly on the scaling of the problem. Poor scaling of the rows
|
|
or columns of A should therefore be avoided where possible.
|
|
|
|
For example, in problem 1 the solution is unaltered by
|
|
row-scaling. If a row of A is very small or large compared to
|
|
the other rows of A, the corresponding row of ( A b ) should be
|
|
scaled up or down.
|
|
|
|
In problems 1 and 2, the solution x is easily recovered
|
|
following column-scaling. Unless better information is known,
|
|
the nonzero columns of A should be scaled so that they all have
|
|
the same Euclidean norm (e.g., 1.0).
|
|
|
|
In problem 3, there is no freedom to re-scale if damp is
|
|
nonzero. However, the value of damp should be assigned only
|
|
after attention has been paid to the scaling of A.
|
|
|
|
The parameter damp is intended to help regularize
|
|
ill-conditioned systems, by preventing the true solution from
|
|
being very large. Another aid to regularization is provided by
|
|
the parameter acond, which may be used to terminate iterations
|
|
before the computed solution becomes very large.
|
|
|
|
If some initial estimate ``x0`` is known and if ``damp == 0``,
|
|
one could proceed as follows:
|
|
|
|
1. Compute a residual vector ``r0 = b - A*x0``.
|
|
2. Use LSQR to solve the system ``A*dx = r0``.
|
|
3. Add the correction dx to obtain a final solution ``x = x0 + dx``.
|
|
|
|
This requires that ``x0`` be available before and after the call
|
|
to LSQR. To judge the benefits, suppose LSQR takes k1 iterations
|
|
to solve A*x = b and k2 iterations to solve A*dx = r0.
|
|
If x0 is "good", norm(r0) will be smaller than norm(b).
|
|
If the same stopping tolerances atol and btol are used for each
|
|
system, k1 and k2 will be similar, but the final solution x0 + dx
|
|
should be more accurate. The only way to reduce the total work
|
|
is to use a larger stopping tolerance for the second system.
|
|
If some value btol is suitable for A*x = b, the larger value
|
|
btol*norm(b)/norm(r0) should be suitable for A*dx = r0.
|
|
|
|
Preconditioning is another way to reduce the number of iterations.
|
|
If it is possible to solve a related system ``M*x = b``
|
|
efficiently, where M approximates A in some helpful way (e.g. M -
|
|
A has low rank or its elements are small relative to those of A),
|
|
LSQR may converge more rapidly on the system ``A*M(inverse)*z =
|
|
b``, after which x can be recovered by solving M*x = z.
|
|
|
|
If A is symmetric, LSQR should not be used!
|
|
|
|
Alternatives are the symmetric conjugate-gradient method (cg)
|
|
and/or SYMMLQ. SYMMLQ is an implementation of symmetric cg that
|
|
applies to any symmetric A and will converge more rapidly than
|
|
LSQR. If A is positive definite, there are other implementations
|
|
of symmetric cg that require slightly less work per iteration than
|
|
SYMMLQ (but will take the same number of iterations).
|
|
|
|
References
|
|
----------
|
|
.. [1] C. C. Paige and M. A. Saunders (1982a).
|
|
"LSQR: An algorithm for sparse linear equations and
|
|
sparse least squares", ACM TOMS 8(1), 43-71.
|
|
.. [2] C. C. Paige and M. A. Saunders (1982b).
|
|
"Algorithm 583. LSQR: Sparse linear equations and least
|
|
squares problems", ACM TOMS 8(2), 195-209.
|
|
.. [3] M. A. Saunders (1995). "Solution of sparse rectangular
|
|
systems using LSQR and CRAIG", BIT 35, 588-604.
|
|
|
|
Examples
|
|
--------
|
|
>>> from scipy.sparse import csc_matrix
|
|
>>> from scipy.sparse.linalg import lsqr
|
|
>>> A = csc_matrix([[1., 0.], [1., 1.], [0., 1.]], dtype=float)
|
|
|
|
The first example has the trivial solution `[0, 0]`
|
|
|
|
>>> b = np.array([0., 0., 0.], dtype=float)
|
|
>>> x, istop, itn, normr = lsqr(A, b)[:4]
|
|
The exact solution is x = 0
|
|
>>> istop
|
|
0
|
|
>>> x
|
|
array([ 0., 0.])
|
|
|
|
The stopping code `istop=0` returned indicates that a vector of zeros was
|
|
found as a solution. The returned solution `x` indeed contains `[0., 0.]`.
|
|
The next example has a non-trivial solution:
|
|
|
|
>>> b = np.array([1., 0., -1.], dtype=float)
|
|
>>> x, istop, itn, r1norm = lsqr(A, b)[:4]
|
|
>>> istop
|
|
1
|
|
>>> x
|
|
array([ 1., -1.])
|
|
>>> itn
|
|
1
|
|
>>> r1norm
|
|
4.440892098500627e-16
|
|
|
|
As indicated by `istop=1`, `lsqr` found a solution obeying the tolerance
|
|
limits. The given solution `[1., -1.]` obviously solves the equation. The
|
|
remaining return values include information about the number of iterations
|
|
(`itn=1`) and the remaining difference of left and right side of the solved
|
|
equation.
|
|
The final example demonstrates the behavior in the case where there is no
|
|
solution for the equation:
|
|
|
|
>>> b = np.array([1., 0.01, -1.], dtype=float)
|
|
>>> x, istop, itn, r1norm = lsqr(A, b)[:4]
|
|
>>> istop
|
|
2
|
|
>>> x
|
|
array([ 1.00333333, -0.99666667])
|
|
>>> A.dot(x)-b
|
|
array([ 0.00333333, -0.00333333, 0.00333333])
|
|
>>> r1norm
|
|
0.005773502691896255
|
|
|
|
`istop` indicates that the system is inconsistent and thus `x` is rather an
|
|
approximate solution to the corresponding least-squares problem. `r1norm`
|
|
contains the norm of the minimal residual that was found.
|
|
"""
|
|
A = aslinearoperator(A)
|
|
b = np.atleast_1d(b)
|
|
if b.ndim > 1:
|
|
b = b.squeeze()
|
|
|
|
m, n = A.shape
|
|
if iter_lim is None:
|
|
iter_lim = 2 * n
|
|
var = np.zeros(n)
|
|
|
|
msg = ('The exact solution is x = 0 ',
|
|
'Ax - b is small enough, given atol, btol ',
|
|
'The least-squares solution is good enough, given atol ',
|
|
'The estimate of cond(Abar) has exceeded conlim ',
|
|
'Ax - b is small enough for this machine ',
|
|
'The least-squares solution is good enough for this machine',
|
|
'Cond(Abar) seems to be too large for this machine ',
|
|
'The iteration limit has been reached ')
|
|
|
|
if show:
|
|
print(' ')
|
|
print('LSQR Least-squares solution of Ax = b')
|
|
str1 = f'The matrix A has {m} rows and {n} columns'
|
|
str2 = 'damp = %20.14e calc_var = %8g' % (damp, calc_var)
|
|
str3 = 'atol = %8.2e conlim = %8.2e' % (atol, conlim)
|
|
str4 = 'btol = %8.2e iter_lim = %8g' % (btol, iter_lim)
|
|
print(str1)
|
|
print(str2)
|
|
print(str3)
|
|
print(str4)
|
|
|
|
itn = 0
|
|
istop = 0
|
|
ctol = 0
|
|
if conlim > 0:
|
|
ctol = 1/conlim
|
|
anorm = 0
|
|
acond = 0
|
|
dampsq = damp**2
|
|
ddnorm = 0
|
|
res2 = 0
|
|
xnorm = 0
|
|
xxnorm = 0
|
|
z = 0
|
|
cs2 = -1
|
|
sn2 = 0
|
|
|
|
"""
|
|
Set up the first vectors u and v for the bidiagonalization.
|
|
These satisfy beta*u = b - A*x, alfa*v = A'*u.
|
|
"""
|
|
u = b
|
|
bnorm = np.linalg.norm(b)
|
|
if x0 is None:
|
|
x = np.zeros(n)
|
|
beta = bnorm.copy()
|
|
else:
|
|
x = np.asarray(x0)
|
|
u = u - A.matvec(x)
|
|
beta = np.linalg.norm(u)
|
|
|
|
if beta > 0:
|
|
u = (1/beta) * u
|
|
v = A.rmatvec(u)
|
|
alfa = np.linalg.norm(v)
|
|
else:
|
|
v = x.copy()
|
|
alfa = 0
|
|
|
|
if alfa > 0:
|
|
v = (1/alfa) * v
|
|
w = v.copy()
|
|
|
|
rhobar = alfa
|
|
phibar = beta
|
|
rnorm = beta
|
|
r1norm = rnorm
|
|
r2norm = rnorm
|
|
|
|
# Reverse the order here from the original matlab code because
|
|
# there was an error on return when arnorm==0
|
|
arnorm = alfa * beta
|
|
if arnorm == 0:
|
|
print(msg[0])
|
|
return x, istop, itn, r1norm, r2norm, anorm, acond, arnorm, xnorm, var
|
|
|
|
head1 = ' Itn x[0] r1norm r2norm '
|
|
head2 = ' Compatible LS Norm A Cond A'
|
|
|
|
if show:
|
|
print(' ')
|
|
print(head1, head2)
|
|
test1 = 1
|
|
test2 = alfa / beta
|
|
str1 = '%6g %12.5e' % (itn, x[0])
|
|
str2 = ' %10.3e %10.3e' % (r1norm, r2norm)
|
|
str3 = ' %8.1e %8.1e' % (test1, test2)
|
|
print(str1, str2, str3)
|
|
|
|
# Main iteration loop.
|
|
while itn < iter_lim:
|
|
itn = itn + 1
|
|
"""
|
|
% Perform the next step of the bidiagonalization to obtain the
|
|
% next beta, u, alfa, v. These satisfy the relations
|
|
% beta*u = a*v - alfa*u,
|
|
% alfa*v = A'*u - beta*v.
|
|
"""
|
|
u = A.matvec(v) - alfa * u
|
|
beta = np.linalg.norm(u)
|
|
|
|
if beta > 0:
|
|
u = (1/beta) * u
|
|
anorm = sqrt(anorm**2 + alfa**2 + beta**2 + damp**2)
|
|
v = A.rmatvec(u) - beta * v
|
|
alfa = np.linalg.norm(v)
|
|
if alfa > 0:
|
|
v = (1 / alfa) * v
|
|
|
|
# Use a plane rotation to eliminate the damping parameter.
|
|
# This alters the diagonal (rhobar) of the lower-bidiagonal matrix.
|
|
rhobar1 = sqrt(rhobar**2 + damp**2)
|
|
cs1 = rhobar / rhobar1
|
|
sn1 = damp / rhobar1
|
|
psi = sn1 * phibar
|
|
phibar = cs1 * phibar
|
|
|
|
# Use a plane rotation to eliminate the subdiagonal element (beta)
|
|
# of the lower-bidiagonal matrix, giving an upper-bidiagonal matrix.
|
|
cs, sn, rho = _sym_ortho(rhobar1, beta)
|
|
|
|
theta = sn * alfa
|
|
rhobar = -cs * alfa
|
|
phi = cs * phibar
|
|
phibar = sn * phibar
|
|
tau = sn * phi
|
|
|
|
# Update x and w.
|
|
t1 = phi / rho
|
|
t2 = -theta / rho
|
|
dk = (1 / rho) * w
|
|
|
|
x = x + t1 * w
|
|
w = v + t2 * w
|
|
ddnorm = ddnorm + np.linalg.norm(dk)**2
|
|
|
|
if calc_var:
|
|
var = var + dk**2
|
|
|
|
# Use a plane rotation on the right to eliminate the
|
|
# super-diagonal element (theta) of the upper-bidiagonal matrix.
|
|
# Then use the result to estimate norm(x).
|
|
delta = sn2 * rho
|
|
gambar = -cs2 * rho
|
|
rhs = phi - delta * z
|
|
zbar = rhs / gambar
|
|
xnorm = sqrt(xxnorm + zbar**2)
|
|
gamma = sqrt(gambar**2 + theta**2)
|
|
cs2 = gambar / gamma
|
|
sn2 = theta / gamma
|
|
z = rhs / gamma
|
|
xxnorm = xxnorm + z**2
|
|
|
|
# Test for convergence.
|
|
# First, estimate the condition of the matrix Abar,
|
|
# and the norms of rbar and Abar'rbar.
|
|
acond = anorm * sqrt(ddnorm)
|
|
res1 = phibar**2
|
|
res2 = res2 + psi**2
|
|
rnorm = sqrt(res1 + res2)
|
|
arnorm = alfa * abs(tau)
|
|
|
|
# Distinguish between
|
|
# r1norm = ||b - Ax|| and
|
|
# r2norm = rnorm in current code
|
|
# = sqrt(r1norm^2 + damp^2*||x||^2).
|
|
# Estimate r1norm from
|
|
# r1norm = sqrt(r2norm^2 - damp^2*||x||^2).
|
|
# Although there is cancellation, it might be accurate enough.
|
|
r1sq = rnorm**2 - dampsq * xxnorm
|
|
r1norm = sqrt(abs(r1sq))
|
|
if r1sq < 0:
|
|
r1norm = -r1norm
|
|
r2norm = rnorm
|
|
|
|
# Now use these norms to estimate certain other quantities,
|
|
# some of which will be small near a solution.
|
|
test1 = rnorm / bnorm
|
|
test2 = arnorm / (anorm * rnorm + eps)
|
|
test3 = 1 / (acond + eps)
|
|
t1 = test1 / (1 + anorm * xnorm / bnorm)
|
|
rtol = btol + atol * anorm * xnorm / bnorm
|
|
|
|
# The following tests guard against extremely small values of
|
|
# atol, btol or ctol. (The user may have set any or all of
|
|
# the parameters atol, btol, conlim to 0.)
|
|
# The effect is equivalent to the normal tests using
|
|
# atol = eps, btol = eps, conlim = 1/eps.
|
|
if itn >= iter_lim:
|
|
istop = 7
|
|
if 1 + test3 <= 1:
|
|
istop = 6
|
|
if 1 + test2 <= 1:
|
|
istop = 5
|
|
if 1 + t1 <= 1:
|
|
istop = 4
|
|
|
|
# Allow for tolerances set by the user.
|
|
if test3 <= ctol:
|
|
istop = 3
|
|
if test2 <= atol:
|
|
istop = 2
|
|
if test1 <= rtol:
|
|
istop = 1
|
|
|
|
# See if it is time to print something.
|
|
prnt = False
|
|
if n <= 40:
|
|
prnt = True
|
|
if itn <= 10:
|
|
prnt = True
|
|
if itn >= iter_lim-10:
|
|
prnt = True
|
|
# if itn%10 == 0: prnt = True
|
|
if test3 <= 2*ctol:
|
|
prnt = True
|
|
if test2 <= 10*atol:
|
|
prnt = True
|
|
if test1 <= 10*rtol:
|
|
prnt = True
|
|
if istop != 0:
|
|
prnt = True
|
|
|
|
if prnt:
|
|
if show:
|
|
str1 = '%6g %12.5e' % (itn, x[0])
|
|
str2 = ' %10.3e %10.3e' % (r1norm, r2norm)
|
|
str3 = ' %8.1e %8.1e' % (test1, test2)
|
|
str4 = ' %8.1e %8.1e' % (anorm, acond)
|
|
print(str1, str2, str3, str4)
|
|
|
|
if istop != 0:
|
|
break
|
|
|
|
# End of iteration loop.
|
|
# Print the stopping condition.
|
|
if show:
|
|
print(' ')
|
|
print('LSQR finished')
|
|
print(msg[istop])
|
|
print(' ')
|
|
str1 = 'istop =%8g r1norm =%8.1e' % (istop, r1norm)
|
|
str2 = 'anorm =%8.1e arnorm =%8.1e' % (anorm, arnorm)
|
|
str3 = 'itn =%8g r2norm =%8.1e' % (itn, r2norm)
|
|
str4 = 'acond =%8.1e xnorm =%8.1e' % (acond, xnorm)
|
|
print(str1 + ' ' + str2)
|
|
print(str3 + ' ' + str4)
|
|
print(' ')
|
|
|
|
return x, istop, itn, r1norm, r2norm, anorm, acond, arnorm, xnorm, var
|