Fixed database typo and removed unnecessary class identifier.
This commit is contained in:
parent
00ad49a143
commit
45fb349a7d
5098 changed files with 952558 additions and 85 deletions
415
venv/Lib/site-packages/scipy/optimize/__init__.py
Normal file
415
venv/Lib/site-packages/scipy/optimize/__init__.py
Normal file
|
@ -0,0 +1,415 @@
|
|||
"""
|
||||
=====================================================
|
||||
Optimization and root finding (:mod:`scipy.optimize`)
|
||||
=====================================================
|
||||
|
||||
.. currentmodule:: scipy.optimize
|
||||
|
||||
SciPy ``optimize`` provides functions for minimizing (or maximizing)
|
||||
objective functions, possibly subject to constraints. It includes
|
||||
solvers for nonlinear problems (with support for both local and global
|
||||
optimization algorithms), linear programing, constrained
|
||||
and nonlinear least-squares, root finding, and curve fitting.
|
||||
|
||||
Common functions and objects, shared across different solvers, are:
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
show_options - Show specific options optimization solvers.
|
||||
OptimizeResult - The optimization result returned by some optimizers.
|
||||
OptimizeWarning - The optimization encountered problems.
|
||||
|
||||
|
||||
Optimization
|
||||
============
|
||||
|
||||
Scalar functions optimization
|
||||
-----------------------------
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
minimize_scalar - Interface for minimizers of univariate functions
|
||||
|
||||
The `minimize_scalar` function supports the following methods:
|
||||
|
||||
.. toctree::
|
||||
|
||||
optimize.minimize_scalar-brent
|
||||
optimize.minimize_scalar-bounded
|
||||
optimize.minimize_scalar-golden
|
||||
|
||||
Local (multivariate) optimization
|
||||
---------------------------------
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
minimize - Interface for minimizers of multivariate functions.
|
||||
|
||||
The `minimize` function supports the following methods:
|
||||
|
||||
.. toctree::
|
||||
|
||||
optimize.minimize-neldermead
|
||||
optimize.minimize-powell
|
||||
optimize.minimize-cg
|
||||
optimize.minimize-bfgs
|
||||
optimize.minimize-newtoncg
|
||||
optimize.minimize-lbfgsb
|
||||
optimize.minimize-tnc
|
||||
optimize.minimize-cobyla
|
||||
optimize.minimize-slsqp
|
||||
optimize.minimize-trustconstr
|
||||
optimize.minimize-dogleg
|
||||
optimize.minimize-trustncg
|
||||
optimize.minimize-trustkrylov
|
||||
optimize.minimize-trustexact
|
||||
|
||||
Constraints are passed to `minimize` function as a single object or
|
||||
as a list of objects from the following classes:
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
NonlinearConstraint - Class defining general nonlinear constraints.
|
||||
LinearConstraint - Class defining general linear constraints.
|
||||
|
||||
Simple bound constraints are handled separately and there is a special class
|
||||
for them:
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
Bounds - Bound constraints.
|
||||
|
||||
Quasi-Newton strategies implementing `HessianUpdateStrategy`
|
||||
interface can be used to approximate the Hessian in `minimize`
|
||||
function (available only for the 'trust-constr' method). Available
|
||||
quasi-Newton methods implementing this interface are:
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
BFGS - Broyden-Fletcher-Goldfarb-Shanno (BFGS) Hessian update strategy.
|
||||
SR1 - Symmetric-rank-1 Hessian update strategy.
|
||||
|
||||
Global optimization
|
||||
-------------------
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
basinhopping - Basinhopping stochastic optimizer.
|
||||
brute - Brute force searching optimizer.
|
||||
differential_evolution - stochastic minimization using differential evolution.
|
||||
|
||||
shgo - simplicial homology global optimisation
|
||||
dual_annealing - Dual annealing stochastic optimizer.
|
||||
|
||||
|
||||
Least-squares and curve fitting
|
||||
===============================
|
||||
|
||||
Nonlinear least-squares
|
||||
-----------------------
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
least_squares - Solve a nonlinear least-squares problem with bounds on the variables.
|
||||
|
||||
Linear least-squares
|
||||
--------------------
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
nnls - Linear least-squares problem with non-negativity constraint.
|
||||
lsq_linear - Linear least-squares problem with bound constraints.
|
||||
|
||||
Curve fitting
|
||||
-------------
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
curve_fit -- Fit curve to a set of points.
|
||||
|
||||
Root finding
|
||||
============
|
||||
|
||||
Scalar functions
|
||||
----------------
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
root_scalar - Unified interface for nonlinear solvers of scalar functions.
|
||||
brentq - quadratic interpolation Brent method.
|
||||
brenth - Brent method, modified by Harris with hyperbolic extrapolation.
|
||||
ridder - Ridder's method.
|
||||
bisect - Bisection method.
|
||||
newton - Newton's method (also Secant and Halley's methods).
|
||||
toms748 - Alefeld, Potra & Shi Algorithm 748.
|
||||
RootResults - The root finding result returned by some root finders.
|
||||
|
||||
The `root_scalar` function supports the following methods:
|
||||
|
||||
.. toctree::
|
||||
|
||||
optimize.root_scalar-brentq
|
||||
optimize.root_scalar-brenth
|
||||
optimize.root_scalar-bisect
|
||||
optimize.root_scalar-ridder
|
||||
optimize.root_scalar-newton
|
||||
optimize.root_scalar-toms748
|
||||
optimize.root_scalar-secant
|
||||
optimize.root_scalar-halley
|
||||
|
||||
|
||||
|
||||
The table below lists situations and appropriate methods, along with
|
||||
*asymptotic* convergence rates per iteration (and per function evaluation)
|
||||
for successful convergence to a simple root(*).
|
||||
Bisection is the slowest of them all, adding one bit of accuracy for each
|
||||
function evaluation, but is guaranteed to converge.
|
||||
The other bracketing methods all (eventually) increase the number of accurate
|
||||
bits by about 50% for every function evaluation.
|
||||
The derivative-based methods, all built on `newton`, can converge quite quickly
|
||||
if the initial value is close to the root. They can also be applied to
|
||||
functions defined on (a subset of) the complex plane.
|
||||
|
||||
+-------------+----------+----------+-----------+-------------+-------------+----------------+
|
||||
| Domain of f | Bracket? | Derivatives? | Solvers | Convergence |
|
||||
+ + +----------+-----------+ +-------------+----------------+
|
||||
| | | `fprime` | `fprime2` | | Guaranteed? | Rate(s)(*) |
|
||||
+=============+==========+==========+===========+=============+=============+================+
|
||||
| `R` | Yes | N/A | N/A | - bisection | - Yes | - 1 "Linear" |
|
||||
| | | | | - brentq | - Yes | - >=1, <= 1.62 |
|
||||
| | | | | - brenth | - Yes | - >=1, <= 1.62 |
|
||||
| | | | | - ridder | - Yes | - 2.0 (1.41) |
|
||||
| | | | | - toms748 | - Yes | - 2.7 (1.65) |
|
||||
+-------------+----------+----------+-----------+-------------+-------------+----------------+
|
||||
| `R` or `C` | No | No | No | secant | No | 1.62 (1.62) |
|
||||
+-------------+----------+----------+-----------+-------------+-------------+----------------+
|
||||
| `R` or `C` | No | Yes | No | newton | No | 2.00 (1.41) |
|
||||
+-------------+----------+----------+-----------+-------------+-------------+----------------+
|
||||
| `R` or `C` | No | Yes | Yes | halley | No | 3.00 (1.44) |
|
||||
+-------------+----------+----------+-----------+-------------+-------------+----------------+
|
||||
|
||||
.. seealso::
|
||||
|
||||
`scipy.optimize.cython_optimize` -- Typed Cython versions of zeros functions
|
||||
|
||||
Fixed point finding:
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
fixed_point - Single-variable fixed-point solver.
|
||||
|
||||
Multidimensional
|
||||
----------------
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
root - Unified interface for nonlinear solvers of multivariate functions.
|
||||
|
||||
The `root` function supports the following methods:
|
||||
|
||||
.. toctree::
|
||||
|
||||
optimize.root-hybr
|
||||
optimize.root-lm
|
||||
optimize.root-broyden1
|
||||
optimize.root-broyden2
|
||||
optimize.root-anderson
|
||||
optimize.root-linearmixing
|
||||
optimize.root-diagbroyden
|
||||
optimize.root-excitingmixing
|
||||
optimize.root-krylov
|
||||
optimize.root-dfsane
|
||||
|
||||
Linear programming
|
||||
==================
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
linprog -- Unified interface for minimizers of linear programming problems.
|
||||
|
||||
The `linprog` function supports the following methods:
|
||||
|
||||
.. toctree::
|
||||
|
||||
optimize.linprog-simplex
|
||||
optimize.linprog-interior-point
|
||||
optimize.linprog-revised_simplex
|
||||
|
||||
The simplex method supports callback functions, such as:
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
linprog_verbose_callback -- Sample callback function for linprog (simplex).
|
||||
|
||||
Assignment problems:
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
linear_sum_assignment -- Solves the linear-sum assignment problem.
|
||||
|
||||
Utilities
|
||||
=========
|
||||
|
||||
Finite-difference approximation
|
||||
-------------------------------
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
approx_fprime - Approximate the gradient of a scalar function.
|
||||
check_grad - Check the supplied derivative using finite differences.
|
||||
|
||||
|
||||
Line search
|
||||
-----------
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
bracket - Bracket a minimum, given two starting points.
|
||||
line_search - Return a step that satisfies the strong Wolfe conditions.
|
||||
|
||||
Hessian approximation
|
||||
---------------------
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
LbfgsInvHessProduct - Linear operator for L-BFGS approximate inverse Hessian.
|
||||
HessianUpdateStrategy - Interface for implementing Hessian update strategies
|
||||
|
||||
Benchmark problems
|
||||
------------------
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
rosen - The Rosenbrock function.
|
||||
rosen_der - The derivative of the Rosenbrock function.
|
||||
rosen_hess - The Hessian matrix of the Rosenbrock function.
|
||||
rosen_hess_prod - Product of the Rosenbrock Hessian with a vector.
|
||||
|
||||
Legacy functions
|
||||
================
|
||||
|
||||
The functions below are not recommended for use in new scripts;
|
||||
all of these methods are accessible via a newer, more consistent
|
||||
interfaces, provided by the interfaces above.
|
||||
|
||||
Optimization
|
||||
------------
|
||||
|
||||
General-purpose multivariate methods:
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
fmin - Nelder-Mead Simplex algorithm.
|
||||
fmin_powell - Powell's (modified) level set method.
|
||||
fmin_cg - Non-linear (Polak-Ribiere) conjugate gradient algorithm.
|
||||
fmin_bfgs - Quasi-Newton method (Broydon-Fletcher-Goldfarb-Shanno).
|
||||
fmin_ncg - Line-search Newton Conjugate Gradient.
|
||||
|
||||
Constrained multivariate methods:
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
fmin_l_bfgs_b - Zhu, Byrd, and Nocedal's constrained optimizer.
|
||||
fmin_tnc - Truncated Newton code.
|
||||
fmin_cobyla - Constrained optimization by linear approximation.
|
||||
fmin_slsqp - Minimization using sequential least-squares programming.
|
||||
|
||||
Univariate (scalar) minimization methods:
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
fminbound - Bounded minimization of a scalar function.
|
||||
brent - 1-D function minimization using Brent method.
|
||||
golden - 1-D function minimization using Golden Section method.
|
||||
|
||||
Least-squares
|
||||
-------------
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
leastsq - Minimize the sum of squares of M equations in N unknowns.
|
||||
|
||||
Root finding
|
||||
------------
|
||||
|
||||
General nonlinear solvers:
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
fsolve - Non-linear multivariable equation solver.
|
||||
broyden1 - Broyden's first method.
|
||||
broyden2 - Broyden's second method.
|
||||
|
||||
Large-scale nonlinear solvers:
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
newton_krylov
|
||||
anderson
|
||||
|
||||
Simple iteration solvers:
|
||||
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
excitingmixing
|
||||
linearmixing
|
||||
diagbroyden
|
||||
|
||||
:mod:`Additional information on the nonlinear solvers <scipy.optimize.nonlin>`
|
||||
"""
|
||||
|
||||
from .optimize import *
|
||||
from ._minimize import *
|
||||
from ._root import *
|
||||
from ._root_scalar import *
|
||||
from .minpack import *
|
||||
from .zeros import *
|
||||
from .lbfgsb import fmin_l_bfgs_b, LbfgsInvHessProduct
|
||||
from .tnc import fmin_tnc
|
||||
from .cobyla import fmin_cobyla
|
||||
from .nonlin import *
|
||||
from .slsqp import fmin_slsqp
|
||||
from ._nnls import nnls
|
||||
from ._basinhopping import basinhopping
|
||||
from ._linprog import linprog, linprog_verbose_callback
|
||||
from ._lsap import linear_sum_assignment
|
||||
from ._differentialevolution import differential_evolution
|
||||
from ._lsq import least_squares, lsq_linear
|
||||
from ._constraints import (NonlinearConstraint,
|
||||
LinearConstraint,
|
||||
Bounds)
|
||||
from ._hessian_update_strategy import HessianUpdateStrategy, BFGS, SR1
|
||||
from ._shgo import shgo
|
||||
from ._dual_annealing import dual_annealing
|
||||
|
||||
__all__ = [s for s in dir() if not s.startswith('_')]
|
||||
|
||||
from scipy._lib._testutils import PytestTester
|
||||
test = PytestTester(__name__)
|
||||
del PytestTester
|
BIN
venv/Lib/site-packages/scipy/optimize/__nnls.cp36-win32.pyd
Normal file
BIN
venv/Lib/site-packages/scipy/optimize/__nnls.cp36-win32.pyd
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
746
venv/Lib/site-packages/scipy/optimize/_basinhopping.py
Normal file
746
venv/Lib/site-packages/scipy/optimize/_basinhopping.py
Normal file
|
@ -0,0 +1,746 @@
|
|||
"""
|
||||
basinhopping: The basinhopping global optimization algorithm
|
||||
"""
|
||||
import numpy as np
|
||||
import math
|
||||
from numpy import cos, sin
|
||||
import scipy.optimize
|
||||
from scipy._lib._util import check_random_state
|
||||
|
||||
__all__ = ['basinhopping']
|
||||
|
||||
|
||||
class Storage(object):
|
||||
"""
|
||||
Class used to store the lowest energy structure
|
||||
"""
|
||||
def __init__(self, minres):
|
||||
self._add(minres)
|
||||
|
||||
def _add(self, minres):
|
||||
self.minres = minres
|
||||
self.minres.x = np.copy(minres.x)
|
||||
|
||||
def update(self, minres):
|
||||
if minres.fun < self.minres.fun:
|
||||
self._add(minres)
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def get_lowest(self):
|
||||
return self.minres
|
||||
|
||||
|
||||
class BasinHoppingRunner(object):
|
||||
"""This class implements the core of the basinhopping algorithm.
|
||||
|
||||
x0 : ndarray
|
||||
The starting coordinates.
|
||||
minimizer : callable
|
||||
The local minimizer, with signature ``result = minimizer(x)``.
|
||||
The return value is an `optimize.OptimizeResult` object.
|
||||
step_taking : callable
|
||||
This function displaces the coordinates randomly. Signature should
|
||||
be ``x_new = step_taking(x)``. Note that `x` may be modified in-place.
|
||||
accept_tests : list of callables
|
||||
Each test is passed the kwargs `f_new`, `x_new`, `f_old` and
|
||||
`x_old`. These tests will be used to judge whether or not to accept
|
||||
the step. The acceptable return values are True, False, or ``"force
|
||||
accept"``. If any of the tests return False then the step is rejected.
|
||||
If ``"force accept"``, then this will override any other tests in
|
||||
order to accept the step. This can be used, for example, to forcefully
|
||||
escape from a local minimum that ``basinhopping`` is trapped in.
|
||||
disp : bool, optional
|
||||
Display status messages.
|
||||
|
||||
"""
|
||||
def __init__(self, x0, minimizer, step_taking, accept_tests, disp=False):
|
||||
self.x = np.copy(x0)
|
||||
self.minimizer = minimizer
|
||||
self.step_taking = step_taking
|
||||
self.accept_tests = accept_tests
|
||||
self.disp = disp
|
||||
|
||||
self.nstep = 0
|
||||
|
||||
# initialize return object
|
||||
self.res = scipy.optimize.OptimizeResult()
|
||||
self.res.minimization_failures = 0
|
||||
|
||||
# do initial minimization
|
||||
minres = minimizer(self.x)
|
||||
if not minres.success:
|
||||
self.res.minimization_failures += 1
|
||||
if self.disp:
|
||||
print("warning: basinhopping: local minimization failure")
|
||||
self.x = np.copy(minres.x)
|
||||
self.energy = minres.fun
|
||||
if self.disp:
|
||||
print("basinhopping step %d: f %g" % (self.nstep, self.energy))
|
||||
|
||||
# initialize storage class
|
||||
self.storage = Storage(minres)
|
||||
|
||||
if hasattr(minres, "nfev"):
|
||||
self.res.nfev = minres.nfev
|
||||
if hasattr(minres, "njev"):
|
||||
self.res.njev = minres.njev
|
||||
if hasattr(minres, "nhev"):
|
||||
self.res.nhev = minres.nhev
|
||||
|
||||
def _monte_carlo_step(self):
|
||||
"""Do one Monte Carlo iteration
|
||||
|
||||
Randomly displace the coordinates, minimize, and decide whether
|
||||
or not to accept the new coordinates.
|
||||
"""
|
||||
# Take a random step. Make a copy of x because the step_taking
|
||||
# algorithm might change x in place
|
||||
x_after_step = np.copy(self.x)
|
||||
x_after_step = self.step_taking(x_after_step)
|
||||
|
||||
# do a local minimization
|
||||
minres = self.minimizer(x_after_step)
|
||||
x_after_quench = minres.x
|
||||
energy_after_quench = minres.fun
|
||||
if not minres.success:
|
||||
self.res.minimization_failures += 1
|
||||
if self.disp:
|
||||
print("warning: basinhopping: local minimization failure")
|
||||
|
||||
if hasattr(minres, "nfev"):
|
||||
self.res.nfev += minres.nfev
|
||||
if hasattr(minres, "njev"):
|
||||
self.res.njev += minres.njev
|
||||
if hasattr(minres, "nhev"):
|
||||
self.res.nhev += minres.nhev
|
||||
|
||||
# accept the move based on self.accept_tests. If any test is False,
|
||||
# then reject the step. If any test returns the special string
|
||||
# 'force accept', then accept the step regardless. This can be used
|
||||
# to forcefully escape from a local minimum if normal basin hopping
|
||||
# steps are not sufficient.
|
||||
accept = True
|
||||
for test in self.accept_tests:
|
||||
testres = test(f_new=energy_after_quench, x_new=x_after_quench,
|
||||
f_old=self.energy, x_old=self.x)
|
||||
if testres == 'force accept':
|
||||
accept = True
|
||||
break
|
||||
elif testres is None:
|
||||
raise ValueError("accept_tests must return True, False, or "
|
||||
"'force accept'")
|
||||
elif not testres:
|
||||
accept = False
|
||||
|
||||
# Report the result of the acceptance test to the take step class.
|
||||
# This is for adaptive step taking
|
||||
if hasattr(self.step_taking, "report"):
|
||||
self.step_taking.report(accept, f_new=energy_after_quench,
|
||||
x_new=x_after_quench, f_old=self.energy,
|
||||
x_old=self.x)
|
||||
|
||||
return accept, minres
|
||||
|
||||
def one_cycle(self):
|
||||
"""Do one cycle of the basinhopping algorithm
|
||||
"""
|
||||
self.nstep += 1
|
||||
new_global_min = False
|
||||
|
||||
accept, minres = self._monte_carlo_step()
|
||||
|
||||
if accept:
|
||||
self.energy = minres.fun
|
||||
self.x = np.copy(minres.x)
|
||||
new_global_min = self.storage.update(minres)
|
||||
|
||||
# print some information
|
||||
if self.disp:
|
||||
self.print_report(minres.fun, accept)
|
||||
if new_global_min:
|
||||
print("found new global minimum on step %d with function"
|
||||
" value %g" % (self.nstep, self.energy))
|
||||
|
||||
# save some variables as BasinHoppingRunner attributes
|
||||
self.xtrial = minres.x
|
||||
self.energy_trial = minres.fun
|
||||
self.accept = accept
|
||||
|
||||
return new_global_min
|
||||
|
||||
def print_report(self, energy_trial, accept):
|
||||
"""print a status update"""
|
||||
minres = self.storage.get_lowest()
|
||||
print("basinhopping step %d: f %g trial_f %g accepted %d "
|
||||
" lowest_f %g" % (self.nstep, self.energy, energy_trial,
|
||||
accept, minres.fun))
|
||||
|
||||
|
||||
class AdaptiveStepsize(object):
|
||||
"""
|
||||
Class to implement adaptive stepsize.
|
||||
|
||||
This class wraps the step taking class and modifies the stepsize to
|
||||
ensure the true acceptance rate is as close as possible to the target.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
takestep : callable
|
||||
The step taking routine. Must contain modifiable attribute
|
||||
takestep.stepsize
|
||||
accept_rate : float, optional
|
||||
The target step acceptance rate
|
||||
interval : int, optional
|
||||
Interval for how often to update the stepsize
|
||||
factor : float, optional
|
||||
The step size is multiplied or divided by this factor upon each
|
||||
update.
|
||||
verbose : bool, optional
|
||||
Print information about each update
|
||||
|
||||
"""
|
||||
def __init__(self, takestep, accept_rate=0.5, interval=50, factor=0.9,
|
||||
verbose=True):
|
||||
self.takestep = takestep
|
||||
self.target_accept_rate = accept_rate
|
||||
self.interval = interval
|
||||
self.factor = factor
|
||||
self.verbose = verbose
|
||||
|
||||
self.nstep = 0
|
||||
self.nstep_tot = 0
|
||||
self.naccept = 0
|
||||
|
||||
def __call__(self, x):
|
||||
return self.take_step(x)
|
||||
|
||||
def _adjust_step_size(self):
|
||||
old_stepsize = self.takestep.stepsize
|
||||
accept_rate = float(self.naccept) / self.nstep
|
||||
if accept_rate > self.target_accept_rate:
|
||||
# We're accepting too many steps. This generally means we're
|
||||
# trapped in a basin. Take bigger steps.
|
||||
self.takestep.stepsize /= self.factor
|
||||
else:
|
||||
# We're not accepting enough steps. Take smaller steps.
|
||||
self.takestep.stepsize *= self.factor
|
||||
if self.verbose:
|
||||
print("adaptive stepsize: acceptance rate %f target %f new "
|
||||
"stepsize %g old stepsize %g" % (accept_rate,
|
||||
self.target_accept_rate, self.takestep.stepsize,
|
||||
old_stepsize))
|
||||
|
||||
def take_step(self, x):
|
||||
self.nstep += 1
|
||||
self.nstep_tot += 1
|
||||
if self.nstep % self.interval == 0:
|
||||
self._adjust_step_size()
|
||||
return self.takestep(x)
|
||||
|
||||
def report(self, accept, **kwargs):
|
||||
"called by basinhopping to report the result of the step"
|
||||
if accept:
|
||||
self.naccept += 1
|
||||
|
||||
|
||||
class RandomDisplacement(object):
|
||||
"""
|
||||
Add a random displacement of maximum size `stepsize` to each coordinate
|
||||
|
||||
Calling this updates `x` in-place.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
stepsize : float, optional
|
||||
Maximum stepsize in any dimension
|
||||
random_gen : {None, `np.random.RandomState`, `np.random.Generator`}
|
||||
The random number generator that generates the displacements
|
||||
"""
|
||||
def __init__(self, stepsize=0.5, random_gen=None):
|
||||
self.stepsize = stepsize
|
||||
self.random_gen = check_random_state(random_gen)
|
||||
|
||||
def __call__(self, x):
|
||||
x += self.random_gen.uniform(-self.stepsize, self.stepsize,
|
||||
np.shape(x))
|
||||
return x
|
||||
|
||||
|
||||
class MinimizerWrapper(object):
|
||||
"""
|
||||
wrap a minimizer function as a minimizer class
|
||||
"""
|
||||
def __init__(self, minimizer, func=None, **kwargs):
|
||||
self.minimizer = minimizer
|
||||
self.func = func
|
||||
self.kwargs = kwargs
|
||||
|
||||
def __call__(self, x0):
|
||||
if self.func is None:
|
||||
return self.minimizer(x0, **self.kwargs)
|
||||
else:
|
||||
return self.minimizer(self.func, x0, **self.kwargs)
|
||||
|
||||
|
||||
class Metropolis(object):
|
||||
"""
|
||||
Metropolis acceptance criterion
|
||||
|
||||
Parameters
|
||||
----------
|
||||
T : float
|
||||
The "temperature" parameter for the accept or reject criterion.
|
||||
random_gen : {None, `np.random.RandomState`, `np.random.Generator`}
|
||||
Random number generator used for acceptance test
|
||||
"""
|
||||
def __init__(self, T, random_gen=None):
|
||||
# Avoid ZeroDivisionError since "MBH can be regarded as a special case
|
||||
# of the BH framework with the Metropolis criterion, where temperature
|
||||
# T = 0." (Reject all steps that increase energy.)
|
||||
self.beta = 1.0 / T if T != 0 else float('inf')
|
||||
self.random_gen = check_random_state(random_gen)
|
||||
|
||||
def accept_reject(self, energy_new, energy_old):
|
||||
"""
|
||||
If new energy is lower than old, it will always be accepted.
|
||||
If new is higher than old, there is a chance it will be accepted,
|
||||
less likely for larger differences.
|
||||
"""
|
||||
with np.errstate(invalid='ignore'):
|
||||
# The energy values being fed to Metropolis are 1-length arrays, and if
|
||||
# they are equal, their difference is 0, which gets multiplied by beta,
|
||||
# which is inf, and array([0]) * float('inf') causes
|
||||
#
|
||||
# RuntimeWarning: invalid value encountered in multiply
|
||||
#
|
||||
# Ignore this warning so so when the algorithm is on a flat plane, it always
|
||||
# accepts the step, to try to move off the plane.
|
||||
prod = -(energy_new - energy_old) * self.beta
|
||||
w = math.exp(min(0, prod))
|
||||
|
||||
rand = self.random_gen.uniform()
|
||||
return w >= rand
|
||||
|
||||
def __call__(self, **kwargs):
|
||||
"""
|
||||
f_new and f_old are mandatory in kwargs
|
||||
"""
|
||||
return bool(self.accept_reject(kwargs["f_new"],
|
||||
kwargs["f_old"]))
|
||||
|
||||
|
||||
def basinhopping(func, x0, niter=100, T=1.0, stepsize=0.5,
|
||||
minimizer_kwargs=None, take_step=None, accept_test=None,
|
||||
callback=None, interval=50, disp=False, niter_success=None,
|
||||
seed=None):
|
||||
"""
|
||||
Find the global minimum of a function using the basin-hopping algorithm
|
||||
|
||||
Basin-hopping is a two-phase method that combines a global stepping
|
||||
algorithm with local minimization at each step. Designed to mimic
|
||||
the natural process of energy minimization of clusters of atoms, it works
|
||||
well for similar problems with "funnel-like, but rugged" energy landscapes
|
||||
[5]_.
|
||||
|
||||
As the step-taking, step acceptance, and minimization methods are all
|
||||
customizable, this function can also be used to implement other two-phase
|
||||
methods.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
func : callable ``f(x, *args)``
|
||||
Function to be optimized. ``args`` can be passed as an optional item
|
||||
in the dict ``minimizer_kwargs``
|
||||
x0 : array_like
|
||||
Initial guess.
|
||||
niter : integer, optional
|
||||
The number of basin-hopping iterations
|
||||
T : float, optional
|
||||
The "temperature" parameter for the accept or reject criterion. Higher
|
||||
"temperatures" mean that larger jumps in function value will be
|
||||
accepted. For best results ``T`` should be comparable to the
|
||||
separation (in function value) between local minima.
|
||||
stepsize : float, optional
|
||||
Maximum step size for use in the random displacement.
|
||||
minimizer_kwargs : dict, optional
|
||||
Extra keyword arguments to be passed to the local minimizer
|
||||
``scipy.optimize.minimize()`` Some important options could be:
|
||||
|
||||
method : str
|
||||
The minimization method (e.g. ``"L-BFGS-B"``)
|
||||
args : tuple
|
||||
Extra arguments passed to the objective function (``func``) and
|
||||
its derivatives (Jacobian, Hessian).
|
||||
|
||||
take_step : callable ``take_step(x)``, optional
|
||||
Replace the default step-taking routine with this routine. The default
|
||||
step-taking routine is a random displacement of the coordinates, but
|
||||
other step-taking algorithms may be better for some systems.
|
||||
``take_step`` can optionally have the attribute ``take_step.stepsize``.
|
||||
If this attribute exists, then ``basinhopping`` will adjust
|
||||
``take_step.stepsize`` in order to try to optimize the global minimum
|
||||
search.
|
||||
accept_test : callable, ``accept_test(f_new=f_new, x_new=x_new, f_old=fold, x_old=x_old)``, optional
|
||||
Define a test which will be used to judge whether or not to accept the
|
||||
step. This will be used in addition to the Metropolis test based on
|
||||
"temperature" ``T``. The acceptable return values are True,
|
||||
False, or ``"force accept"``. If any of the tests return False
|
||||
then the step is rejected. If the latter, then this will override any
|
||||
other tests in order to accept the step. This can be used, for example,
|
||||
to forcefully escape from a local minimum that ``basinhopping`` is
|
||||
trapped in.
|
||||
callback : callable, ``callback(x, f, accept)``, optional
|
||||
A callback function which will be called for all minima found. ``x``
|
||||
and ``f`` are the coordinates and function value of the trial minimum,
|
||||
and ``accept`` is whether or not that minimum was accepted. This can
|
||||
be used, for example, to save the lowest N minima found. Also,
|
||||
``callback`` can be used to specify a user defined stop criterion by
|
||||
optionally returning True to stop the ``basinhopping`` routine.
|
||||
interval : integer, optional
|
||||
interval for how often to update the ``stepsize``
|
||||
disp : bool, optional
|
||||
Set to True to print status messages
|
||||
niter_success : integer, optional
|
||||
Stop the run if the global minimum candidate remains the same for this
|
||||
number of iterations.
|
||||
seed : {int, `~np.random.RandomState`, `~np.random.Generator`}, optional
|
||||
If `seed` is not specified the `~np.random.RandomState` singleton is
|
||||
used.
|
||||
If `seed` is an int, a new ``RandomState`` instance is used, seeded
|
||||
with seed.
|
||||
If `seed` is already a ``RandomState`` or ``Generator`` instance, then
|
||||
that object is used.
|
||||
Specify `seed` for repeatable minimizations. The random numbers
|
||||
generated with this seed only affect the default Metropolis
|
||||
`accept_test` and the default `take_step`. If you supply your own
|
||||
`take_step` and `accept_test`, and these functions use random
|
||||
number generation, then those functions are responsible for the state
|
||||
of their random number generator.
|
||||
|
||||
Returns
|
||||
-------
|
||||
res : OptimizeResult
|
||||
The optimization result represented as a ``OptimizeResult`` object.
|
||||
Important attributes are: ``x`` the solution array, ``fun`` the value
|
||||
of the function at the solution, and ``message`` which describes the
|
||||
cause of the termination. The ``OptimizeResult`` object returned by the
|
||||
selected minimizer at the lowest minimum is also contained within this
|
||||
object and can be accessed through the ``lowest_optimization_result``
|
||||
attribute. See `OptimizeResult` for a description of other attributes.
|
||||
|
||||
See Also
|
||||
--------
|
||||
minimize :
|
||||
The local minimization function called once for each basinhopping step.
|
||||
``minimizer_kwargs`` is passed to this routine.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Basin-hopping is a stochastic algorithm which attempts to find the global
|
||||
minimum of a smooth scalar function of one or more variables [1]_ [2]_ [3]_
|
||||
[4]_. The algorithm in its current form was described by David Wales and
|
||||
Jonathan Doye [2]_ http://www-wales.ch.cam.ac.uk/.
|
||||
|
||||
The algorithm is iterative with each cycle composed of the following
|
||||
features
|
||||
|
||||
1) random perturbation of the coordinates
|
||||
|
||||
2) local minimization
|
||||
|
||||
3) accept or reject the new coordinates based on the minimized function
|
||||
value
|
||||
|
||||
The acceptance test used here is the Metropolis criterion of standard Monte
|
||||
Carlo algorithms, although there are many other possibilities [3]_.
|
||||
|
||||
This global minimization method has been shown to be extremely efficient
|
||||
for a wide variety of problems in physics and chemistry. It is
|
||||
particularly useful when the function has many minima separated by large
|
||||
barriers. See the Cambridge Cluster Database
|
||||
http://www-wales.ch.cam.ac.uk/CCD.html for databases of molecular systems
|
||||
that have been optimized primarily using basin-hopping. This database
|
||||
includes minimization problems exceeding 300 degrees of freedom.
|
||||
|
||||
See the free software program GMIN (http://www-wales.ch.cam.ac.uk/GMIN) for
|
||||
a Fortran implementation of basin-hopping. This implementation has many
|
||||
different variations of the procedure described above, including more
|
||||
advanced step taking algorithms and alternate acceptance criterion.
|
||||
|
||||
For stochastic global optimization there is no way to determine if the true
|
||||
global minimum has actually been found. Instead, as a consistency check,
|
||||
the algorithm can be run from a number of different random starting points
|
||||
to ensure the lowest minimum found in each example has converged to the
|
||||
global minimum. For this reason, ``basinhopping`` will by default simply
|
||||
run for the number of iterations ``niter`` and return the lowest minimum
|
||||
found. It is left to the user to ensure that this is in fact the global
|
||||
minimum.
|
||||
|
||||
Choosing ``stepsize``: This is a crucial parameter in ``basinhopping`` and
|
||||
depends on the problem being solved. The step is chosen uniformly in the
|
||||
region from x0-stepsize to x0+stepsize, in each dimension. Ideally, it
|
||||
should be comparable to the typical separation (in argument values) between
|
||||
local minima of the function being optimized. ``basinhopping`` will, by
|
||||
default, adjust ``stepsize`` to find an optimal value, but this may take
|
||||
many iterations. You will get quicker results if you set a sensible
|
||||
initial value for ``stepsize``.
|
||||
|
||||
Choosing ``T``: The parameter ``T`` is the "temperature" used in the
|
||||
Metropolis criterion. Basinhopping steps are always accepted if
|
||||
``func(xnew) < func(xold)``. Otherwise, they are accepted with
|
||||
probability::
|
||||
|
||||
exp( -(func(xnew) - func(xold)) / T )
|
||||
|
||||
So, for best results, ``T`` should to be comparable to the typical
|
||||
difference (in function values) between local minima. (The height of
|
||||
"walls" between local minima is irrelevant.)
|
||||
|
||||
If ``T`` is 0, the algorithm becomes Monotonic Basin-Hopping, in which all
|
||||
steps that increase energy are rejected.
|
||||
|
||||
.. versionadded:: 0.12.0
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Wales, David J. 2003, Energy Landscapes, Cambridge University Press,
|
||||
Cambridge, UK.
|
||||
.. [2] Wales, D J, and Doye J P K, Global Optimization by Basin-Hopping and
|
||||
the Lowest Energy Structures of Lennard-Jones Clusters Containing up to
|
||||
110 Atoms. Journal of Physical Chemistry A, 1997, 101, 5111.
|
||||
.. [3] Li, Z. and Scheraga, H. A., Monte Carlo-minimization approach to the
|
||||
multiple-minima problem in protein folding, Proc. Natl. Acad. Sci. USA,
|
||||
1987, 84, 6611.
|
||||
.. [4] Wales, D. J. and Scheraga, H. A., Global optimization of clusters,
|
||||
crystals, and biomolecules, Science, 1999, 285, 1368.
|
||||
.. [5] Olson, B., Hashmi, I., Molloy, K., and Shehu1, A., Basin Hopping as
|
||||
a General and Versatile Optimization Framework for the Characterization
|
||||
of Biological Macromolecules, Advances in Artificial Intelligence,
|
||||
Volume 2012 (2012), Article ID 674832, :doi:`10.1155/2012/674832`
|
||||
|
||||
Examples
|
||||
--------
|
||||
The following example is a 1-D minimization problem, with many
|
||||
local minima superimposed on a parabola.
|
||||
|
||||
>>> from scipy.optimize import basinhopping
|
||||
>>> func = lambda x: np.cos(14.5 * x - 0.3) + (x + 0.2) * x
|
||||
>>> x0=[1.]
|
||||
|
||||
Basinhopping, internally, uses a local minimization algorithm. We will use
|
||||
the parameter ``minimizer_kwargs`` to tell basinhopping which algorithm to
|
||||
use and how to set up that minimizer. This parameter will be passed to
|
||||
``scipy.optimize.minimize()``.
|
||||
|
||||
>>> minimizer_kwargs = {"method": "BFGS"}
|
||||
>>> ret = basinhopping(func, x0, minimizer_kwargs=minimizer_kwargs,
|
||||
... niter=200)
|
||||
>>> print("global minimum: x = %.4f, f(x0) = %.4f" % (ret.x, ret.fun))
|
||||
global minimum: x = -0.1951, f(x0) = -1.0009
|
||||
|
||||
Next consider a 2-D minimization problem. Also, this time, we
|
||||
will use gradient information to significantly speed up the search.
|
||||
|
||||
>>> def func2d(x):
|
||||
... f = np.cos(14.5 * x[0] - 0.3) + (x[1] + 0.2) * x[1] + (x[0] +
|
||||
... 0.2) * x[0]
|
||||
... df = np.zeros(2)
|
||||
... df[0] = -14.5 * np.sin(14.5 * x[0] - 0.3) + 2. * x[0] + 0.2
|
||||
... df[1] = 2. * x[1] + 0.2
|
||||
... return f, df
|
||||
|
||||
We'll also use a different local minimization algorithm. Also, we must tell
|
||||
the minimizer that our function returns both energy and gradient (Jacobian).
|
||||
|
||||
>>> minimizer_kwargs = {"method":"L-BFGS-B", "jac":True}
|
||||
>>> x0 = [1.0, 1.0]
|
||||
>>> ret = basinhopping(func2d, x0, minimizer_kwargs=minimizer_kwargs,
|
||||
... niter=200)
|
||||
>>> print("global minimum: x = [%.4f, %.4f], f(x0) = %.4f" % (ret.x[0],
|
||||
... ret.x[1],
|
||||
... ret.fun))
|
||||
global minimum: x = [-0.1951, -0.1000], f(x0) = -1.0109
|
||||
|
||||
|
||||
Here is an example using a custom step-taking routine. Imagine you want
|
||||
the first coordinate to take larger steps than the rest of the coordinates.
|
||||
This can be implemented like so:
|
||||
|
||||
>>> class MyTakeStep(object):
|
||||
... def __init__(self, stepsize=0.5):
|
||||
... self.stepsize = stepsize
|
||||
... def __call__(self, x):
|
||||
... s = self.stepsize
|
||||
... x[0] += np.random.uniform(-2.*s, 2.*s)
|
||||
... x[1:] += np.random.uniform(-s, s, x[1:].shape)
|
||||
... return x
|
||||
|
||||
Since ``MyTakeStep.stepsize`` exists basinhopping will adjust the magnitude
|
||||
of ``stepsize`` to optimize the search. We'll use the same 2-D function as
|
||||
before
|
||||
|
||||
>>> mytakestep = MyTakeStep()
|
||||
>>> ret = basinhopping(func2d, x0, minimizer_kwargs=minimizer_kwargs,
|
||||
... niter=200, take_step=mytakestep)
|
||||
>>> print("global minimum: x = [%.4f, %.4f], f(x0) = %.4f" % (ret.x[0],
|
||||
... ret.x[1],
|
||||
... ret.fun))
|
||||
global minimum: x = [-0.1951, -0.1000], f(x0) = -1.0109
|
||||
|
||||
|
||||
Now, let's do an example using a custom callback function which prints the
|
||||
value of every minimum found
|
||||
|
||||
>>> def print_fun(x, f, accepted):
|
||||
... print("at minimum %.4f accepted %d" % (f, int(accepted)))
|
||||
|
||||
We'll run it for only 10 basinhopping steps this time.
|
||||
|
||||
>>> np.random.seed(1)
|
||||
>>> ret = basinhopping(func2d, x0, minimizer_kwargs=minimizer_kwargs,
|
||||
... niter=10, callback=print_fun)
|
||||
at minimum 0.4159 accepted 1
|
||||
at minimum -0.9073 accepted 1
|
||||
at minimum -0.1021 accepted 1
|
||||
at minimum -0.1021 accepted 1
|
||||
at minimum 0.9102 accepted 1
|
||||
at minimum 0.9102 accepted 1
|
||||
at minimum 2.2945 accepted 0
|
||||
at minimum -0.1021 accepted 1
|
||||
at minimum -1.0109 accepted 1
|
||||
at minimum -1.0109 accepted 1
|
||||
|
||||
|
||||
The minimum at -1.0109 is actually the global minimum, found already on the
|
||||
8th iteration.
|
||||
|
||||
Now let's implement bounds on the problem using a custom ``accept_test``:
|
||||
|
||||
>>> class MyBounds(object):
|
||||
... def __init__(self, xmax=[1.1,1.1], xmin=[-1.1,-1.1] ):
|
||||
... self.xmax = np.array(xmax)
|
||||
... self.xmin = np.array(xmin)
|
||||
... def __call__(self, **kwargs):
|
||||
... x = kwargs["x_new"]
|
||||
... tmax = bool(np.all(x <= self.xmax))
|
||||
... tmin = bool(np.all(x >= self.xmin))
|
||||
... return tmax and tmin
|
||||
|
||||
>>> mybounds = MyBounds()
|
||||
>>> ret = basinhopping(func2d, x0, minimizer_kwargs=minimizer_kwargs,
|
||||
... niter=10, accept_test=mybounds)
|
||||
|
||||
"""
|
||||
x0 = np.array(x0)
|
||||
|
||||
# set up the np.random.RandomState generator
|
||||
rng = check_random_state(seed)
|
||||
|
||||
# set up minimizer
|
||||
if minimizer_kwargs is None:
|
||||
minimizer_kwargs = dict()
|
||||
wrapped_minimizer = MinimizerWrapper(scipy.optimize.minimize, func,
|
||||
**minimizer_kwargs)
|
||||
|
||||
# set up step-taking algorithm
|
||||
if take_step is not None:
|
||||
if not callable(take_step):
|
||||
raise TypeError("take_step must be callable")
|
||||
# if take_step.stepsize exists then use AdaptiveStepsize to control
|
||||
# take_step.stepsize
|
||||
if hasattr(take_step, "stepsize"):
|
||||
take_step_wrapped = AdaptiveStepsize(take_step, interval=interval,
|
||||
verbose=disp)
|
||||
else:
|
||||
take_step_wrapped = take_step
|
||||
else:
|
||||
# use default
|
||||
displace = RandomDisplacement(stepsize=stepsize, random_gen=rng)
|
||||
take_step_wrapped = AdaptiveStepsize(displace, interval=interval,
|
||||
verbose=disp)
|
||||
|
||||
# set up accept tests
|
||||
accept_tests = []
|
||||
if accept_test is not None:
|
||||
if not callable(accept_test):
|
||||
raise TypeError("accept_test must be callable")
|
||||
accept_tests = [accept_test]
|
||||
|
||||
# use default
|
||||
metropolis = Metropolis(T, random_gen=rng)
|
||||
accept_tests.append(metropolis)
|
||||
|
||||
if niter_success is None:
|
||||
niter_success = niter + 2
|
||||
|
||||
bh = BasinHoppingRunner(x0, wrapped_minimizer, take_step_wrapped,
|
||||
accept_tests, disp=disp)
|
||||
|
||||
# start main iteration loop
|
||||
count, i = 0, 0
|
||||
message = ["requested number of basinhopping iterations completed"
|
||||
" successfully"]
|
||||
for i in range(niter):
|
||||
new_global_min = bh.one_cycle()
|
||||
|
||||
if callable(callback):
|
||||
# should we pass a copy of x?
|
||||
val = callback(bh.xtrial, bh.energy_trial, bh.accept)
|
||||
if val is not None:
|
||||
if val:
|
||||
message = ["callback function requested stop early by"
|
||||
"returning True"]
|
||||
break
|
||||
|
||||
count += 1
|
||||
if new_global_min:
|
||||
count = 0
|
||||
elif count > niter_success:
|
||||
message = ["success condition satisfied"]
|
||||
break
|
||||
|
||||
# prepare return object
|
||||
res = bh.res
|
||||
res.lowest_optimization_result = bh.storage.get_lowest()
|
||||
res.x = np.copy(res.lowest_optimization_result.x)
|
||||
res.fun = res.lowest_optimization_result.fun
|
||||
res.message = message
|
||||
res.nit = i + 1
|
||||
return res
|
||||
|
||||
|
||||
def _test_func2d_nograd(x):
|
||||
f = (cos(14.5 * x[0] - 0.3) + (x[1] + 0.2) * x[1] + (x[0] + 0.2) * x[0]
|
||||
+ 1.010876184442655)
|
||||
return f
|
||||
|
||||
|
||||
def _test_func2d(x):
|
||||
f = (cos(14.5 * x[0] - 0.3) + (x[0] + 0.2) * x[0] + cos(14.5 * x[1] -
|
||||
0.3) + (x[1] + 0.2) * x[1] + x[0] * x[1] + 1.963879482144252)
|
||||
df = np.zeros(2)
|
||||
df[0] = -14.5 * sin(14.5 * x[0] - 0.3) + 2. * x[0] + 0.2 + x[1]
|
||||
df[1] = -14.5 * sin(14.5 * x[1] - 0.3) + 2. * x[1] + 0.2 + x[0]
|
||||
return f, df
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("\n\nminimize a 2-D function without gradient")
|
||||
# minimum expected at ~[-0.195, -0.1]
|
||||
kwargs = {"method": "L-BFGS-B"}
|
||||
x0 = np.array([1.0, 1.])
|
||||
scipy.optimize.minimize(_test_func2d_nograd, x0, **kwargs)
|
||||
ret = basinhopping(_test_func2d_nograd, x0, minimizer_kwargs=kwargs,
|
||||
niter=200, disp=False)
|
||||
print("minimum expected at func([-0.195, -0.1]) = 0.0")
|
||||
print(ret)
|
||||
|
||||
print("\n\ntry a harder 2-D problem")
|
||||
kwargs = {"method": "L-BFGS-B", "jac": True}
|
||||
x0 = np.array([1.0, 1.0])
|
||||
ret = basinhopping(_test_func2d, x0, minimizer_kwargs=kwargs, niter=200,
|
||||
disp=False)
|
||||
print("minimum expected at ~, func([-0.19415263, -0.19415263]) = 0")
|
||||
print(ret)
|
BIN
venv/Lib/site-packages/scipy/optimize/_bglu_dense.cp36-win32.pyd
Normal file
BIN
venv/Lib/site-packages/scipy/optimize/_bglu_dense.cp36-win32.pyd
Normal file
Binary file not shown.
BIN
venv/Lib/site-packages/scipy/optimize/_cobyla.cp36-win32.pyd
Normal file
BIN
venv/Lib/site-packages/scipy/optimize/_cobyla.cp36-win32.pyd
Normal file
Binary file not shown.
462
venv/Lib/site-packages/scipy/optimize/_constraints.py
Normal file
462
venv/Lib/site-packages/scipy/optimize/_constraints.py
Normal file
|
@ -0,0 +1,462 @@
|
|||
"""Constraints definition for minimize."""
|
||||
import numpy as np
|
||||
from ._hessian_update_strategy import BFGS
|
||||
from ._differentiable_functions import (
|
||||
VectorFunction, LinearVectorFunction, IdentityVectorFunction)
|
||||
from .optimize import OptimizeWarning
|
||||
from warnings import warn
|
||||
from numpy.testing import suppress_warnings
|
||||
from scipy.sparse import issparse
|
||||
|
||||
class NonlinearConstraint(object):
|
||||
"""Nonlinear constraint on the variables.
|
||||
|
||||
The constraint has the general inequality form::
|
||||
|
||||
lb <= fun(x) <= ub
|
||||
|
||||
Here the vector of independent variables x is passed as ndarray of shape
|
||||
(n,) and ``fun`` returns a vector with m components.
|
||||
|
||||
It is possible to use equal bounds to represent an equality constraint or
|
||||
infinite bounds to represent a one-sided constraint.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
fun : callable
|
||||
The function defining the constraint.
|
||||
The signature is ``fun(x) -> array_like, shape (m,)``.
|
||||
lb, ub : array_like
|
||||
Lower and upper bounds on the constraint. Each array must have the
|
||||
shape (m,) or be a scalar, in the latter case a bound will be the same
|
||||
for all components of the constraint. Use ``np.inf`` with an
|
||||
appropriate sign to specify a one-sided constraint.
|
||||
Set components of `lb` and `ub` equal to represent an equality
|
||||
constraint. Note that you can mix constraints of different types:
|
||||
interval, one-sided or equality, by setting different components of
|
||||
`lb` and `ub` as necessary.
|
||||
jac : {callable, '2-point', '3-point', 'cs'}, optional
|
||||
Method of computing the Jacobian matrix (an m-by-n matrix,
|
||||
where element (i, j) is the partial derivative of f[i] with
|
||||
respect to x[j]). The keywords {'2-point', '3-point',
|
||||
'cs'} select a finite difference scheme for the numerical estimation.
|
||||
A callable must have the following signature:
|
||||
``jac(x) -> {ndarray, sparse matrix}, shape (m, n)``.
|
||||
Default is '2-point'.
|
||||
hess : {callable, '2-point', '3-point', 'cs', HessianUpdateStrategy, None}, optional
|
||||
Method for computing the Hessian matrix. The keywords
|
||||
{'2-point', '3-point', 'cs'} select a finite difference scheme for
|
||||
numerical estimation. Alternatively, objects implementing
|
||||
`HessianUpdateStrategy` interface can be used to approximate the
|
||||
Hessian. Currently available implementations are:
|
||||
|
||||
- `BFGS` (default option)
|
||||
- `SR1`
|
||||
|
||||
A callable must return the Hessian matrix of ``dot(fun, v)`` and
|
||||
must have the following signature:
|
||||
``hess(x, v) -> {LinearOperator, sparse matrix, array_like}, shape (n, n)``.
|
||||
Here ``v`` is ndarray with shape (m,) containing Lagrange multipliers.
|
||||
keep_feasible : array_like of bool, optional
|
||||
Whether to keep the constraint components feasible throughout
|
||||
iterations. A single value set this property for all components.
|
||||
Default is False. Has no effect for equality constraints.
|
||||
finite_diff_rel_step: None or array_like, optional
|
||||
Relative step size for the finite difference approximation. Default is
|
||||
None, which will select a reasonable value automatically depending
|
||||
on a finite difference scheme.
|
||||
finite_diff_jac_sparsity: {None, array_like, sparse matrix}, optional
|
||||
Defines the sparsity structure of the Jacobian matrix for finite
|
||||
difference estimation, its shape must be (m, n). If the Jacobian has
|
||||
only few non-zero elements in *each* row, providing the sparsity
|
||||
structure will greatly speed up the computations. A zero entry means
|
||||
that a corresponding element in the Jacobian is identically zero.
|
||||
If provided, forces the use of 'lsmr' trust-region solver.
|
||||
If None (default) then dense differencing will be used.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Finite difference schemes {'2-point', '3-point', 'cs'} may be used for
|
||||
approximating either the Jacobian or the Hessian. We, however, do not allow
|
||||
its use for approximating both simultaneously. Hence whenever the Jacobian
|
||||
is estimated via finite-differences, we require the Hessian to be estimated
|
||||
using one of the quasi-Newton strategies.
|
||||
|
||||
The scheme 'cs' is potentially the most accurate, but requires the function
|
||||
to correctly handles complex inputs and be analytically continuable to the
|
||||
complex plane. The scheme '3-point' is more accurate than '2-point' but
|
||||
requires twice as many operations.
|
||||
|
||||
Examples
|
||||
--------
|
||||
Constrain ``x[0] < sin(x[1]) + 1.9``
|
||||
|
||||
>>> from scipy.optimize import NonlinearConstraint
|
||||
>>> con = lambda x: x[0] - np.sin(x[1])
|
||||
>>> nlc = NonlinearConstraint(con, -np.inf, 1.9)
|
||||
|
||||
"""
|
||||
def __init__(self, fun, lb, ub, jac='2-point', hess=BFGS(),
|
||||
keep_feasible=False, finite_diff_rel_step=None,
|
||||
finite_diff_jac_sparsity=None):
|
||||
self.fun = fun
|
||||
self.lb = lb
|
||||
self.ub = ub
|
||||
self.finite_diff_rel_step = finite_diff_rel_step
|
||||
self.finite_diff_jac_sparsity = finite_diff_jac_sparsity
|
||||
self.jac = jac
|
||||
self.hess = hess
|
||||
self.keep_feasible = keep_feasible
|
||||
|
||||
|
||||
class LinearConstraint(object):
|
||||
"""Linear constraint on the variables.
|
||||
|
||||
The constraint has the general inequality form::
|
||||
|
||||
lb <= A.dot(x) <= ub
|
||||
|
||||
Here the vector of independent variables x is passed as ndarray of shape
|
||||
(n,) and the matrix A has shape (m, n).
|
||||
|
||||
It is possible to use equal bounds to represent an equality constraint or
|
||||
infinite bounds to represent a one-sided constraint.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
A : {array_like, sparse matrix}, shape (m, n)
|
||||
Matrix defining the constraint.
|
||||
lb, ub : array_like
|
||||
Lower and upper bounds on the constraint. Each array must have the
|
||||
shape (m,) or be a scalar, in the latter case a bound will be the same
|
||||
for all components of the constraint. Use ``np.inf`` with an
|
||||
appropriate sign to specify a one-sided constraint.
|
||||
Set components of `lb` and `ub` equal to represent an equality
|
||||
constraint. Note that you can mix constraints of different types:
|
||||
interval, one-sided or equality, by setting different components of
|
||||
`lb` and `ub` as necessary.
|
||||
keep_feasible : array_like of bool, optional
|
||||
Whether to keep the constraint components feasible throughout
|
||||
iterations. A single value set this property for all components.
|
||||
Default is False. Has no effect for equality constraints.
|
||||
"""
|
||||
def __init__(self, A, lb, ub, keep_feasible=False):
|
||||
self.A = A
|
||||
self.lb = lb
|
||||
self.ub = ub
|
||||
self.keep_feasible = keep_feasible
|
||||
|
||||
|
||||
class Bounds(object):
|
||||
"""Bounds constraint on the variables.
|
||||
|
||||
The constraint has the general inequality form::
|
||||
|
||||
lb <= x <= ub
|
||||
|
||||
It is possible to use equal bounds to represent an equality constraint or
|
||||
infinite bounds to represent a one-sided constraint.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
lb, ub : array_like, optional
|
||||
Lower and upper bounds on independent variables. Each array must
|
||||
have the same size as x or be a scalar, in which case a bound will be
|
||||
the same for all the variables. Set components of `lb` and `ub` equal
|
||||
to fix a variable. Use ``np.inf`` with an appropriate sign to disable
|
||||
bounds on all or some variables. Note that you can mix constraints of
|
||||
different types: interval, one-sided or equality, by setting different
|
||||
components of `lb` and `ub` as necessary.
|
||||
keep_feasible : array_like of bool, optional
|
||||
Whether to keep the constraint components feasible throughout
|
||||
iterations. A single value set this property for all components.
|
||||
Default is False. Has no effect for equality constraints.
|
||||
"""
|
||||
def __init__(self, lb, ub, keep_feasible=False):
|
||||
self.lb = lb
|
||||
self.ub = ub
|
||||
self.keep_feasible = keep_feasible
|
||||
|
||||
def __repr__(self):
|
||||
if np.any(self.keep_feasible):
|
||||
return "{}({!r}, {!r}, keep_feasible={!r})".format(type(self).__name__, self.lb, self.ub, self.keep_feasible)
|
||||
else:
|
||||
return "{}({!r}, {!r})".format(type(self).__name__, self.lb, self.ub)
|
||||
|
||||
|
||||
class PreparedConstraint(object):
|
||||
"""Constraint prepared from a user defined constraint.
|
||||
|
||||
On creation it will check whether a constraint definition is valid and
|
||||
the initial point is feasible. If created successfully, it will contain
|
||||
the attributes listed below.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
constraint : {NonlinearConstraint, LinearConstraint`, Bounds}
|
||||
Constraint to check and prepare.
|
||||
x0 : array_like
|
||||
Initial vector of independent variables.
|
||||
sparse_jacobian : bool or None, optional
|
||||
If bool, then the Jacobian of the constraint will be converted
|
||||
to the corresponded format if necessary. If None (default), such
|
||||
conversion is not made.
|
||||
finite_diff_bounds : 2-tuple, optional
|
||||
Lower and upper bounds on the independent variables for the finite
|
||||
difference approximation, if applicable. Defaults to no bounds.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
fun : {VectorFunction, LinearVectorFunction, IdentityVectorFunction}
|
||||
Function defining the constraint wrapped by one of the convenience
|
||||
classes.
|
||||
bounds : 2-tuple
|
||||
Contains lower and upper bounds for the constraints --- lb and ub.
|
||||
These are converted to ndarray and have a size equal to the number of
|
||||
the constraints.
|
||||
keep_feasible : ndarray
|
||||
Array indicating which components must be kept feasible with a size
|
||||
equal to the number of the constraints.
|
||||
"""
|
||||
def __init__(self, constraint, x0, sparse_jacobian=None,
|
||||
finite_diff_bounds=(-np.inf, np.inf)):
|
||||
if isinstance(constraint, NonlinearConstraint):
|
||||
fun = VectorFunction(constraint.fun, x0,
|
||||
constraint.jac, constraint.hess,
|
||||
constraint.finite_diff_rel_step,
|
||||
constraint.finite_diff_jac_sparsity,
|
||||
finite_diff_bounds, sparse_jacobian)
|
||||
elif isinstance(constraint, LinearConstraint):
|
||||
fun = LinearVectorFunction(constraint.A, x0, sparse_jacobian)
|
||||
elif isinstance(constraint, Bounds):
|
||||
fun = IdentityVectorFunction(x0, sparse_jacobian)
|
||||
else:
|
||||
raise ValueError("`constraint` of an unknown type is passed.")
|
||||
|
||||
m = fun.m
|
||||
lb = np.asarray(constraint.lb, dtype=float)
|
||||
ub = np.asarray(constraint.ub, dtype=float)
|
||||
if lb.ndim == 0:
|
||||
lb = np.resize(lb, m)
|
||||
if ub.ndim == 0:
|
||||
ub = np.resize(ub, m)
|
||||
|
||||
keep_feasible = np.asarray(constraint.keep_feasible, dtype=bool)
|
||||
if keep_feasible.ndim == 0:
|
||||
keep_feasible = np.resize(keep_feasible, m)
|
||||
if keep_feasible.shape != (m,):
|
||||
raise ValueError("`keep_feasible` has a wrong shape.")
|
||||
|
||||
mask = keep_feasible & (lb != ub)
|
||||
f0 = fun.f
|
||||
if np.any(f0[mask] < lb[mask]) or np.any(f0[mask] > ub[mask]):
|
||||
raise ValueError("`x0` is infeasible with respect to some "
|
||||
"inequality constraint with `keep_feasible` "
|
||||
"set to True.")
|
||||
|
||||
self.fun = fun
|
||||
self.bounds = (lb, ub)
|
||||
self.keep_feasible = keep_feasible
|
||||
|
||||
def violation(self, x):
|
||||
"""How much the constraint is exceeded by.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : array-like
|
||||
Vector of independent variables
|
||||
|
||||
Returns
|
||||
-------
|
||||
excess : array-like
|
||||
How much the constraint is exceeded by, for each of the
|
||||
constraints specified by `PreparedConstraint.fun`.
|
||||
"""
|
||||
with suppress_warnings() as sup:
|
||||
sup.filter(UserWarning)
|
||||
ev = self.fun.fun(np.asarray(x))
|
||||
|
||||
excess_lb = np.maximum(self.bounds[0] - ev, 0)
|
||||
excess_ub = np.maximum(ev - self.bounds[1], 0)
|
||||
|
||||
return excess_lb + excess_ub
|
||||
|
||||
|
||||
def new_bounds_to_old(lb, ub, n):
|
||||
"""Convert the new bounds representation to the old one.
|
||||
|
||||
The new representation is a tuple (lb, ub) and the old one is a list
|
||||
containing n tuples, ith containing lower and upper bound on a ith
|
||||
variable.
|
||||
If any of the entries in lb/ub are -np.inf/np.inf they are replaced by
|
||||
None.
|
||||
"""
|
||||
lb = np.asarray(lb)
|
||||
ub = np.asarray(ub)
|
||||
if lb.ndim == 0:
|
||||
lb = np.resize(lb, n)
|
||||
if ub.ndim == 0:
|
||||
ub = np.resize(ub, n)
|
||||
|
||||
lb = [float(x) if x > -np.inf else None for x in lb]
|
||||
ub = [float(x) if x < np.inf else None for x in ub]
|
||||
|
||||
return list(zip(lb, ub))
|
||||
|
||||
|
||||
def old_bound_to_new(bounds):
|
||||
"""Convert the old bounds representation to the new one.
|
||||
|
||||
The new representation is a tuple (lb, ub) and the old one is a list
|
||||
containing n tuples, ith containing lower and upper bound on a ith
|
||||
variable.
|
||||
If any of the entries in lb/ub are None they are replaced by
|
||||
-np.inf/np.inf.
|
||||
"""
|
||||
lb, ub = zip(*bounds)
|
||||
lb = np.array([float(x) if x is not None else -np.inf for x in lb])
|
||||
ub = np.array([float(x) if x is not None else np.inf for x in ub])
|
||||
return lb, ub
|
||||
|
||||
|
||||
def strict_bounds(lb, ub, keep_feasible, n_vars):
|
||||
"""Remove bounds which are not asked to be kept feasible."""
|
||||
strict_lb = np.resize(lb, n_vars).astype(float)
|
||||
strict_ub = np.resize(ub, n_vars).astype(float)
|
||||
keep_feasible = np.resize(keep_feasible, n_vars)
|
||||
strict_lb[~keep_feasible] = -np.inf
|
||||
strict_ub[~keep_feasible] = np.inf
|
||||
return strict_lb, strict_ub
|
||||
|
||||
|
||||
def new_constraint_to_old(con, x0):
|
||||
"""
|
||||
Converts new-style constraint objects to old-style constraint dictionaries.
|
||||
"""
|
||||
if isinstance(con, NonlinearConstraint):
|
||||
if (con.finite_diff_jac_sparsity is not None or
|
||||
con.finite_diff_rel_step is not None or
|
||||
not isinstance(con.hess, BFGS) or # misses user specified BFGS
|
||||
con.keep_feasible):
|
||||
warn("Constraint options `finite_diff_jac_sparsity`, "
|
||||
"`finite_diff_rel_step`, `keep_feasible`, and `hess`"
|
||||
"are ignored by this method.", OptimizeWarning)
|
||||
|
||||
fun = con.fun
|
||||
if callable(con.jac):
|
||||
jac = con.jac
|
||||
else:
|
||||
jac = None
|
||||
|
||||
else: # LinearConstraint
|
||||
if con.keep_feasible:
|
||||
warn("Constraint option `keep_feasible` is ignored by this "
|
||||
"method.", OptimizeWarning)
|
||||
|
||||
A = con.A
|
||||
if issparse(A):
|
||||
A = A.todense()
|
||||
fun = lambda x: np.dot(A, x)
|
||||
jac = lambda x: A
|
||||
|
||||
# FIXME: when bugs in VectorFunction/LinearVectorFunction are worked out,
|
||||
# use pcon.fun.fun and pcon.fun.jac. Until then, get fun/jac above.
|
||||
pcon = PreparedConstraint(con, x0)
|
||||
lb, ub = pcon.bounds
|
||||
|
||||
i_eq = lb == ub
|
||||
i_bound_below = np.logical_xor(lb != -np.inf, i_eq)
|
||||
i_bound_above = np.logical_xor(ub != np.inf, i_eq)
|
||||
i_unbounded = np.logical_and(lb == -np.inf, ub == np.inf)
|
||||
|
||||
if np.any(i_unbounded):
|
||||
warn("At least one constraint is unbounded above and below. Such "
|
||||
"constraints are ignored.", OptimizeWarning)
|
||||
|
||||
ceq = []
|
||||
if np.any(i_eq):
|
||||
def f_eq(x):
|
||||
y = np.array(fun(x)).flatten()
|
||||
return y[i_eq] - lb[i_eq]
|
||||
ceq = [{"type": "eq", "fun": f_eq}]
|
||||
|
||||
if jac is not None:
|
||||
def j_eq(x):
|
||||
dy = jac(x)
|
||||
if issparse(dy):
|
||||
dy = dy.todense()
|
||||
dy = np.atleast_2d(dy)
|
||||
return dy[i_eq, :]
|
||||
ceq[0]["jac"] = j_eq
|
||||
|
||||
cineq = []
|
||||
n_bound_below = np.sum(i_bound_below)
|
||||
n_bound_above = np.sum(i_bound_above)
|
||||
if n_bound_below + n_bound_above:
|
||||
def f_ineq(x):
|
||||
y = np.zeros(n_bound_below + n_bound_above)
|
||||
y_all = np.array(fun(x)).flatten()
|
||||
y[:n_bound_below] = y_all[i_bound_below] - lb[i_bound_below]
|
||||
y[n_bound_below:] = -(y_all[i_bound_above] - ub[i_bound_above])
|
||||
return y
|
||||
cineq = [{"type": "ineq", "fun": f_ineq}]
|
||||
|
||||
if jac is not None:
|
||||
def j_ineq(x):
|
||||
dy = np.zeros((n_bound_below + n_bound_above, len(x0)))
|
||||
dy_all = jac(x)
|
||||
if issparse(dy_all):
|
||||
dy_all = dy_all.todense()
|
||||
dy_all = np.atleast_2d(dy_all)
|
||||
dy[:n_bound_below, :] = dy_all[i_bound_below]
|
||||
dy[n_bound_below:, :] = -dy_all[i_bound_above]
|
||||
return dy
|
||||
cineq[0]["jac"] = j_ineq
|
||||
|
||||
old_constraints = ceq + cineq
|
||||
|
||||
if len(old_constraints) > 1:
|
||||
warn("Equality and inequality constraints are specified in the same "
|
||||
"element of the constraint list. For efficient use with this "
|
||||
"method, equality and inequality constraints should be specified "
|
||||
"in separate elements of the constraint list. ", OptimizeWarning)
|
||||
return old_constraints
|
||||
|
||||
|
||||
def old_constraint_to_new(ic, con):
|
||||
"""
|
||||
Converts old-style constraint dictionaries to new-style constraint objects.
|
||||
"""
|
||||
# check type
|
||||
try:
|
||||
ctype = con['type'].lower()
|
||||
except KeyError:
|
||||
raise KeyError('Constraint %d has no type defined.' % ic)
|
||||
except TypeError:
|
||||
raise TypeError('Constraints must be a sequence of dictionaries.')
|
||||
except AttributeError:
|
||||
raise TypeError("Constraint's type must be a string.")
|
||||
else:
|
||||
if ctype not in ['eq', 'ineq']:
|
||||
raise ValueError("Unknown constraint type '%s'." % con['type'])
|
||||
if 'fun' not in con:
|
||||
raise ValueError('Constraint %d has no function defined.' % ic)
|
||||
|
||||
lb = 0
|
||||
if ctype == 'eq':
|
||||
ub = 0
|
||||
else:
|
||||
ub = np.inf
|
||||
|
||||
jac = '2-point'
|
||||
if 'args' in con:
|
||||
args = con['args']
|
||||
fun = lambda x: con['fun'](x, *args)
|
||||
if 'jac' in con:
|
||||
jac = lambda x: con['jac'](x, *args)
|
||||
else:
|
||||
fun = con['fun']
|
||||
if 'jac' in con:
|
||||
jac = con['jac']
|
||||
|
||||
return NonlinearConstraint(fun, lb, ub, jac)
|
|
@ -0,0 +1,531 @@
|
|||
import numpy as np
|
||||
import scipy.sparse as sps
|
||||
from ._numdiff import approx_derivative, group_columns
|
||||
from ._hessian_update_strategy import HessianUpdateStrategy
|
||||
from scipy.sparse.linalg import LinearOperator
|
||||
|
||||
|
||||
FD_METHODS = ('2-point', '3-point', 'cs')
|
||||
|
||||
|
||||
class ScalarFunction(object):
|
||||
"""Scalar function and its derivatives.
|
||||
|
||||
This class defines a scalar function F: R^n->R and methods for
|
||||
computing or approximating its first and second derivatives.
|
||||
|
||||
Notes
|
||||
-----
|
||||
This class implements a memoization logic. There are methods `fun`,
|
||||
`grad`, hess` and corresponding attributes `f`, `g` and `H`. The following
|
||||
things should be considered:
|
||||
|
||||
1. Use only public methods `fun`, `grad` and `hess`.
|
||||
2. After one of the methods is called, the corresponding attribute
|
||||
will be set. However, a subsequent call with a different argument
|
||||
of *any* of the methods may overwrite the attribute.
|
||||
"""
|
||||
def __init__(self, fun, x0, args, grad, hess, finite_diff_rel_step,
|
||||
finite_diff_bounds, epsilon=None):
|
||||
if not callable(grad) and grad not in FD_METHODS:
|
||||
raise ValueError("`grad` must be either callable or one of {}."
|
||||
.format(FD_METHODS))
|
||||
|
||||
if not (callable(hess) or hess in FD_METHODS
|
||||
or isinstance(hess, HessianUpdateStrategy)):
|
||||
raise ValueError("`hess` must be either callable,"
|
||||
"HessianUpdateStrategy or one of {}."
|
||||
.format(FD_METHODS))
|
||||
|
||||
if grad in FD_METHODS and hess in FD_METHODS:
|
||||
raise ValueError("Whenever the gradient is estimated via "
|
||||
"finite-differences, we require the Hessian "
|
||||
"to be estimated using one of the "
|
||||
"quasi-Newton strategies.")
|
||||
|
||||
self.x = np.atleast_1d(x0).astype(float)
|
||||
self.n = self.x.size
|
||||
self.nfev = 0
|
||||
self.ngev = 0
|
||||
self.nhev = 0
|
||||
self.f_updated = False
|
||||
self.g_updated = False
|
||||
self.H_updated = False
|
||||
|
||||
finite_diff_options = {}
|
||||
if grad in FD_METHODS:
|
||||
finite_diff_options["method"] = grad
|
||||
finite_diff_options["rel_step"] = finite_diff_rel_step
|
||||
finite_diff_options["abs_step"] = epsilon
|
||||
finite_diff_options["bounds"] = finite_diff_bounds
|
||||
if hess in FD_METHODS:
|
||||
finite_diff_options["method"] = hess
|
||||
finite_diff_options["rel_step"] = finite_diff_rel_step
|
||||
finite_diff_options["abs_step"] = epsilon
|
||||
finite_diff_options["as_linear_operator"] = True
|
||||
|
||||
# Function evaluation
|
||||
def fun_wrapped(x):
|
||||
self.nfev += 1
|
||||
return fun(x, *args)
|
||||
|
||||
def update_fun():
|
||||
self.f = fun_wrapped(self.x)
|
||||
|
||||
self._update_fun_impl = update_fun
|
||||
self._update_fun()
|
||||
|
||||
# Gradient evaluation
|
||||
if callable(grad):
|
||||
def grad_wrapped(x):
|
||||
self.ngev += 1
|
||||
return np.atleast_1d(grad(x, *args))
|
||||
|
||||
def update_grad():
|
||||
self.g = grad_wrapped(self.x)
|
||||
|
||||
elif grad in FD_METHODS:
|
||||
def update_grad():
|
||||
self._update_fun()
|
||||
self.ngev += 1
|
||||
self.g = approx_derivative(fun_wrapped, self.x, f0=self.f,
|
||||
**finite_diff_options)
|
||||
|
||||
self._update_grad_impl = update_grad
|
||||
self._update_grad()
|
||||
|
||||
# Hessian Evaluation
|
||||
if callable(hess):
|
||||
self.H = hess(x0, *args)
|
||||
self.H_updated = True
|
||||
self.nhev += 1
|
||||
|
||||
if sps.issparse(self.H):
|
||||
def hess_wrapped(x):
|
||||
self.nhev += 1
|
||||
return sps.csr_matrix(hess(x, *args))
|
||||
self.H = sps.csr_matrix(self.H)
|
||||
|
||||
elif isinstance(self.H, LinearOperator):
|
||||
def hess_wrapped(x):
|
||||
self.nhev += 1
|
||||
return hess(x, *args)
|
||||
|
||||
else:
|
||||
def hess_wrapped(x):
|
||||
self.nhev += 1
|
||||
return np.atleast_2d(np.asarray(hess(x, *args)))
|
||||
self.H = np.atleast_2d(np.asarray(self.H))
|
||||
|
||||
def update_hess():
|
||||
self.H = hess_wrapped(self.x)
|
||||
|
||||
elif hess in FD_METHODS:
|
||||
def update_hess():
|
||||
self._update_grad()
|
||||
self.H = approx_derivative(grad_wrapped, self.x, f0=self.g,
|
||||
**finite_diff_options)
|
||||
return self.H
|
||||
|
||||
update_hess()
|
||||
self.H_updated = True
|
||||
elif isinstance(hess, HessianUpdateStrategy):
|
||||
self.H = hess
|
||||
self.H.initialize(self.n, 'hess')
|
||||
self.H_updated = True
|
||||
self.x_prev = None
|
||||
self.g_prev = None
|
||||
|
||||
def update_hess():
|
||||
self._update_grad()
|
||||
self.H.update(self.x - self.x_prev, self.g - self.g_prev)
|
||||
|
||||
self._update_hess_impl = update_hess
|
||||
|
||||
if isinstance(hess, HessianUpdateStrategy):
|
||||
def update_x(x):
|
||||
self._update_grad()
|
||||
self.x_prev = self.x
|
||||
self.g_prev = self.g
|
||||
|
||||
self.x = np.atleast_1d(x).astype(float)
|
||||
self.f_updated = False
|
||||
self.g_updated = False
|
||||
self.H_updated = False
|
||||
self._update_hess()
|
||||
else:
|
||||
def update_x(x):
|
||||
self.x = np.atleast_1d(x).astype(float)
|
||||
self.f_updated = False
|
||||
self.g_updated = False
|
||||
self.H_updated = False
|
||||
self._update_x_impl = update_x
|
||||
|
||||
def _update_fun(self):
|
||||
if not self.f_updated:
|
||||
self._update_fun_impl()
|
||||
self.f_updated = True
|
||||
|
||||
def _update_grad(self):
|
||||
if not self.g_updated:
|
||||
self._update_grad_impl()
|
||||
self.g_updated = True
|
||||
|
||||
def _update_hess(self):
|
||||
if not self.H_updated:
|
||||
self._update_hess_impl()
|
||||
self.H_updated = True
|
||||
|
||||
def fun(self, x):
|
||||
if not np.array_equal(x, self.x):
|
||||
self._update_x_impl(x)
|
||||
self._update_fun()
|
||||
return self.f
|
||||
|
||||
def grad(self, x):
|
||||
if not np.array_equal(x, self.x):
|
||||
self._update_x_impl(x)
|
||||
self._update_grad()
|
||||
return self.g
|
||||
|
||||
def hess(self, x):
|
||||
if not np.array_equal(x, self.x):
|
||||
self._update_x_impl(x)
|
||||
self._update_hess()
|
||||
return self.H
|
||||
|
||||
def fun_and_grad(self, x):
|
||||
if not np.array_equal(x, self.x):
|
||||
self._update_x_impl(x)
|
||||
self._update_fun()
|
||||
self._update_grad()
|
||||
return self.f, self.g
|
||||
|
||||
|
||||
class VectorFunction(object):
|
||||
"""Vector function and its derivatives.
|
||||
|
||||
This class defines a vector function F: R^n->R^m and methods for
|
||||
computing or approximating its first and second derivatives.
|
||||
|
||||
Notes
|
||||
-----
|
||||
This class implements a memoization logic. There are methods `fun`,
|
||||
`jac`, hess` and corresponding attributes `f`, `J` and `H`. The following
|
||||
things should be considered:
|
||||
|
||||
1. Use only public methods `fun`, `jac` and `hess`.
|
||||
2. After one of the methods is called, the corresponding attribute
|
||||
will be set. However, a subsequent call with a different argument
|
||||
of *any* of the methods may overwrite the attribute.
|
||||
"""
|
||||
def __init__(self, fun, x0, jac, hess,
|
||||
finite_diff_rel_step, finite_diff_jac_sparsity,
|
||||
finite_diff_bounds, sparse_jacobian):
|
||||
if not callable(jac) and jac not in FD_METHODS:
|
||||
raise ValueError("`jac` must be either callable or one of {}."
|
||||
.format(FD_METHODS))
|
||||
|
||||
if not (callable(hess) or hess in FD_METHODS
|
||||
or isinstance(hess, HessianUpdateStrategy)):
|
||||
raise ValueError("`hess` must be either callable,"
|
||||
"HessianUpdateStrategy or one of {}."
|
||||
.format(FD_METHODS))
|
||||
|
||||
if jac in FD_METHODS and hess in FD_METHODS:
|
||||
raise ValueError("Whenever the Jacobian is estimated via "
|
||||
"finite-differences, we require the Hessian to "
|
||||
"be estimated using one of the quasi-Newton "
|
||||
"strategies.")
|
||||
|
||||
self.x = np.atleast_1d(x0).astype(float)
|
||||
self.n = self.x.size
|
||||
self.nfev = 0
|
||||
self.njev = 0
|
||||
self.nhev = 0
|
||||
self.f_updated = False
|
||||
self.J_updated = False
|
||||
self.H_updated = False
|
||||
|
||||
finite_diff_options = {}
|
||||
if jac in FD_METHODS:
|
||||
finite_diff_options["method"] = jac
|
||||
finite_diff_options["rel_step"] = finite_diff_rel_step
|
||||
if finite_diff_jac_sparsity is not None:
|
||||
sparsity_groups = group_columns(finite_diff_jac_sparsity)
|
||||
finite_diff_options["sparsity"] = (finite_diff_jac_sparsity,
|
||||
sparsity_groups)
|
||||
finite_diff_options["bounds"] = finite_diff_bounds
|
||||
self.x_diff = np.copy(self.x)
|
||||
if hess in FD_METHODS:
|
||||
finite_diff_options["method"] = hess
|
||||
finite_diff_options["rel_step"] = finite_diff_rel_step
|
||||
finite_diff_options["as_linear_operator"] = True
|
||||
self.x_diff = np.copy(self.x)
|
||||
if jac in FD_METHODS and hess in FD_METHODS:
|
||||
raise ValueError("Whenever the Jacobian is estimated via "
|
||||
"finite-differences, we require the Hessian to "
|
||||
"be estimated using one of the quasi-Newton "
|
||||
"strategies.")
|
||||
|
||||
# Function evaluation
|
||||
def fun_wrapped(x):
|
||||
self.nfev += 1
|
||||
return np.atleast_1d(fun(x))
|
||||
|
||||
def update_fun():
|
||||
self.f = fun_wrapped(self.x)
|
||||
|
||||
self._update_fun_impl = update_fun
|
||||
update_fun()
|
||||
|
||||
self.v = np.zeros_like(self.f)
|
||||
self.m = self.v.size
|
||||
|
||||
# Jacobian Evaluation
|
||||
if callable(jac):
|
||||
self.J = jac(self.x)
|
||||
self.J_updated = True
|
||||
self.njev += 1
|
||||
|
||||
if (sparse_jacobian or
|
||||
sparse_jacobian is None and sps.issparse(self.J)):
|
||||
def jac_wrapped(x):
|
||||
self.njev += 1
|
||||
return sps.csr_matrix(jac(x))
|
||||
self.J = sps.csr_matrix(self.J)
|
||||
self.sparse_jacobian = True
|
||||
|
||||
elif sps.issparse(self.J):
|
||||
def jac_wrapped(x):
|
||||
self.njev += 1
|
||||
return jac(x).toarray()
|
||||
self.J = self.J.toarray()
|
||||
self.sparse_jacobian = False
|
||||
|
||||
else:
|
||||
def jac_wrapped(x):
|
||||
self.njev += 1
|
||||
return np.atleast_2d(jac(x))
|
||||
self.J = np.atleast_2d(self.J)
|
||||
self.sparse_jacobian = False
|
||||
|
||||
def update_jac():
|
||||
self.J = jac_wrapped(self.x)
|
||||
|
||||
elif jac in FD_METHODS:
|
||||
self.J = approx_derivative(fun_wrapped, self.x, f0=self.f,
|
||||
**finite_diff_options)
|
||||
self.J_updated = True
|
||||
|
||||
if (sparse_jacobian or
|
||||
sparse_jacobian is None and sps.issparse(self.J)):
|
||||
def update_jac():
|
||||
self._update_fun()
|
||||
self.J = sps.csr_matrix(
|
||||
approx_derivative(fun_wrapped, self.x, f0=self.f,
|
||||
**finite_diff_options))
|
||||
self.J = sps.csr_matrix(self.J)
|
||||
self.sparse_jacobian = True
|
||||
|
||||
elif sps.issparse(self.J):
|
||||
def update_jac():
|
||||
self._update_fun()
|
||||
self.J = approx_derivative(fun_wrapped, self.x, f0=self.f,
|
||||
**finite_diff_options).toarray()
|
||||
self.J = self.J.toarray()
|
||||
self.sparse_jacobian = False
|
||||
|
||||
else:
|
||||
def update_jac():
|
||||
self._update_fun()
|
||||
self.J = np.atleast_2d(
|
||||
approx_derivative(fun_wrapped, self.x, f0=self.f,
|
||||
**finite_diff_options))
|
||||
self.J = np.atleast_2d(self.J)
|
||||
self.sparse_jacobian = False
|
||||
|
||||
self._update_jac_impl = update_jac
|
||||
|
||||
# Define Hessian
|
||||
if callable(hess):
|
||||
self.H = hess(self.x, self.v)
|
||||
self.H_updated = True
|
||||
self.nhev += 1
|
||||
|
||||
if sps.issparse(self.H):
|
||||
def hess_wrapped(x, v):
|
||||
self.nhev += 1
|
||||
return sps.csr_matrix(hess(x, v))
|
||||
self.H = sps.csr_matrix(self.H)
|
||||
|
||||
elif isinstance(self.H, LinearOperator):
|
||||
def hess_wrapped(x, v):
|
||||
self.nhev += 1
|
||||
return hess(x, v)
|
||||
|
||||
else:
|
||||
def hess_wrapped(x, v):
|
||||
self.nhev += 1
|
||||
return np.atleast_2d(np.asarray(hess(x, v)))
|
||||
self.H = np.atleast_2d(np.asarray(self.H))
|
||||
|
||||
def update_hess():
|
||||
self.H = hess_wrapped(self.x, self.v)
|
||||
elif hess in FD_METHODS:
|
||||
def jac_dot_v(x, v):
|
||||
return jac_wrapped(x).T.dot(v)
|
||||
|
||||
def update_hess():
|
||||
self._update_jac()
|
||||
self.H = approx_derivative(jac_dot_v, self.x,
|
||||
f0=self.J.T.dot(self.v),
|
||||
args=(self.v,),
|
||||
**finite_diff_options)
|
||||
update_hess()
|
||||
self.H_updated = True
|
||||
elif isinstance(hess, HessianUpdateStrategy):
|
||||
self.H = hess
|
||||
self.H.initialize(self.n, 'hess')
|
||||
self.H_updated = True
|
||||
self.x_prev = None
|
||||
self.J_prev = None
|
||||
|
||||
def update_hess():
|
||||
self._update_jac()
|
||||
# When v is updated before x was updated, then x_prev and
|
||||
# J_prev are None and we need this check.
|
||||
if self.x_prev is not None and self.J_prev is not None:
|
||||
delta_x = self.x - self.x_prev
|
||||
delta_g = self.J.T.dot(self.v) - self.J_prev.T.dot(self.v)
|
||||
self.H.update(delta_x, delta_g)
|
||||
|
||||
self._update_hess_impl = update_hess
|
||||
|
||||
if isinstance(hess, HessianUpdateStrategy):
|
||||
def update_x(x):
|
||||
self._update_jac()
|
||||
self.x_prev = self.x
|
||||
self.J_prev = self.J
|
||||
self.x = np.atleast_1d(x).astype(float)
|
||||
self.f_updated = False
|
||||
self.J_updated = False
|
||||
self.H_updated = False
|
||||
self._update_hess()
|
||||
else:
|
||||
def update_x(x):
|
||||
self.x = np.atleast_1d(x).astype(float)
|
||||
self.f_updated = False
|
||||
self.J_updated = False
|
||||
self.H_updated = False
|
||||
|
||||
self._update_x_impl = update_x
|
||||
|
||||
def _update_v(self, v):
|
||||
if not np.array_equal(v, self.v):
|
||||
self.v = v
|
||||
self.H_updated = False
|
||||
|
||||
def _update_x(self, x):
|
||||
if not np.array_equal(x, self.x):
|
||||
self._update_x_impl(x)
|
||||
|
||||
def _update_fun(self):
|
||||
if not self.f_updated:
|
||||
self._update_fun_impl()
|
||||
self.f_updated = True
|
||||
|
||||
def _update_jac(self):
|
||||
if not self.J_updated:
|
||||
self._update_jac_impl()
|
||||
self.J_updated = True
|
||||
|
||||
def _update_hess(self):
|
||||
if not self.H_updated:
|
||||
self._update_hess_impl()
|
||||
self.H_updated = True
|
||||
|
||||
def fun(self, x):
|
||||
self._update_x(x)
|
||||
self._update_fun()
|
||||
return self.f
|
||||
|
||||
def jac(self, x):
|
||||
self._update_x(x)
|
||||
self._update_jac()
|
||||
return self.J
|
||||
|
||||
def hess(self, x, v):
|
||||
# v should be updated before x.
|
||||
self._update_v(v)
|
||||
self._update_x(x)
|
||||
self._update_hess()
|
||||
return self.H
|
||||
|
||||
|
||||
class LinearVectorFunction(object):
|
||||
"""Linear vector function and its derivatives.
|
||||
|
||||
Defines a linear function F = A x, where x is N-D vector and
|
||||
A is m-by-n matrix. The Jacobian is constant and equals to A. The Hessian
|
||||
is identically zero and it is returned as a csr matrix.
|
||||
"""
|
||||
def __init__(self, A, x0, sparse_jacobian):
|
||||
if sparse_jacobian or sparse_jacobian is None and sps.issparse(A):
|
||||
self.J = sps.csr_matrix(A)
|
||||
self.sparse_jacobian = True
|
||||
elif sps.issparse(A):
|
||||
self.J = A.toarray()
|
||||
self.sparse_jacobian = False
|
||||
else:
|
||||
# np.asarray makes sure A is ndarray and not matrix
|
||||
self.J = np.atleast_2d(np.asarray(A))
|
||||
self.sparse_jacobian = False
|
||||
|
||||
self.m, self.n = self.J.shape
|
||||
|
||||
self.x = np.atleast_1d(x0).astype(float)
|
||||
self.f = self.J.dot(self.x)
|
||||
self.f_updated = True
|
||||
|
||||
self.v = np.zeros(self.m, dtype=float)
|
||||
self.H = sps.csr_matrix((self.n, self.n))
|
||||
|
||||
def _update_x(self, x):
|
||||
if not np.array_equal(x, self.x):
|
||||
self.x = np.atleast_1d(x).astype(float)
|
||||
self.f_updated = False
|
||||
|
||||
def fun(self, x):
|
||||
self._update_x(x)
|
||||
if not self.f_updated:
|
||||
self.f = self.J.dot(x)
|
||||
self.f_updated = True
|
||||
return self.f
|
||||
|
||||
def jac(self, x):
|
||||
self._update_x(x)
|
||||
return self.J
|
||||
|
||||
def hess(self, x, v):
|
||||
self._update_x(x)
|
||||
self.v = v
|
||||
return self.H
|
||||
|
||||
|
||||
class IdentityVectorFunction(LinearVectorFunction):
|
||||
"""Identity vector function and its derivatives.
|
||||
|
||||
The Jacobian is the identity matrix, returned as a dense array when
|
||||
`sparse_jacobian=False` and as a csr matrix otherwise. The Hessian is
|
||||
identically zero and it is returned as a csr matrix.
|
||||
"""
|
||||
def __init__(self, x0, sparse_jacobian):
|
||||
n = len(x0)
|
||||
if sparse_jacobian or sparse_jacobian is None:
|
||||
A = sps.eye(n, format='csr')
|
||||
sparse_jacobian = True
|
||||
else:
|
||||
A = np.eye(n)
|
||||
sparse_jacobian = False
|
||||
super(IdentityVectorFunction, self).__init__(A, x0, sparse_jacobian)
|
1345
venv/Lib/site-packages/scipy/optimize/_differentialevolution.py
Normal file
1345
venv/Lib/site-packages/scipy/optimize/_differentialevolution.py
Normal file
File diff suppressed because it is too large
Load diff
689
venv/Lib/site-packages/scipy/optimize/_dual_annealing.py
Normal file
689
venv/Lib/site-packages/scipy/optimize/_dual_annealing.py
Normal file
|
@ -0,0 +1,689 @@
|
|||
# Dual Annealing implementation.
|
||||
# Copyright (c) 2018 Sylvain Gubian <sylvain.gubian@pmi.com>,
|
||||
# Yang Xiang <yang.xiang@pmi.com>
|
||||
# Author: Sylvain Gubian, Yang Xiang, PMP S.A.
|
||||
|
||||
"""
|
||||
A Dual Annealing global optimization algorithm
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from scipy.optimize import OptimizeResult
|
||||
from scipy.optimize import minimize
|
||||
from scipy.special import gammaln
|
||||
from scipy._lib._util import check_random_state
|
||||
|
||||
|
||||
__all__ = ['dual_annealing']
|
||||
|
||||
|
||||
class VisitingDistribution(object):
|
||||
"""
|
||||
Class used to generate new coordinates based on the distorted
|
||||
Cauchy-Lorentz distribution. Depending on the steps within the strategy
|
||||
chain, the class implements the strategy for generating new location
|
||||
changes.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
lb : array_like
|
||||
A 1-D NumPy ndarray containing lower bounds of the generated
|
||||
components. Neither NaN or inf are allowed.
|
||||
ub : array_like
|
||||
A 1-D NumPy ndarray containing upper bounds for the generated
|
||||
components. Neither NaN or inf are allowed.
|
||||
visiting_param : float
|
||||
Parameter for visiting distribution. Default value is 2.62.
|
||||
Higher values give the visiting distribution a heavier tail, this
|
||||
makes the algorithm jump to a more distant region.
|
||||
The value range is (0, 3]. It's value is fixed for the life of the
|
||||
object.
|
||||
rand_gen : {`~numpy.random.RandomState`, `~numpy.random.Generator`}
|
||||
A `~numpy.random.RandomState`, `~numpy.random.Generator` object
|
||||
for using the current state of the created random generator container.
|
||||
"""
|
||||
TAIL_LIMIT = 1.e8
|
||||
MIN_VISIT_BOUND = 1.e-10
|
||||
|
||||
def __init__(self, lb, ub, visiting_param, rand_gen):
|
||||
# if you wish to make _visiting_param adjustable during the life of
|
||||
# the object then _factor2, _factor3, _factor5, _d1, _factor6 will
|
||||
# have to be dynamically calculated in `visit_fn`. They're factored
|
||||
# out here so they don't need to be recalculated all the time.
|
||||
self._visiting_param = visiting_param
|
||||
self.rand_gen = rand_gen
|
||||
self.lower = lb
|
||||
self.upper = ub
|
||||
self.bound_range = ub - lb
|
||||
|
||||
# these are invariant numbers unless visiting_param changes
|
||||
self._factor2 = np.exp((4.0 - self._visiting_param) * np.log(
|
||||
self._visiting_param - 1.0))
|
||||
self._factor3 = np.exp((2.0 - self._visiting_param) * np.log(2.0)
|
||||
/ (self._visiting_param - 1.0))
|
||||
self._factor4_p = np.sqrt(np.pi) * self._factor2 / (self._factor3 * (
|
||||
3.0 - self._visiting_param))
|
||||
|
||||
self._factor5 = 1.0 / (self._visiting_param - 1.0) - 0.5
|
||||
self._d1 = 2.0 - self._factor5
|
||||
self._factor6 = np.pi * (1.0 - self._factor5) / np.sin(
|
||||
np.pi * (1.0 - self._factor5)) / np.exp(gammaln(self._d1))
|
||||
|
||||
def visiting(self, x, step, temperature):
|
||||
""" Based on the step in the strategy chain, new coordinated are
|
||||
generated by changing all components is the same time or only
|
||||
one of them, the new values are computed with visit_fn method
|
||||
"""
|
||||
dim = x.size
|
||||
if step < dim:
|
||||
# Changing all coordinates with a new visiting value
|
||||
visits = self.visit_fn(temperature, dim)
|
||||
upper_sample, lower_sample = self.rand_gen.uniform(size=2)
|
||||
visits[visits > self.TAIL_LIMIT] = self.TAIL_LIMIT * upper_sample
|
||||
visits[visits < -self.TAIL_LIMIT] = -self.TAIL_LIMIT * lower_sample
|
||||
x_visit = visits + x
|
||||
a = x_visit - self.lower
|
||||
b = np.fmod(a, self.bound_range) + self.bound_range
|
||||
x_visit = np.fmod(b, self.bound_range) + self.lower
|
||||
x_visit[np.fabs(
|
||||
x_visit - self.lower) < self.MIN_VISIT_BOUND] += 1.e-10
|
||||
else:
|
||||
# Changing only one coordinate at a time based on strategy
|
||||
# chain step
|
||||
x_visit = np.copy(x)
|
||||
visit = self.visit_fn(temperature, 1)
|
||||
if visit > self.TAIL_LIMIT:
|
||||
visit = self.TAIL_LIMIT * self.rand_gen.uniform()
|
||||
elif visit < -self.TAIL_LIMIT:
|
||||
visit = -self.TAIL_LIMIT * self.rand_gen.uniform()
|
||||
index = step - dim
|
||||
x_visit[index] = visit + x[index]
|
||||
a = x_visit[index] - self.lower[index]
|
||||
b = np.fmod(a, self.bound_range[index]) + self.bound_range[index]
|
||||
x_visit[index] = np.fmod(b, self.bound_range[
|
||||
index]) + self.lower[index]
|
||||
if np.fabs(x_visit[index] - self.lower[
|
||||
index]) < self.MIN_VISIT_BOUND:
|
||||
x_visit[index] += self.MIN_VISIT_BOUND
|
||||
return x_visit
|
||||
|
||||
def visit_fn(self, temperature, dim):
|
||||
""" Formula Visita from p. 405 of reference [2] """
|
||||
x, y = self.rand_gen.normal(size=(dim, 2)).T
|
||||
|
||||
factor1 = np.exp(np.log(temperature) / (self._visiting_param - 1.0))
|
||||
factor4 = self._factor4_p * factor1
|
||||
|
||||
# sigmax
|
||||
x *= np.exp(-(self._visiting_param - 1.0) * np.log(
|
||||
self._factor6 / factor4) / (3.0 - self._visiting_param))
|
||||
|
||||
den = np.exp((self._visiting_param - 1.0) * np.log(np.fabs(y)) /
|
||||
(3.0 - self._visiting_param))
|
||||
|
||||
return x / den
|
||||
|
||||
|
||||
class EnergyState(object):
|
||||
"""
|
||||
Class used to record the energy state. At any time, it knows what is the
|
||||
currently used coordinates and the most recent best location.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
lower : array_like
|
||||
A 1-D NumPy ndarray containing lower bounds for generating an initial
|
||||
random components in the `reset` method.
|
||||
upper : array_like
|
||||
A 1-D NumPy ndarray containing upper bounds for generating an initial
|
||||
random components in the `reset` method
|
||||
components. Neither NaN or inf are allowed.
|
||||
callback : callable, ``callback(x, f, context)``, optional
|
||||
A callback function which will be called for all minima found.
|
||||
``x`` and ``f`` are the coordinates and function value of the
|
||||
latest minimum found, and `context` has value in [0, 1, 2]
|
||||
"""
|
||||
# Maximimum number of trials for generating a valid starting point
|
||||
MAX_REINIT_COUNT = 1000
|
||||
|
||||
def __init__(self, lower, upper, callback=None):
|
||||
self.ebest = None
|
||||
self.current_energy = None
|
||||
self.current_location = None
|
||||
self.xbest = None
|
||||
self.lower = lower
|
||||
self.upper = upper
|
||||
self.callback = callback
|
||||
|
||||
def reset(self, func_wrapper, rand_gen, x0=None):
|
||||
"""
|
||||
Initialize current location is the search domain. If `x0` is not
|
||||
provided, a random location within the bounds is generated.
|
||||
"""
|
||||
if x0 is None:
|
||||
self.current_location = rand_gen.uniform(self.lower, self.upper,
|
||||
size=len(self.lower))
|
||||
else:
|
||||
self.current_location = np.copy(x0)
|
||||
init_error = True
|
||||
reinit_counter = 0
|
||||
while init_error:
|
||||
self.current_energy = func_wrapper.fun(self.current_location)
|
||||
if self.current_energy is None:
|
||||
raise ValueError('Objective function is returning None')
|
||||
if (not np.isfinite(self.current_energy) or np.isnan(
|
||||
self.current_energy)):
|
||||
if reinit_counter >= EnergyState.MAX_REINIT_COUNT:
|
||||
init_error = False
|
||||
message = (
|
||||
'Stopping algorithm because function '
|
||||
'create NaN or (+/-) infinity values even with '
|
||||
'trying new random parameters'
|
||||
)
|
||||
raise ValueError(message)
|
||||
self.current_location = rand_gen.uniform(self.lower,
|
||||
self.upper,
|
||||
size=self.lower.size)
|
||||
reinit_counter += 1
|
||||
else:
|
||||
init_error = False
|
||||
# If first time reset, initialize ebest and xbest
|
||||
if self.ebest is None and self.xbest is None:
|
||||
self.ebest = self.current_energy
|
||||
self.xbest = np.copy(self.current_location)
|
||||
# Otherwise, we keep them in case of reannealing reset
|
||||
|
||||
def update_best(self, e, x, context):
|
||||
self.ebest = e
|
||||
self.xbest = np.copy(x)
|
||||
if self.callback is not None:
|
||||
val = self.callback(x, e, context)
|
||||
if val is not None:
|
||||
if val:
|
||||
return('Callback function requested to stop early by '
|
||||
'returning True')
|
||||
|
||||
def update_current(self, e, x):
|
||||
self.current_energy = e
|
||||
self.current_location = np.copy(x)
|
||||
|
||||
|
||||
class StrategyChain(object):
|
||||
"""
|
||||
Class that implements within a Markov chain the strategy for location
|
||||
acceptance and local search decision making.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
acceptance_param : float
|
||||
Parameter for acceptance distribution. It is used to control the
|
||||
probability of acceptance. The lower the acceptance parameter, the
|
||||
smaller the probability of acceptance. Default value is -5.0 with
|
||||
a range (-1e4, -5].
|
||||
visit_dist : VisitingDistribution
|
||||
Instance of `VisitingDistribution` class.
|
||||
func_wrapper : ObjectiveFunWrapper
|
||||
Instance of `ObjectiveFunWrapper` class.
|
||||
minimizer_wrapper: LocalSearchWrapper
|
||||
Instance of `LocalSearchWrapper` class.
|
||||
rand_gen : {`~numpy.random.RandomState`, `~numpy.random.Generator`}
|
||||
A `~numpy.random.RandomState` or `~numpy.random.Generator`
|
||||
object for using the current state of the created random generator
|
||||
container.
|
||||
energy_state: EnergyState
|
||||
Instance of `EnergyState` class.
|
||||
"""
|
||||
def __init__(self, acceptance_param, visit_dist, func_wrapper,
|
||||
minimizer_wrapper, rand_gen, energy_state):
|
||||
# Local strategy chain minimum energy and location
|
||||
self.emin = energy_state.current_energy
|
||||
self.xmin = np.array(energy_state.current_location)
|
||||
# Global optimizer state
|
||||
self.energy_state = energy_state
|
||||
# Acceptance parameter
|
||||
self.acceptance_param = acceptance_param
|
||||
# Visiting distribution instance
|
||||
self.visit_dist = visit_dist
|
||||
# Wrapper to objective function
|
||||
self.func_wrapper = func_wrapper
|
||||
# Wrapper to the local minimizer
|
||||
self.minimizer_wrapper = minimizer_wrapper
|
||||
self.not_improved_idx = 0
|
||||
self.not_improved_max_idx = 1000
|
||||
self._rand_gen = rand_gen
|
||||
self.temperature_step = 0
|
||||
self.K = 100 * len(energy_state.current_location)
|
||||
|
||||
def accept_reject(self, j, e, x_visit):
|
||||
r = self._rand_gen.uniform()
|
||||
pqv_temp = (self.acceptance_param - 1.0) * (
|
||||
e - self.energy_state.current_energy) / (
|
||||
self.temperature_step + 1.)
|
||||
if pqv_temp <= 0.:
|
||||
pqv = 0.
|
||||
else:
|
||||
pqv = np.exp(np.log(pqv_temp) / (
|
||||
1. - self.acceptance_param))
|
||||
if r <= pqv:
|
||||
# We accept the new location and update state
|
||||
self.energy_state.update_current(e, x_visit)
|
||||
self.xmin = np.copy(self.energy_state.current_location)
|
||||
|
||||
# No improvement for a long time
|
||||
if self.not_improved_idx >= self.not_improved_max_idx:
|
||||
if j == 0 or self.energy_state.current_energy < self.emin:
|
||||
self.emin = self.energy_state.current_energy
|
||||
self.xmin = np.copy(self.energy_state.current_location)
|
||||
|
||||
def run(self, step, temperature):
|
||||
self.temperature_step = temperature / float(step + 1)
|
||||
self.not_improved_idx += 1
|
||||
for j in range(self.energy_state.current_location.size * 2):
|
||||
if j == 0:
|
||||
if step == 0:
|
||||
self.energy_state_improved = True
|
||||
else:
|
||||
self.energy_state_improved = False
|
||||
x_visit = self.visit_dist.visiting(
|
||||
self.energy_state.current_location, j, temperature)
|
||||
# Calling the objective function
|
||||
e = self.func_wrapper.fun(x_visit)
|
||||
if e < self.energy_state.current_energy:
|
||||
# We have got a better energy value
|
||||
self.energy_state.update_current(e, x_visit)
|
||||
if e < self.energy_state.ebest:
|
||||
val = self.energy_state.update_best(e, x_visit, 0)
|
||||
if val is not None:
|
||||
if val:
|
||||
return val
|
||||
self.energy_state_improved = True
|
||||
self.not_improved_idx = 0
|
||||
else:
|
||||
# We have not improved but do we accept the new location?
|
||||
self.accept_reject(j, e, x_visit)
|
||||
if self.func_wrapper.nfev >= self.func_wrapper.maxfun:
|
||||
return ('Maximum number of function call reached '
|
||||
'during annealing')
|
||||
# End of StrategyChain loop
|
||||
|
||||
def local_search(self):
|
||||
# Decision making for performing a local search
|
||||
# based on strategy chain results
|
||||
# If energy has been improved or no improvement since too long,
|
||||
# performing a local search with the best strategy chain location
|
||||
if self.energy_state_improved:
|
||||
# Global energy has improved, let's see if LS improves further
|
||||
e, x = self.minimizer_wrapper.local_search(self.energy_state.xbest,
|
||||
self.energy_state.ebest)
|
||||
if e < self.energy_state.ebest:
|
||||
self.not_improved_idx = 0
|
||||
val = self.energy_state.update_best(e, x, 1)
|
||||
if val is not None:
|
||||
if val:
|
||||
return val
|
||||
self.energy_state.update_current(e, x)
|
||||
if self.func_wrapper.nfev >= self.func_wrapper.maxfun:
|
||||
return ('Maximum number of function call reached '
|
||||
'during local search')
|
||||
# Check probability of a need to perform a LS even if no improvement
|
||||
do_ls = False
|
||||
if self.K < 90 * len(self.energy_state.current_location):
|
||||
pls = np.exp(self.K * (
|
||||
self.energy_state.ebest - self.energy_state.current_energy) /
|
||||
self.temperature_step)
|
||||
if pls >= self._rand_gen.uniform():
|
||||
do_ls = True
|
||||
# Global energy not improved, let's see what LS gives
|
||||
# on the best strategy chain location
|
||||
if self.not_improved_idx >= self.not_improved_max_idx:
|
||||
do_ls = True
|
||||
if do_ls:
|
||||
e, x = self.minimizer_wrapper.local_search(self.xmin, self.emin)
|
||||
self.xmin = np.copy(x)
|
||||
self.emin = e
|
||||
self.not_improved_idx = 0
|
||||
self.not_improved_max_idx = self.energy_state.current_location.size
|
||||
if e < self.energy_state.ebest:
|
||||
val = self.energy_state.update_best(
|
||||
self.emin, self.xmin, 2)
|
||||
if val is not None:
|
||||
if val:
|
||||
return val
|
||||
self.energy_state.update_current(e, x)
|
||||
if self.func_wrapper.nfev >= self.func_wrapper.maxfun:
|
||||
return ('Maximum number of function call reached '
|
||||
'during dual annealing')
|
||||
|
||||
|
||||
class ObjectiveFunWrapper(object):
|
||||
|
||||
def __init__(self, func, maxfun=1e7, *args):
|
||||
self.func = func
|
||||
self.args = args
|
||||
# Number of objective function evaluations
|
||||
self.nfev = 0
|
||||
# Number of gradient function evaluation if used
|
||||
self.ngev = 0
|
||||
# Number of hessian of the objective function if used
|
||||
self.nhev = 0
|
||||
self.maxfun = maxfun
|
||||
|
||||
def fun(self, x):
|
||||
self.nfev += 1
|
||||
return self.func(x, *self.args)
|
||||
|
||||
|
||||
class LocalSearchWrapper(object):
|
||||
"""
|
||||
Class used to wrap around the minimizer used for local search
|
||||
Default local minimizer is SciPy minimizer L-BFGS-B
|
||||
"""
|
||||
|
||||
LS_MAXITER_RATIO = 6
|
||||
LS_MAXITER_MIN = 100
|
||||
LS_MAXITER_MAX = 1000
|
||||
|
||||
def __init__(self, bounds, func_wrapper, **kwargs):
|
||||
self.func_wrapper = func_wrapper
|
||||
self.kwargs = kwargs
|
||||
self.minimizer = minimize
|
||||
bounds_list = list(zip(*bounds))
|
||||
self.lower = np.array(bounds_list[0])
|
||||
self.upper = np.array(bounds_list[1])
|
||||
|
||||
# If no minimizer specified, use SciPy minimize with 'L-BFGS-B' method
|
||||
if not self.kwargs:
|
||||
n = len(self.lower)
|
||||
ls_max_iter = min(max(n * self.LS_MAXITER_RATIO,
|
||||
self.LS_MAXITER_MIN),
|
||||
self.LS_MAXITER_MAX)
|
||||
self.kwargs['method'] = 'L-BFGS-B'
|
||||
self.kwargs['options'] = {
|
||||
'maxiter': ls_max_iter,
|
||||
}
|
||||
self.kwargs['bounds'] = list(zip(self.lower, self.upper))
|
||||
|
||||
def local_search(self, x, e):
|
||||
# Run local search from the given x location where energy value is e
|
||||
x_tmp = np.copy(x)
|
||||
mres = self.minimizer(self.func_wrapper.fun, x, **self.kwargs)
|
||||
if 'njev' in mres.keys():
|
||||
self.func_wrapper.ngev += mres.njev
|
||||
if 'nhev' in mres.keys():
|
||||
self.func_wrapper.nhev += mres.nhev
|
||||
# Check if is valid value
|
||||
is_finite = np.all(np.isfinite(mres.x)) and np.isfinite(mres.fun)
|
||||
in_bounds = np.all(mres.x >= self.lower) and np.all(
|
||||
mres.x <= self.upper)
|
||||
is_valid = is_finite and in_bounds
|
||||
|
||||
# Use the new point only if it is valid and return a better results
|
||||
if is_valid and mres.fun < e:
|
||||
return mres.fun, mres.x
|
||||
else:
|
||||
return e, x_tmp
|
||||
|
||||
|
||||
def dual_annealing(func, bounds, args=(), maxiter=1000,
|
||||
local_search_options={}, initial_temp=5230.,
|
||||
restart_temp_ratio=2.e-5, visit=2.62, accept=-5.0,
|
||||
maxfun=1e7, seed=None, no_local_search=False,
|
||||
callback=None, x0=None):
|
||||
"""
|
||||
Find the global minimum of a function using Dual Annealing.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
func : callable
|
||||
The objective function to be minimized. Must be in the form
|
||||
``f(x, *args)``, where ``x`` is the argument in the form of a 1-D array
|
||||
and ``args`` is a tuple of any additional fixed parameters needed to
|
||||
completely specify the function.
|
||||
bounds : sequence, shape (n, 2)
|
||||
Bounds for variables. ``(min, max)`` pairs for each element in ``x``,
|
||||
defining bounds for the objective function parameter.
|
||||
args : tuple, optional
|
||||
Any additional fixed parameters needed to completely specify the
|
||||
objective function.
|
||||
maxiter : int, optional
|
||||
The maximum number of global search iterations. Default value is 1000.
|
||||
local_search_options : dict, optional
|
||||
Extra keyword arguments to be passed to the local minimizer
|
||||
(`minimize`). Some important options could be:
|
||||
``method`` for the minimizer method to use and ``args`` for
|
||||
objective function additional arguments.
|
||||
initial_temp : float, optional
|
||||
The initial temperature, use higher values to facilitates a wider
|
||||
search of the energy landscape, allowing dual_annealing to escape
|
||||
local minima that it is trapped in. Default value is 5230. Range is
|
||||
(0.01, 5.e4].
|
||||
restart_temp_ratio : float, optional
|
||||
During the annealing process, temperature is decreasing, when it
|
||||
reaches ``initial_temp * restart_temp_ratio``, the reannealing process
|
||||
is triggered. Default value of the ratio is 2e-5. Range is (0, 1).
|
||||
visit : float, optional
|
||||
Parameter for visiting distribution. Default value is 2.62. Higher
|
||||
values give the visiting distribution a heavier tail, this makes
|
||||
the algorithm jump to a more distant region. The value range is (0, 3].
|
||||
accept : float, optional
|
||||
Parameter for acceptance distribution. It is used to control the
|
||||
probability of acceptance. The lower the acceptance parameter, the
|
||||
smaller the probability of acceptance. Default value is -5.0 with
|
||||
a range (-1e4, -5].
|
||||
maxfun : int, optional
|
||||
Soft limit for the number of objective function calls. If the
|
||||
algorithm is in the middle of a local search, this number will be
|
||||
exceeded, the algorithm will stop just after the local search is
|
||||
done. Default value is 1e7.
|
||||
seed : {int, `~numpy.random.RandomState`, `~numpy.random.Generator`}, optional
|
||||
If `seed` is not specified the `~numpy.random.RandomState` singleton is
|
||||
used.
|
||||
If `seed` is an int, a new ``RandomState`` instance is used, seeded
|
||||
with `seed`.
|
||||
If `seed` is already a ``RandomState`` or ``Generator`` instance, then
|
||||
that instance is used.
|
||||
Specify `seed` for repeatable minimizations. The random numbers
|
||||
generated with this seed only affect the visiting distribution function
|
||||
and new coordinates generation.
|
||||
no_local_search : bool, optional
|
||||
If `no_local_search` is set to True, a traditional Generalized
|
||||
Simulated Annealing will be performed with no local search
|
||||
strategy applied.
|
||||
callback : callable, optional
|
||||
A callback function with signature ``callback(x, f, context)``,
|
||||
which will be called for all minima found.
|
||||
``x`` and ``f`` are the coordinates and function value of the
|
||||
latest minimum found, and ``context`` has value in [0, 1, 2], with the
|
||||
following meaning:
|
||||
|
||||
- 0: minimum detected in the annealing process.
|
||||
- 1: detection occurred in the local search process.
|
||||
- 2: detection done in the dual annealing process.
|
||||
|
||||
If the callback implementation returns True, the algorithm will stop.
|
||||
x0 : ndarray, shape(n,), optional
|
||||
Coordinates of a single N-D starting point.
|
||||
|
||||
Returns
|
||||
-------
|
||||
res : OptimizeResult
|
||||
The optimization result represented as a `OptimizeResult` object.
|
||||
Important attributes are: ``x`` the solution array, ``fun`` the value
|
||||
of the function at the solution, and ``message`` which describes the
|
||||
cause of the termination.
|
||||
See `OptimizeResult` for a description of other attributes.
|
||||
|
||||
Notes
|
||||
-----
|
||||
This function implements the Dual Annealing optimization. This stochastic
|
||||
approach derived from [3]_ combines the generalization of CSA (Classical
|
||||
Simulated Annealing) and FSA (Fast Simulated Annealing) [1]_ [2]_ coupled
|
||||
to a strategy for applying a local search on accepted locations [4]_.
|
||||
An alternative implementation of this same algorithm is described in [5]_
|
||||
and benchmarks are presented in [6]_. This approach introduces an advanced
|
||||
method to refine the solution found by the generalized annealing
|
||||
process. This algorithm uses a distorted Cauchy-Lorentz visiting
|
||||
distribution, with its shape controlled by the parameter :math:`q_{v}`
|
||||
|
||||
.. math::
|
||||
|
||||
g_{q_{v}}(\\Delta x(t)) \\propto \\frac{ \\
|
||||
\\left[T_{q_{v}}(t) \\right]^{-\\frac{D}{3-q_{v}}}}{ \\
|
||||
\\left[{1+(q_{v}-1)\\frac{(\\Delta x(t))^{2}} { \\
|
||||
\\left[T_{q_{v}}(t)\\right]^{\\frac{2}{3-q_{v}}}}}\\right]^{ \\
|
||||
\\frac{1}{q_{v}-1}+\\frac{D-1}{2}}}
|
||||
|
||||
Where :math:`t` is the artificial time. This visiting distribution is used
|
||||
to generate a trial jump distance :math:`\\Delta x(t)` of variable
|
||||
:math:`x(t)` under artificial temperature :math:`T_{q_{v}}(t)`.
|
||||
|
||||
From the starting point, after calling the visiting distribution
|
||||
function, the acceptance probability is computed as follows:
|
||||
|
||||
.. math::
|
||||
|
||||
p_{q_{a}} = \\min{\\{1,\\left[1-(1-q_{a}) \\beta \\Delta E \\right]^{ \\
|
||||
\\frac{1}{1-q_{a}}}\\}}
|
||||
|
||||
Where :math:`q_{a}` is a acceptance parameter. For :math:`q_{a}<1`, zero
|
||||
acceptance probability is assigned to the cases where
|
||||
|
||||
.. math::
|
||||
|
||||
[1-(1-q_{a}) \\beta \\Delta E] < 0
|
||||
|
||||
The artificial temperature :math:`T_{q_{v}}(t)` is decreased according to
|
||||
|
||||
.. math::
|
||||
|
||||
T_{q_{v}}(t) = T_{q_{v}}(1) \\frac{2^{q_{v}-1}-1}{\\left( \\
|
||||
1 + t\\right)^{q_{v}-1}-1}
|
||||
|
||||
Where :math:`q_{v}` is the visiting parameter.
|
||||
|
||||
.. versionadded:: 1.2.0
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Tsallis C. Possible generalization of Boltzmann-Gibbs
|
||||
statistics. Journal of Statistical Physics, 52, 479-487 (1998).
|
||||
.. [2] Tsallis C, Stariolo DA. Generalized Simulated Annealing.
|
||||
Physica A, 233, 395-406 (1996).
|
||||
.. [3] Xiang Y, Sun DY, Fan W, Gong XG. Generalized Simulated
|
||||
Annealing Algorithm and Its Application to the Thomson Model.
|
||||
Physics Letters A, 233, 216-220 (1997).
|
||||
.. [4] Xiang Y, Gong XG. Efficiency of Generalized Simulated
|
||||
Annealing. Physical Review E, 62, 4473 (2000).
|
||||
.. [5] Xiang Y, Gubian S, Suomela B, Hoeng J. Generalized
|
||||
Simulated Annealing for Efficient Global Optimization: the GenSA
|
||||
Package for R. The R Journal, Volume 5/1 (2013).
|
||||
.. [6] Mullen, K. Continuous Global Optimization in R. Journal of
|
||||
Statistical Software, 60(6), 1 - 45, (2014). DOI:10.18637/jss.v060.i06
|
||||
|
||||
Examples
|
||||
--------
|
||||
The following example is a 10-D problem, with many local minima.
|
||||
The function involved is called Rastrigin
|
||||
(https://en.wikipedia.org/wiki/Rastrigin_function)
|
||||
|
||||
>>> from scipy.optimize import dual_annealing
|
||||
>>> func = lambda x: np.sum(x*x - 10*np.cos(2*np.pi*x)) + 10*np.size(x)
|
||||
>>> lw = [-5.12] * 10
|
||||
>>> up = [5.12] * 10
|
||||
>>> ret = dual_annealing(func, bounds=list(zip(lw, up)), seed=1234)
|
||||
>>> ret.x
|
||||
array([-4.26437714e-09, -3.91699361e-09, -1.86149218e-09, -3.97165720e-09,
|
||||
-6.29151648e-09, -6.53145322e-09, -3.93616815e-09, -6.55623025e-09,
|
||||
-6.05775280e-09, -5.00668935e-09]) # may vary
|
||||
>>> ret.fun
|
||||
0.000000
|
||||
|
||||
""" # noqa: E501
|
||||
if x0 is not None and not len(x0) == len(bounds):
|
||||
raise ValueError('Bounds size does not match x0')
|
||||
|
||||
lu = list(zip(*bounds))
|
||||
lower = np.array(lu[0])
|
||||
upper = np.array(lu[1])
|
||||
# Check that restart temperature ratio is correct
|
||||
if restart_temp_ratio <= 0. or restart_temp_ratio >= 1.:
|
||||
raise ValueError('Restart temperature ratio has to be in range (0, 1)')
|
||||
# Checking bounds are valid
|
||||
if (np.any(np.isinf(lower)) or np.any(np.isinf(upper)) or np.any(
|
||||
np.isnan(lower)) or np.any(np.isnan(upper))):
|
||||
raise ValueError('Some bounds values are inf values or nan values')
|
||||
# Checking that bounds are consistent
|
||||
if not np.all(lower < upper):
|
||||
raise ValueError('Bounds are not consistent min < max')
|
||||
# Checking that bounds are the same length
|
||||
if not len(lower) == len(upper):
|
||||
raise ValueError('Bounds do not have the same dimensions')
|
||||
|
||||
# Wrapper for the objective function
|
||||
func_wrapper = ObjectiveFunWrapper(func, maxfun, *args)
|
||||
# Wrapper fot the minimizer
|
||||
minimizer_wrapper = LocalSearchWrapper(
|
||||
bounds, func_wrapper, **local_search_options)
|
||||
# Initialization of RandomState for reproducible runs if seed provided
|
||||
rand_state = check_random_state(seed)
|
||||
# Initialization of the energy state
|
||||
energy_state = EnergyState(lower, upper, callback)
|
||||
energy_state.reset(func_wrapper, rand_state, x0)
|
||||
# Minimum value of annealing temperature reached to perform
|
||||
# re-annealing
|
||||
temperature_restart = initial_temp * restart_temp_ratio
|
||||
# VisitingDistribution instance
|
||||
visit_dist = VisitingDistribution(lower, upper, visit, rand_state)
|
||||
# Strategy chain instance
|
||||
strategy_chain = StrategyChain(accept, visit_dist, func_wrapper,
|
||||
minimizer_wrapper, rand_state, energy_state)
|
||||
need_to_stop = False
|
||||
iteration = 0
|
||||
message = []
|
||||
# OptimizeResult object to be returned
|
||||
optimize_res = OptimizeResult()
|
||||
optimize_res.success = True
|
||||
optimize_res.status = 0
|
||||
|
||||
t1 = np.exp((visit - 1) * np.log(2.0)) - 1.0
|
||||
# Run the search loop
|
||||
while(not need_to_stop):
|
||||
for i in range(maxiter):
|
||||
# Compute temperature for this step
|
||||
s = float(i) + 2.0
|
||||
t2 = np.exp((visit - 1) * np.log(s)) - 1.0
|
||||
temperature = initial_temp * t1 / t2
|
||||
if iteration >= maxiter:
|
||||
message.append("Maximum number of iteration reached")
|
||||
need_to_stop = True
|
||||
break
|
||||
# Need a re-annealing process?
|
||||
if temperature < temperature_restart:
|
||||
energy_state.reset(func_wrapper, rand_state)
|
||||
break
|
||||
# starting strategy chain
|
||||
val = strategy_chain.run(i, temperature)
|
||||
if val is not None:
|
||||
message.append(val)
|
||||
need_to_stop = True
|
||||
optimize_res.success = False
|
||||
break
|
||||
# Possible local search at the end of the strategy chain
|
||||
if not no_local_search:
|
||||
val = strategy_chain.local_search()
|
||||
if val is not None:
|
||||
message.append(val)
|
||||
need_to_stop = True
|
||||
optimize_res.success = False
|
||||
break
|
||||
iteration += 1
|
||||
|
||||
# Setting the OptimizeResult values
|
||||
optimize_res.x = energy_state.xbest
|
||||
optimize_res.fun = energy_state.ebest
|
||||
optimize_res.nit = iteration
|
||||
optimize_res.nfev = func_wrapper.nfev
|
||||
optimize_res.njev = func_wrapper.ngev
|
||||
optimize_res.nhev = func_wrapper.nhev
|
||||
optimize_res.message = message
|
||||
return optimize_res
|
Binary file not shown.
|
@ -0,0 +1,429 @@
|
|||
"""Hessian update strategies for quasi-Newton optimization methods."""
|
||||
import numpy as np
|
||||
from numpy.linalg import norm
|
||||
from scipy.linalg import get_blas_funcs
|
||||
from warnings import warn
|
||||
|
||||
|
||||
__all__ = ['HessianUpdateStrategy', 'BFGS', 'SR1']
|
||||
|
||||
|
||||
class HessianUpdateStrategy(object):
|
||||
"""Interface for implementing Hessian update strategies.
|
||||
|
||||
Many optimization methods make use of Hessian (or inverse Hessian)
|
||||
approximations, such as the quasi-Newton methods BFGS, SR1, L-BFGS.
|
||||
Some of these approximations, however, do not actually need to store
|
||||
the entire matrix or can compute the internal matrix product with a
|
||||
given vector in a very efficiently manner. This class serves as an
|
||||
abstract interface between the optimization algorithm and the
|
||||
quasi-Newton update strategies, giving freedom of implementation
|
||||
to store and update the internal matrix as efficiently as possible.
|
||||
Different choices of initialization and update procedure will result
|
||||
in different quasi-Newton strategies.
|
||||
|
||||
Four methods should be implemented in derived classes: ``initialize``,
|
||||
``update``, ``dot`` and ``get_matrix``.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Any instance of a class that implements this interface,
|
||||
can be accepted by the method ``minimize`` and used by
|
||||
the compatible solvers to approximate the Hessian (or
|
||||
inverse Hessian) used by the optimization algorithms.
|
||||
"""
|
||||
|
||||
def initialize(self, n, approx_type):
|
||||
"""Initialize internal matrix.
|
||||
|
||||
Allocate internal memory for storing and updating
|
||||
the Hessian or its inverse.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
n : int
|
||||
Problem dimension.
|
||||
approx_type : {'hess', 'inv_hess'}
|
||||
Selects either the Hessian or the inverse Hessian.
|
||||
When set to 'hess' the Hessian will be stored and updated.
|
||||
When set to 'inv_hess' its inverse will be used instead.
|
||||
"""
|
||||
raise NotImplementedError("The method ``initialize(n, approx_type)``"
|
||||
" is not implemented.")
|
||||
|
||||
def update(self, delta_x, delta_grad):
|
||||
"""Update internal matrix.
|
||||
|
||||
Update Hessian matrix or its inverse (depending on how 'approx_type'
|
||||
is defined) using information about the last evaluated points.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
delta_x : ndarray
|
||||
The difference between two points the gradient
|
||||
function have been evaluated at: ``delta_x = x2 - x1``.
|
||||
delta_grad : ndarray
|
||||
The difference between the gradients:
|
||||
``delta_grad = grad(x2) - grad(x1)``.
|
||||
"""
|
||||
raise NotImplementedError("The method ``update(delta_x, delta_grad)``"
|
||||
" is not implemented.")
|
||||
|
||||
def dot(self, p):
|
||||
"""Compute the product of the internal matrix with the given vector.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
p : array_like
|
||||
1-D array representing a vector.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Hp : array
|
||||
1-D represents the result of multiplying the approximation matrix
|
||||
by vector p.
|
||||
"""
|
||||
raise NotImplementedError("The method ``dot(p)``"
|
||||
" is not implemented.")
|
||||
|
||||
def get_matrix(self):
|
||||
"""Return current internal matrix.
|
||||
|
||||
Returns
|
||||
-------
|
||||
H : ndarray, shape (n, n)
|
||||
Dense matrix containing either the Hessian
|
||||
or its inverse (depending on how 'approx_type'
|
||||
is defined).
|
||||
"""
|
||||
raise NotImplementedError("The method ``get_matrix(p)``"
|
||||
" is not implemented.")
|
||||
|
||||
|
||||
class FullHessianUpdateStrategy(HessianUpdateStrategy):
|
||||
"""Hessian update strategy with full dimensional internal representation.
|
||||
"""
|
||||
_syr = get_blas_funcs('syr', dtype='d') # Symmetric rank 1 update
|
||||
_syr2 = get_blas_funcs('syr2', dtype='d') # Symmetric rank 2 update
|
||||
# Symmetric matrix-vector product
|
||||
_symv = get_blas_funcs('symv', dtype='d')
|
||||
|
||||
def __init__(self, init_scale='auto'):
|
||||
self.init_scale = init_scale
|
||||
# Until initialize is called we can't really use the class,
|
||||
# so it makes sense to set everything to None.
|
||||
self.first_iteration = None
|
||||
self.approx_type = None
|
||||
self.B = None
|
||||
self.H = None
|
||||
|
||||
def initialize(self, n, approx_type):
|
||||
"""Initialize internal matrix.
|
||||
|
||||
Allocate internal memory for storing and updating
|
||||
the Hessian or its inverse.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
n : int
|
||||
Problem dimension.
|
||||
approx_type : {'hess', 'inv_hess'}
|
||||
Selects either the Hessian or the inverse Hessian.
|
||||
When set to 'hess' the Hessian will be stored and updated.
|
||||
When set to 'inv_hess' its inverse will be used instead.
|
||||
"""
|
||||
self.first_iteration = True
|
||||
self.n = n
|
||||
self.approx_type = approx_type
|
||||
if approx_type not in ('hess', 'inv_hess'):
|
||||
raise ValueError("`approx_type` must be 'hess' or 'inv_hess'.")
|
||||
# Create matrix
|
||||
if self.approx_type == 'hess':
|
||||
self.B = np.eye(n, dtype=float)
|
||||
else:
|
||||
self.H = np.eye(n, dtype=float)
|
||||
|
||||
def _auto_scale(self, delta_x, delta_grad):
|
||||
# Heuristic to scale matrix at first iteration.
|
||||
# Described in Nocedal and Wright "Numerical Optimization"
|
||||
# p.143 formula (6.20).
|
||||
s_norm2 = np.dot(delta_x, delta_x)
|
||||
y_norm2 = np.dot(delta_grad, delta_grad)
|
||||
ys = np.abs(np.dot(delta_grad, delta_x))
|
||||
if ys == 0.0 or y_norm2 == 0 or s_norm2 == 0:
|
||||
return 1
|
||||
if self.approx_type == 'hess':
|
||||
return y_norm2 / ys
|
||||
else:
|
||||
return ys / y_norm2
|
||||
|
||||
def _update_implementation(self, delta_x, delta_grad):
|
||||
raise NotImplementedError("The method ``_update_implementation``"
|
||||
" is not implemented.")
|
||||
|
||||
def update(self, delta_x, delta_grad):
|
||||
"""Update internal matrix.
|
||||
|
||||
Update Hessian matrix or its inverse (depending on how 'approx_type'
|
||||
is defined) using information about the last evaluated points.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
delta_x : ndarray
|
||||
The difference between two points the gradient
|
||||
function have been evaluated at: ``delta_x = x2 - x1``.
|
||||
delta_grad : ndarray
|
||||
The difference between the gradients:
|
||||
``delta_grad = grad(x2) - grad(x1)``.
|
||||
"""
|
||||
if np.all(delta_x == 0.0):
|
||||
return
|
||||
if np.all(delta_grad == 0.0):
|
||||
warn('delta_grad == 0.0. Check if the approximated '
|
||||
'function is linear. If the function is linear '
|
||||
'better results can be obtained by defining the '
|
||||
'Hessian as zero instead of using quasi-Newton '
|
||||
'approximations.', UserWarning)
|
||||
return
|
||||
if self.first_iteration:
|
||||
# Get user specific scale
|
||||
if self.init_scale == "auto":
|
||||
scale = self._auto_scale(delta_x, delta_grad)
|
||||
else:
|
||||
scale = float(self.init_scale)
|
||||
# Scale initial matrix with ``scale * np.eye(n)``
|
||||
if self.approx_type == 'hess':
|
||||
self.B *= scale
|
||||
else:
|
||||
self.H *= scale
|
||||
self.first_iteration = False
|
||||
self._update_implementation(delta_x, delta_grad)
|
||||
|
||||
def dot(self, p):
|
||||
"""Compute the product of the internal matrix with the given vector.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
p : array_like
|
||||
1-D array representing a vector.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Hp : array
|
||||
1-D represents the result of multiplying the approximation matrix
|
||||
by vector p.
|
||||
"""
|
||||
if self.approx_type == 'hess':
|
||||
return self._symv(1, self.B, p)
|
||||
else:
|
||||
return self._symv(1, self.H, p)
|
||||
|
||||
def get_matrix(self):
|
||||
"""Return the current internal matrix.
|
||||
|
||||
Returns
|
||||
-------
|
||||
M : ndarray, shape (n, n)
|
||||
Dense matrix containing either the Hessian or its inverse
|
||||
(depending on how `approx_type` was defined).
|
||||
"""
|
||||
if self.approx_type == 'hess':
|
||||
M = np.copy(self.B)
|
||||
else:
|
||||
M = np.copy(self.H)
|
||||
li = np.tril_indices_from(M, k=-1)
|
||||
M[li] = M.T[li]
|
||||
return M
|
||||
|
||||
|
||||
class BFGS(FullHessianUpdateStrategy):
|
||||
"""Broyden-Fletcher-Goldfarb-Shanno (BFGS) Hessian update strategy.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
exception_strategy : {'skip_update', 'damp_update'}, optional
|
||||
Define how to proceed when the curvature condition is violated.
|
||||
Set it to 'skip_update' to just skip the update. Or, alternatively,
|
||||
set it to 'damp_update' to interpolate between the actual BFGS
|
||||
result and the unmodified matrix. Both exceptions strategies
|
||||
are explained in [1]_, p.536-537.
|
||||
min_curvature : float
|
||||
This number, scaled by a normalization factor, defines the
|
||||
minimum curvature ``dot(delta_grad, delta_x)`` allowed to go
|
||||
unaffected by the exception strategy. By default is equal to
|
||||
1e-8 when ``exception_strategy = 'skip_update'`` and equal
|
||||
to 0.2 when ``exception_strategy = 'damp_update'``.
|
||||
init_scale : {float, 'auto'}
|
||||
Matrix scale at first iteration. At the first
|
||||
iteration the Hessian matrix or its inverse will be initialized
|
||||
with ``init_scale*np.eye(n)``, where ``n`` is the problem dimension.
|
||||
Set it to 'auto' in order to use an automatic heuristic for choosing
|
||||
the initial scale. The heuristic is described in [1]_, p.143.
|
||||
By default uses 'auto'.
|
||||
|
||||
Notes
|
||||
-----
|
||||
The update is based on the description in [1]_, p.140.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Nocedal, Jorge, and Stephen J. Wright. "Numerical optimization"
|
||||
Second Edition (2006).
|
||||
"""
|
||||
|
||||
def __init__(self, exception_strategy='skip_update', min_curvature=None,
|
||||
init_scale='auto'):
|
||||
if exception_strategy == 'skip_update':
|
||||
if min_curvature is not None:
|
||||
self.min_curvature = min_curvature
|
||||
else:
|
||||
self.min_curvature = 1e-8
|
||||
elif exception_strategy == 'damp_update':
|
||||
if min_curvature is not None:
|
||||
self.min_curvature = min_curvature
|
||||
else:
|
||||
self.min_curvature = 0.2
|
||||
else:
|
||||
raise ValueError("`exception_strategy` must be 'skip_update' "
|
||||
"or 'damp_update'.")
|
||||
|
||||
super(BFGS, self).__init__(init_scale)
|
||||
self.exception_strategy = exception_strategy
|
||||
|
||||
def _update_inverse_hessian(self, ys, Hy, yHy, s):
|
||||
"""Update the inverse Hessian matrix.
|
||||
|
||||
BFGS update using the formula:
|
||||
|
||||
``H <- H + ((H*y).T*y + s.T*y)/(s.T*y)^2 * (s*s.T)
|
||||
- 1/(s.T*y) * ((H*y)*s.T + s*(H*y).T)``
|
||||
|
||||
where ``s = delta_x`` and ``y = delta_grad``. This formula is
|
||||
equivalent to (6.17) in [1]_ written in a more efficient way
|
||||
for implementation.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Nocedal, Jorge, and Stephen J. Wright. "Numerical optimization"
|
||||
Second Edition (2006).
|
||||
"""
|
||||
self.H = self._syr2(-1.0 / ys, s, Hy, a=self.H)
|
||||
self.H = self._syr((ys+yHy)/ys**2, s, a=self.H)
|
||||
|
||||
def _update_hessian(self, ys, Bs, sBs, y):
|
||||
"""Update the Hessian matrix.
|
||||
|
||||
BFGS update using the formula:
|
||||
|
||||
``B <- B - (B*s)*(B*s).T/s.T*(B*s) + y*y^T/s.T*y``
|
||||
|
||||
where ``s`` is short for ``delta_x`` and ``y`` is short
|
||||
for ``delta_grad``. Formula (6.19) in [1]_.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Nocedal, Jorge, and Stephen J. Wright. "Numerical optimization"
|
||||
Second Edition (2006).
|
||||
"""
|
||||
self.B = self._syr(1.0 / ys, y, a=self.B)
|
||||
self.B = self._syr(-1.0 / sBs, Bs, a=self.B)
|
||||
|
||||
def _update_implementation(self, delta_x, delta_grad):
|
||||
# Auxiliary variables w and z
|
||||
if self.approx_type == 'hess':
|
||||
w = delta_x
|
||||
z = delta_grad
|
||||
else:
|
||||
w = delta_grad
|
||||
z = delta_x
|
||||
# Do some common operations
|
||||
wz = np.dot(w, z)
|
||||
Mw = self.dot(w)
|
||||
wMw = Mw.dot(w)
|
||||
# Guarantee that wMw > 0 by reinitializing matrix.
|
||||
# While this is always true in exact arithmetics,
|
||||
# indefinite matrix may appear due to roundoff errors.
|
||||
if wMw <= 0.0:
|
||||
scale = self._auto_scale(delta_x, delta_grad)
|
||||
# Reinitialize matrix
|
||||
if self.approx_type == 'hess':
|
||||
self.B = scale * np.eye(self.n, dtype=float)
|
||||
else:
|
||||
self.H = scale * np.eye(self.n, dtype=float)
|
||||
# Do common operations for new matrix
|
||||
Mw = self.dot(w)
|
||||
wMw = Mw.dot(w)
|
||||
# Check if curvature condition is violated
|
||||
if wz <= self.min_curvature * wMw:
|
||||
# If the option 'skip_update' is set
|
||||
# we just skip the update when the condion
|
||||
# is violated.
|
||||
if self.exception_strategy == 'skip_update':
|
||||
return
|
||||
# If the option 'damp_update' is set we
|
||||
# interpolate between the actual BFGS
|
||||
# result and the unmodified matrix.
|
||||
elif self.exception_strategy == 'damp_update':
|
||||
update_factor = (1-self.min_curvature) / (1 - wz/wMw)
|
||||
z = update_factor*z + (1-update_factor)*Mw
|
||||
wz = np.dot(w, z)
|
||||
# Update matrix
|
||||
if self.approx_type == 'hess':
|
||||
self._update_hessian(wz, Mw, wMw, z)
|
||||
else:
|
||||
self._update_inverse_hessian(wz, Mw, wMw, z)
|
||||
|
||||
|
||||
class SR1(FullHessianUpdateStrategy):
|
||||
"""Symmetric-rank-1 Hessian update strategy.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
min_denominator : float
|
||||
This number, scaled by a normalization factor,
|
||||
defines the minimum denominator magnitude allowed
|
||||
in the update. When the condition is violated we skip
|
||||
the update. By default uses ``1e-8``.
|
||||
init_scale : {float, 'auto'}, optional
|
||||
Matrix scale at first iteration. At the first
|
||||
iteration the Hessian matrix or its inverse will be initialized
|
||||
with ``init_scale*np.eye(n)``, where ``n`` is the problem dimension.
|
||||
Set it to 'auto' in order to use an automatic heuristic for choosing
|
||||
the initial scale. The heuristic is described in [1]_, p.143.
|
||||
By default uses 'auto'.
|
||||
|
||||
Notes
|
||||
-----
|
||||
The update is based on the description in [1]_, p.144-146.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Nocedal, Jorge, and Stephen J. Wright. "Numerical optimization"
|
||||
Second Edition (2006).
|
||||
"""
|
||||
|
||||
def __init__(self, min_denominator=1e-8, init_scale='auto'):
|
||||
self.min_denominator = min_denominator
|
||||
super(SR1, self).__init__(init_scale)
|
||||
|
||||
def _update_implementation(self, delta_x, delta_grad):
|
||||
# Auxiliary variables w and z
|
||||
if self.approx_type == 'hess':
|
||||
w = delta_x
|
||||
z = delta_grad
|
||||
else:
|
||||
w = delta_grad
|
||||
z = delta_x
|
||||
# Do some common operations
|
||||
Mw = self.dot(w)
|
||||
z_minus_Mw = z - Mw
|
||||
denominator = np.dot(w, z_minus_Mw)
|
||||
# If the denominator is too small
|
||||
# we just skip the update.
|
||||
if np.abs(denominator) <= self.min_denominator*norm(w)*norm(z_minus_Mw):
|
||||
return
|
||||
# Update matrix
|
||||
if self.approx_type == 'hess':
|
||||
self.B = self._syr(1/denominator, z_minus_Mw, a=self.B)
|
||||
else:
|
||||
self.H = self._syr(1/denominator, z_minus_Mw, a=self.H)
|
BIN
venv/Lib/site-packages/scipy/optimize/_lbfgsb.cp36-win32.pyd
Normal file
BIN
venv/Lib/site-packages/scipy/optimize/_lbfgsb.cp36-win32.pyd
Normal file
Binary file not shown.
575
venv/Lib/site-packages/scipy/optimize/_linprog.py
Normal file
575
venv/Lib/site-packages/scipy/optimize/_linprog.py
Normal file
|
@ -0,0 +1,575 @@
|
|||
"""
|
||||
A top-level linear programming interface. Currently this interface solves
|
||||
linear programming problems via the Simplex and Interior-Point methods.
|
||||
|
||||
.. versionadded:: 0.15.0
|
||||
|
||||
Functions
|
||||
---------
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
linprog
|
||||
linprog_verbose_callback
|
||||
linprog_terse_callback
|
||||
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
|
||||
from .optimize import OptimizeResult, OptimizeWarning
|
||||
from warnings import warn
|
||||
from ._linprog_ip import _linprog_ip
|
||||
from ._linprog_simplex import _linprog_simplex
|
||||
from ._linprog_rs import _linprog_rs
|
||||
from ._linprog_util import (
|
||||
_parse_linprog, _presolve, _get_Abc, _postprocess, _LPProblem, _autoscale)
|
||||
from copy import deepcopy
|
||||
|
||||
__all__ = ['linprog', 'linprog_verbose_callback', 'linprog_terse_callback']
|
||||
|
||||
__docformat__ = "restructuredtext en"
|
||||
|
||||
|
||||
def linprog_verbose_callback(res):
|
||||
"""
|
||||
A sample callback function demonstrating the linprog callback interface.
|
||||
This callback produces detailed output to sys.stdout before each iteration
|
||||
and after the final iteration of the simplex algorithm.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
res : A `scipy.optimize.OptimizeResult` consisting of the following fields:
|
||||
|
||||
x : 1-D array
|
||||
The independent variable vector which optimizes the linear
|
||||
programming problem.
|
||||
fun : float
|
||||
Value of the objective function.
|
||||
success : bool
|
||||
True if the algorithm succeeded in finding an optimal solution.
|
||||
slack : 1-D array
|
||||
The values of the slack variables. Each slack variable corresponds
|
||||
to an inequality constraint. If the slack is zero, then the
|
||||
corresponding constraint is active.
|
||||
con : 1-D array
|
||||
The (nominally zero) residuals of the equality constraints, that is,
|
||||
``b - A_eq @ x``
|
||||
phase : int
|
||||
The phase of the optimization being executed. In phase 1 a basic
|
||||
feasible solution is sought and the T has an additional row
|
||||
representing an alternate objective function.
|
||||
status : int
|
||||
An integer representing the exit status of the optimization::
|
||||
|
||||
0 : Optimization terminated successfully
|
||||
1 : Iteration limit reached
|
||||
2 : Problem appears to be infeasible
|
||||
3 : Problem appears to be unbounded
|
||||
4 : Serious numerical difficulties encountered
|
||||
|
||||
nit : int
|
||||
The number of iterations performed.
|
||||
message : str
|
||||
A string descriptor of the exit status of the optimization.
|
||||
"""
|
||||
x = res['x']
|
||||
fun = res['fun']
|
||||
phase = res['phase']
|
||||
status = res['status']
|
||||
nit = res['nit']
|
||||
message = res['message']
|
||||
complete = res['complete']
|
||||
|
||||
saved_printoptions = np.get_printoptions()
|
||||
np.set_printoptions(linewidth=500,
|
||||
formatter={'float': lambda x: "{0: 12.4f}".format(x)})
|
||||
if status:
|
||||
print('--------- Simplex Early Exit -------\n'.format(nit))
|
||||
print('The simplex method exited early with status {0:d}'.format(status))
|
||||
print(message)
|
||||
elif complete:
|
||||
print('--------- Simplex Complete --------\n')
|
||||
print('Iterations required: {}'.format(nit))
|
||||
else:
|
||||
print('--------- Iteration {0:d} ---------\n'.format(nit))
|
||||
|
||||
if nit > 0:
|
||||
if phase == 1:
|
||||
print('Current Pseudo-Objective Value:')
|
||||
else:
|
||||
print('Current Objective Value:')
|
||||
print('f = ', fun)
|
||||
print()
|
||||
print('Current Solution Vector:')
|
||||
print('x = ', x)
|
||||
print()
|
||||
|
||||
np.set_printoptions(**saved_printoptions)
|
||||
|
||||
|
||||
def linprog_terse_callback(res):
|
||||
"""
|
||||
A sample callback function demonstrating the linprog callback interface.
|
||||
This callback produces brief output to sys.stdout before each iteration
|
||||
and after the final iteration of the simplex algorithm.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
res : A `scipy.optimize.OptimizeResult` consisting of the following fields:
|
||||
|
||||
x : 1-D array
|
||||
The independent variable vector which optimizes the linear
|
||||
programming problem.
|
||||
fun : float
|
||||
Value of the objective function.
|
||||
success : bool
|
||||
True if the algorithm succeeded in finding an optimal solution.
|
||||
slack : 1-D array
|
||||
The values of the slack variables. Each slack variable corresponds
|
||||
to an inequality constraint. If the slack is zero, then the
|
||||
corresponding constraint is active.
|
||||
con : 1-D array
|
||||
The (nominally zero) residuals of the equality constraints, that is,
|
||||
``b - A_eq @ x``.
|
||||
phase : int
|
||||
The phase of the optimization being executed. In phase 1 a basic
|
||||
feasible solution is sought and the T has an additional row
|
||||
representing an alternate objective function.
|
||||
status : int
|
||||
An integer representing the exit status of the optimization::
|
||||
|
||||
0 : Optimization terminated successfully
|
||||
1 : Iteration limit reached
|
||||
2 : Problem appears to be infeasible
|
||||
3 : Problem appears to be unbounded
|
||||
4 : Serious numerical difficulties encountered
|
||||
|
||||
nit : int
|
||||
The number of iterations performed.
|
||||
message : str
|
||||
A string descriptor of the exit status of the optimization.
|
||||
"""
|
||||
nit = res['nit']
|
||||
x = res['x']
|
||||
|
||||
if nit == 0:
|
||||
print("Iter: X:")
|
||||
print("{0: <5d} ".format(nit), end="")
|
||||
print(x)
|
||||
|
||||
|
||||
def linprog(c, A_ub=None, b_ub=None, A_eq=None, b_eq=None,
|
||||
bounds=None, method='interior-point', callback=None,
|
||||
options=None, x0=None):
|
||||
r"""
|
||||
Linear programming: minimize a linear objective function subject to linear
|
||||
equality and inequality constraints.
|
||||
|
||||
Linear programming solves problems of the following form:
|
||||
|
||||
.. math::
|
||||
|
||||
\min_x \ & c^T x \\
|
||||
\mbox{such that} \ & A_{ub} x \leq b_{ub},\\
|
||||
& A_{eq} x = b_{eq},\\
|
||||
& l \leq x \leq u ,
|
||||
|
||||
where :math:`x` is a vector of decision variables; :math:`c`,
|
||||
:math:`b_{ub}`, :math:`b_{eq}`, :math:`l`, and :math:`u` are vectors; and
|
||||
:math:`A_{ub}` and :math:`A_{eq}` are matrices.
|
||||
|
||||
Informally, that's:
|
||||
|
||||
minimize::
|
||||
|
||||
c @ x
|
||||
|
||||
such that::
|
||||
|
||||
A_ub @ x <= b_ub
|
||||
A_eq @ x == b_eq
|
||||
lb <= x <= ub
|
||||
|
||||
Note that by default ``lb = 0`` and ``ub = None`` unless specified with
|
||||
``bounds``.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
c : 1-D array
|
||||
The coefficients of the linear objective function to be minimized.
|
||||
A_ub : 2-D array, optional
|
||||
The inequality constraint matrix. Each row of ``A_ub`` specifies the
|
||||
coefficients of a linear inequality constraint on ``x``.
|
||||
b_ub : 1-D array, optional
|
||||
The inequality constraint vector. Each element represents an
|
||||
upper bound on the corresponding value of ``A_ub @ x``.
|
||||
A_eq : 2-D array, optional
|
||||
The equality constraint matrix. Each row of ``A_eq`` specifies the
|
||||
coefficients of a linear equality constraint on ``x``.
|
||||
b_eq : 1-D array, optional
|
||||
The equality constraint vector. Each element of ``A_eq @ x`` must equal
|
||||
the corresponding element of ``b_eq``.
|
||||
bounds : sequence, optional
|
||||
A sequence of ``(min, max)`` pairs for each element in ``x``, defining
|
||||
the minimum and maximum values of that decision variable. Use ``None`` to
|
||||
indicate that there is no bound. By default, bounds are ``(0, None)``
|
||||
(all decision variables are non-negative).
|
||||
If a single tuple ``(min, max)`` is provided, then ``min`` and
|
||||
``max`` will serve as bounds for all decision variables.
|
||||
method : {'interior-point', 'revised simplex', 'simplex'}, optional
|
||||
The algorithm used to solve the standard form problem.
|
||||
:ref:`'interior-point' <optimize.linprog-interior-point>` (default),
|
||||
:ref:`'revised simplex' <optimize.linprog-revised_simplex>`, and
|
||||
:ref:`'simplex' <optimize.linprog-simplex>` (legacy)
|
||||
are supported.
|
||||
callback : callable, optional
|
||||
If a callback function is provided, it will be called at least once per
|
||||
iteration of the algorithm. The callback function must accept a single
|
||||
`scipy.optimize.OptimizeResult` consisting of the following fields:
|
||||
|
||||
x : 1-D array
|
||||
The current solution vector.
|
||||
fun : float
|
||||
The current value of the objective function ``c @ x``.
|
||||
success : bool
|
||||
``True`` when the algorithm has completed successfully.
|
||||
slack : 1-D array
|
||||
The (nominally positive) values of the slack,
|
||||
``b_ub - A_ub @ x``.
|
||||
con : 1-D array
|
||||
The (nominally zero) residuals of the equality constraints,
|
||||
``b_eq - A_eq @ x``.
|
||||
phase : int
|
||||
The phase of the algorithm being executed.
|
||||
status : int
|
||||
An integer representing the status of the algorithm.
|
||||
|
||||
``0`` : Optimization proceeding nominally.
|
||||
|
||||
``1`` : Iteration limit reached.
|
||||
|
||||
``2`` : Problem appears to be infeasible.
|
||||
|
||||
``3`` : Problem appears to be unbounded.
|
||||
|
||||
``4`` : Numerical difficulties encountered.
|
||||
|
||||
nit : int
|
||||
The current iteration number.
|
||||
message : str
|
||||
A string descriptor of the algorithm status.
|
||||
|
||||
options : dict, optional
|
||||
A dictionary of solver options. All methods accept the following
|
||||
options:
|
||||
|
||||
maxiter : int
|
||||
Maximum number of iterations to perform.
|
||||
Default: see method-specific documentation.
|
||||
disp : bool
|
||||
Set to ``True`` to print convergence messages.
|
||||
Default: ``False``.
|
||||
autoscale : bool
|
||||
Set to ``True`` to automatically perform equilibration.
|
||||
Consider using this option if the numerical values in the
|
||||
constraints are separated by several orders of magnitude.
|
||||
Default: ``False``.
|
||||
presolve : bool
|
||||
Set to ``False`` to disable automatic presolve.
|
||||
Default: ``True``.
|
||||
rr : bool
|
||||
Set to ``False`` to disable automatic redundancy removal.
|
||||
Default: ``True``.
|
||||
|
||||
For method-specific options, see
|
||||
:func:`show_options('linprog') <show_options>`.
|
||||
|
||||
x0 : 1-D array, optional
|
||||
Guess values of the decision variables, which will be refined by
|
||||
the optimization algorithm. This argument is currently used only by the
|
||||
'revised simplex' method, and can only be used if `x0` represents a
|
||||
basic feasible solution.
|
||||
|
||||
|
||||
Returns
|
||||
-------
|
||||
res : OptimizeResult
|
||||
A :class:`scipy.optimize.OptimizeResult` consisting of the fields:
|
||||
|
||||
x : 1-D array
|
||||
The values of the decision variables that minimizes the
|
||||
objective function while satisfying the constraints.
|
||||
fun : float
|
||||
The optimal value of the objective function ``c @ x``.
|
||||
slack : 1-D array
|
||||
The (nominally positive) values of the slack variables,
|
||||
``b_ub - A_ub @ x``.
|
||||
con : 1-D array
|
||||
The (nominally zero) residuals of the equality constraints,
|
||||
``b_eq - A_eq @ x``.
|
||||
success : bool
|
||||
``True`` when the algorithm succeeds in finding an optimal
|
||||
solution.
|
||||
status : int
|
||||
An integer representing the exit status of the algorithm.
|
||||
|
||||
``0`` : Optimization terminated successfully.
|
||||
|
||||
``1`` : Iteration limit reached.
|
||||
|
||||
``2`` : Problem appears to be infeasible.
|
||||
|
||||
``3`` : Problem appears to be unbounded.
|
||||
|
||||
``4`` : Numerical difficulties encountered.
|
||||
|
||||
nit : int
|
||||
The total number of iterations performed in all phases.
|
||||
message : str
|
||||
A string descriptor of the exit status of the algorithm.
|
||||
|
||||
See Also
|
||||
--------
|
||||
show_options : Additional options accepted by the solvers.
|
||||
|
||||
Notes
|
||||
-----
|
||||
This section describes the available solvers that can be selected by the
|
||||
'method' parameter.
|
||||
|
||||
:ref:`'interior-point' <optimize.linprog-interior-point>` is the default
|
||||
as it is typically the fastest and most robust method.
|
||||
:ref:`'revised simplex' <optimize.linprog-revised_simplex>` is more
|
||||
accurate for the problems it solves.
|
||||
:ref:`'simplex' <optimize.linprog-simplex>` is the legacy method and is
|
||||
included for backwards compatibility and educational purposes.
|
||||
|
||||
Method *interior-point* uses the primal-dual path following algorithm
|
||||
as outlined in [4]_. This algorithm supports sparse constraint matrices and
|
||||
is typically faster than the simplex methods, especially for large, sparse
|
||||
problems. Note, however, that the solution returned may be slightly less
|
||||
accurate than those of the simplex methods and will not, in general,
|
||||
correspond with a vertex of the polytope defined by the constraints.
|
||||
|
||||
.. versionadded:: 1.0.0
|
||||
|
||||
Method *revised simplex* uses the revised simplex method as described in
|
||||
[9]_, except that a factorization [11]_ of the basis matrix, rather than
|
||||
its inverse, is efficiently maintained and used to solve the linear systems
|
||||
at each iteration of the algorithm.
|
||||
|
||||
.. versionadded:: 1.3.0
|
||||
|
||||
Method *simplex* uses a traditional, full-tableau implementation of
|
||||
Dantzig's simplex algorithm [1]_, [2]_ (*not* the
|
||||
Nelder-Mead simplex). This algorithm is included for backwards
|
||||
compatibility and educational purposes.
|
||||
|
||||
.. versionadded:: 0.15.0
|
||||
|
||||
Before applying any method, a presolve procedure based on [8]_ attempts
|
||||
to identify trivial infeasibilities, trivial unboundedness, and potential
|
||||
problem simplifications. Specifically, it checks for:
|
||||
|
||||
- rows of zeros in ``A_eq`` or ``A_ub``, representing trivial constraints;
|
||||
- columns of zeros in ``A_eq`` `and` ``A_ub``, representing unconstrained
|
||||
variables;
|
||||
- column singletons in ``A_eq``, representing fixed variables; and
|
||||
- column singletons in ``A_ub``, representing simple bounds.
|
||||
|
||||
If presolve reveals that the problem is unbounded (e.g. an unconstrained
|
||||
and unbounded variable has negative cost) or infeasible (e.g., a row of
|
||||
zeros in ``A_eq`` corresponds with a nonzero in ``b_eq``), the solver
|
||||
terminates with the appropriate status code. Note that presolve terminates
|
||||
as soon as any sign of unboundedness is detected; consequently, a problem
|
||||
may be reported as unbounded when in reality the problem is infeasible
|
||||
(but infeasibility has not been detected yet). Therefore, if it is
|
||||
important to know whether the problem is actually infeasible, solve the
|
||||
problem again with option ``presolve=False``.
|
||||
|
||||
If neither infeasibility nor unboundedness are detected in a single pass
|
||||
of the presolve, bounds are tightened where possible and fixed
|
||||
variables are removed from the problem. Then, linearly dependent rows
|
||||
of the ``A_eq`` matrix are removed, (unless they represent an
|
||||
infeasibility) to avoid numerical difficulties in the primary solve
|
||||
routine. Note that rows that are nearly linearly dependent (within a
|
||||
prescribed tolerance) may also be removed, which can change the optimal
|
||||
solution in rare cases. If this is a concern, eliminate redundancy from
|
||||
your problem formulation and run with option ``rr=False`` or
|
||||
``presolve=False``.
|
||||
|
||||
Several potential improvements can be made here: additional presolve
|
||||
checks outlined in [8]_ should be implemented, the presolve routine should
|
||||
be run multiple times (until no further simplifications can be made), and
|
||||
more of the efficiency improvements from [5]_ should be implemented in the
|
||||
redundancy removal routines.
|
||||
|
||||
After presolve, the problem is transformed to standard form by converting
|
||||
the (tightened) simple bounds to upper bound constraints, introducing
|
||||
non-negative slack variables for inequality constraints, and expressing
|
||||
unbounded variables as the difference between two non-negative variables.
|
||||
Optionally, the problem is automatically scaled via equilibration [12]_.
|
||||
The selected algorithm solves the standard form problem, and a
|
||||
postprocessing routine converts the result to a solution to the original
|
||||
problem.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Dantzig, George B., Linear programming and extensions. Rand
|
||||
Corporation Research Study Princeton Univ. Press, Princeton, NJ,
|
||||
1963
|
||||
.. [2] Hillier, S.H. and Lieberman, G.J. (1995), "Introduction to
|
||||
Mathematical Programming", McGraw-Hill, Chapter 4.
|
||||
.. [3] Bland, Robert G. New finite pivoting rules for the simplex method.
|
||||
Mathematics of Operations Research (2), 1977: pp. 103-107.
|
||||
.. [4] Andersen, Erling D., and Knud D. Andersen. "The MOSEK interior point
|
||||
optimizer for linear programming: an implementation of the
|
||||
homogeneous algorithm." High performance optimization. Springer US,
|
||||
2000. 197-232.
|
||||
.. [5] Andersen, Erling D. "Finding all linearly dependent rows in
|
||||
large-scale linear programming." Optimization Methods and Software
|
||||
6.3 (1995): 219-227.
|
||||
.. [6] Freund, Robert M. "Primal-Dual Interior-Point Methods for Linear
|
||||
Programming based on Newton's Method." Unpublished Course Notes,
|
||||
March 2004. Available 2/25/2017 at
|
||||
https://ocw.mit.edu/courses/sloan-school-of-management/15-084j-nonlinear-programming-spring-2004/lecture-notes/lec14_int_pt_mthd.pdf
|
||||
.. [7] Fourer, Robert. "Solving Linear Programs by Interior-Point Methods."
|
||||
Unpublished Course Notes, August 26, 2005. Available 2/25/2017 at
|
||||
http://www.4er.org/CourseNotes/Book%20B/B-III.pdf
|
||||
.. [8] Andersen, Erling D., and Knud D. Andersen. "Presolving in linear
|
||||
programming." Mathematical Programming 71.2 (1995): 221-245.
|
||||
.. [9] Bertsimas, Dimitris, and J. Tsitsiklis. "Introduction to linear
|
||||
programming." Athena Scientific 1 (1997): 997.
|
||||
.. [10] Andersen, Erling D., et al. Implementation of interior point
|
||||
methods for large scale linear programming. HEC/Universite de
|
||||
Geneve, 1996.
|
||||
.. [11] Bartels, Richard H. "A stabilization of the simplex method."
|
||||
Journal in Numerische Mathematik 16.5 (1971): 414-434.
|
||||
.. [12] Tomlin, J. A. "On scaling linear programming problems."
|
||||
Mathematical Programming Study 4 (1975): 146-166.
|
||||
|
||||
Examples
|
||||
--------
|
||||
Consider the following problem:
|
||||
|
||||
.. math::
|
||||
|
||||
\min_{x_0, x_1} \ -x_0 + 4x_1 & \\
|
||||
\mbox{such that} \ -3x_0 + x_1 & \leq 6,\\
|
||||
-x_0 - 2x_1 & \geq -4,\\
|
||||
x_1 & \geq -3.
|
||||
|
||||
The problem is not presented in the form accepted by `linprog`. This is
|
||||
easily remedied by converting the "greater than" inequality
|
||||
constraint to a "less than" inequality constraint by
|
||||
multiplying both sides by a factor of :math:`-1`. Note also that the last
|
||||
constraint is really the simple bound :math:`-3 \leq x_1 \leq \infty`.
|
||||
Finally, since there are no bounds on :math:`x_0`, we must explicitly
|
||||
specify the bounds :math:`-\infty \leq x_0 \leq \infty`, as the
|
||||
default is for variables to be non-negative. After collecting coeffecients
|
||||
into arrays and tuples, the input for this problem is:
|
||||
|
||||
>>> c = [-1, 4]
|
||||
>>> A = [[-3, 1], [1, 2]]
|
||||
>>> b = [6, 4]
|
||||
>>> x0_bounds = (None, None)
|
||||
>>> x1_bounds = (-3, None)
|
||||
>>> from scipy.optimize import linprog
|
||||
>>> res = linprog(c, A_ub=A, b_ub=b, bounds=[x0_bounds, x1_bounds])
|
||||
|
||||
Note that the default method for `linprog` is 'interior-point', which is
|
||||
approximate by nature.
|
||||
|
||||
>>> print(res)
|
||||
con: array([], dtype=float64)
|
||||
fun: -21.99999984082494 # may vary
|
||||
message: 'Optimization terminated successfully.'
|
||||
nit: 6 # may vary
|
||||
slack: array([3.89999997e+01, 8.46872439e-08] # may vary
|
||||
status: 0
|
||||
success: True
|
||||
x: array([ 9.99999989, -2.99999999]) # may vary
|
||||
|
||||
If you need greater accuracy, try 'revised simplex'.
|
||||
|
||||
>>> res = linprog(c, A_ub=A, b_ub=b, bounds=[x0_bounds, x1_bounds], method='revised simplex')
|
||||
>>> print(res)
|
||||
con: array([], dtype=float64)
|
||||
fun: -22.0 # may vary
|
||||
message: 'Optimization terminated successfully.'
|
||||
nit: 1 # may vary
|
||||
slack: array([39., 0.]) # may vary
|
||||
status: 0
|
||||
success: True
|
||||
x: array([10., -3.]) # may vary
|
||||
|
||||
"""
|
||||
meth = method.lower()
|
||||
|
||||
if x0 is not None and meth != "revised simplex":
|
||||
warning_message = "x0 is used only when method is 'revised simplex'. "
|
||||
warn(warning_message, OptimizeWarning)
|
||||
|
||||
lp = _LPProblem(c, A_ub, b_ub, A_eq, b_eq, bounds, x0)
|
||||
lp, solver_options = _parse_linprog(lp, options)
|
||||
tol = solver_options.get('tol', 1e-9)
|
||||
|
||||
iteration = 0
|
||||
complete = False # will become True if solved in presolve
|
||||
undo = []
|
||||
|
||||
# Keep the original arrays to calculate slack/residuals for original
|
||||
# problem.
|
||||
lp_o = deepcopy(lp)
|
||||
|
||||
# Solve trivial problem, eliminate variables, tighten bounds, etc.
|
||||
c0 = 0 # we might get a constant term in the objective
|
||||
if solver_options.pop('presolve', True):
|
||||
rr = solver_options.pop('rr', True)
|
||||
(lp, c0, x, undo, complete, status, message) = _presolve(lp, rr, tol)
|
||||
|
||||
C, b_scale = 1, 1 # for trivial unscaling if autoscale is not used
|
||||
postsolve_args = (lp_o._replace(bounds=lp.bounds), undo, C, b_scale)
|
||||
|
||||
if not complete:
|
||||
A, b, c, c0, x0 = _get_Abc(lp, c0, undo)
|
||||
if solver_options.pop('autoscale', False):
|
||||
A, b, c, x0, C, b_scale = _autoscale(A, b, c, x0)
|
||||
postsolve_args = postsolve_args[:-2] + (C, b_scale)
|
||||
|
||||
if meth == 'simplex':
|
||||
x, status, message, iteration = _linprog_simplex(
|
||||
c, c0=c0, A=A, b=b, callback=callback,
|
||||
postsolve_args=postsolve_args, **solver_options)
|
||||
elif meth == 'interior-point':
|
||||
x, status, message, iteration = _linprog_ip(
|
||||
c, c0=c0, A=A, b=b, callback=callback,
|
||||
postsolve_args=postsolve_args, **solver_options)
|
||||
elif meth == 'revised simplex':
|
||||
x, status, message, iteration = _linprog_rs(
|
||||
c, c0=c0, A=A, b=b, x0=x0, callback=callback,
|
||||
postsolve_args=postsolve_args, **solver_options)
|
||||
else:
|
||||
raise ValueError('Unknown solver %s' % method)
|
||||
|
||||
# Eliminate artificial variables, re-introduce presolved variables, etc.
|
||||
# need modified bounds here to translate variables appropriately
|
||||
disp = solver_options.get('disp', False)
|
||||
|
||||
x, fun, slack, con, status, message = _postprocess(x, postsolve_args,
|
||||
complete, status,
|
||||
message, tol,
|
||||
iteration, disp)
|
||||
|
||||
sol = {
|
||||
'x': x,
|
||||
'fun': fun,
|
||||
'slack': slack,
|
||||
'con': con,
|
||||
'status': status,
|
||||
'message': message,
|
||||
'nit': iteration,
|
||||
'success': status == 0}
|
||||
|
||||
return OptimizeResult(sol)
|
1124
venv/Lib/site-packages/scipy/optimize/_linprog_ip.py
Normal file
1124
venv/Lib/site-packages/scipy/optimize/_linprog_ip.py
Normal file
File diff suppressed because it is too large
Load diff
557
venv/Lib/site-packages/scipy/optimize/_linprog_rs.py
Normal file
557
venv/Lib/site-packages/scipy/optimize/_linprog_rs.py
Normal file
|
@ -0,0 +1,557 @@
|
|||
"""Revised simplex method for linear programming
|
||||
|
||||
The *revised simplex* method uses the method described in [1]_, except
|
||||
that a factorization [2]_ of the basis matrix, rather than its inverse,
|
||||
is efficiently maintained and used to solve the linear systems at each
|
||||
iteration of the algorithm.
|
||||
|
||||
.. versionadded:: 1.3.0
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Bertsimas, Dimitris, and J. Tsitsiklis. "Introduction to linear
|
||||
programming." Athena Scientific 1 (1997): 997.
|
||||
.. [2] Bartels, Richard H. "A stabilization of the simplex method."
|
||||
Journal in Numerische Mathematik 16.5 (1971): 414-434.
|
||||
|
||||
"""
|
||||
# Author: Matt Haberland
|
||||
|
||||
import numpy as np
|
||||
from scipy.linalg import solve
|
||||
from .optimize import _check_unknown_options
|
||||
from ._bglu_dense import LU
|
||||
from ._bglu_dense import BGLU as BGLU
|
||||
from scipy.linalg import LinAlgError
|
||||
from numpy.linalg.linalg import LinAlgError as LinAlgError2
|
||||
from ._linprog_util import _postsolve
|
||||
from .optimize import OptimizeResult
|
||||
|
||||
|
||||
def _phase_one(A, b, x0, callback, postsolve_args, maxiter, tol, disp,
|
||||
maxupdate, mast, pivot):
|
||||
"""
|
||||
The purpose of phase one is to find an initial basic feasible solution
|
||||
(BFS) to the original problem.
|
||||
|
||||
Generates an auxiliary problem with a trivial BFS and an objective that
|
||||
minimizes infeasibility of the original problem. Solves the auxiliary
|
||||
problem using the main simplex routine (phase two). This either yields
|
||||
a BFS to the original problem or determines that the original problem is
|
||||
infeasible. If feasible, phase one detects redundant rows in the original
|
||||
constraint matrix and removes them, then chooses additional indices as
|
||||
necessary to complete a basis/BFS for the original problem.
|
||||
"""
|
||||
|
||||
m, n = A.shape
|
||||
status = 0
|
||||
|
||||
# generate auxiliary problem to get initial BFS
|
||||
A, b, c, basis, x, status = _generate_auxiliary_problem(A, b, x0, tol)
|
||||
|
||||
if status == 6:
|
||||
residual = c.dot(x)
|
||||
iter_k = 0
|
||||
return x, basis, A, b, residual, status, iter_k
|
||||
|
||||
# solve auxiliary problem
|
||||
phase_one_n = n
|
||||
iter_k = 0
|
||||
x, basis, status, iter_k = _phase_two(c, A, x, basis, callback,
|
||||
postsolve_args,
|
||||
maxiter, tol, disp,
|
||||
maxupdate, mast, pivot,
|
||||
iter_k, phase_one_n)
|
||||
|
||||
# check for infeasibility
|
||||
residual = c.dot(x)
|
||||
if status == 0 and residual > tol:
|
||||
status = 2
|
||||
|
||||
# drive artificial variables out of basis
|
||||
# TODO: test redundant row removal better
|
||||
# TODO: make solve more efficient with BGLU? This could take a while.
|
||||
keep_rows = np.ones(m, dtype=bool)
|
||||
for basis_column in basis[basis >= n]:
|
||||
B = A[:, basis]
|
||||
try:
|
||||
basis_finder = np.abs(solve(B, A)) # inefficient
|
||||
pertinent_row = np.argmax(basis_finder[:, basis_column])
|
||||
eligible_columns = np.ones(n, dtype=bool)
|
||||
eligible_columns[basis[basis < n]] = 0
|
||||
eligible_column_indices = np.where(eligible_columns)[0]
|
||||
index = np.argmax(basis_finder[:, :n]
|
||||
[pertinent_row, eligible_columns])
|
||||
new_basis_column = eligible_column_indices[index]
|
||||
if basis_finder[pertinent_row, new_basis_column] < tol:
|
||||
keep_rows[pertinent_row] = False
|
||||
else:
|
||||
basis[basis == basis_column] = new_basis_column
|
||||
except (LinAlgError, LinAlgError2):
|
||||
status = 4
|
||||
|
||||
# form solution to original problem
|
||||
A = A[keep_rows, :n]
|
||||
basis = basis[keep_rows]
|
||||
x = x[:n]
|
||||
m = A.shape[0]
|
||||
return x, basis, A, b, residual, status, iter_k
|
||||
|
||||
|
||||
def _get_more_basis_columns(A, basis):
|
||||
"""
|
||||
Called when the auxiliary problem terminates with artificial columns in
|
||||
the basis, which must be removed and replaced with non-artificial
|
||||
columns. Finds additional columns that do not make the matrix singular.
|
||||
"""
|
||||
m, n = A.shape
|
||||
|
||||
# options for inclusion are those that aren't already in the basis
|
||||
a = np.arange(m+n)
|
||||
bl = np.zeros(len(a), dtype=bool)
|
||||
bl[basis] = 1
|
||||
options = a[~bl]
|
||||
options = options[options < n] # and they have to be non-artificial
|
||||
|
||||
# form basis matrix
|
||||
B = np.zeros((m, m))
|
||||
B[:, 0:len(basis)] = A[:, basis]
|
||||
|
||||
if (basis.size > 0 and
|
||||
np.linalg.matrix_rank(B[:, :len(basis)]) < len(basis)):
|
||||
raise Exception("Basis has dependent columns")
|
||||
|
||||
rank = 0 # just enter the loop
|
||||
for i in range(n): # somewhat arbitrary, but we need another way out
|
||||
# permute the options, and take as many as needed
|
||||
new_basis = np.random.permutation(options)[:m-len(basis)]
|
||||
B[:, len(basis):] = A[:, new_basis] # update the basis matrix
|
||||
rank = np.linalg.matrix_rank(B) # check the rank
|
||||
if rank == m:
|
||||
break
|
||||
|
||||
return np.concatenate((basis, new_basis))
|
||||
|
||||
|
||||
def _generate_auxiliary_problem(A, b, x0, tol):
|
||||
"""
|
||||
Modifies original problem to create an auxiliary problem with a trivial
|
||||
initial basic feasible solution and an objective that minimizes
|
||||
infeasibility in the original problem.
|
||||
|
||||
Conceptually, this is done by stacking an identity matrix on the right of
|
||||
the original constraint matrix, adding artificial variables to correspond
|
||||
with each of these new columns, and generating a cost vector that is all
|
||||
zeros except for ones corresponding with each of the new variables.
|
||||
|
||||
A initial basic feasible solution is trivial: all variables are zero
|
||||
except for the artificial variables, which are set equal to the
|
||||
corresponding element of the right hand side `b`.
|
||||
|
||||
Runnning the simplex method on this auxiliary problem drives all of the
|
||||
artificial variables - and thus the cost - to zero if the original problem
|
||||
is feasible. The original problem is declared infeasible otherwise.
|
||||
|
||||
Much of the complexity below is to improve efficiency by using singleton
|
||||
columns in the original problem where possible, thus generating artificial
|
||||
variables only as necessary, and using an initial 'guess' basic feasible
|
||||
solution.
|
||||
"""
|
||||
status = 0
|
||||
m, n = A.shape
|
||||
|
||||
if x0 is not None:
|
||||
x = x0
|
||||
else:
|
||||
x = np.zeros(n)
|
||||
|
||||
r = b - A@x # residual; this must be all zeros for feasibility
|
||||
|
||||
A[r < 0] = -A[r < 0] # express problem with RHS positive for trivial BFS
|
||||
b[r < 0] = -b[r < 0] # to the auxiliary problem
|
||||
r[r < 0] *= -1
|
||||
|
||||
# Rows which we will need to find a trivial way to zero.
|
||||
# This should just be the rows where there is a nonzero residual.
|
||||
# But then we would not necessarily have a column singleton in every row.
|
||||
# This makes it difficult to find an initial basis.
|
||||
if x0 is None:
|
||||
nonzero_constraints = np.arange(m)
|
||||
else:
|
||||
nonzero_constraints = np.where(r > tol)[0]
|
||||
|
||||
# these are (at least some of) the initial basis columns
|
||||
basis = np.where(np.abs(x) > tol)[0]
|
||||
|
||||
if len(nonzero_constraints) == 0 and len(basis) <= m: # already a BFS
|
||||
c = np.zeros(n)
|
||||
basis = _get_more_basis_columns(A, basis)
|
||||
return A, b, c, basis, x, status
|
||||
elif (len(nonzero_constraints) > m - len(basis) or
|
||||
np.any(x < 0)): # can't get trivial BFS
|
||||
c = np.zeros(n)
|
||||
status = 6
|
||||
return A, b, c, basis, x, status
|
||||
|
||||
# chooses existing columns appropriate for inclusion in initial basis
|
||||
cols, rows = _select_singleton_columns(A, r)
|
||||
|
||||
# find the rows we need to zero that we _can_ zero with column singletons
|
||||
i_tofix = np.isin(rows, nonzero_constraints)
|
||||
# these columns can't already be in the basis, though
|
||||
# we are going to add them to the basis and change the corresponding x val
|
||||
i_notinbasis = np.logical_not(np.isin(cols, basis))
|
||||
i_fix_without_aux = np.logical_and(i_tofix, i_notinbasis)
|
||||
rows = rows[i_fix_without_aux]
|
||||
cols = cols[i_fix_without_aux]
|
||||
|
||||
# indices of the rows we can only zero with auxiliary variable
|
||||
# these rows will get a one in each auxiliary column
|
||||
arows = nonzero_constraints[np.logical_not(
|
||||
np.isin(nonzero_constraints, rows))]
|
||||
n_aux = len(arows)
|
||||
acols = n + np.arange(n_aux) # indices of auxiliary columns
|
||||
|
||||
basis_ng = np.concatenate((cols, acols)) # basis columns not from guess
|
||||
basis_ng_rows = np.concatenate((rows, arows)) # rows we need to zero
|
||||
|
||||
# add auxiliary singleton columns
|
||||
A = np.hstack((A, np.zeros((m, n_aux))))
|
||||
A[arows, acols] = 1
|
||||
|
||||
# generate initial BFS
|
||||
x = np.concatenate((x, np.zeros(n_aux)))
|
||||
x[basis_ng] = r[basis_ng_rows]/A[basis_ng_rows, basis_ng]
|
||||
|
||||
# generate costs to minimize infeasibility
|
||||
c = np.zeros(n_aux + n)
|
||||
c[acols] = 1
|
||||
|
||||
# basis columns correspond with nonzeros in guess, those with column
|
||||
# singletons we used to zero remaining constraints, and any additional
|
||||
# columns to get a full set (m columns)
|
||||
basis = np.concatenate((basis, basis_ng))
|
||||
basis = _get_more_basis_columns(A, basis) # add columns as needed
|
||||
|
||||
return A, b, c, basis, x, status
|
||||
|
||||
|
||||
def _select_singleton_columns(A, b):
|
||||
"""
|
||||
Finds singleton columns for which the singleton entry is of the same sign
|
||||
as the right-hand side; these columns are eligible for inclusion in an
|
||||
initial basis. Determines the rows in which the singleton entries are
|
||||
located. For each of these rows, returns the indices of the one singleton
|
||||
column and its corresponding row.
|
||||
"""
|
||||
# find indices of all singleton columns and corresponding row indicies
|
||||
column_indices = np.nonzero(np.sum(np.abs(A) != 0, axis=0) == 1)[0]
|
||||
columns = A[:, column_indices] # array of singleton columns
|
||||
row_indices = np.zeros(len(column_indices), dtype=int)
|
||||
nonzero_rows, nonzero_columns = np.nonzero(columns)
|
||||
row_indices[nonzero_columns] = nonzero_rows # corresponding row indicies
|
||||
|
||||
# keep only singletons with entries that have same sign as RHS
|
||||
# this is necessary because all elements of BFS must be non-negative
|
||||
same_sign = A[row_indices, column_indices]*b[row_indices] >= 0
|
||||
column_indices = column_indices[same_sign][::-1]
|
||||
row_indices = row_indices[same_sign][::-1]
|
||||
# Reversing the order so that steps below select rightmost columns
|
||||
# for initial basis, which will tend to be slack variables. (If the
|
||||
# guess corresponds with a basic feasible solution but a constraint
|
||||
# is not satisfied with the corresponding slack variable zero, the slack
|
||||
# variable must be basic.)
|
||||
|
||||
# for each row, keep rightmost singleton column with an entry in that row
|
||||
unique_row_indices, first_columns = np.unique(row_indices,
|
||||
return_index=True)
|
||||
return column_indices[first_columns], unique_row_indices
|
||||
|
||||
|
||||
def _find_nonzero_rows(A, tol):
|
||||
"""
|
||||
Returns logical array indicating the locations of rows with at least
|
||||
one nonzero element.
|
||||
"""
|
||||
return np.any(np.abs(A) > tol, axis=1)
|
||||
|
||||
|
||||
def _select_enter_pivot(c_hat, bl, a, rule="bland", tol=1e-12):
|
||||
"""
|
||||
Selects a pivot to enter the basis. Currently Bland's rule - the smallest
|
||||
index that has a negative reduced cost - is the default.
|
||||
"""
|
||||
if rule.lower() == "mrc": # index with minimum reduced cost
|
||||
return a[~bl][np.argmin(c_hat)]
|
||||
else: # smallest index w/ negative reduced cost
|
||||
return a[~bl][c_hat < -tol][0]
|
||||
|
||||
|
||||
def _display_iter(phase, iteration, slack, con, fun):
|
||||
"""
|
||||
Print indicators of optimization status to the console.
|
||||
"""
|
||||
header = True if not iteration % 20 else False
|
||||
|
||||
if header:
|
||||
print("Phase",
|
||||
"Iteration",
|
||||
"Minimum Slack ",
|
||||
"Constraint Residual",
|
||||
"Objective ")
|
||||
|
||||
# :<X.Y left aligns Y digits in X digit spaces
|
||||
fmt = '{0:<6}{1:<10}{2:<20.13}{3:<20.13}{4:<20.13}'
|
||||
try:
|
||||
slack = np.min(slack)
|
||||
except ValueError:
|
||||
slack = "NA"
|
||||
print(fmt.format(phase, iteration, slack, np.linalg.norm(con), fun))
|
||||
|
||||
|
||||
def _phase_two(c, A, x, b, callback, postsolve_args, maxiter, tol, disp,
|
||||
maxupdate, mast, pivot, iteration=0, phase_one_n=None):
|
||||
"""
|
||||
The heart of the simplex method. Beginning with a basic feasible solution,
|
||||
moves to adjacent basic feasible solutions successively lower reduced cost.
|
||||
Terminates when there are no basic feasible solutions with lower reduced
|
||||
cost or if the problem is determined to be unbounded.
|
||||
|
||||
This implementation follows the revised simplex method based on LU
|
||||
decomposition. Rather than maintaining a tableau or an inverse of the
|
||||
basis matrix, we keep a factorization of the basis matrix that allows
|
||||
efficient solution of linear systems while avoiding stability issues
|
||||
associated with inverted matrices.
|
||||
"""
|
||||
m, n = A.shape
|
||||
status = 0
|
||||
a = np.arange(n) # indices of columns of A
|
||||
ab = np.arange(m) # indices of columns of B
|
||||
if maxupdate:
|
||||
# basis matrix factorization object; similar to B = A[:, b]
|
||||
B = BGLU(A, b, maxupdate, mast)
|
||||
else:
|
||||
B = LU(A, b)
|
||||
|
||||
for iteration in range(iteration, iteration + maxiter):
|
||||
|
||||
if disp or callback is not None:
|
||||
if phase_one_n is not None:
|
||||
phase = 1
|
||||
x_postsolve = x[:phase_one_n]
|
||||
else:
|
||||
phase = 2
|
||||
x_postsolve = x
|
||||
x_o, fun, slack, con, _ = _postsolve(x_postsolve,
|
||||
postsolve_args,
|
||||
tol=tol, copy=True)
|
||||
|
||||
if callback is not None:
|
||||
res = OptimizeResult({'x': x_o, 'fun': fun, 'slack': slack,
|
||||
'con': con, 'nit': iteration,
|
||||
'phase': phase, 'complete': False,
|
||||
'status': 0, 'message': "",
|
||||
'success': False})
|
||||
callback(res)
|
||||
else:
|
||||
_display_iter(phase, iteration, slack, con, fun)
|
||||
|
||||
bl = np.zeros(len(a), dtype=bool)
|
||||
bl[b] = 1
|
||||
|
||||
xb = x[b] # basic variables
|
||||
cb = c[b] # basic costs
|
||||
|
||||
try:
|
||||
v = B.solve(cb, transposed=True) # similar to v = solve(B.T, cb)
|
||||
except LinAlgError:
|
||||
status = 4
|
||||
break
|
||||
|
||||
# TODO: cythonize?
|
||||
c_hat = c - v.dot(A) # reduced cost
|
||||
c_hat = c_hat[~bl]
|
||||
# Above is much faster than:
|
||||
# N = A[:, ~bl] # slow!
|
||||
# c_hat = c[~bl] - v.T.dot(N)
|
||||
# Can we perform the multiplication only on the nonbasic columns?
|
||||
|
||||
if np.all(c_hat >= -tol): # all reduced costs positive -> terminate
|
||||
break
|
||||
|
||||
j = _select_enter_pivot(c_hat, bl, a, rule=pivot, tol=tol)
|
||||
u = B.solve(A[:, j]) # similar to u = solve(B, A[:, j])
|
||||
|
||||
i = u > tol # if none of the u are positive, unbounded
|
||||
if not np.any(i):
|
||||
status = 3
|
||||
break
|
||||
|
||||
th = xb[i]/u[i]
|
||||
l = np.argmin(th) # implicitly selects smallest subscript
|
||||
th_star = th[l] # step size
|
||||
|
||||
x[b] = x[b] - th_star*u # take step
|
||||
x[j] = th_star
|
||||
B.update(ab[i][l], j) # modify basis
|
||||
b = B.b # similar to b[ab[i][l]] = j
|
||||
else:
|
||||
status = 1
|
||||
|
||||
return x, b, status, iteration
|
||||
|
||||
|
||||
def _linprog_rs(c, c0, A, b, x0, callback, postsolve_args,
|
||||
maxiter=5000, tol=1e-12, disp=False,
|
||||
maxupdate=10, mast=False, pivot="mrc",
|
||||
**unknown_options):
|
||||
"""
|
||||
Solve the following linear programming problem via a two-phase
|
||||
revised simplex algorithm.::
|
||||
|
||||
minimize: c @ x
|
||||
|
||||
subject to: A @ x == b
|
||||
0 <= x < oo
|
||||
|
||||
Parameters
|
||||
----------
|
||||
c : 1-D array
|
||||
Coefficients of the linear objective function to be minimized.
|
||||
c0 : float
|
||||
Constant term in objective function due to fixed (and eliminated)
|
||||
variables. (Currently unused.)
|
||||
A : 2-D array
|
||||
2-D array which, when matrix-multiplied by ``x``, gives the values of
|
||||
the equality constraints at ``x``.
|
||||
b : 1-D array
|
||||
1-D array of values representing the RHS of each equality constraint
|
||||
(row) in ``A_eq``.
|
||||
x0 : 1-D array, optional
|
||||
Starting values of the independent variables, which will be refined by
|
||||
the optimization algorithm. For the revised simplex method, these must
|
||||
correspond with a basic feasible solution.
|
||||
callback : callable, optional
|
||||
If a callback function is provided, it will be called within each
|
||||
iteration of the algorithm. The callback function must accept a single
|
||||
`scipy.optimize.OptimizeResult` consisting of the following fields:
|
||||
|
||||
x : 1-D array
|
||||
Current solution vector.
|
||||
fun : float
|
||||
Current value of the objective function ``c @ x``.
|
||||
success : bool
|
||||
True only when an algorithm has completed successfully,
|
||||
so this is always False as the callback function is called
|
||||
only while the algorithm is still iterating.
|
||||
slack : 1-D array
|
||||
The values of the slack variables. Each slack variable
|
||||
corresponds to an inequality constraint. If the slack is zero,
|
||||
the corresponding constraint is active.
|
||||
con : 1-D array
|
||||
The (nominally zero) residuals of the equality constraints,
|
||||
that is, ``b - A_eq @ x``.
|
||||
phase : int
|
||||
The phase of the algorithm being executed.
|
||||
status : int
|
||||
For revised simplex, this is always 0 because if a different
|
||||
status is detected, the algorithm terminates.
|
||||
nit : int
|
||||
The number of iterations performed.
|
||||
message : str
|
||||
A string descriptor of the exit status of the optimization.
|
||||
postsolve_args : tuple
|
||||
Data needed by _postsolve to convert the solution to the standard-form
|
||||
problem into the solution to the original problem.
|
||||
|
||||
Options
|
||||
-------
|
||||
maxiter : int
|
||||
The maximum number of iterations to perform in either phase.
|
||||
tol : float
|
||||
The tolerance which determines when a solution is "close enough" to
|
||||
zero in Phase 1 to be considered a basic feasible solution or close
|
||||
enough to positive to serve as an optimal solution.
|
||||
disp : bool
|
||||
Set to ``True`` if indicators of optimization status are to be printed
|
||||
to the console each iteration.
|
||||
maxupdate : int
|
||||
The maximum number of updates performed on the LU factorization.
|
||||
After this many updates is reached, the basis matrix is factorized
|
||||
from scratch.
|
||||
mast : bool
|
||||
Minimize Amortized Solve Time. If enabled, the average time to solve
|
||||
a linear system using the basis factorization is measured. Typically,
|
||||
the average solve time will decrease with each successive solve after
|
||||
initial factorization, as factorization takes much more time than the
|
||||
solve operation (and updates). Eventually, however, the updated
|
||||
factorization becomes sufficiently complex that the average solve time
|
||||
begins to increase. When this is detected, the basis is refactorized
|
||||
from scratch. Enable this option to maximize speed at the risk of
|
||||
nondeterministic behavior. Ignored if ``maxupdate`` is 0.
|
||||
pivot : "mrc" or "bland"
|
||||
Pivot rule: Minimum Reduced Cost (default) or Bland's rule. Choose
|
||||
Bland's rule if iteration limit is reached and cycling is suspected.
|
||||
unknown_options : dict
|
||||
Optional arguments not used by this particular solver. If
|
||||
`unknown_options` is non-empty a warning is issued listing all
|
||||
unused options.
|
||||
|
||||
Returns
|
||||
-------
|
||||
x : 1-D array
|
||||
Solution vector.
|
||||
status : int
|
||||
An integer representing the exit status of the optimization::
|
||||
|
||||
0 : Optimization terminated successfully
|
||||
1 : Iteration limit reached
|
||||
2 : Problem appears to be infeasible
|
||||
3 : Problem appears to be unbounded
|
||||
4 : Numerical difficulties encountered
|
||||
5 : No constraints; turn presolve on
|
||||
6 : Guess x0 cannot be converted to a basic feasible solution
|
||||
|
||||
message : str
|
||||
A string descriptor of the exit status of the optimization.
|
||||
iteration : int
|
||||
The number of iterations taken to solve the problem.
|
||||
"""
|
||||
|
||||
_check_unknown_options(unknown_options)
|
||||
|
||||
messages = ["Optimization terminated successfully.",
|
||||
"Iteration limit reached.",
|
||||
"The problem appears infeasible, as the phase one auxiliary "
|
||||
"problem terminated successfully with a residual of {0:.1e}, "
|
||||
"greater than the tolerance {1} required for the solution to "
|
||||
"be considered feasible. Consider increasing the tolerance to "
|
||||
"be greater than {0:.1e}. If this tolerance is unnaceptably "
|
||||
"large, the problem is likely infeasible.",
|
||||
"The problem is unbounded, as the simplex algorithm found "
|
||||
"a basic feasible solution from which there is a direction "
|
||||
"with negative reduced cost in which all decision variables "
|
||||
"increase.",
|
||||
"Numerical difficulties encountered; consider trying "
|
||||
"method='interior-point'.",
|
||||
"Problems with no constraints are trivially solved; please "
|
||||
"turn presolve on.",
|
||||
"The guess x0 cannot be converted to a basic feasible "
|
||||
"solution. "
|
||||
]
|
||||
|
||||
if A.size == 0: # address test_unbounded_below_no_presolve_corrected
|
||||
return np.zeros(c.shape), 5, messages[5], 0
|
||||
|
||||
x, basis, A, b, residual, status, iteration = (
|
||||
_phase_one(A, b, x0, callback, postsolve_args,
|
||||
maxiter, tol, disp, maxupdate, mast, pivot))
|
||||
|
||||
if status == 0:
|
||||
x, basis, status, iteration = _phase_two(c, A, x, basis, callback,
|
||||
postsolve_args,
|
||||
maxiter, tol, disp,
|
||||
maxupdate, mast, pivot,
|
||||
iteration)
|
||||
|
||||
return x, status, messages[status].format(residual, tol), iteration
|
659
venv/Lib/site-packages/scipy/optimize/_linprog_simplex.py
Normal file
659
venv/Lib/site-packages/scipy/optimize/_linprog_simplex.py
Normal file
|
@ -0,0 +1,659 @@
|
|||
"""Simplex method for linear programming
|
||||
|
||||
The *simplex* method uses a traditional, full-tableau implementation of
|
||||
Dantzig's simplex algorithm [1]_, [2]_ (*not* the Nelder-Mead simplex).
|
||||
This algorithm is included for backwards compatibility and educational
|
||||
purposes.
|
||||
|
||||
.. versionadded:: 0.15.0
|
||||
|
||||
Warnings
|
||||
--------
|
||||
|
||||
The simplex method may encounter numerical difficulties when pivot
|
||||
values are close to the specified tolerance. If encountered try
|
||||
remove any redundant constraints, change the pivot strategy to Bland's
|
||||
rule or increase the tolerance value.
|
||||
|
||||
Alternatively, more robust methods maybe be used. See
|
||||
:ref:`'interior-point' <optimize.linprog-interior-point>` and
|
||||
:ref:`'revised simplex' <optimize.linprog-revised_simplex>`.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Dantzig, George B., Linear programming and extensions. Rand
|
||||
Corporation Research Study Princeton Univ. Press, Princeton, NJ,
|
||||
1963
|
||||
.. [2] Hillier, S.H. and Lieberman, G.J. (1995), "Introduction to
|
||||
Mathematical Programming", McGraw-Hill, Chapter 4.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from warnings import warn
|
||||
from .optimize import OptimizeResult, OptimizeWarning, _check_unknown_options
|
||||
from ._linprog_util import _postsolve
|
||||
|
||||
|
||||
def _pivot_col(T, tol=1e-9, bland=False):
|
||||
"""
|
||||
Given a linear programming simplex tableau, determine the column
|
||||
of the variable to enter the basis.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
T : 2-D array
|
||||
A 2-D array representing the simplex tableau, T, corresponding to the
|
||||
linear programming problem. It should have the form:
|
||||
|
||||
[[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
|
||||
[A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
|
||||
.
|
||||
.
|
||||
.
|
||||
[A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
|
||||
[c[0], c[1], ..., c[n_total], 0]]
|
||||
|
||||
for a Phase 2 problem, or the form:
|
||||
|
||||
[[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
|
||||
[A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
|
||||
.
|
||||
.
|
||||
.
|
||||
[A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
|
||||
[c[0], c[1], ..., c[n_total], 0],
|
||||
[c'[0], c'[1], ..., c'[n_total], 0]]
|
||||
|
||||
for a Phase 1 problem (a problem in which a basic feasible solution is
|
||||
sought prior to maximizing the actual objective. ``T`` is modified in
|
||||
place by ``_solve_simplex``.
|
||||
tol : float
|
||||
Elements in the objective row larger than -tol will not be considered
|
||||
for pivoting. Nominally this value is zero, but numerical issues
|
||||
cause a tolerance about zero to be necessary.
|
||||
bland : bool
|
||||
If True, use Bland's rule for selection of the column (select the
|
||||
first column with a negative coefficient in the objective row,
|
||||
regardless of magnitude).
|
||||
|
||||
Returns
|
||||
-------
|
||||
status: bool
|
||||
True if a suitable pivot column was found, otherwise False.
|
||||
A return of False indicates that the linear programming simplex
|
||||
algorithm is complete.
|
||||
col: int
|
||||
The index of the column of the pivot element.
|
||||
If status is False, col will be returned as nan.
|
||||
"""
|
||||
ma = np.ma.masked_where(T[-1, :-1] >= -tol, T[-1, :-1], copy=False)
|
||||
if ma.count() == 0:
|
||||
return False, np.nan
|
||||
if bland:
|
||||
# ma.mask is sometimes 0d
|
||||
return True, np.nonzero(np.logical_not(np.atleast_1d(ma.mask)))[0][0]
|
||||
return True, np.ma.nonzero(ma == ma.min())[0][0]
|
||||
|
||||
|
||||
def _pivot_row(T, basis, pivcol, phase, tol=1e-9, bland=False):
|
||||
"""
|
||||
Given a linear programming simplex tableau, determine the row for the
|
||||
pivot operation.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
T : 2-D array
|
||||
A 2-D array representing the simplex tableau, T, corresponding to the
|
||||
linear programming problem. It should have the form:
|
||||
|
||||
[[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
|
||||
[A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
|
||||
.
|
||||
.
|
||||
.
|
||||
[A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
|
||||
[c[0], c[1], ..., c[n_total], 0]]
|
||||
|
||||
for a Phase 2 problem, or the form:
|
||||
|
||||
[[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
|
||||
[A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
|
||||
.
|
||||
.
|
||||
.
|
||||
[A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
|
||||
[c[0], c[1], ..., c[n_total], 0],
|
||||
[c'[0], c'[1], ..., c'[n_total], 0]]
|
||||
|
||||
for a Phase 1 problem (a Problem in which a basic feasible solution is
|
||||
sought prior to maximizing the actual objective. ``T`` is modified in
|
||||
place by ``_solve_simplex``.
|
||||
basis : array
|
||||
A list of the current basic variables.
|
||||
pivcol : int
|
||||
The index of the pivot column.
|
||||
phase : int
|
||||
The phase of the simplex algorithm (1 or 2).
|
||||
tol : float
|
||||
Elements in the pivot column smaller than tol will not be considered
|
||||
for pivoting. Nominally this value is zero, but numerical issues
|
||||
cause a tolerance about zero to be necessary.
|
||||
bland : bool
|
||||
If True, use Bland's rule for selection of the row (if more than one
|
||||
row can be used, choose the one with the lowest variable index).
|
||||
|
||||
Returns
|
||||
-------
|
||||
status: bool
|
||||
True if a suitable pivot row was found, otherwise False. A return
|
||||
of False indicates that the linear programming problem is unbounded.
|
||||
row: int
|
||||
The index of the row of the pivot element. If status is False, row
|
||||
will be returned as nan.
|
||||
"""
|
||||
if phase == 1:
|
||||
k = 2
|
||||
else:
|
||||
k = 1
|
||||
ma = np.ma.masked_where(T[:-k, pivcol] <= tol, T[:-k, pivcol], copy=False)
|
||||
if ma.count() == 0:
|
||||
return False, np.nan
|
||||
mb = np.ma.masked_where(T[:-k, pivcol] <= tol, T[:-k, -1], copy=False)
|
||||
q = mb / ma
|
||||
min_rows = np.ma.nonzero(q == q.min())[0]
|
||||
if bland:
|
||||
return True, min_rows[np.argmin(np.take(basis, min_rows))]
|
||||
return True, min_rows[0]
|
||||
|
||||
|
||||
def _apply_pivot(T, basis, pivrow, pivcol, tol=1e-9):
|
||||
"""
|
||||
Pivot the simplex tableau inplace on the element given by (pivrow, pivol).
|
||||
The entering variable corresponds to the column given by pivcol forcing
|
||||
the variable basis[pivrow] to leave the basis.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
T : 2-D array
|
||||
A 2-D array representing the simplex tableau, T, corresponding to the
|
||||
linear programming problem. It should have the form:
|
||||
|
||||
[[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
|
||||
[A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
|
||||
.
|
||||
.
|
||||
.
|
||||
[A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
|
||||
[c[0], c[1], ..., c[n_total], 0]]
|
||||
|
||||
for a Phase 2 problem, or the form:
|
||||
|
||||
[[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
|
||||
[A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
|
||||
.
|
||||
.
|
||||
.
|
||||
[A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
|
||||
[c[0], c[1], ..., c[n_total], 0],
|
||||
[c'[0], c'[1], ..., c'[n_total], 0]]
|
||||
|
||||
for a Phase 1 problem (a problem in which a basic feasible solution is
|
||||
sought prior to maximizing the actual objective. ``T`` is modified in
|
||||
place by ``_solve_simplex``.
|
||||
basis : 1-D array
|
||||
An array of the indices of the basic variables, such that basis[i]
|
||||
contains the column corresponding to the basic variable for row i.
|
||||
Basis is modified in place by _apply_pivot.
|
||||
pivrow : int
|
||||
Row index of the pivot.
|
||||
pivcol : int
|
||||
Column index of the pivot.
|
||||
"""
|
||||
basis[pivrow] = pivcol
|
||||
pivval = T[pivrow, pivcol]
|
||||
T[pivrow] = T[pivrow] / pivval
|
||||
for irow in range(T.shape[0]):
|
||||
if irow != pivrow:
|
||||
T[irow] = T[irow] - T[pivrow] * T[irow, pivcol]
|
||||
|
||||
# The selected pivot should never lead to a pivot value less than the tol.
|
||||
if np.isclose(pivval, tol, atol=0, rtol=1e4):
|
||||
message = (
|
||||
"The pivot operation produces a pivot value of:{0: .1e}, "
|
||||
"which is only slightly greater than the specified "
|
||||
"tolerance{1: .1e}. This may lead to issues regarding the "
|
||||
"numerical stability of the simplex method. "
|
||||
"Removing redundant constraints, changing the pivot strategy "
|
||||
"via Bland's rule or increasing the tolerance may "
|
||||
"help reduce the issue.".format(pivval, tol))
|
||||
warn(message, OptimizeWarning, stacklevel=5)
|
||||
|
||||
|
||||
def _solve_simplex(T, n, basis, callback, postsolve_args,
|
||||
maxiter=1000, tol=1e-9, phase=2, bland=False, nit0=0,
|
||||
):
|
||||
"""
|
||||
Solve a linear programming problem in "standard form" using the Simplex
|
||||
Method. Linear Programming is intended to solve the following problem form:
|
||||
|
||||
Minimize::
|
||||
|
||||
c @ x
|
||||
|
||||
Subject to::
|
||||
|
||||
A @ x == b
|
||||
x >= 0
|
||||
|
||||
Parameters
|
||||
----------
|
||||
T : 2-D array
|
||||
A 2-D array representing the simplex tableau, T, corresponding to the
|
||||
linear programming problem. It should have the form:
|
||||
|
||||
[[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
|
||||
[A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
|
||||
.
|
||||
.
|
||||
.
|
||||
[A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
|
||||
[c[0], c[1], ..., c[n_total], 0]]
|
||||
|
||||
for a Phase 2 problem, or the form:
|
||||
|
||||
[[A[0, 0], A[0, 1], ..., A[0, n_total], b[0]],
|
||||
[A[1, 0], A[1, 1], ..., A[1, n_total], b[1]],
|
||||
.
|
||||
.
|
||||
.
|
||||
[A[m, 0], A[m, 1], ..., A[m, n_total], b[m]],
|
||||
[c[0], c[1], ..., c[n_total], 0],
|
||||
[c'[0], c'[1], ..., c'[n_total], 0]]
|
||||
|
||||
for a Phase 1 problem (a problem in which a basic feasible solution is
|
||||
sought prior to maximizing the actual objective. ``T`` is modified in
|
||||
place by ``_solve_simplex``.
|
||||
n : int
|
||||
The number of true variables in the problem.
|
||||
basis : 1-D array
|
||||
An array of the indices of the basic variables, such that basis[i]
|
||||
contains the column corresponding to the basic variable for row i.
|
||||
Basis is modified in place by _solve_simplex
|
||||
callback : callable, optional
|
||||
If a callback function is provided, it will be called within each
|
||||
iteration of the algorithm. The callback must accept a
|
||||
`scipy.optimize.OptimizeResult` consisting of the following fields:
|
||||
|
||||
x : 1-D array
|
||||
Current solution vector
|
||||
fun : float
|
||||
Current value of the objective function
|
||||
success : bool
|
||||
True only when a phase has completed successfully. This
|
||||
will be False for most iterations.
|
||||
slack : 1-D array
|
||||
The values of the slack variables. Each slack variable
|
||||
corresponds to an inequality constraint. If the slack is zero,
|
||||
the corresponding constraint is active.
|
||||
con : 1-D array
|
||||
The (nominally zero) residuals of the equality constraints,
|
||||
that is, ``b - A_eq @ x``
|
||||
phase : int
|
||||
The phase of the optimization being executed. In phase 1 a basic
|
||||
feasible solution is sought and the T has an additional row
|
||||
representing an alternate objective function.
|
||||
status : int
|
||||
An integer representing the exit status of the optimization::
|
||||
|
||||
0 : Optimization terminated successfully
|
||||
1 : Iteration limit reached
|
||||
2 : Problem appears to be infeasible
|
||||
3 : Problem appears to be unbounded
|
||||
4 : Serious numerical difficulties encountered
|
||||
|
||||
nit : int
|
||||
The number of iterations performed.
|
||||
message : str
|
||||
A string descriptor of the exit status of the optimization.
|
||||
postsolve_args : tuple
|
||||
Data needed by _postsolve to convert the solution to the standard-form
|
||||
problem into the solution to the original problem.
|
||||
maxiter : int
|
||||
The maximum number of iterations to perform before aborting the
|
||||
optimization.
|
||||
tol : float
|
||||
The tolerance which determines when a solution is "close enough" to
|
||||
zero in Phase 1 to be considered a basic feasible solution or close
|
||||
enough to positive to serve as an optimal solution.
|
||||
phase : int
|
||||
The phase of the optimization being executed. In phase 1 a basic
|
||||
feasible solution is sought and the T has an additional row
|
||||
representing an alternate objective function.
|
||||
bland : bool
|
||||
If True, choose pivots using Bland's rule [3]_. In problems which
|
||||
fail to converge due to cycling, using Bland's rule can provide
|
||||
convergence at the expense of a less optimal path about the simplex.
|
||||
nit0 : int
|
||||
The initial iteration number used to keep an accurate iteration total
|
||||
in a two-phase problem.
|
||||
|
||||
Returns
|
||||
-------
|
||||
nit : int
|
||||
The number of iterations. Used to keep an accurate iteration total
|
||||
in the two-phase problem.
|
||||
status : int
|
||||
An integer representing the exit status of the optimization::
|
||||
|
||||
0 : Optimization terminated successfully
|
||||
1 : Iteration limit reached
|
||||
2 : Problem appears to be infeasible
|
||||
3 : Problem appears to be unbounded
|
||||
4 : Serious numerical difficulties encountered
|
||||
|
||||
"""
|
||||
nit = nit0
|
||||
status = 0
|
||||
message = ''
|
||||
complete = False
|
||||
|
||||
if phase == 1:
|
||||
m = T.shape[1]-2
|
||||
elif phase == 2:
|
||||
m = T.shape[1]-1
|
||||
else:
|
||||
raise ValueError("Argument 'phase' to _solve_simplex must be 1 or 2")
|
||||
|
||||
if phase == 2:
|
||||
# Check if any artificial variables are still in the basis.
|
||||
# If yes, check if any coefficients from this row and a column
|
||||
# corresponding to one of the non-artificial variable is non-zero.
|
||||
# If found, pivot at this term. If not, start phase 2.
|
||||
# Do this for all artificial variables in the basis.
|
||||
# Ref: "An Introduction to Linear Programming and Game Theory"
|
||||
# by Paul R. Thie, Gerard E. Keough, 3rd Ed,
|
||||
# Chapter 3.7 Redundant Systems (pag 102)
|
||||
for pivrow in [row for row in range(basis.size)
|
||||
if basis[row] > T.shape[1] - 2]:
|
||||
non_zero_row = [col for col in range(T.shape[1] - 1)
|
||||
if abs(T[pivrow, col]) > tol]
|
||||
if len(non_zero_row) > 0:
|
||||
pivcol = non_zero_row[0]
|
||||
_apply_pivot(T, basis, pivrow, pivcol, tol)
|
||||
nit += 1
|
||||
|
||||
if len(basis[:m]) == 0:
|
||||
solution = np.zeros(T.shape[1] - 1, dtype=np.float64)
|
||||
else:
|
||||
solution = np.zeros(max(T.shape[1] - 1, max(basis[:m]) + 1),
|
||||
dtype=np.float64)
|
||||
|
||||
while not complete:
|
||||
# Find the pivot column
|
||||
pivcol_found, pivcol = _pivot_col(T, tol, bland)
|
||||
if not pivcol_found:
|
||||
pivcol = np.nan
|
||||
pivrow = np.nan
|
||||
status = 0
|
||||
complete = True
|
||||
else:
|
||||
# Find the pivot row
|
||||
pivrow_found, pivrow = _pivot_row(T, basis, pivcol, phase, tol, bland)
|
||||
if not pivrow_found:
|
||||
status = 3
|
||||
complete = True
|
||||
|
||||
if callback is not None:
|
||||
solution[:] = 0
|
||||
solution[basis[:n]] = T[:n, -1]
|
||||
x = solution[:m]
|
||||
x, fun, slack, con, _ = _postsolve(
|
||||
x, postsolve_args, tol=tol
|
||||
)
|
||||
res = OptimizeResult({
|
||||
'x': x,
|
||||
'fun': fun,
|
||||
'slack': slack,
|
||||
'con': con,
|
||||
'status': status,
|
||||
'message': message,
|
||||
'nit': nit,
|
||||
'success': status == 0 and complete,
|
||||
'phase': phase,
|
||||
'complete': complete,
|
||||
})
|
||||
callback(res)
|
||||
|
||||
if not complete:
|
||||
if nit >= maxiter:
|
||||
# Iteration limit exceeded
|
||||
status = 1
|
||||
complete = True
|
||||
else:
|
||||
_apply_pivot(T, basis, pivrow, pivcol, tol)
|
||||
nit += 1
|
||||
return nit, status
|
||||
|
||||
|
||||
def _linprog_simplex(c, c0, A, b, callback, postsolve_args,
|
||||
maxiter=1000, tol=1e-9, disp=False, bland=False,
|
||||
**unknown_options):
|
||||
"""
|
||||
Minimize a linear objective function subject to linear equality and
|
||||
non-negativity constraints using the two phase simplex method.
|
||||
Linear programming is intended to solve problems of the following form:
|
||||
|
||||
Minimize::
|
||||
|
||||
c @ x
|
||||
|
||||
Subject to::
|
||||
|
||||
A @ x == b
|
||||
x >= 0
|
||||
|
||||
Parameters
|
||||
----------
|
||||
c : 1-D array
|
||||
Coefficients of the linear objective function to be minimized.
|
||||
c0 : float
|
||||
Constant term in objective function due to fixed (and eliminated)
|
||||
variables. (Purely for display.)
|
||||
A : 2-D array
|
||||
2-D array such that ``A @ x``, gives the values of the equality
|
||||
constraints at ``x``.
|
||||
b : 1-D array
|
||||
1-D array of values representing the right hand side of each equality
|
||||
constraint (row) in ``A``.
|
||||
callback : callable, optional
|
||||
If a callback function is provided, it will be called within each
|
||||
iteration of the algorithm. The callback function must accept a single
|
||||
`scipy.optimize.OptimizeResult` consisting of the following fields:
|
||||
|
||||
x : 1-D array
|
||||
Current solution vector
|
||||
fun : float
|
||||
Current value of the objective function
|
||||
success : bool
|
||||
True when an algorithm has completed successfully.
|
||||
slack : 1-D array
|
||||
The values of the slack variables. Each slack variable
|
||||
corresponds to an inequality constraint. If the slack is zero,
|
||||
the corresponding constraint is active.
|
||||
con : 1-D array
|
||||
The (nominally zero) residuals of the equality constraints,
|
||||
that is, ``b - A_eq @ x``
|
||||
phase : int
|
||||
The phase of the algorithm being executed.
|
||||
status : int
|
||||
An integer representing the status of the optimization::
|
||||
|
||||
0 : Algorithm proceeding nominally
|
||||
1 : Iteration limit reached
|
||||
2 : Problem appears to be infeasible
|
||||
3 : Problem appears to be unbounded
|
||||
4 : Serious numerical difficulties encountered
|
||||
nit : int
|
||||
The number of iterations performed.
|
||||
message : str
|
||||
A string descriptor of the exit status of the optimization.
|
||||
postsolve_args : tuple
|
||||
Data needed by _postsolve to convert the solution to the standard-form
|
||||
problem into the solution to the original problem.
|
||||
|
||||
Options
|
||||
-------
|
||||
maxiter : int
|
||||
The maximum number of iterations to perform.
|
||||
disp : bool
|
||||
If True, print exit status message to sys.stdout
|
||||
tol : float
|
||||
The tolerance which determines when a solution is "close enough" to
|
||||
zero in Phase 1 to be considered a basic feasible solution or close
|
||||
enough to positive to serve as an optimal solution.
|
||||
bland : bool
|
||||
If True, use Bland's anti-cycling rule [3]_ to choose pivots to
|
||||
prevent cycling. If False, choose pivots which should lead to a
|
||||
converged solution more quickly. The latter method is subject to
|
||||
cycling (non-convergence) in rare instances.
|
||||
unknown_options : dict
|
||||
Optional arguments not used by this particular solver. If
|
||||
`unknown_options` is non-empty a warning is issued listing all
|
||||
unused options.
|
||||
|
||||
Returns
|
||||
-------
|
||||
x : 1-D array
|
||||
Solution vector.
|
||||
status : int
|
||||
An integer representing the exit status of the optimization::
|
||||
|
||||
0 : Optimization terminated successfully
|
||||
1 : Iteration limit reached
|
||||
2 : Problem appears to be infeasible
|
||||
3 : Problem appears to be unbounded
|
||||
4 : Serious numerical difficulties encountered
|
||||
|
||||
message : str
|
||||
A string descriptor of the exit status of the optimization.
|
||||
iteration : int
|
||||
The number of iterations taken to solve the problem.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Dantzig, George B., Linear programming and extensions. Rand
|
||||
Corporation Research Study Princeton Univ. Press, Princeton, NJ,
|
||||
1963
|
||||
.. [2] Hillier, S.H. and Lieberman, G.J. (1995), "Introduction to
|
||||
Mathematical Programming", McGraw-Hill, Chapter 4.
|
||||
.. [3] Bland, Robert G. New finite pivoting rules for the simplex method.
|
||||
Mathematics of Operations Research (2), 1977: pp. 103-107.
|
||||
|
||||
|
||||
Notes
|
||||
-----
|
||||
The expected problem formulation differs between the top level ``linprog``
|
||||
module and the method specific solvers. The method specific solvers expect a
|
||||
problem in standard form:
|
||||
|
||||
Minimize::
|
||||
|
||||
c @ x
|
||||
|
||||
Subject to::
|
||||
|
||||
A @ x == b
|
||||
x >= 0
|
||||
|
||||
Whereas the top level ``linprog`` module expects a problem of form:
|
||||
|
||||
Minimize::
|
||||
|
||||
c @ x
|
||||
|
||||
Subject to::
|
||||
|
||||
A_ub @ x <= b_ub
|
||||
A_eq @ x == b_eq
|
||||
lb <= x <= ub
|
||||
|
||||
where ``lb = 0`` and ``ub = None`` unless set in ``bounds``.
|
||||
|
||||
The original problem contains equality, upper-bound and variable constraints
|
||||
whereas the method specific solver requires equality constraints and
|
||||
variable non-negativity.
|
||||
|
||||
``linprog`` module converts the original problem to standard form by
|
||||
converting the simple bounds to upper bound constraints, introducing
|
||||
non-negative slack variables for inequality constraints, and expressing
|
||||
unbounded variables as the difference between two non-negative variables.
|
||||
"""
|
||||
_check_unknown_options(unknown_options)
|
||||
|
||||
status = 0
|
||||
messages = {0: "Optimization terminated successfully.",
|
||||
1: "Iteration limit reached.",
|
||||
2: "Optimization failed. Unable to find a feasible"
|
||||
" starting point.",
|
||||
3: "Optimization failed. The problem appears to be unbounded.",
|
||||
4: "Optimization failed. Singular matrix encountered."}
|
||||
|
||||
n, m = A.shape
|
||||
|
||||
# All constraints must have b >= 0.
|
||||
is_negative_constraint = np.less(b, 0)
|
||||
A[is_negative_constraint] *= -1
|
||||
b[is_negative_constraint] *= -1
|
||||
|
||||
# As all constraints are equality constraints the artificial variables
|
||||
# will also be basic variables.
|
||||
av = np.arange(n) + m
|
||||
basis = av.copy()
|
||||
|
||||
# Format the phase one tableau by adding artificial variables and stacking
|
||||
# the constraints, the objective row and pseudo-objective row.
|
||||
row_constraints = np.hstack((A, np.eye(n), b[:, np.newaxis]))
|
||||
row_objective = np.hstack((c, np.zeros(n), c0))
|
||||
row_pseudo_objective = -row_constraints.sum(axis=0)
|
||||
row_pseudo_objective[av] = 0
|
||||
T = np.vstack((row_constraints, row_objective, row_pseudo_objective))
|
||||
|
||||
nit1, status = _solve_simplex(T, n, basis, callback=callback,
|
||||
postsolve_args=postsolve_args,
|
||||
maxiter=maxiter, tol=tol, phase=1,
|
||||
bland=bland
|
||||
)
|
||||
# if pseudo objective is zero, remove the last row from the tableau and
|
||||
# proceed to phase 2
|
||||
nit2 = nit1
|
||||
if abs(T[-1, -1]) < tol:
|
||||
# Remove the pseudo-objective row from the tableau
|
||||
T = T[:-1, :]
|
||||
# Remove the artificial variable columns from the tableau
|
||||
T = np.delete(T, av, 1)
|
||||
else:
|
||||
# Failure to find a feasible starting point
|
||||
status = 2
|
||||
messages[status] = (
|
||||
"Phase 1 of the simplex method failed to find a feasible "
|
||||
"solution. The pseudo-objective function evaluates to {0:.1e} "
|
||||
"which exceeds the required tolerance of {1} for a solution to be "
|
||||
"considered 'close enough' to zero to be a basic solution. "
|
||||
"Consider increasing the tolerance to be greater than {0:.1e}. "
|
||||
"If this tolerance is unacceptably large the problem may be "
|
||||
"infeasible.".format(abs(T[-1, -1]), tol)
|
||||
)
|
||||
|
||||
if status == 0:
|
||||
# Phase 2
|
||||
nit2, status = _solve_simplex(T, n, basis, callback=callback,
|
||||
postsolve_args=postsolve_args,
|
||||
maxiter=maxiter, tol=tol, phase=2,
|
||||
bland=bland, nit0=nit1
|
||||
)
|
||||
|
||||
solution = np.zeros(n + m)
|
||||
solution[basis[:n]] = T[:n, -1]
|
||||
x = solution[:m]
|
||||
|
||||
return x, status, messages[status], int(nit2)
|
1546
venv/Lib/site-packages/scipy/optimize/_linprog_util.py
Normal file
1546
venv/Lib/site-packages/scipy/optimize/_linprog_util.py
Normal file
File diff suppressed because it is too large
Load diff
105
venv/Lib/site-packages/scipy/optimize/_lsap.py
Normal file
105
venv/Lib/site-packages/scipy/optimize/_lsap.py
Normal file
|
@ -0,0 +1,105 @@
|
|||
# Wrapper for the shortest augmenting path algorithm for solving the
|
||||
# rectangular linear sum assignment problem. The original code was an
|
||||
# implementation of the Hungarian algorithm (Kuhn-Munkres) taken from
|
||||
# scikit-learn, based on original code by Brian Clapper and adapted to NumPy
|
||||
# by Gael Varoquaux. Further improvements by Ben Root, Vlad Niculae, Lars
|
||||
# Buitinck, and Peter Larsen.
|
||||
#
|
||||
# Copyright (c) 2008 Brian M. Clapper <bmc@clapper.org>, Gael Varoquaux
|
||||
# Author: Brian M. Clapper, Gael Varoquaux
|
||||
# License: 3-clause BSD
|
||||
|
||||
import numpy as np
|
||||
from . import _lsap_module
|
||||
|
||||
|
||||
def linear_sum_assignment(cost_matrix, maximize=False):
|
||||
"""Solve the linear sum assignment problem.
|
||||
|
||||
The linear sum assignment problem is also known as minimum weight matching
|
||||
in bipartite graphs. A problem instance is described by a matrix C, where
|
||||
each C[i,j] is the cost of matching vertex i of the first partite set
|
||||
(a "worker") and vertex j of the second set (a "job"). The goal is to find
|
||||
a complete assignment of workers to jobs of minimal cost.
|
||||
|
||||
Formally, let X be a boolean matrix where :math:`X[i,j] = 1` iff row i is
|
||||
assigned to column j. Then the optimal assignment has cost
|
||||
|
||||
.. math::
|
||||
\\min \\sum_i \\sum_j C_{i,j} X_{i,j}
|
||||
|
||||
where, in the case where the matrix X is square, each row is assigned to
|
||||
exactly one column, and each column to exactly one row.
|
||||
|
||||
This function can also solve a generalization of the classic assignment
|
||||
problem where the cost matrix is rectangular. If it has more rows than
|
||||
columns, then not every row needs to be assigned to a column, and vice
|
||||
versa.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
cost_matrix : array
|
||||
The cost matrix of the bipartite graph.
|
||||
|
||||
maximize : bool (default: False)
|
||||
Calculates a maximum weight matching if true.
|
||||
|
||||
Returns
|
||||
-------
|
||||
row_ind, col_ind : array
|
||||
An array of row indices and one of corresponding column indices giving
|
||||
the optimal assignment. The cost of the assignment can be computed
|
||||
as ``cost_matrix[row_ind, col_ind].sum()``. The row indices will be
|
||||
sorted; in the case of a square cost matrix they will be equal to
|
||||
``numpy.arange(cost_matrix.shape[0])``.
|
||||
|
||||
Notes
|
||||
-----
|
||||
.. versionadded:: 0.17.0
|
||||
|
||||
References
|
||||
----------
|
||||
|
||||
1. https://en.wikipedia.org/wiki/Assignment_problem
|
||||
|
||||
2. DF Crouse. On implementing 2D rectangular assignment algorithms.
|
||||
*IEEE Transactions on Aerospace and Electronic Systems*,
|
||||
52(4):1679-1696, August 2016, https://doi.org/10.1109/TAES.2016.140952
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> cost = np.array([[4, 1, 3], [2, 0, 5], [3, 2, 2]])
|
||||
>>> from scipy.optimize import linear_sum_assignment
|
||||
>>> row_ind, col_ind = linear_sum_assignment(cost)
|
||||
>>> col_ind
|
||||
array([1, 0, 2])
|
||||
>>> cost[row_ind, col_ind].sum()
|
||||
5
|
||||
"""
|
||||
cost_matrix = np.asarray(cost_matrix)
|
||||
if len(cost_matrix.shape) != 2:
|
||||
raise ValueError("expected a matrix (2-D array), got a %r array"
|
||||
% (cost_matrix.shape,))
|
||||
|
||||
if not (np.issubdtype(cost_matrix.dtype, np.number) or
|
||||
cost_matrix.dtype == np.dtype(np.bool_)):
|
||||
raise ValueError("expected a matrix containing numerical entries, got %s"
|
||||
% (cost_matrix.dtype,))
|
||||
|
||||
if maximize:
|
||||
cost_matrix = -cost_matrix
|
||||
|
||||
if np.any(np.isneginf(cost_matrix) | np.isnan(cost_matrix)):
|
||||
raise ValueError("matrix contains invalid numeric entries")
|
||||
|
||||
cost_matrix = cost_matrix.astype(np.double)
|
||||
a = np.arange(np.min(cost_matrix.shape))
|
||||
|
||||
# The algorithm expects more columns than rows in the cost matrix.
|
||||
if cost_matrix.shape[1] < cost_matrix.shape[0]:
|
||||
b = _lsap_module.calculate_assignment(cost_matrix.T)
|
||||
indices = np.argsort(b)
|
||||
return (b[indices], a[indices])
|
||||
else:
|
||||
b = _lsap_module.calculate_assignment(cost_matrix)
|
||||
return (a, b)
|
Binary file not shown.
5
venv/Lib/site-packages/scipy/optimize/_lsq/__init__.py
Normal file
5
venv/Lib/site-packages/scipy/optimize/_lsq/__init__.py
Normal file
|
@ -0,0 +1,5 @@
|
|||
"""This module contains least-squares algorithms."""
|
||||
from .least_squares import least_squares
|
||||
from .lsq_linear import lsq_linear
|
||||
|
||||
__all__ = ['least_squares', 'lsq_linear']
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
178
venv/Lib/site-packages/scipy/optimize/_lsq/bvls.py
Normal file
178
venv/Lib/site-packages/scipy/optimize/_lsq/bvls.py
Normal file
|
@ -0,0 +1,178 @@
|
|||
"""Bounded-variable least-squares algorithm."""
|
||||
import numpy as np
|
||||
from numpy.linalg import norm, lstsq
|
||||
from scipy.optimize import OptimizeResult
|
||||
|
||||
from .common import print_header_linear, print_iteration_linear
|
||||
|
||||
|
||||
def compute_kkt_optimality(g, on_bound):
|
||||
"""Compute the maximum violation of KKT conditions."""
|
||||
g_kkt = g * on_bound
|
||||
free_set = on_bound == 0
|
||||
g_kkt[free_set] = np.abs(g[free_set])
|
||||
return np.max(g_kkt)
|
||||
|
||||
|
||||
def bvls(A, b, x_lsq, lb, ub, tol, max_iter, verbose):
|
||||
m, n = A.shape
|
||||
|
||||
x = x_lsq.copy()
|
||||
on_bound = np.zeros(n)
|
||||
|
||||
mask = x < lb
|
||||
x[mask] = lb[mask]
|
||||
on_bound[mask] = -1
|
||||
|
||||
mask = x > ub
|
||||
x[mask] = ub[mask]
|
||||
on_bound[mask] = 1
|
||||
|
||||
free_set = on_bound == 0
|
||||
active_set = ~free_set
|
||||
free_set, = np.nonzero(free_set)
|
||||
|
||||
r = A.dot(x) - b
|
||||
cost = 0.5 * np.dot(r, r)
|
||||
initial_cost = cost
|
||||
g = A.T.dot(r)
|
||||
|
||||
cost_change = None
|
||||
step_norm = None
|
||||
iteration = 0
|
||||
|
||||
if verbose == 2:
|
||||
print_header_linear()
|
||||
|
||||
# This is the initialization loop. The requirement is that the
|
||||
# least-squares solution on free variables is feasible before BVLS starts.
|
||||
# One possible initialization is to set all variables to lower or upper
|
||||
# bounds, but many iterations may be required from this state later on.
|
||||
# The implemented ad-hoc procedure which intuitively should give a better
|
||||
# initial state: find the least-squares solution on current free variables,
|
||||
# if its feasible then stop, otherwise, set violating variables to
|
||||
# corresponding bounds and continue on the reduced set of free variables.
|
||||
|
||||
while free_set.size > 0:
|
||||
if verbose == 2:
|
||||
optimality = compute_kkt_optimality(g, on_bound)
|
||||
print_iteration_linear(iteration, cost, cost_change, step_norm,
|
||||
optimality)
|
||||
|
||||
iteration += 1
|
||||
x_free_old = x[free_set].copy()
|
||||
|
||||
A_free = A[:, free_set]
|
||||
b_free = b - A.dot(x * active_set)
|
||||
z = lstsq(A_free, b_free, rcond=-1)[0]
|
||||
|
||||
lbv = z < lb[free_set]
|
||||
ubv = z > ub[free_set]
|
||||
v = lbv | ubv
|
||||
|
||||
if np.any(lbv):
|
||||
ind = free_set[lbv]
|
||||
x[ind] = lb[ind]
|
||||
active_set[ind] = True
|
||||
on_bound[ind] = -1
|
||||
|
||||
if np.any(ubv):
|
||||
ind = free_set[ubv]
|
||||
x[ind] = ub[ind]
|
||||
active_set[ind] = True
|
||||
on_bound[ind] = 1
|
||||
|
||||
ind = free_set[~v]
|
||||
x[ind] = z[~v]
|
||||
|
||||
r = A.dot(x) - b
|
||||
cost_new = 0.5 * np.dot(r, r)
|
||||
cost_change = cost - cost_new
|
||||
cost = cost_new
|
||||
g = A.T.dot(r)
|
||||
step_norm = norm(x[free_set] - x_free_old)
|
||||
|
||||
if np.any(v):
|
||||
free_set = free_set[~v]
|
||||
else:
|
||||
break
|
||||
|
||||
if max_iter is None:
|
||||
max_iter = n
|
||||
max_iter += iteration
|
||||
|
||||
termination_status = None
|
||||
|
||||
# Main BVLS loop.
|
||||
|
||||
optimality = compute_kkt_optimality(g, on_bound)
|
||||
for iteration in range(iteration, max_iter):
|
||||
if verbose == 2:
|
||||
print_iteration_linear(iteration, cost, cost_change,
|
||||
step_norm, optimality)
|
||||
|
||||
if optimality < tol:
|
||||
termination_status = 1
|
||||
|
||||
if termination_status is not None:
|
||||
break
|
||||
|
||||
move_to_free = np.argmax(g * on_bound)
|
||||
on_bound[move_to_free] = 0
|
||||
free_set = on_bound == 0
|
||||
active_set = ~free_set
|
||||
free_set, = np.nonzero(free_set)
|
||||
|
||||
x_free = x[free_set]
|
||||
x_free_old = x_free.copy()
|
||||
lb_free = lb[free_set]
|
||||
ub_free = ub[free_set]
|
||||
|
||||
A_free = A[:, free_set]
|
||||
b_free = b - A.dot(x * active_set)
|
||||
z = lstsq(A_free, b_free, rcond=-1)[0]
|
||||
|
||||
lbv, = np.nonzero(z < lb_free)
|
||||
ubv, = np.nonzero(z > ub_free)
|
||||
v = np.hstack((lbv, ubv))
|
||||
|
||||
if v.size > 0:
|
||||
alphas = np.hstack((
|
||||
lb_free[lbv] - x_free[lbv],
|
||||
ub_free[ubv] - x_free[ubv])) / (z[v] - x_free[v])
|
||||
|
||||
i = np.argmin(alphas)
|
||||
i_free = v[i]
|
||||
alpha = alphas[i]
|
||||
|
||||
x_free *= 1 - alpha
|
||||
x_free += alpha * z
|
||||
|
||||
if i < lbv.size:
|
||||
on_bound[free_set[i_free]] = -1
|
||||
else:
|
||||
on_bound[free_set[i_free]] = 1
|
||||
else:
|
||||
x_free = z
|
||||
|
||||
x[free_set] = x_free
|
||||
step_norm = norm(x_free - x_free_old)
|
||||
|
||||
r = A.dot(x) - b
|
||||
cost_new = 0.5 * np.dot(r, r)
|
||||
cost_change = cost - cost_new
|
||||
|
||||
if cost_change < tol * cost:
|
||||
termination_status = 2
|
||||
cost = cost_new
|
||||
|
||||
g = A.T.dot(r)
|
||||
optimality = compute_kkt_optimality(g, on_bound)
|
||||
|
||||
if termination_status is None:
|
||||
termination_status = 0
|
||||
|
||||
return OptimizeResult(
|
||||
x=x, fun=r, cost=cost, optimality=optimality, active_mask=on_bound,
|
||||
nit=iteration + 1, status=termination_status,
|
||||
initial_cost=initial_cost)
|
734
venv/Lib/site-packages/scipy/optimize/_lsq/common.py
Normal file
734
venv/Lib/site-packages/scipy/optimize/_lsq/common.py
Normal file
|
@ -0,0 +1,734 @@
|
|||
"""Functions used by least-squares algorithms."""
|
||||
from math import copysign
|
||||
|
||||
import numpy as np
|
||||
from numpy.linalg import norm
|
||||
|
||||
from scipy.linalg import cho_factor, cho_solve, LinAlgError
|
||||
from scipy.sparse import issparse
|
||||
from scipy.sparse.linalg import LinearOperator, aslinearoperator
|
||||
|
||||
|
||||
EPS = np.finfo(float).eps
|
||||
|
||||
|
||||
# Functions related to a trust-region problem.
|
||||
|
||||
|
||||
def intersect_trust_region(x, s, Delta):
|
||||
"""Find the intersection of a line with the boundary of a trust region.
|
||||
|
||||
This function solves the quadratic equation with respect to t
|
||||
||(x + s*t)||**2 = Delta**2.
|
||||
|
||||
Returns
|
||||
-------
|
||||
t_neg, t_pos : tuple of float
|
||||
Negative and positive roots.
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
If `s` is zero or `x` is not within the trust region.
|
||||
"""
|
||||
a = np.dot(s, s)
|
||||
if a == 0:
|
||||
raise ValueError("`s` is zero.")
|
||||
|
||||
b = np.dot(x, s)
|
||||
|
||||
c = np.dot(x, x) - Delta**2
|
||||
if c > 0:
|
||||
raise ValueError("`x` is not within the trust region.")
|
||||
|
||||
d = np.sqrt(b*b - a*c) # Root from one fourth of the discriminant.
|
||||
|
||||
# Computations below avoid loss of significance, see "Numerical Recipes".
|
||||
q = -(b + copysign(d, b))
|
||||
t1 = q / a
|
||||
t2 = c / q
|
||||
|
||||
if t1 < t2:
|
||||
return t1, t2
|
||||
else:
|
||||
return t2, t1
|
||||
|
||||
|
||||
def solve_lsq_trust_region(n, m, uf, s, V, Delta, initial_alpha=None,
|
||||
rtol=0.01, max_iter=10):
|
||||
"""Solve a trust-region problem arising in least-squares minimization.
|
||||
|
||||
This function implements a method described by J. J. More [1]_ and used
|
||||
in MINPACK, but it relies on a single SVD of Jacobian instead of series
|
||||
of Cholesky decompositions. Before running this function, compute:
|
||||
``U, s, VT = svd(J, full_matrices=False)``.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
n : int
|
||||
Number of variables.
|
||||
m : int
|
||||
Number of residuals.
|
||||
uf : ndarray
|
||||
Computed as U.T.dot(f).
|
||||
s : ndarray
|
||||
Singular values of J.
|
||||
V : ndarray
|
||||
Transpose of VT.
|
||||
Delta : float
|
||||
Radius of a trust region.
|
||||
initial_alpha : float, optional
|
||||
Initial guess for alpha, which might be available from a previous
|
||||
iteration. If None, determined automatically.
|
||||
rtol : float, optional
|
||||
Stopping tolerance for the root-finding procedure. Namely, the
|
||||
solution ``p`` will satisfy ``abs(norm(p) - Delta) < rtol * Delta``.
|
||||
max_iter : int, optional
|
||||
Maximum allowed number of iterations for the root-finding procedure.
|
||||
|
||||
Returns
|
||||
-------
|
||||
p : ndarray, shape (n,)
|
||||
Found solution of a trust-region problem.
|
||||
alpha : float
|
||||
Positive value such that (J.T*J + alpha*I)*p = -J.T*f.
|
||||
Sometimes called Levenberg-Marquardt parameter.
|
||||
n_iter : int
|
||||
Number of iterations made by root-finding procedure. Zero means
|
||||
that Gauss-Newton step was selected as the solution.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] More, J. J., "The Levenberg-Marquardt Algorithm: Implementation
|
||||
and Theory," Numerical Analysis, ed. G. A. Watson, Lecture Notes
|
||||
in Mathematics 630, Springer Verlag, pp. 105-116, 1977.
|
||||
"""
|
||||
def phi_and_derivative(alpha, suf, s, Delta):
|
||||
"""Function of which to find zero.
|
||||
|
||||
It is defined as "norm of regularized (by alpha) least-squares
|
||||
solution minus `Delta`". Refer to [1]_.
|
||||
"""
|
||||
denom = s**2 + alpha
|
||||
p_norm = norm(suf / denom)
|
||||
phi = p_norm - Delta
|
||||
phi_prime = -np.sum(suf ** 2 / denom**3) / p_norm
|
||||
return phi, phi_prime
|
||||
|
||||
suf = s * uf
|
||||
|
||||
# Check if J has full rank and try Gauss-Newton step.
|
||||
if m >= n:
|
||||
threshold = EPS * m * s[0]
|
||||
full_rank = s[-1] > threshold
|
||||
else:
|
||||
full_rank = False
|
||||
|
||||
if full_rank:
|
||||
p = -V.dot(uf / s)
|
||||
if norm(p) <= Delta:
|
||||
return p, 0.0, 0
|
||||
|
||||
alpha_upper = norm(suf) / Delta
|
||||
|
||||
if full_rank:
|
||||
phi, phi_prime = phi_and_derivative(0.0, suf, s, Delta)
|
||||
alpha_lower = -phi / phi_prime
|
||||
else:
|
||||
alpha_lower = 0.0
|
||||
|
||||
if initial_alpha is None or not full_rank and initial_alpha == 0:
|
||||
alpha = max(0.001 * alpha_upper, (alpha_lower * alpha_upper)**0.5)
|
||||
else:
|
||||
alpha = initial_alpha
|
||||
|
||||
for it in range(max_iter):
|
||||
if alpha < alpha_lower or alpha > alpha_upper:
|
||||
alpha = max(0.001 * alpha_upper, (alpha_lower * alpha_upper)**0.5)
|
||||
|
||||
phi, phi_prime = phi_and_derivative(alpha, suf, s, Delta)
|
||||
|
||||
if phi < 0:
|
||||
alpha_upper = alpha
|
||||
|
||||
ratio = phi / phi_prime
|
||||
alpha_lower = max(alpha_lower, alpha - ratio)
|
||||
alpha -= (phi + Delta) * ratio / Delta
|
||||
|
||||
if np.abs(phi) < rtol * Delta:
|
||||
break
|
||||
|
||||
p = -V.dot(suf / (s**2 + alpha))
|
||||
|
||||
# Make the norm of p equal to Delta, p is changed only slightly during
|
||||
# this. It is done to prevent p lie outside the trust region (which can
|
||||
# cause problems later).
|
||||
p *= Delta / norm(p)
|
||||
|
||||
return p, alpha, it + 1
|
||||
|
||||
|
||||
def solve_trust_region_2d(B, g, Delta):
|
||||
"""Solve a general trust-region problem in 2 dimensions.
|
||||
|
||||
The problem is reformulated as a 4th order algebraic equation,
|
||||
the solution of which is found by numpy.roots.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
B : ndarray, shape (2, 2)
|
||||
Symmetric matrix, defines a quadratic term of the function.
|
||||
g : ndarray, shape (2,)
|
||||
Defines a linear term of the function.
|
||||
Delta : float
|
||||
Radius of a trust region.
|
||||
|
||||
Returns
|
||||
-------
|
||||
p : ndarray, shape (2,)
|
||||
Found solution.
|
||||
newton_step : bool
|
||||
Whether the returned solution is the Newton step which lies within
|
||||
the trust region.
|
||||
"""
|
||||
try:
|
||||
R, lower = cho_factor(B)
|
||||
p = -cho_solve((R, lower), g)
|
||||
if np.dot(p, p) <= Delta**2:
|
||||
return p, True
|
||||
except LinAlgError:
|
||||
pass
|
||||
|
||||
a = B[0, 0] * Delta**2
|
||||
b = B[0, 1] * Delta**2
|
||||
c = B[1, 1] * Delta**2
|
||||
|
||||
d = g[0] * Delta
|
||||
f = g[1] * Delta
|
||||
|
||||
coeffs = np.array(
|
||||
[-b + d, 2 * (a - c + f), 6 * b, 2 * (-a + c + f), -b - d])
|
||||
t = np.roots(coeffs) # Can handle leading zeros.
|
||||
t = np.real(t[np.isreal(t)])
|
||||
|
||||
p = Delta * np.vstack((2 * t / (1 + t**2), (1 - t**2) / (1 + t**2)))
|
||||
value = 0.5 * np.sum(p * B.dot(p), axis=0) + np.dot(g, p)
|
||||
i = np.argmin(value)
|
||||
p = p[:, i]
|
||||
|
||||
return p, False
|
||||
|
||||
|
||||
def update_tr_radius(Delta, actual_reduction, predicted_reduction,
|
||||
step_norm, bound_hit):
|
||||
"""Update the radius of a trust region based on the cost reduction.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Delta : float
|
||||
New radius.
|
||||
ratio : float
|
||||
Ratio between actual and predicted reductions.
|
||||
"""
|
||||
if predicted_reduction > 0:
|
||||
ratio = actual_reduction / predicted_reduction
|
||||
elif predicted_reduction == actual_reduction == 0:
|
||||
ratio = 1
|
||||
else:
|
||||
ratio = 0
|
||||
|
||||
if ratio < 0.25:
|
||||
Delta = 0.25 * step_norm
|
||||
elif ratio > 0.75 and bound_hit:
|
||||
Delta *= 2.0
|
||||
|
||||
return Delta, ratio
|
||||
|
||||
|
||||
# Construction and minimization of quadratic functions.
|
||||
|
||||
|
||||
def build_quadratic_1d(J, g, s, diag=None, s0=None):
|
||||
"""Parameterize a multivariate quadratic function along a line.
|
||||
|
||||
The resulting univariate quadratic function is given as follows:
|
||||
::
|
||||
f(t) = 0.5 * (s0 + s*t).T * (J.T*J + diag) * (s0 + s*t) +
|
||||
g.T * (s0 + s*t)
|
||||
|
||||
Parameters
|
||||
----------
|
||||
J : ndarray, sparse matrix or LinearOperator shape (m, n)
|
||||
Jacobian matrix, affects the quadratic term.
|
||||
g : ndarray, shape (n,)
|
||||
Gradient, defines the linear term.
|
||||
s : ndarray, shape (n,)
|
||||
Direction vector of a line.
|
||||
diag : None or ndarray with shape (n,), optional
|
||||
Addition diagonal part, affects the quadratic term.
|
||||
If None, assumed to be 0.
|
||||
s0 : None or ndarray with shape (n,), optional
|
||||
Initial point. If None, assumed to be 0.
|
||||
|
||||
Returns
|
||||
-------
|
||||
a : float
|
||||
Coefficient for t**2.
|
||||
b : float
|
||||
Coefficient for t.
|
||||
c : float
|
||||
Free term. Returned only if `s0` is provided.
|
||||
"""
|
||||
v = J.dot(s)
|
||||
a = np.dot(v, v)
|
||||
if diag is not None:
|
||||
a += np.dot(s * diag, s)
|
||||
a *= 0.5
|
||||
|
||||
b = np.dot(g, s)
|
||||
|
||||
if s0 is not None:
|
||||
u = J.dot(s0)
|
||||
b += np.dot(u, v)
|
||||
c = 0.5 * np.dot(u, u) + np.dot(g, s0)
|
||||
if diag is not None:
|
||||
b += np.dot(s0 * diag, s)
|
||||
c += 0.5 * np.dot(s0 * diag, s0)
|
||||
return a, b, c
|
||||
else:
|
||||
return a, b
|
||||
|
||||
|
||||
def minimize_quadratic_1d(a, b, lb, ub, c=0):
|
||||
"""Minimize a 1-D quadratic function subject to bounds.
|
||||
|
||||
The free term `c` is 0 by default. Bounds must be finite.
|
||||
|
||||
Returns
|
||||
-------
|
||||
t : float
|
||||
Minimum point.
|
||||
y : float
|
||||
Minimum value.
|
||||
"""
|
||||
t = [lb, ub]
|
||||
if a != 0:
|
||||
extremum = -0.5 * b / a
|
||||
if lb < extremum < ub:
|
||||
t.append(extremum)
|
||||
t = np.asarray(t)
|
||||
y = t * (a * t + b) + c
|
||||
min_index = np.argmin(y)
|
||||
return t[min_index], y[min_index]
|
||||
|
||||
|
||||
def evaluate_quadratic(J, g, s, diag=None):
|
||||
"""Compute values of a quadratic function arising in least squares.
|
||||
|
||||
The function is 0.5 * s.T * (J.T * J + diag) * s + g.T * s.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
J : ndarray, sparse matrix or LinearOperator, shape (m, n)
|
||||
Jacobian matrix, affects the quadratic term.
|
||||
g : ndarray, shape (n,)
|
||||
Gradient, defines the linear term.
|
||||
s : ndarray, shape (k, n) or (n,)
|
||||
Array containing steps as rows.
|
||||
diag : ndarray, shape (n,), optional
|
||||
Addition diagonal part, affects the quadratic term.
|
||||
If None, assumed to be 0.
|
||||
|
||||
Returns
|
||||
-------
|
||||
values : ndarray with shape (k,) or float
|
||||
Values of the function. If `s` was 2-D, then ndarray is
|
||||
returned, otherwise, float is returned.
|
||||
"""
|
||||
if s.ndim == 1:
|
||||
Js = J.dot(s)
|
||||
q = np.dot(Js, Js)
|
||||
if diag is not None:
|
||||
q += np.dot(s * diag, s)
|
||||
else:
|
||||
Js = J.dot(s.T)
|
||||
q = np.sum(Js**2, axis=0)
|
||||
if diag is not None:
|
||||
q += np.sum(diag * s**2, axis=1)
|
||||
|
||||
l = np.dot(s, g)
|
||||
|
||||
return 0.5 * q + l
|
||||
|
||||
|
||||
# Utility functions to work with bound constraints.
|
||||
|
||||
|
||||
def in_bounds(x, lb, ub):
|
||||
"""Check if a point lies within bounds."""
|
||||
return np.all((x >= lb) & (x <= ub))
|
||||
|
||||
|
||||
def step_size_to_bound(x, s, lb, ub):
|
||||
"""Compute a min_step size required to reach a bound.
|
||||
|
||||
The function computes a positive scalar t, such that x + s * t is on
|
||||
the bound.
|
||||
|
||||
Returns
|
||||
-------
|
||||
step : float
|
||||
Computed step. Non-negative value.
|
||||
hits : ndarray of int with shape of x
|
||||
Each element indicates whether a corresponding variable reaches the
|
||||
bound:
|
||||
|
||||
* 0 - the bound was not hit.
|
||||
* -1 - the lower bound was hit.
|
||||
* 1 - the upper bound was hit.
|
||||
"""
|
||||
non_zero = np.nonzero(s)
|
||||
s_non_zero = s[non_zero]
|
||||
steps = np.empty_like(x)
|
||||
steps.fill(np.inf)
|
||||
with np.errstate(over='ignore'):
|
||||
steps[non_zero] = np.maximum((lb - x)[non_zero] / s_non_zero,
|
||||
(ub - x)[non_zero] / s_non_zero)
|
||||
min_step = np.min(steps)
|
||||
return min_step, np.equal(steps, min_step) * np.sign(s).astype(int)
|
||||
|
||||
|
||||
def find_active_constraints(x, lb, ub, rtol=1e-10):
|
||||
"""Determine which constraints are active in a given point.
|
||||
|
||||
The threshold is computed using `rtol` and the absolute value of the
|
||||
closest bound.
|
||||
|
||||
Returns
|
||||
-------
|
||||
active : ndarray of int with shape of x
|
||||
Each component shows whether the corresponding constraint is active:
|
||||
|
||||
* 0 - a constraint is not active.
|
||||
* -1 - a lower bound is active.
|
||||
* 1 - a upper bound is active.
|
||||
"""
|
||||
active = np.zeros_like(x, dtype=int)
|
||||
|
||||
if rtol == 0:
|
||||
active[x <= lb] = -1
|
||||
active[x >= ub] = 1
|
||||
return active
|
||||
|
||||
lower_dist = x - lb
|
||||
upper_dist = ub - x
|
||||
|
||||
lower_threshold = rtol * np.maximum(1, np.abs(lb))
|
||||
upper_threshold = rtol * np.maximum(1, np.abs(ub))
|
||||
|
||||
lower_active = (np.isfinite(lb) &
|
||||
(lower_dist <= np.minimum(upper_dist, lower_threshold)))
|
||||
active[lower_active] = -1
|
||||
|
||||
upper_active = (np.isfinite(ub) &
|
||||
(upper_dist <= np.minimum(lower_dist, upper_threshold)))
|
||||
active[upper_active] = 1
|
||||
|
||||
return active
|
||||
|
||||
|
||||
def make_strictly_feasible(x, lb, ub, rstep=1e-10):
|
||||
"""Shift a point to the interior of a feasible region.
|
||||
|
||||
Each element of the returned vector is at least at a relative distance
|
||||
`rstep` from the closest bound. If ``rstep=0`` then `np.nextafter` is used.
|
||||
"""
|
||||
x_new = x.copy()
|
||||
|
||||
active = find_active_constraints(x, lb, ub, rstep)
|
||||
lower_mask = np.equal(active, -1)
|
||||
upper_mask = np.equal(active, 1)
|
||||
|
||||
if rstep == 0:
|
||||
x_new[lower_mask] = np.nextafter(lb[lower_mask], ub[lower_mask])
|
||||
x_new[upper_mask] = np.nextafter(ub[upper_mask], lb[upper_mask])
|
||||
else:
|
||||
x_new[lower_mask] = (lb[lower_mask] +
|
||||
rstep * np.maximum(1, np.abs(lb[lower_mask])))
|
||||
x_new[upper_mask] = (ub[upper_mask] -
|
||||
rstep * np.maximum(1, np.abs(ub[upper_mask])))
|
||||
|
||||
tight_bounds = (x_new < lb) | (x_new > ub)
|
||||
x_new[tight_bounds] = 0.5 * (lb[tight_bounds] + ub[tight_bounds])
|
||||
|
||||
return x_new
|
||||
|
||||
|
||||
def CL_scaling_vector(x, g, lb, ub):
|
||||
"""Compute Coleman-Li scaling vector and its derivatives.
|
||||
|
||||
Components of a vector v are defined as follows:
|
||||
::
|
||||
| ub[i] - x[i], if g[i] < 0 and ub[i] < np.inf
|
||||
v[i] = | x[i] - lb[i], if g[i] > 0 and lb[i] > -np.inf
|
||||
| 1, otherwise
|
||||
|
||||
According to this definition v[i] >= 0 for all i. It differs from the
|
||||
definition in paper [1]_ (eq. (2.2)), where the absolute value of v is
|
||||
used. Both definitions are equivalent down the line.
|
||||
Derivatives of v with respect to x take value 1, -1 or 0 depending on a
|
||||
case.
|
||||
|
||||
Returns
|
||||
-------
|
||||
v : ndarray with shape of x
|
||||
Scaling vector.
|
||||
dv : ndarray with shape of x
|
||||
Derivatives of v[i] with respect to x[i], diagonal elements of v's
|
||||
Jacobian.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] M.A. Branch, T.F. Coleman, and Y. Li, "A Subspace, Interior,
|
||||
and Conjugate Gradient Method for Large-Scale Bound-Constrained
|
||||
Minimization Problems," SIAM Journal on Scientific Computing,
|
||||
Vol. 21, Number 1, pp 1-23, 1999.
|
||||
"""
|
||||
v = np.ones_like(x)
|
||||
dv = np.zeros_like(x)
|
||||
|
||||
mask = (g < 0) & np.isfinite(ub)
|
||||
v[mask] = ub[mask] - x[mask]
|
||||
dv[mask] = -1
|
||||
|
||||
mask = (g > 0) & np.isfinite(lb)
|
||||
v[mask] = x[mask] - lb[mask]
|
||||
dv[mask] = 1
|
||||
|
||||
return v, dv
|
||||
|
||||
|
||||
def reflective_transformation(y, lb, ub):
|
||||
"""Compute reflective transformation and its gradient."""
|
||||
if in_bounds(y, lb, ub):
|
||||
return y, np.ones_like(y)
|
||||
|
||||
lb_finite = np.isfinite(lb)
|
||||
ub_finite = np.isfinite(ub)
|
||||
|
||||
x = y.copy()
|
||||
g_negative = np.zeros_like(y, dtype=bool)
|
||||
|
||||
mask = lb_finite & ~ub_finite
|
||||
x[mask] = np.maximum(y[mask], 2 * lb[mask] - y[mask])
|
||||
g_negative[mask] = y[mask] < lb[mask]
|
||||
|
||||
mask = ~lb_finite & ub_finite
|
||||
x[mask] = np.minimum(y[mask], 2 * ub[mask] - y[mask])
|
||||
g_negative[mask] = y[mask] > ub[mask]
|
||||
|
||||
mask = lb_finite & ub_finite
|
||||
d = ub - lb
|
||||
t = np.remainder(y[mask] - lb[mask], 2 * d[mask])
|
||||
x[mask] = lb[mask] + np.minimum(t, 2 * d[mask] - t)
|
||||
g_negative[mask] = t > d[mask]
|
||||
|
||||
g = np.ones_like(y)
|
||||
g[g_negative] = -1
|
||||
|
||||
return x, g
|
||||
|
||||
|
||||
# Functions to display algorithm's progress.
|
||||
|
||||
|
||||
def print_header_nonlinear():
|
||||
print("{0:^15}{1:^15}{2:^15}{3:^15}{4:^15}{5:^15}"
|
||||
.format("Iteration", "Total nfev", "Cost", "Cost reduction",
|
||||
"Step norm", "Optimality"))
|
||||
|
||||
|
||||
def print_iteration_nonlinear(iteration, nfev, cost, cost_reduction,
|
||||
step_norm, optimality):
|
||||
if cost_reduction is None:
|
||||
cost_reduction = " " * 15
|
||||
else:
|
||||
cost_reduction = "{0:^15.2e}".format(cost_reduction)
|
||||
|
||||
if step_norm is None:
|
||||
step_norm = " " * 15
|
||||
else:
|
||||
step_norm = "{0:^15.2e}".format(step_norm)
|
||||
|
||||
print("{0:^15}{1:^15}{2:^15.4e}{3}{4}{5:^15.2e}"
|
||||
.format(iteration, nfev, cost, cost_reduction,
|
||||
step_norm, optimality))
|
||||
|
||||
|
||||
def print_header_linear():
|
||||
print("{0:^15}{1:^15}{2:^15}{3:^15}{4:^15}"
|
||||
.format("Iteration", "Cost", "Cost reduction", "Step norm",
|
||||
"Optimality"))
|
||||
|
||||
|
||||
def print_iteration_linear(iteration, cost, cost_reduction, step_norm,
|
||||
optimality):
|
||||
if cost_reduction is None:
|
||||
cost_reduction = " " * 15
|
||||
else:
|
||||
cost_reduction = "{0:^15.2e}".format(cost_reduction)
|
||||
|
||||
if step_norm is None:
|
||||
step_norm = " " * 15
|
||||
else:
|
||||
step_norm = "{0:^15.2e}".format(step_norm)
|
||||
|
||||
print("{0:^15}{1:^15.4e}{2}{3}{4:^15.2e}".format(
|
||||
iteration, cost, cost_reduction, step_norm, optimality))
|
||||
|
||||
|
||||
# Simple helper functions.
|
||||
|
||||
|
||||
def compute_grad(J, f):
|
||||
"""Compute gradient of the least-squares cost function."""
|
||||
if isinstance(J, LinearOperator):
|
||||
return J.rmatvec(f)
|
||||
else:
|
||||
return J.T.dot(f)
|
||||
|
||||
|
||||
def compute_jac_scale(J, scale_inv_old=None):
|
||||
"""Compute variables scale based on the Jacobian matrix."""
|
||||
if issparse(J):
|
||||
scale_inv = np.asarray(J.power(2).sum(axis=0)).ravel()**0.5
|
||||
else:
|
||||
scale_inv = np.sum(J**2, axis=0)**0.5
|
||||
|
||||
if scale_inv_old is None:
|
||||
scale_inv[scale_inv == 0] = 1
|
||||
else:
|
||||
scale_inv = np.maximum(scale_inv, scale_inv_old)
|
||||
|
||||
return 1 / scale_inv, scale_inv
|
||||
|
||||
|
||||
def left_multiplied_operator(J, d):
|
||||
"""Return diag(d) J as LinearOperator."""
|
||||
J = aslinearoperator(J)
|
||||
|
||||
def matvec(x):
|
||||
return d * J.matvec(x)
|
||||
|
||||
def matmat(X):
|
||||
return d[:, np.newaxis] * J.matmat(X)
|
||||
|
||||
def rmatvec(x):
|
||||
return J.rmatvec(x.ravel() * d)
|
||||
|
||||
return LinearOperator(J.shape, matvec=matvec, matmat=matmat,
|
||||
rmatvec=rmatvec)
|
||||
|
||||
|
||||
def right_multiplied_operator(J, d):
|
||||
"""Return J diag(d) as LinearOperator."""
|
||||
J = aslinearoperator(J)
|
||||
|
||||
def matvec(x):
|
||||
return J.matvec(np.ravel(x) * d)
|
||||
|
||||
def matmat(X):
|
||||
return J.matmat(X * d[:, np.newaxis])
|
||||
|
||||
def rmatvec(x):
|
||||
return d * J.rmatvec(x)
|
||||
|
||||
return LinearOperator(J.shape, matvec=matvec, matmat=matmat,
|
||||
rmatvec=rmatvec)
|
||||
|
||||
|
||||
def regularized_lsq_operator(J, diag):
|
||||
"""Return a matrix arising in regularized least squares as LinearOperator.
|
||||
|
||||
The matrix is
|
||||
[ J ]
|
||||
[ D ]
|
||||
where D is diagonal matrix with elements from `diag`.
|
||||
"""
|
||||
J = aslinearoperator(J)
|
||||
m, n = J.shape
|
||||
|
||||
def matvec(x):
|
||||
return np.hstack((J.matvec(x), diag * x))
|
||||
|
||||
def rmatvec(x):
|
||||
x1 = x[:m]
|
||||
x2 = x[m:]
|
||||
return J.rmatvec(x1) + diag * x2
|
||||
|
||||
return LinearOperator((m + n, n), matvec=matvec, rmatvec=rmatvec)
|
||||
|
||||
|
||||
def right_multiply(J, d, copy=True):
|
||||
"""Compute J diag(d).
|
||||
|
||||
If `copy` is False, `J` is modified in place (unless being LinearOperator).
|
||||
"""
|
||||
if copy and not isinstance(J, LinearOperator):
|
||||
J = J.copy()
|
||||
|
||||
if issparse(J):
|
||||
J.data *= d.take(J.indices, mode='clip') # scikit-learn recipe.
|
||||
elif isinstance(J, LinearOperator):
|
||||
J = right_multiplied_operator(J, d)
|
||||
else:
|
||||
J *= d
|
||||
|
||||
return J
|
||||
|
||||
|
||||
def left_multiply(J, d, copy=True):
|
||||
"""Compute diag(d) J.
|
||||
|
||||
If `copy` is False, `J` is modified in place (unless being LinearOperator).
|
||||
"""
|
||||
if copy and not isinstance(J, LinearOperator):
|
||||
J = J.copy()
|
||||
|
||||
if issparse(J):
|
||||
J.data *= np.repeat(d, np.diff(J.indptr)) # scikit-learn recipe.
|
||||
elif isinstance(J, LinearOperator):
|
||||
J = left_multiplied_operator(J, d)
|
||||
else:
|
||||
J *= d[:, np.newaxis]
|
||||
|
||||
return J
|
||||
|
||||
|
||||
def check_termination(dF, F, dx_norm, x_norm, ratio, ftol, xtol):
|
||||
"""Check termination condition for nonlinear least squares."""
|
||||
ftol_satisfied = dF < ftol * F and ratio > 0.25
|
||||
xtol_satisfied = dx_norm < xtol * (xtol + x_norm)
|
||||
|
||||
if ftol_satisfied and xtol_satisfied:
|
||||
return 4
|
||||
elif ftol_satisfied:
|
||||
return 2
|
||||
elif xtol_satisfied:
|
||||
return 3
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def scale_for_robust_loss_function(J, f, rho):
|
||||
"""Scale Jacobian and residuals for a robust loss function.
|
||||
|
||||
Arrays are modified in place.
|
||||
"""
|
||||
J_scale = rho[1] + 2 * rho[2] * f**2
|
||||
J_scale[J_scale < EPS] = EPS
|
||||
J_scale **= 0.5
|
||||
|
||||
f *= rho[1] / J_scale
|
||||
|
||||
return left_multiply(J, J_scale, copy=False), f
|
329
venv/Lib/site-packages/scipy/optimize/_lsq/dogbox.py
Normal file
329
venv/Lib/site-packages/scipy/optimize/_lsq/dogbox.py
Normal file
|
@ -0,0 +1,329 @@
|
|||
"""
|
||||
Dogleg algorithm with rectangular trust regions for least-squares minimization.
|
||||
|
||||
The description of the algorithm can be found in [Voglis]_. The algorithm does
|
||||
trust-region iterations, but the shape of trust regions is rectangular as
|
||||
opposed to conventional elliptical. The intersection of a trust region and
|
||||
an initial feasible region is again some rectangle. Thus, on each iteration a
|
||||
bound-constrained quadratic optimization problem is solved.
|
||||
|
||||
A quadratic problem is solved by well-known dogleg approach, where the
|
||||
function is minimized along piecewise-linear "dogleg" path [NumOpt]_,
|
||||
Chapter 4. If Jacobian is not rank-deficient then the function is decreasing
|
||||
along this path, and optimization amounts to simply following along this
|
||||
path as long as a point stays within the bounds. A constrained Cauchy step
|
||||
(along the anti-gradient) is considered for safety in rank deficient cases,
|
||||
in this situations the convergence might be slow.
|
||||
|
||||
If during iterations some variable hit the initial bound and the component
|
||||
of anti-gradient points outside the feasible region, then a next dogleg step
|
||||
won't make any progress. At this state such variables satisfy first-order
|
||||
optimality conditions and they are excluded before computing a next dogleg
|
||||
step.
|
||||
|
||||
Gauss-Newton step can be computed exactly by `numpy.linalg.lstsq` (for dense
|
||||
Jacobian matrices) or by iterative procedure `scipy.sparse.linalg.lsmr` (for
|
||||
dense and sparse matrices, or Jacobian being LinearOperator). The second
|
||||
option allows to solve very large problems (up to couple of millions of
|
||||
residuals on a regular PC), provided the Jacobian matrix is sufficiently
|
||||
sparse. But note that dogbox is not very good for solving problems with
|
||||
large number of constraints, because of variables exclusion-inclusion on each
|
||||
iteration (a required number of function evaluations might be high or accuracy
|
||||
of a solution will be poor), thus its large-scale usage is probably limited
|
||||
to unconstrained problems.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [Voglis] C. Voglis and I. E. Lagaris, "A Rectangular Trust Region Dogleg
|
||||
Approach for Unconstrained and Bound Constrained Nonlinear
|
||||
Optimization", WSEAS International Conference on Applied
|
||||
Mathematics, Corfu, Greece, 2004.
|
||||
.. [NumOpt] J. Nocedal and S. J. Wright, "Numerical optimization, 2nd edition".
|
||||
"""
|
||||
import numpy as np
|
||||
from numpy.linalg import lstsq, norm
|
||||
|
||||
from scipy.sparse.linalg import LinearOperator, aslinearoperator, lsmr
|
||||
from scipy.optimize import OptimizeResult
|
||||
|
||||
from .common import (
|
||||
step_size_to_bound, in_bounds, update_tr_radius, evaluate_quadratic,
|
||||
build_quadratic_1d, minimize_quadratic_1d, compute_grad,
|
||||
compute_jac_scale, check_termination, scale_for_robust_loss_function,
|
||||
print_header_nonlinear, print_iteration_nonlinear)
|
||||
|
||||
|
||||
def lsmr_operator(Jop, d, active_set):
|
||||
"""Compute LinearOperator to use in LSMR by dogbox algorithm.
|
||||
|
||||
`active_set` mask is used to excluded active variables from computations
|
||||
of matrix-vector products.
|
||||
"""
|
||||
m, n = Jop.shape
|
||||
|
||||
def matvec(x):
|
||||
x_free = x.ravel().copy()
|
||||
x_free[active_set] = 0
|
||||
return Jop.matvec(x * d)
|
||||
|
||||
def rmatvec(x):
|
||||
r = d * Jop.rmatvec(x)
|
||||
r[active_set] = 0
|
||||
return r
|
||||
|
||||
return LinearOperator((m, n), matvec=matvec, rmatvec=rmatvec, dtype=float)
|
||||
|
||||
|
||||
def find_intersection(x, tr_bounds, lb, ub):
|
||||
"""Find intersection of trust-region bounds and initial bounds.
|
||||
|
||||
Returns
|
||||
-------
|
||||
lb_total, ub_total : ndarray with shape of x
|
||||
Lower and upper bounds of the intersection region.
|
||||
orig_l, orig_u : ndarray of bool with shape of x
|
||||
True means that an original bound is taken as a corresponding bound
|
||||
in the intersection region.
|
||||
tr_l, tr_u : ndarray of bool with shape of x
|
||||
True means that a trust-region bound is taken as a corresponding bound
|
||||
in the intersection region.
|
||||
"""
|
||||
lb_centered = lb - x
|
||||
ub_centered = ub - x
|
||||
|
||||
lb_total = np.maximum(lb_centered, -tr_bounds)
|
||||
ub_total = np.minimum(ub_centered, tr_bounds)
|
||||
|
||||
orig_l = np.equal(lb_total, lb_centered)
|
||||
orig_u = np.equal(ub_total, ub_centered)
|
||||
|
||||
tr_l = np.equal(lb_total, -tr_bounds)
|
||||
tr_u = np.equal(ub_total, tr_bounds)
|
||||
|
||||
return lb_total, ub_total, orig_l, orig_u, tr_l, tr_u
|
||||
|
||||
|
||||
def dogleg_step(x, newton_step, g, a, b, tr_bounds, lb, ub):
|
||||
"""Find dogleg step in a rectangular region.
|
||||
|
||||
Returns
|
||||
-------
|
||||
step : ndarray, shape (n,)
|
||||
Computed dogleg step.
|
||||
bound_hits : ndarray of int, shape (n,)
|
||||
Each component shows whether a corresponding variable hits the
|
||||
initial bound after the step is taken:
|
||||
* 0 - a variable doesn't hit the bound.
|
||||
* -1 - lower bound is hit.
|
||||
* 1 - upper bound is hit.
|
||||
tr_hit : bool
|
||||
Whether the step hit the boundary of the trust-region.
|
||||
"""
|
||||
lb_total, ub_total, orig_l, orig_u, tr_l, tr_u = find_intersection(
|
||||
x, tr_bounds, lb, ub
|
||||
)
|
||||
bound_hits = np.zeros_like(x, dtype=int)
|
||||
|
||||
if in_bounds(newton_step, lb_total, ub_total):
|
||||
return newton_step, bound_hits, False
|
||||
|
||||
to_bounds, _ = step_size_to_bound(np.zeros_like(x), -g, lb_total, ub_total)
|
||||
|
||||
# The classical dogleg algorithm would check if Cauchy step fits into
|
||||
# the bounds, and just return it constrained version if not. But in a
|
||||
# rectangular trust region it makes sense to try to improve constrained
|
||||
# Cauchy step too. Thus, we don't distinguish these two cases.
|
||||
|
||||
cauchy_step = -minimize_quadratic_1d(a, b, 0, to_bounds)[0] * g
|
||||
|
||||
step_diff = newton_step - cauchy_step
|
||||
step_size, hits = step_size_to_bound(cauchy_step, step_diff,
|
||||
lb_total, ub_total)
|
||||
bound_hits[(hits < 0) & orig_l] = -1
|
||||
bound_hits[(hits > 0) & orig_u] = 1
|
||||
tr_hit = np.any((hits < 0) & tr_l | (hits > 0) & tr_u)
|
||||
|
||||
return cauchy_step + step_size * step_diff, bound_hits, tr_hit
|
||||
|
||||
|
||||
def dogbox(fun, jac, x0, f0, J0, lb, ub, ftol, xtol, gtol, max_nfev, x_scale,
|
||||
loss_function, tr_solver, tr_options, verbose):
|
||||
f = f0
|
||||
f_true = f.copy()
|
||||
nfev = 1
|
||||
|
||||
J = J0
|
||||
njev = 1
|
||||
|
||||
if loss_function is not None:
|
||||
rho = loss_function(f)
|
||||
cost = 0.5 * np.sum(rho[0])
|
||||
J, f = scale_for_robust_loss_function(J, f, rho)
|
||||
else:
|
||||
cost = 0.5 * np.dot(f, f)
|
||||
|
||||
g = compute_grad(J, f)
|
||||
|
||||
jac_scale = isinstance(x_scale, str) and x_scale == 'jac'
|
||||
if jac_scale:
|
||||
scale, scale_inv = compute_jac_scale(J)
|
||||
else:
|
||||
scale, scale_inv = x_scale, 1 / x_scale
|
||||
|
||||
Delta = norm(x0 * scale_inv, ord=np.inf)
|
||||
if Delta == 0:
|
||||
Delta = 1.0
|
||||
|
||||
on_bound = np.zeros_like(x0, dtype=int)
|
||||
on_bound[np.equal(x0, lb)] = -1
|
||||
on_bound[np.equal(x0, ub)] = 1
|
||||
|
||||
x = x0
|
||||
step = np.empty_like(x0)
|
||||
|
||||
if max_nfev is None:
|
||||
max_nfev = x0.size * 100
|
||||
|
||||
termination_status = None
|
||||
iteration = 0
|
||||
step_norm = None
|
||||
actual_reduction = None
|
||||
|
||||
if verbose == 2:
|
||||
print_header_nonlinear()
|
||||
|
||||
while True:
|
||||
active_set = on_bound * g < 0
|
||||
free_set = ~active_set
|
||||
|
||||
g_free = g[free_set]
|
||||
g_full = g.copy()
|
||||
g[active_set] = 0
|
||||
|
||||
g_norm = norm(g, ord=np.inf)
|
||||
if g_norm < gtol:
|
||||
termination_status = 1
|
||||
|
||||
if verbose == 2:
|
||||
print_iteration_nonlinear(iteration, nfev, cost, actual_reduction,
|
||||
step_norm, g_norm)
|
||||
|
||||
if termination_status is not None or nfev == max_nfev:
|
||||
break
|
||||
|
||||
x_free = x[free_set]
|
||||
lb_free = lb[free_set]
|
||||
ub_free = ub[free_set]
|
||||
scale_free = scale[free_set]
|
||||
|
||||
# Compute (Gauss-)Newton and build quadratic model for Cauchy step.
|
||||
if tr_solver == 'exact':
|
||||
J_free = J[:, free_set]
|
||||
newton_step = lstsq(J_free, -f, rcond=-1)[0]
|
||||
|
||||
# Coefficients for the quadratic model along the anti-gradient.
|
||||
a, b = build_quadratic_1d(J_free, g_free, -g_free)
|
||||
elif tr_solver == 'lsmr':
|
||||
Jop = aslinearoperator(J)
|
||||
|
||||
# We compute lsmr step in scaled variables and then
|
||||
# transform back to normal variables, if lsmr would give exact lsq
|
||||
# solution, this would be equivalent to not doing any
|
||||
# transformations, but from experience it's better this way.
|
||||
|
||||
# We pass active_set to make computations as if we selected
|
||||
# the free subset of J columns, but without actually doing any
|
||||
# slicing, which is expensive for sparse matrices and impossible
|
||||
# for LinearOperator.
|
||||
|
||||
lsmr_op = lsmr_operator(Jop, scale, active_set)
|
||||
newton_step = -lsmr(lsmr_op, f, **tr_options)[0][free_set]
|
||||
newton_step *= scale_free
|
||||
|
||||
# Components of g for active variables were zeroed, so this call
|
||||
# is correct and equivalent to using J_free and g_free.
|
||||
a, b = build_quadratic_1d(Jop, g, -g)
|
||||
|
||||
actual_reduction = -1.0
|
||||
while actual_reduction <= 0 and nfev < max_nfev:
|
||||
tr_bounds = Delta * scale_free
|
||||
|
||||
step_free, on_bound_free, tr_hit = dogleg_step(
|
||||
x_free, newton_step, g_free, a, b, tr_bounds, lb_free, ub_free)
|
||||
|
||||
step.fill(0.0)
|
||||
step[free_set] = step_free
|
||||
|
||||
if tr_solver == 'exact':
|
||||
predicted_reduction = -evaluate_quadratic(J_free, g_free,
|
||||
step_free)
|
||||
elif tr_solver == 'lsmr':
|
||||
predicted_reduction = -evaluate_quadratic(Jop, g, step)
|
||||
|
||||
x_new = x + step
|
||||
f_new = fun(x_new)
|
||||
nfev += 1
|
||||
|
||||
step_h_norm = norm(step * scale_inv, ord=np.inf)
|
||||
|
||||
if not np.all(np.isfinite(f_new)):
|
||||
Delta = 0.25 * step_h_norm
|
||||
continue
|
||||
|
||||
# Usual trust-region step quality estimation.
|
||||
if loss_function is not None:
|
||||
cost_new = loss_function(f_new, cost_only=True)
|
||||
else:
|
||||
cost_new = 0.5 * np.dot(f_new, f_new)
|
||||
actual_reduction = cost - cost_new
|
||||
|
||||
Delta, ratio = update_tr_radius(
|
||||
Delta, actual_reduction, predicted_reduction,
|
||||
step_h_norm, tr_hit
|
||||
)
|
||||
|
||||
step_norm = norm(step)
|
||||
termination_status = check_termination(
|
||||
actual_reduction, cost, step_norm, norm(x), ratio, ftol, xtol)
|
||||
|
||||
if termination_status is not None:
|
||||
break
|
||||
|
||||
if actual_reduction > 0:
|
||||
on_bound[free_set] = on_bound_free
|
||||
|
||||
x = x_new
|
||||
# Set variables exactly at the boundary.
|
||||
mask = on_bound == -1
|
||||
x[mask] = lb[mask]
|
||||
mask = on_bound == 1
|
||||
x[mask] = ub[mask]
|
||||
|
||||
f = f_new
|
||||
f_true = f.copy()
|
||||
|
||||
cost = cost_new
|
||||
|
||||
J = jac(x, f)
|
||||
njev += 1
|
||||
|
||||
if loss_function is not None:
|
||||
rho = loss_function(f)
|
||||
J, f = scale_for_robust_loss_function(J, f, rho)
|
||||
|
||||
g = compute_grad(J, f)
|
||||
|
||||
if jac_scale:
|
||||
scale, scale_inv = compute_jac_scale(J, scale_inv)
|
||||
else:
|
||||
step_norm = 0
|
||||
actual_reduction = 0
|
||||
|
||||
iteration += 1
|
||||
|
||||
if termination_status is None:
|
||||
termination_status = 0
|
||||
|
||||
return OptimizeResult(
|
||||
x=x, cost=cost, fun=f_true, jac=J, grad=g_full, optimality=g_norm,
|
||||
active_mask=on_bound, nfev=nfev, njev=njev, status=termination_status)
|
Binary file not shown.
940
venv/Lib/site-packages/scipy/optimize/_lsq/least_squares.py
Normal file
940
venv/Lib/site-packages/scipy/optimize/_lsq/least_squares.py
Normal file
|
@ -0,0 +1,940 @@
|
|||
"""Generic interface for least-squares minimization."""
|
||||
from warnings import warn
|
||||
|
||||
import numpy as np
|
||||
from numpy.linalg import norm
|
||||
|
||||
from scipy.sparse import issparse, csr_matrix
|
||||
from scipy.sparse.linalg import LinearOperator
|
||||
from scipy.optimize import _minpack, OptimizeResult
|
||||
from scipy.optimize._numdiff import approx_derivative, group_columns
|
||||
|
||||
from .trf import trf
|
||||
from .dogbox import dogbox
|
||||
from .common import EPS, in_bounds, make_strictly_feasible
|
||||
|
||||
|
||||
TERMINATION_MESSAGES = {
|
||||
-1: "Improper input parameters status returned from `leastsq`",
|
||||
0: "The maximum number of function evaluations is exceeded.",
|
||||
1: "`gtol` termination condition is satisfied.",
|
||||
2: "`ftol` termination condition is satisfied.",
|
||||
3: "`xtol` termination condition is satisfied.",
|
||||
4: "Both `ftol` and `xtol` termination conditions are satisfied."
|
||||
}
|
||||
|
||||
|
||||
FROM_MINPACK_TO_COMMON = {
|
||||
0: -1, # Improper input parameters from MINPACK.
|
||||
1: 2,
|
||||
2: 3,
|
||||
3: 4,
|
||||
4: 1,
|
||||
5: 0
|
||||
# There are 6, 7, 8 for too small tolerance parameters,
|
||||
# but we guard against it by checking ftol, xtol, gtol beforehand.
|
||||
}
|
||||
|
||||
|
||||
def call_minpack(fun, x0, jac, ftol, xtol, gtol, max_nfev, x_scale, diff_step):
|
||||
n = x0.size
|
||||
|
||||
if diff_step is None:
|
||||
epsfcn = EPS
|
||||
else:
|
||||
epsfcn = diff_step**2
|
||||
|
||||
# Compute MINPACK's `diag`, which is inverse of our `x_scale` and
|
||||
# ``x_scale='jac'`` corresponds to ``diag=None``.
|
||||
if isinstance(x_scale, str) and x_scale == 'jac':
|
||||
diag = None
|
||||
else:
|
||||
diag = 1 / x_scale
|
||||
|
||||
full_output = True
|
||||
col_deriv = False
|
||||
factor = 100.0
|
||||
|
||||
if jac is None:
|
||||
if max_nfev is None:
|
||||
# n squared to account for Jacobian evaluations.
|
||||
max_nfev = 100 * n * (n + 1)
|
||||
x, info, status = _minpack._lmdif(
|
||||
fun, x0, (), full_output, ftol, xtol, gtol,
|
||||
max_nfev, epsfcn, factor, diag)
|
||||
else:
|
||||
if max_nfev is None:
|
||||
max_nfev = 100 * n
|
||||
x, info, status = _minpack._lmder(
|
||||
fun, jac, x0, (), full_output, col_deriv,
|
||||
ftol, xtol, gtol, max_nfev, factor, diag)
|
||||
|
||||
f = info['fvec']
|
||||
|
||||
if callable(jac):
|
||||
J = jac(x)
|
||||
else:
|
||||
J = np.atleast_2d(approx_derivative(fun, x))
|
||||
|
||||
cost = 0.5 * np.dot(f, f)
|
||||
g = J.T.dot(f)
|
||||
g_norm = norm(g, ord=np.inf)
|
||||
|
||||
nfev = info['nfev']
|
||||
njev = info.get('njev', None)
|
||||
|
||||
status = FROM_MINPACK_TO_COMMON[status]
|
||||
active_mask = np.zeros_like(x0, dtype=int)
|
||||
|
||||
return OptimizeResult(
|
||||
x=x, cost=cost, fun=f, jac=J, grad=g, optimality=g_norm,
|
||||
active_mask=active_mask, nfev=nfev, njev=njev, status=status)
|
||||
|
||||
|
||||
def prepare_bounds(bounds, n):
|
||||
lb, ub = [np.asarray(b, dtype=float) for b in bounds]
|
||||
if lb.ndim == 0:
|
||||
lb = np.resize(lb, n)
|
||||
|
||||
if ub.ndim == 0:
|
||||
ub = np.resize(ub, n)
|
||||
|
||||
return lb, ub
|
||||
|
||||
|
||||
def check_tolerance(ftol, xtol, gtol, method):
|
||||
def check(tol, name):
|
||||
if tol is None:
|
||||
tol = 0
|
||||
elif tol < EPS:
|
||||
warn("Setting `{}` below the machine epsilon ({:.2e}) effectively "
|
||||
"disables the corresponding termination condition."
|
||||
.format(name, EPS))
|
||||
return tol
|
||||
|
||||
ftol = check(ftol, "ftol")
|
||||
xtol = check(xtol, "xtol")
|
||||
gtol = check(gtol, "gtol")
|
||||
|
||||
if method == "lm" and (ftol < EPS or xtol < EPS or gtol < EPS):
|
||||
raise ValueError("All tolerances must be higher than machine epsilon "
|
||||
"({:.2e}) for method 'lm'.".format(EPS))
|
||||
elif ftol < EPS and xtol < EPS and gtol < EPS:
|
||||
raise ValueError("At least one of the tolerances must be higher than "
|
||||
"machine epsilon ({:.2e}).".format(EPS))
|
||||
|
||||
return ftol, xtol, gtol
|
||||
|
||||
|
||||
def check_x_scale(x_scale, x0):
|
||||
if isinstance(x_scale, str) and x_scale == 'jac':
|
||||
return x_scale
|
||||
|
||||
try:
|
||||
x_scale = np.asarray(x_scale, dtype=float)
|
||||
valid = np.all(np.isfinite(x_scale)) and np.all(x_scale > 0)
|
||||
except (ValueError, TypeError):
|
||||
valid = False
|
||||
|
||||
if not valid:
|
||||
raise ValueError("`x_scale` must be 'jac' or array_like with "
|
||||
"positive numbers.")
|
||||
|
||||
if x_scale.ndim == 0:
|
||||
x_scale = np.resize(x_scale, x0.shape)
|
||||
|
||||
if x_scale.shape != x0.shape:
|
||||
raise ValueError("Inconsistent shapes between `x_scale` and `x0`.")
|
||||
|
||||
return x_scale
|
||||
|
||||
|
||||
def check_jac_sparsity(jac_sparsity, m, n):
|
||||
if jac_sparsity is None:
|
||||
return None
|
||||
|
||||
if not issparse(jac_sparsity):
|
||||
jac_sparsity = np.atleast_2d(jac_sparsity)
|
||||
|
||||
if jac_sparsity.shape != (m, n):
|
||||
raise ValueError("`jac_sparsity` has wrong shape.")
|
||||
|
||||
return jac_sparsity, group_columns(jac_sparsity)
|
||||
|
||||
|
||||
# Loss functions.
|
||||
|
||||
|
||||
def huber(z, rho, cost_only):
|
||||
mask = z <= 1
|
||||
rho[0, mask] = z[mask]
|
||||
rho[0, ~mask] = 2 * z[~mask]**0.5 - 1
|
||||
if cost_only:
|
||||
return
|
||||
rho[1, mask] = 1
|
||||
rho[1, ~mask] = z[~mask]**-0.5
|
||||
rho[2, mask] = 0
|
||||
rho[2, ~mask] = -0.5 * z[~mask]**-1.5
|
||||
|
||||
|
||||
def soft_l1(z, rho, cost_only):
|
||||
t = 1 + z
|
||||
rho[0] = 2 * (t**0.5 - 1)
|
||||
if cost_only:
|
||||
return
|
||||
rho[1] = t**-0.5
|
||||
rho[2] = -0.5 * t**-1.5
|
||||
|
||||
|
||||
def cauchy(z, rho, cost_only):
|
||||
rho[0] = np.log1p(z)
|
||||
if cost_only:
|
||||
return
|
||||
t = 1 + z
|
||||
rho[1] = 1 / t
|
||||
rho[2] = -1 / t**2
|
||||
|
||||
|
||||
def arctan(z, rho, cost_only):
|
||||
rho[0] = np.arctan(z)
|
||||
if cost_only:
|
||||
return
|
||||
t = 1 + z**2
|
||||
rho[1] = 1 / t
|
||||
rho[2] = -2 * z / t**2
|
||||
|
||||
|
||||
IMPLEMENTED_LOSSES = dict(linear=None, huber=huber, soft_l1=soft_l1,
|
||||
cauchy=cauchy, arctan=arctan)
|
||||
|
||||
|
||||
def construct_loss_function(m, loss, f_scale):
|
||||
if loss == 'linear':
|
||||
return None
|
||||
|
||||
if not callable(loss):
|
||||
loss = IMPLEMENTED_LOSSES[loss]
|
||||
rho = np.empty((3, m))
|
||||
|
||||
def loss_function(f, cost_only=False):
|
||||
z = (f / f_scale) ** 2
|
||||
loss(z, rho, cost_only=cost_only)
|
||||
if cost_only:
|
||||
return 0.5 * f_scale ** 2 * np.sum(rho[0])
|
||||
rho[0] *= f_scale ** 2
|
||||
rho[2] /= f_scale ** 2
|
||||
return rho
|
||||
else:
|
||||
def loss_function(f, cost_only=False):
|
||||
z = (f / f_scale) ** 2
|
||||
rho = loss(z)
|
||||
if cost_only:
|
||||
return 0.5 * f_scale ** 2 * np.sum(rho[0])
|
||||
rho[0] *= f_scale ** 2
|
||||
rho[2] /= f_scale ** 2
|
||||
return rho
|
||||
|
||||
return loss_function
|
||||
|
||||
|
||||
def least_squares(
|
||||
fun, x0, jac='2-point', bounds=(-np.inf, np.inf), method='trf',
|
||||
ftol=1e-8, xtol=1e-8, gtol=1e-8, x_scale=1.0, loss='linear',
|
||||
f_scale=1.0, diff_step=None, tr_solver=None, tr_options={},
|
||||
jac_sparsity=None, max_nfev=None, verbose=0, args=(), kwargs={}):
|
||||
"""Solve a nonlinear least-squares problem with bounds on the variables.
|
||||
|
||||
Given the residuals f(x) (an m-D real function of n real
|
||||
variables) and the loss function rho(s) (a scalar function), `least_squares`
|
||||
finds a local minimum of the cost function F(x)::
|
||||
|
||||
minimize F(x) = 0.5 * sum(rho(f_i(x)**2), i = 0, ..., m - 1)
|
||||
subject to lb <= x <= ub
|
||||
|
||||
The purpose of the loss function rho(s) is to reduce the influence of
|
||||
outliers on the solution.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
fun : callable
|
||||
Function which computes the vector of residuals, with the signature
|
||||
``fun(x, *args, **kwargs)``, i.e., the minimization proceeds with
|
||||
respect to its first argument. The argument ``x`` passed to this
|
||||
function is an ndarray of shape (n,) (never a scalar, even for n=1).
|
||||
It must allocate and return a 1-D array_like of shape (m,) or a scalar.
|
||||
If the argument ``x`` is complex or the function ``fun`` returns
|
||||
complex residuals, it must be wrapped in a real function of real
|
||||
arguments, as shown at the end of the Examples section.
|
||||
x0 : array_like with shape (n,) or float
|
||||
Initial guess on independent variables. If float, it will be treated
|
||||
as a 1-D array with one element.
|
||||
jac : {'2-point', '3-point', 'cs', callable}, optional
|
||||
Method of computing the Jacobian matrix (an m-by-n matrix, where
|
||||
element (i, j) is the partial derivative of f[i] with respect to
|
||||
x[j]). The keywords select a finite difference scheme for numerical
|
||||
estimation. The scheme '3-point' is more accurate, but requires
|
||||
twice as many operations as '2-point' (default). The scheme 'cs'
|
||||
uses complex steps, and while potentially the most accurate, it is
|
||||
applicable only when `fun` correctly handles complex inputs and
|
||||
can be analytically continued to the complex plane. Method 'lm'
|
||||
always uses the '2-point' scheme. If callable, it is used as
|
||||
``jac(x, *args, **kwargs)`` and should return a good approximation
|
||||
(or the exact value) for the Jacobian as an array_like (np.atleast_2d
|
||||
is applied), a sparse matrix or a `scipy.sparse.linalg.LinearOperator`.
|
||||
bounds : 2-tuple of array_like, optional
|
||||
Lower and upper bounds on independent variables. Defaults to no bounds.
|
||||
Each array must match the size of `x0` or be a scalar, in the latter
|
||||
case a bound will be the same for all variables. Use ``np.inf`` with
|
||||
an appropriate sign to disable bounds on all or some variables.
|
||||
method : {'trf', 'dogbox', 'lm'}, optional
|
||||
Algorithm to perform minimization.
|
||||
|
||||
* 'trf' : Trust Region Reflective algorithm, particularly suitable
|
||||
for large sparse problems with bounds. Generally robust method.
|
||||
* 'dogbox' : dogleg algorithm with rectangular trust regions,
|
||||
typical use case is small problems with bounds. Not recommended
|
||||
for problems with rank-deficient Jacobian.
|
||||
* 'lm' : Levenberg-Marquardt algorithm as implemented in MINPACK.
|
||||
Doesn't handle bounds and sparse Jacobians. Usually the most
|
||||
efficient method for small unconstrained problems.
|
||||
|
||||
Default is 'trf'. See Notes for more information.
|
||||
ftol : float or None, optional
|
||||
Tolerance for termination by the change of the cost function. Default
|
||||
is 1e-8. The optimization process is stopped when ``dF < ftol * F``,
|
||||
and there was an adequate agreement between a local quadratic model and
|
||||
the true model in the last step. If None, the termination by this
|
||||
condition is disabled.
|
||||
xtol : float or None, optional
|
||||
Tolerance for termination by the change of the independent variables.
|
||||
Default is 1e-8. The exact condition depends on the `method` used:
|
||||
|
||||
* For 'trf' and 'dogbox' : ``norm(dx) < xtol * (xtol + norm(x))``.
|
||||
* For 'lm' : ``Delta < xtol * norm(xs)``, where ``Delta`` is
|
||||
a trust-region radius and ``xs`` is the value of ``x``
|
||||
scaled according to `x_scale` parameter (see below).
|
||||
|
||||
If None, the termination by this condition is disabled.
|
||||
gtol : float or None, optional
|
||||
Tolerance for termination by the norm of the gradient. Default is 1e-8.
|
||||
The exact condition depends on a `method` used:
|
||||
|
||||
* For 'trf' : ``norm(g_scaled, ord=np.inf) < gtol``, where
|
||||
``g_scaled`` is the value of the gradient scaled to account for
|
||||
the presence of the bounds [STIR]_.
|
||||
* For 'dogbox' : ``norm(g_free, ord=np.inf) < gtol``, where
|
||||
``g_free`` is the gradient with respect to the variables which
|
||||
are not in the optimal state on the boundary.
|
||||
* For 'lm' : the maximum absolute value of the cosine of angles
|
||||
between columns of the Jacobian and the residual vector is less
|
||||
than `gtol`, or the residual vector is zero.
|
||||
|
||||
If None, the termination by this condition is disabled.
|
||||
x_scale : array_like or 'jac', optional
|
||||
Characteristic scale of each variable. Setting `x_scale` is equivalent
|
||||
to reformulating the problem in scaled variables ``xs = x / x_scale``.
|
||||
An alternative view is that the size of a trust region along jth
|
||||
dimension is proportional to ``x_scale[j]``. Improved convergence may
|
||||
be achieved by setting `x_scale` such that a step of a given size
|
||||
along any of the scaled variables has a similar effect on the cost
|
||||
function. If set to 'jac', the scale is iteratively updated using the
|
||||
inverse norms of the columns of the Jacobian matrix (as described in
|
||||
[JJMore]_).
|
||||
loss : str or callable, optional
|
||||
Determines the loss function. The following keyword values are allowed:
|
||||
|
||||
* 'linear' (default) : ``rho(z) = z``. Gives a standard
|
||||
least-squares problem.
|
||||
* 'soft_l1' : ``rho(z) = 2 * ((1 + z)**0.5 - 1)``. The smooth
|
||||
approximation of l1 (absolute value) loss. Usually a good
|
||||
choice for robust least squares.
|
||||
* 'huber' : ``rho(z) = z if z <= 1 else 2*z**0.5 - 1``. Works
|
||||
similarly to 'soft_l1'.
|
||||
* 'cauchy' : ``rho(z) = ln(1 + z)``. Severely weakens outliers
|
||||
influence, but may cause difficulties in optimization process.
|
||||
* 'arctan' : ``rho(z) = arctan(z)``. Limits a maximum loss on
|
||||
a single residual, has properties similar to 'cauchy'.
|
||||
|
||||
If callable, it must take a 1-D ndarray ``z=f**2`` and return an
|
||||
array_like with shape (3, m) where row 0 contains function values,
|
||||
row 1 contains first derivatives and row 2 contains second
|
||||
derivatives. Method 'lm' supports only 'linear' loss.
|
||||
f_scale : float, optional
|
||||
Value of soft margin between inlier and outlier residuals, default
|
||||
is 1.0. The loss function is evaluated as follows
|
||||
``rho_(f**2) = C**2 * rho(f**2 / C**2)``, where ``C`` is `f_scale`,
|
||||
and ``rho`` is determined by `loss` parameter. This parameter has
|
||||
no effect with ``loss='linear'``, but for other `loss` values it is
|
||||
of crucial importance.
|
||||
max_nfev : None or int, optional
|
||||
Maximum number of function evaluations before the termination.
|
||||
If None (default), the value is chosen automatically:
|
||||
|
||||
* For 'trf' and 'dogbox' : 100 * n.
|
||||
* For 'lm' : 100 * n if `jac` is callable and 100 * n * (n + 1)
|
||||
otherwise (because 'lm' counts function calls in Jacobian
|
||||
estimation).
|
||||
|
||||
diff_step : None or array_like, optional
|
||||
Determines the relative step size for the finite difference
|
||||
approximation of the Jacobian. The actual step is computed as
|
||||
``x * diff_step``. If None (default), then `diff_step` is taken to be
|
||||
a conventional "optimal" power of machine epsilon for the finite
|
||||
difference scheme used [NR]_.
|
||||
tr_solver : {None, 'exact', 'lsmr'}, optional
|
||||
Method for solving trust-region subproblems, relevant only for 'trf'
|
||||
and 'dogbox' methods.
|
||||
|
||||
* 'exact' is suitable for not very large problems with dense
|
||||
Jacobian matrices. The computational complexity per iteration is
|
||||
comparable to a singular value decomposition of the Jacobian
|
||||
matrix.
|
||||
* 'lsmr' is suitable for problems with sparse and large Jacobian
|
||||
matrices. It uses the iterative procedure
|
||||
`scipy.sparse.linalg.lsmr` for finding a solution of a linear
|
||||
least-squares problem and only requires matrix-vector product
|
||||
evaluations.
|
||||
|
||||
If None (default), the solver is chosen based on the type of Jacobian
|
||||
returned on the first iteration.
|
||||
tr_options : dict, optional
|
||||
Keyword options passed to trust-region solver.
|
||||
|
||||
* ``tr_solver='exact'``: `tr_options` are ignored.
|
||||
* ``tr_solver='lsmr'``: options for `scipy.sparse.linalg.lsmr`.
|
||||
Additionally, ``method='trf'`` supports 'regularize' option
|
||||
(bool, default is True), which adds a regularization term to the
|
||||
normal equation, which improves convergence if the Jacobian is
|
||||
rank-deficient [Byrd]_ (eq. 3.4).
|
||||
|
||||
jac_sparsity : {None, array_like, sparse matrix}, optional
|
||||
Defines the sparsity structure of the Jacobian matrix for finite
|
||||
difference estimation, its shape must be (m, n). If the Jacobian has
|
||||
only few non-zero elements in *each* row, providing the sparsity
|
||||
structure will greatly speed up the computations [Curtis]_. A zero
|
||||
entry means that a corresponding element in the Jacobian is identically
|
||||
zero. If provided, forces the use of 'lsmr' trust-region solver.
|
||||
If None (default), then dense differencing will be used. Has no effect
|
||||
for 'lm' method.
|
||||
verbose : {0, 1, 2}, optional
|
||||
Level of algorithm's verbosity:
|
||||
|
||||
* 0 (default) : work silently.
|
||||
* 1 : display a termination report.
|
||||
* 2 : display progress during iterations (not supported by 'lm'
|
||||
method).
|
||||
|
||||
args, kwargs : tuple and dict, optional
|
||||
Additional arguments passed to `fun` and `jac`. Both empty by default.
|
||||
The calling signature is ``fun(x, *args, **kwargs)`` and the same for
|
||||
`jac`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
`OptimizeResult` with the following fields defined:
|
||||
x : ndarray, shape (n,)
|
||||
Solution found.
|
||||
cost : float
|
||||
Value of the cost function at the solution.
|
||||
fun : ndarray, shape (m,)
|
||||
Vector of residuals at the solution.
|
||||
jac : ndarray, sparse matrix or LinearOperator, shape (m, n)
|
||||
Modified Jacobian matrix at the solution, in the sense that J^T J
|
||||
is a Gauss-Newton approximation of the Hessian of the cost function.
|
||||
The type is the same as the one used by the algorithm.
|
||||
grad : ndarray, shape (m,)
|
||||
Gradient of the cost function at the solution.
|
||||
optimality : float
|
||||
First-order optimality measure. In unconstrained problems, it is always
|
||||
the uniform norm of the gradient. In constrained problems, it is the
|
||||
quantity which was compared with `gtol` during iterations.
|
||||
active_mask : ndarray of int, shape (n,)
|
||||
Each component shows whether a corresponding constraint is active
|
||||
(that is, whether a variable is at the bound):
|
||||
|
||||
* 0 : a constraint is not active.
|
||||
* -1 : a lower bound is active.
|
||||
* 1 : an upper bound is active.
|
||||
|
||||
Might be somewhat arbitrary for 'trf' method as it generates a sequence
|
||||
of strictly feasible iterates and `active_mask` is determined within a
|
||||
tolerance threshold.
|
||||
nfev : int
|
||||
Number of function evaluations done. Methods 'trf' and 'dogbox' do not
|
||||
count function calls for numerical Jacobian approximation, as opposed
|
||||
to 'lm' method.
|
||||
njev : int or None
|
||||
Number of Jacobian evaluations done. If numerical Jacobian
|
||||
approximation is used in 'lm' method, it is set to None.
|
||||
status : int
|
||||
The reason for algorithm termination:
|
||||
|
||||
* -1 : improper input parameters status returned from MINPACK.
|
||||
* 0 : the maximum number of function evaluations is exceeded.
|
||||
* 1 : `gtol` termination condition is satisfied.
|
||||
* 2 : `ftol` termination condition is satisfied.
|
||||
* 3 : `xtol` termination condition is satisfied.
|
||||
* 4 : Both `ftol` and `xtol` termination conditions are satisfied.
|
||||
|
||||
message : str
|
||||
Verbal description of the termination reason.
|
||||
success : bool
|
||||
True if one of the convergence criteria is satisfied (`status` > 0).
|
||||
|
||||
See Also
|
||||
--------
|
||||
leastsq : A legacy wrapper for the MINPACK implementation of the
|
||||
Levenberg-Marquadt algorithm.
|
||||
curve_fit : Least-squares minimization applied to a curve-fitting problem.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Method 'lm' (Levenberg-Marquardt) calls a wrapper over least-squares
|
||||
algorithms implemented in MINPACK (lmder, lmdif). It runs the
|
||||
Levenberg-Marquardt algorithm formulated as a trust-region type algorithm.
|
||||
The implementation is based on paper [JJMore]_, it is very robust and
|
||||
efficient with a lot of smart tricks. It should be your first choice
|
||||
for unconstrained problems. Note that it doesn't support bounds. Also,
|
||||
it doesn't work when m < n.
|
||||
|
||||
Method 'trf' (Trust Region Reflective) is motivated by the process of
|
||||
solving a system of equations, which constitute the first-order optimality
|
||||
condition for a bound-constrained minimization problem as formulated in
|
||||
[STIR]_. The algorithm iteratively solves trust-region subproblems
|
||||
augmented by a special diagonal quadratic term and with trust-region shape
|
||||
determined by the distance from the bounds and the direction of the
|
||||
gradient. This enhancements help to avoid making steps directly into bounds
|
||||
and efficiently explore the whole space of variables. To further improve
|
||||
convergence, the algorithm considers search directions reflected from the
|
||||
bounds. To obey theoretical requirements, the algorithm keeps iterates
|
||||
strictly feasible. With dense Jacobians trust-region subproblems are
|
||||
solved by an exact method very similar to the one described in [JJMore]_
|
||||
(and implemented in MINPACK). The difference from the MINPACK
|
||||
implementation is that a singular value decomposition of a Jacobian
|
||||
matrix is done once per iteration, instead of a QR decomposition and series
|
||||
of Givens rotation eliminations. For large sparse Jacobians a 2-D subspace
|
||||
approach of solving trust-region subproblems is used [STIR]_, [Byrd]_.
|
||||
The subspace is spanned by a scaled gradient and an approximate
|
||||
Gauss-Newton solution delivered by `scipy.sparse.linalg.lsmr`. When no
|
||||
constraints are imposed the algorithm is very similar to MINPACK and has
|
||||
generally comparable performance. The algorithm works quite robust in
|
||||
unbounded and bounded problems, thus it is chosen as a default algorithm.
|
||||
|
||||
Method 'dogbox' operates in a trust-region framework, but considers
|
||||
rectangular trust regions as opposed to conventional ellipsoids [Voglis]_.
|
||||
The intersection of a current trust region and initial bounds is again
|
||||
rectangular, so on each iteration a quadratic minimization problem subject
|
||||
to bound constraints is solved approximately by Powell's dogleg method
|
||||
[NumOpt]_. The required Gauss-Newton step can be computed exactly for
|
||||
dense Jacobians or approximately by `scipy.sparse.linalg.lsmr` for large
|
||||
sparse Jacobians. The algorithm is likely to exhibit slow convergence when
|
||||
the rank of Jacobian is less than the number of variables. The algorithm
|
||||
often outperforms 'trf' in bounded problems with a small number of
|
||||
variables.
|
||||
|
||||
Robust loss functions are implemented as described in [BA]_. The idea
|
||||
is to modify a residual vector and a Jacobian matrix on each iteration
|
||||
such that computed gradient and Gauss-Newton Hessian approximation match
|
||||
the true gradient and Hessian approximation of the cost function. Then
|
||||
the algorithm proceeds in a normal way, i.e., robust loss functions are
|
||||
implemented as a simple wrapper over standard least-squares algorithms.
|
||||
|
||||
.. versionadded:: 0.17.0
|
||||
|
||||
References
|
||||
----------
|
||||
.. [STIR] M. A. Branch, T. F. Coleman, and Y. Li, "A Subspace, Interior,
|
||||
and Conjugate Gradient Method for Large-Scale Bound-Constrained
|
||||
Minimization Problems," SIAM Journal on Scientific Computing,
|
||||
Vol. 21, Number 1, pp 1-23, 1999.
|
||||
.. [NR] William H. Press et. al., "Numerical Recipes. The Art of Scientific
|
||||
Computing. 3rd edition", Sec. 5.7.
|
||||
.. [Byrd] R. H. Byrd, R. B. Schnabel and G. A. Shultz, "Approximate
|
||||
solution of the trust region problem by minimization over
|
||||
two-dimensional subspaces", Math. Programming, 40, pp. 247-263,
|
||||
1988.
|
||||
.. [Curtis] A. Curtis, M. J. D. Powell, and J. Reid, "On the estimation of
|
||||
sparse Jacobian matrices", Journal of the Institute of
|
||||
Mathematics and its Applications, 13, pp. 117-120, 1974.
|
||||
.. [JJMore] J. J. More, "The Levenberg-Marquardt Algorithm: Implementation
|
||||
and Theory," Numerical Analysis, ed. G. A. Watson, Lecture
|
||||
Notes in Mathematics 630, Springer Verlag, pp. 105-116, 1977.
|
||||
.. [Voglis] C. Voglis and I. E. Lagaris, "A Rectangular Trust Region
|
||||
Dogleg Approach for Unconstrained and Bound Constrained
|
||||
Nonlinear Optimization", WSEAS International Conference on
|
||||
Applied Mathematics, Corfu, Greece, 2004.
|
||||
.. [NumOpt] J. Nocedal and S. J. Wright, "Numerical optimization,
|
||||
2nd edition", Chapter 4.
|
||||
.. [BA] B. Triggs et. al., "Bundle Adjustment - A Modern Synthesis",
|
||||
Proceedings of the International Workshop on Vision Algorithms:
|
||||
Theory and Practice, pp. 298-372, 1999.
|
||||
|
||||
Examples
|
||||
--------
|
||||
In this example we find a minimum of the Rosenbrock function without bounds
|
||||
on independent variables.
|
||||
|
||||
>>> def fun_rosenbrock(x):
|
||||
... return np.array([10 * (x[1] - x[0]**2), (1 - x[0])])
|
||||
|
||||
Notice that we only provide the vector of the residuals. The algorithm
|
||||
constructs the cost function as a sum of squares of the residuals, which
|
||||
gives the Rosenbrock function. The exact minimum is at ``x = [1.0, 1.0]``.
|
||||
|
||||
>>> from scipy.optimize import least_squares
|
||||
>>> x0_rosenbrock = np.array([2, 2])
|
||||
>>> res_1 = least_squares(fun_rosenbrock, x0_rosenbrock)
|
||||
>>> res_1.x
|
||||
array([ 1., 1.])
|
||||
>>> res_1.cost
|
||||
9.8669242910846867e-30
|
||||
>>> res_1.optimality
|
||||
8.8928864934219529e-14
|
||||
|
||||
We now constrain the variables, in such a way that the previous solution
|
||||
becomes infeasible. Specifically, we require that ``x[1] >= 1.5``, and
|
||||
``x[0]`` left unconstrained. To this end, we specify the `bounds` parameter
|
||||
to `least_squares` in the form ``bounds=([-np.inf, 1.5], np.inf)``.
|
||||
|
||||
We also provide the analytic Jacobian:
|
||||
|
||||
>>> def jac_rosenbrock(x):
|
||||
... return np.array([
|
||||
... [-20 * x[0], 10],
|
||||
... [-1, 0]])
|
||||
|
||||
Putting this all together, we see that the new solution lies on the bound:
|
||||
|
||||
>>> res_2 = least_squares(fun_rosenbrock, x0_rosenbrock, jac_rosenbrock,
|
||||
... bounds=([-np.inf, 1.5], np.inf))
|
||||
>>> res_2.x
|
||||
array([ 1.22437075, 1.5 ])
|
||||
>>> res_2.cost
|
||||
0.025213093946805685
|
||||
>>> res_2.optimality
|
||||
1.5885401433157753e-07
|
||||
|
||||
Now we solve a system of equations (i.e., the cost function should be zero
|
||||
at a minimum) for a Broyden tridiagonal vector-valued function of 100000
|
||||
variables:
|
||||
|
||||
>>> def fun_broyden(x):
|
||||
... f = (3 - x) * x + 1
|
||||
... f[1:] -= x[:-1]
|
||||
... f[:-1] -= 2 * x[1:]
|
||||
... return f
|
||||
|
||||
The corresponding Jacobian matrix is sparse. We tell the algorithm to
|
||||
estimate it by finite differences and provide the sparsity structure of
|
||||
Jacobian to significantly speed up this process.
|
||||
|
||||
>>> from scipy.sparse import lil_matrix
|
||||
>>> def sparsity_broyden(n):
|
||||
... sparsity = lil_matrix((n, n), dtype=int)
|
||||
... i = np.arange(n)
|
||||
... sparsity[i, i] = 1
|
||||
... i = np.arange(1, n)
|
||||
... sparsity[i, i - 1] = 1
|
||||
... i = np.arange(n - 1)
|
||||
... sparsity[i, i + 1] = 1
|
||||
... return sparsity
|
||||
...
|
||||
>>> n = 100000
|
||||
>>> x0_broyden = -np.ones(n)
|
||||
...
|
||||
>>> res_3 = least_squares(fun_broyden, x0_broyden,
|
||||
... jac_sparsity=sparsity_broyden(n))
|
||||
>>> res_3.cost
|
||||
4.5687069299604613e-23
|
||||
>>> res_3.optimality
|
||||
1.1650454296851518e-11
|
||||
|
||||
Let's also solve a curve fitting problem using robust loss function to
|
||||
take care of outliers in the data. Define the model function as
|
||||
``y = a + b * exp(c * t)``, where t is a predictor variable, y is an
|
||||
observation and a, b, c are parameters to estimate.
|
||||
|
||||
First, define the function which generates the data with noise and
|
||||
outliers, define the model parameters, and generate data:
|
||||
|
||||
>>> def gen_data(t, a, b, c, noise=0, n_outliers=0, random_state=0):
|
||||
... y = a + b * np.exp(t * c)
|
||||
...
|
||||
... rnd = np.random.RandomState(random_state)
|
||||
... error = noise * rnd.randn(t.size)
|
||||
... outliers = rnd.randint(0, t.size, n_outliers)
|
||||
... error[outliers] *= 10
|
||||
...
|
||||
... return y + error
|
||||
...
|
||||
>>> a = 0.5
|
||||
>>> b = 2.0
|
||||
>>> c = -1
|
||||
>>> t_min = 0
|
||||
>>> t_max = 10
|
||||
>>> n_points = 15
|
||||
...
|
||||
>>> t_train = np.linspace(t_min, t_max, n_points)
|
||||
>>> y_train = gen_data(t_train, a, b, c, noise=0.1, n_outliers=3)
|
||||
|
||||
Define function for computing residuals and initial estimate of
|
||||
parameters.
|
||||
|
||||
>>> def fun(x, t, y):
|
||||
... return x[0] + x[1] * np.exp(x[2] * t) - y
|
||||
...
|
||||
>>> x0 = np.array([1.0, 1.0, 0.0])
|
||||
|
||||
Compute a standard least-squares solution:
|
||||
|
||||
>>> res_lsq = least_squares(fun, x0, args=(t_train, y_train))
|
||||
|
||||
Now compute two solutions with two different robust loss functions. The
|
||||
parameter `f_scale` is set to 0.1, meaning that inlier residuals should
|
||||
not significantly exceed 0.1 (the noise level used).
|
||||
|
||||
>>> res_soft_l1 = least_squares(fun, x0, loss='soft_l1', f_scale=0.1,
|
||||
... args=(t_train, y_train))
|
||||
>>> res_log = least_squares(fun, x0, loss='cauchy', f_scale=0.1,
|
||||
... args=(t_train, y_train))
|
||||
|
||||
And, finally, plot all the curves. We see that by selecting an appropriate
|
||||
`loss` we can get estimates close to optimal even in the presence of
|
||||
strong outliers. But keep in mind that generally it is recommended to try
|
||||
'soft_l1' or 'huber' losses first (if at all necessary) as the other two
|
||||
options may cause difficulties in optimization process.
|
||||
|
||||
>>> t_test = np.linspace(t_min, t_max, n_points * 10)
|
||||
>>> y_true = gen_data(t_test, a, b, c)
|
||||
>>> y_lsq = gen_data(t_test, *res_lsq.x)
|
||||
>>> y_soft_l1 = gen_data(t_test, *res_soft_l1.x)
|
||||
>>> y_log = gen_data(t_test, *res_log.x)
|
||||
...
|
||||
>>> import matplotlib.pyplot as plt
|
||||
>>> plt.plot(t_train, y_train, 'o')
|
||||
>>> plt.plot(t_test, y_true, 'k', linewidth=2, label='true')
|
||||
>>> plt.plot(t_test, y_lsq, label='linear loss')
|
||||
>>> plt.plot(t_test, y_soft_l1, label='soft_l1 loss')
|
||||
>>> plt.plot(t_test, y_log, label='cauchy loss')
|
||||
>>> plt.xlabel("t")
|
||||
>>> plt.ylabel("y")
|
||||
>>> plt.legend()
|
||||
>>> plt.show()
|
||||
|
||||
In the next example, we show how complex-valued residual functions of
|
||||
complex variables can be optimized with ``least_squares()``. Consider the
|
||||
following function:
|
||||
|
||||
>>> def f(z):
|
||||
... return z - (0.5 + 0.5j)
|
||||
|
||||
We wrap it into a function of real variables that returns real residuals
|
||||
by simply handling the real and imaginary parts as independent variables:
|
||||
|
||||
>>> def f_wrap(x):
|
||||
... fx = f(x[0] + 1j*x[1])
|
||||
... return np.array([fx.real, fx.imag])
|
||||
|
||||
Thus, instead of the original m-D complex function of n complex
|
||||
variables we optimize a 2m-D real function of 2n real variables:
|
||||
|
||||
>>> from scipy.optimize import least_squares
|
||||
>>> res_wrapped = least_squares(f_wrap, (0.1, 0.1), bounds=([0, 0], [1, 1]))
|
||||
>>> z = res_wrapped.x[0] + res_wrapped.x[1]*1j
|
||||
>>> z
|
||||
(0.49999999999925893+0.49999999999925893j)
|
||||
|
||||
"""
|
||||
if method not in ['trf', 'dogbox', 'lm']:
|
||||
raise ValueError("`method` must be 'trf', 'dogbox' or 'lm'.")
|
||||
|
||||
if jac not in ['2-point', '3-point', 'cs'] and not callable(jac):
|
||||
raise ValueError("`jac` must be '2-point', '3-point', 'cs' or "
|
||||
"callable.")
|
||||
|
||||
if tr_solver not in [None, 'exact', 'lsmr']:
|
||||
raise ValueError("`tr_solver` must be None, 'exact' or 'lsmr'.")
|
||||
|
||||
if loss not in IMPLEMENTED_LOSSES and not callable(loss):
|
||||
raise ValueError("`loss` must be one of {0} or a callable."
|
||||
.format(IMPLEMENTED_LOSSES.keys()))
|
||||
|
||||
if method == 'lm' and loss != 'linear':
|
||||
raise ValueError("method='lm' supports only 'linear' loss function.")
|
||||
|
||||
if verbose not in [0, 1, 2]:
|
||||
raise ValueError("`verbose` must be in [0, 1, 2].")
|
||||
|
||||
if len(bounds) != 2:
|
||||
raise ValueError("`bounds` must contain 2 elements.")
|
||||
|
||||
if max_nfev is not None and max_nfev <= 0:
|
||||
raise ValueError("`max_nfev` must be None or positive integer.")
|
||||
|
||||
if np.iscomplexobj(x0):
|
||||
raise ValueError("`x0` must be real.")
|
||||
|
||||
x0 = np.atleast_1d(x0).astype(float)
|
||||
|
||||
if x0.ndim > 1:
|
||||
raise ValueError("`x0` must have at most 1 dimension.")
|
||||
|
||||
lb, ub = prepare_bounds(bounds, x0.shape[0])
|
||||
|
||||
if method == 'lm' and not np.all((lb == -np.inf) & (ub == np.inf)):
|
||||
raise ValueError("Method 'lm' doesn't support bounds.")
|
||||
|
||||
if lb.shape != x0.shape or ub.shape != x0.shape:
|
||||
raise ValueError("Inconsistent shapes between bounds and `x0`.")
|
||||
|
||||
if np.any(lb >= ub):
|
||||
raise ValueError("Each lower bound must be strictly less than each "
|
||||
"upper bound.")
|
||||
|
||||
if not in_bounds(x0, lb, ub):
|
||||
raise ValueError("`x0` is infeasible.")
|
||||
|
||||
x_scale = check_x_scale(x_scale, x0)
|
||||
|
||||
ftol, xtol, gtol = check_tolerance(ftol, xtol, gtol, method)
|
||||
|
||||
def fun_wrapped(x):
|
||||
return np.atleast_1d(fun(x, *args, **kwargs))
|
||||
|
||||
if method == 'trf':
|
||||
x0 = make_strictly_feasible(x0, lb, ub)
|
||||
|
||||
f0 = fun_wrapped(x0)
|
||||
|
||||
if f0.ndim != 1:
|
||||
raise ValueError("`fun` must return at most 1-d array_like. "
|
||||
"f0.shape: {0}".format(f0.shape))
|
||||
|
||||
if not np.all(np.isfinite(f0)):
|
||||
raise ValueError("Residuals are not finite in the initial point.")
|
||||
|
||||
n = x0.size
|
||||
m = f0.size
|
||||
|
||||
if method == 'lm' and m < n:
|
||||
raise ValueError("Method 'lm' doesn't work when the number of "
|
||||
"residuals is less than the number of variables.")
|
||||
|
||||
loss_function = construct_loss_function(m, loss, f_scale)
|
||||
if callable(loss):
|
||||
rho = loss_function(f0)
|
||||
if rho.shape != (3, m):
|
||||
raise ValueError("The return value of `loss` callable has wrong "
|
||||
"shape.")
|
||||
initial_cost = 0.5 * np.sum(rho[0])
|
||||
elif loss_function is not None:
|
||||
initial_cost = loss_function(f0, cost_only=True)
|
||||
else:
|
||||
initial_cost = 0.5 * np.dot(f0, f0)
|
||||
|
||||
if callable(jac):
|
||||
J0 = jac(x0, *args, **kwargs)
|
||||
|
||||
if issparse(J0):
|
||||
J0 = csr_matrix(J0)
|
||||
|
||||
def jac_wrapped(x, _=None):
|
||||
return csr_matrix(jac(x, *args, **kwargs))
|
||||
|
||||
elif isinstance(J0, LinearOperator):
|
||||
def jac_wrapped(x, _=None):
|
||||
return jac(x, *args, **kwargs)
|
||||
|
||||
else:
|
||||
J0 = np.atleast_2d(J0)
|
||||
|
||||
def jac_wrapped(x, _=None):
|
||||
return np.atleast_2d(jac(x, *args, **kwargs))
|
||||
|
||||
else: # Estimate Jacobian by finite differences.
|
||||
if method == 'lm':
|
||||
if jac_sparsity is not None:
|
||||
raise ValueError("method='lm' does not support "
|
||||
"`jac_sparsity`.")
|
||||
|
||||
if jac != '2-point':
|
||||
warn("jac='{0}' works equivalently to '2-point' "
|
||||
"for method='lm'.".format(jac))
|
||||
|
||||
J0 = jac_wrapped = None
|
||||
else:
|
||||
if jac_sparsity is not None and tr_solver == 'exact':
|
||||
raise ValueError("tr_solver='exact' is incompatible "
|
||||
"with `jac_sparsity`.")
|
||||
|
||||
jac_sparsity = check_jac_sparsity(jac_sparsity, m, n)
|
||||
|
||||
def jac_wrapped(x, f):
|
||||
J = approx_derivative(fun, x, rel_step=diff_step, method=jac,
|
||||
f0=f, bounds=bounds, args=args,
|
||||
kwargs=kwargs, sparsity=jac_sparsity)
|
||||
if J.ndim != 2: # J is guaranteed not sparse.
|
||||
J = np.atleast_2d(J)
|
||||
|
||||
return J
|
||||
|
||||
J0 = jac_wrapped(x0, f0)
|
||||
|
||||
if J0 is not None:
|
||||
if J0.shape != (m, n):
|
||||
raise ValueError(
|
||||
"The return value of `jac` has wrong shape: expected {0}, "
|
||||
"actual {1}.".format((m, n), J0.shape))
|
||||
|
||||
if not isinstance(J0, np.ndarray):
|
||||
if method == 'lm':
|
||||
raise ValueError("method='lm' works only with dense "
|
||||
"Jacobian matrices.")
|
||||
|
||||
if tr_solver == 'exact':
|
||||
raise ValueError(
|
||||
"tr_solver='exact' works only with dense "
|
||||
"Jacobian matrices.")
|
||||
|
||||
jac_scale = isinstance(x_scale, str) and x_scale == 'jac'
|
||||
if isinstance(J0, LinearOperator) and jac_scale:
|
||||
raise ValueError("x_scale='jac' can't be used when `jac` "
|
||||
"returns LinearOperator.")
|
||||
|
||||
if tr_solver is None:
|
||||
if isinstance(J0, np.ndarray):
|
||||
tr_solver = 'exact'
|
||||
else:
|
||||
tr_solver = 'lsmr'
|
||||
|
||||
if method == 'lm':
|
||||
result = call_minpack(fun_wrapped, x0, jac_wrapped, ftol, xtol, gtol,
|
||||
max_nfev, x_scale, diff_step)
|
||||
|
||||
elif method == 'trf':
|
||||
result = trf(fun_wrapped, jac_wrapped, x0, f0, J0, lb, ub, ftol, xtol,
|
||||
gtol, max_nfev, x_scale, loss_function, tr_solver,
|
||||
tr_options.copy(), verbose)
|
||||
|
||||
elif method == 'dogbox':
|
||||
if tr_solver == 'lsmr' and 'regularize' in tr_options:
|
||||
warn("The keyword 'regularize' in `tr_options` is not relevant "
|
||||
"for 'dogbox' method.")
|
||||
tr_options = tr_options.copy()
|
||||
del tr_options['regularize']
|
||||
|
||||
result = dogbox(fun_wrapped, jac_wrapped, x0, f0, J0, lb, ub, ftol,
|
||||
xtol, gtol, max_nfev, x_scale, loss_function,
|
||||
tr_solver, tr_options, verbose)
|
||||
|
||||
result.message = TERMINATION_MESSAGES[result.status]
|
||||
result.success = result.status > 0
|
||||
|
||||
if verbose >= 1:
|
||||
print(result.message)
|
||||
print("Function evaluations {0}, initial cost {1:.4e}, final cost "
|
||||
"{2:.4e}, first-order optimality {3:.2e}."
|
||||
.format(result.nfev, initial_cost, result.cost,
|
||||
result.optimality))
|
||||
|
||||
return result
|
315
venv/Lib/site-packages/scipy/optimize/_lsq/lsq_linear.py
Normal file
315
venv/Lib/site-packages/scipy/optimize/_lsq/lsq_linear.py
Normal file
|
@ -0,0 +1,315 @@
|
|||
"""Linear least squares with bound constraints on independent variables."""
|
||||
import numpy as np
|
||||
from numpy.linalg import norm
|
||||
from scipy.sparse import issparse, csr_matrix
|
||||
from scipy.sparse.linalg import LinearOperator, lsmr
|
||||
from scipy.optimize import OptimizeResult
|
||||
|
||||
from .common import in_bounds, compute_grad
|
||||
from .trf_linear import trf_linear
|
||||
from .bvls import bvls
|
||||
|
||||
|
||||
def prepare_bounds(bounds, n):
|
||||
lb, ub = [np.asarray(b, dtype=float) for b in bounds]
|
||||
|
||||
if lb.ndim == 0:
|
||||
lb = np.resize(lb, n)
|
||||
|
||||
if ub.ndim == 0:
|
||||
ub = np.resize(ub, n)
|
||||
|
||||
return lb, ub
|
||||
|
||||
|
||||
TERMINATION_MESSAGES = {
|
||||
-1: "The algorithm was not able to make progress on the last iteration.",
|
||||
0: "The maximum number of iterations is exceeded.",
|
||||
1: "The first-order optimality measure is less than `tol`.",
|
||||
2: "The relative change of the cost function is less than `tol`.",
|
||||
3: "The unconstrained solution is optimal."
|
||||
}
|
||||
|
||||
|
||||
def lsq_linear(A, b, bounds=(-np.inf, np.inf), method='trf', tol=1e-10,
|
||||
lsq_solver=None, lsmr_tol=None, max_iter=None, verbose=0):
|
||||
r"""Solve a linear least-squares problem with bounds on the variables.
|
||||
|
||||
Given a m-by-n design matrix A and a target vector b with m elements,
|
||||
`lsq_linear` solves the following optimization problem::
|
||||
|
||||
minimize 0.5 * ||A x - b||**2
|
||||
subject to lb <= x <= ub
|
||||
|
||||
This optimization problem is convex, hence a found minimum (if iterations
|
||||
have converged) is guaranteed to be global.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
A : array_like, sparse matrix of LinearOperator, shape (m, n)
|
||||
Design matrix. Can be `scipy.sparse.linalg.LinearOperator`.
|
||||
b : array_like, shape (m,)
|
||||
Target vector.
|
||||
bounds : 2-tuple of array_like, optional
|
||||
Lower and upper bounds on independent variables. Defaults to no bounds.
|
||||
Each array must have shape (n,) or be a scalar, in the latter
|
||||
case a bound will be the same for all variables. Use ``np.inf`` with
|
||||
an appropriate sign to disable bounds on all or some variables.
|
||||
method : 'trf' or 'bvls', optional
|
||||
Method to perform minimization.
|
||||
|
||||
* 'trf' : Trust Region Reflective algorithm adapted for a linear
|
||||
least-squares problem. This is an interior-point-like method
|
||||
and the required number of iterations is weakly correlated with
|
||||
the number of variables.
|
||||
* 'bvls' : Bounded-variable least-squares algorithm. This is
|
||||
an active set method, which requires the number of iterations
|
||||
comparable to the number of variables. Can't be used when `A` is
|
||||
sparse or LinearOperator.
|
||||
|
||||
Default is 'trf'.
|
||||
tol : float, optional
|
||||
Tolerance parameter. The algorithm terminates if a relative change
|
||||
of the cost function is less than `tol` on the last iteration.
|
||||
Additionally, the first-order optimality measure is considered:
|
||||
|
||||
* ``method='trf'`` terminates if the uniform norm of the gradient,
|
||||
scaled to account for the presence of the bounds, is less than
|
||||
`tol`.
|
||||
* ``method='bvls'`` terminates if Karush-Kuhn-Tucker conditions
|
||||
are satisfied within `tol` tolerance.
|
||||
|
||||
lsq_solver : {None, 'exact', 'lsmr'}, optional
|
||||
Method of solving unbounded least-squares problems throughout
|
||||
iterations:
|
||||
|
||||
* 'exact' : Use dense QR or SVD decomposition approach. Can't be
|
||||
used when `A` is sparse or LinearOperator.
|
||||
* 'lsmr' : Use `scipy.sparse.linalg.lsmr` iterative procedure
|
||||
which requires only matrix-vector product evaluations. Can't
|
||||
be used with ``method='bvls'``.
|
||||
|
||||
If None (default), the solver is chosen based on type of `A`.
|
||||
lsmr_tol : None, float or 'auto', optional
|
||||
Tolerance parameters 'atol' and 'btol' for `scipy.sparse.linalg.lsmr`
|
||||
If None (default), it is set to ``1e-2 * tol``. If 'auto', the
|
||||
tolerance will be adjusted based on the optimality of the current
|
||||
iterate, which can speed up the optimization process, but is not always
|
||||
reliable.
|
||||
max_iter : None or int, optional
|
||||
Maximum number of iterations before termination. If None (default), it
|
||||
is set to 100 for ``method='trf'`` or to the number of variables for
|
||||
``method='bvls'`` (not counting iterations for 'bvls' initialization).
|
||||
verbose : {0, 1, 2}, optional
|
||||
Level of algorithm's verbosity:
|
||||
|
||||
* 0 : work silently (default).
|
||||
* 1 : display a termination report.
|
||||
* 2 : display progress during iterations.
|
||||
|
||||
Returns
|
||||
-------
|
||||
OptimizeResult with the following fields defined:
|
||||
x : ndarray, shape (n,)
|
||||
Solution found.
|
||||
cost : float
|
||||
Value of the cost function at the solution.
|
||||
fun : ndarray, shape (m,)
|
||||
Vector of residuals at the solution.
|
||||
optimality : float
|
||||
First-order optimality measure. The exact meaning depends on `method`,
|
||||
refer to the description of `tol` parameter.
|
||||
active_mask : ndarray of int, shape (n,)
|
||||
Each component shows whether a corresponding constraint is active
|
||||
(that is, whether a variable is at the bound):
|
||||
|
||||
* 0 : a constraint is not active.
|
||||
* -1 : a lower bound is active.
|
||||
* 1 : an upper bound is active.
|
||||
|
||||
Might be somewhat arbitrary for the `trf` method as it generates a
|
||||
sequence of strictly feasible iterates and active_mask is determined
|
||||
within a tolerance threshold.
|
||||
nit : int
|
||||
Number of iterations. Zero if the unconstrained solution is optimal.
|
||||
status : int
|
||||
Reason for algorithm termination:
|
||||
|
||||
* -1 : the algorithm was not able to make progress on the last
|
||||
iteration.
|
||||
* 0 : the maximum number of iterations is exceeded.
|
||||
* 1 : the first-order optimality measure is less than `tol`.
|
||||
* 2 : the relative change of the cost function is less than `tol`.
|
||||
* 3 : the unconstrained solution is optimal.
|
||||
|
||||
message : str
|
||||
Verbal description of the termination reason.
|
||||
success : bool
|
||||
True if one of the convergence criteria is satisfied (`status` > 0).
|
||||
|
||||
See Also
|
||||
--------
|
||||
nnls : Linear least squares with non-negativity constraint.
|
||||
least_squares : Nonlinear least squares with bounds on the variables.
|
||||
|
||||
Notes
|
||||
-----
|
||||
The algorithm first computes the unconstrained least-squares solution by
|
||||
`numpy.linalg.lstsq` or `scipy.sparse.linalg.lsmr` depending on
|
||||
`lsq_solver`. This solution is returned as optimal if it lies within the
|
||||
bounds.
|
||||
|
||||
Method 'trf' runs the adaptation of the algorithm described in [STIR]_ for
|
||||
a linear least-squares problem. The iterations are essentially the same as
|
||||
in the nonlinear least-squares algorithm, but as the quadratic function
|
||||
model is always accurate, we don't need to track or modify the radius of
|
||||
a trust region. The line search (backtracking) is used as a safety net
|
||||
when a selected step does not decrease the cost function. Read more
|
||||
detailed description of the algorithm in `scipy.optimize.least_squares`.
|
||||
|
||||
Method 'bvls' runs a Python implementation of the algorithm described in
|
||||
[BVLS]_. The algorithm maintains active and free sets of variables, on
|
||||
each iteration chooses a new variable to move from the active set to the
|
||||
free set and then solves the unconstrained least-squares problem on free
|
||||
variables. This algorithm is guaranteed to give an accurate solution
|
||||
eventually, but may require up to n iterations for a problem with n
|
||||
variables. Additionally, an ad-hoc initialization procedure is
|
||||
implemented, that determines which variables to set free or active
|
||||
initially. It takes some number of iterations before actual BVLS starts,
|
||||
but can significantly reduce the number of further iterations.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [STIR] M. A. Branch, T. F. Coleman, and Y. Li, "A Subspace, Interior,
|
||||
and Conjugate Gradient Method for Large-Scale Bound-Constrained
|
||||
Minimization Problems," SIAM Journal on Scientific Computing,
|
||||
Vol. 21, Number 1, pp 1-23, 1999.
|
||||
.. [BVLS] P. B. Start and R. L. Parker, "Bounded-Variable Least-Squares:
|
||||
an Algorithm and Applications", Computational Statistics, 10,
|
||||
129-141, 1995.
|
||||
|
||||
Examples
|
||||
--------
|
||||
In this example, a problem with a large sparse matrix and bounds on the
|
||||
variables is solved.
|
||||
|
||||
>>> from scipy.sparse import rand
|
||||
>>> from scipy.optimize import lsq_linear
|
||||
...
|
||||
>>> np.random.seed(0)
|
||||
...
|
||||
>>> m = 20000
|
||||
>>> n = 10000
|
||||
...
|
||||
>>> A = rand(m, n, density=1e-4)
|
||||
>>> b = np.random.randn(m)
|
||||
...
|
||||
>>> lb = np.random.randn(n)
|
||||
>>> ub = lb + 1
|
||||
...
|
||||
>>> res = lsq_linear(A, b, bounds=(lb, ub), lsmr_tol='auto', verbose=1)
|
||||
# may vary
|
||||
The relative change of the cost function is less than `tol`.
|
||||
Number of iterations 16, initial cost 1.5039e+04, final cost 1.1112e+04,
|
||||
first-order optimality 4.66e-08.
|
||||
"""
|
||||
if method not in ['trf', 'bvls']:
|
||||
raise ValueError("`method` must be 'trf' or 'bvls'")
|
||||
|
||||
if lsq_solver not in [None, 'exact', 'lsmr']:
|
||||
raise ValueError("`solver` must be None, 'exact' or 'lsmr'.")
|
||||
|
||||
if verbose not in [0, 1, 2]:
|
||||
raise ValueError("`verbose` must be in [0, 1, 2].")
|
||||
|
||||
if issparse(A):
|
||||
A = csr_matrix(A)
|
||||
elif not isinstance(A, LinearOperator):
|
||||
A = np.atleast_2d(A)
|
||||
|
||||
if method == 'bvls':
|
||||
if lsq_solver == 'lsmr':
|
||||
raise ValueError("method='bvls' can't be used with "
|
||||
"lsq_solver='lsmr'")
|
||||
|
||||
if not isinstance(A, np.ndarray):
|
||||
raise ValueError("method='bvls' can't be used with `A` being "
|
||||
"sparse or LinearOperator.")
|
||||
|
||||
if lsq_solver is None:
|
||||
if isinstance(A, np.ndarray):
|
||||
lsq_solver = 'exact'
|
||||
else:
|
||||
lsq_solver = 'lsmr'
|
||||
elif lsq_solver == 'exact' and not isinstance(A, np.ndarray):
|
||||
raise ValueError("`exact` solver can't be used when `A` is "
|
||||
"sparse or LinearOperator.")
|
||||
|
||||
if len(A.shape) != 2: # No ndim for LinearOperator.
|
||||
raise ValueError("`A` must have at most 2 dimensions.")
|
||||
|
||||
if len(bounds) != 2:
|
||||
raise ValueError("`bounds` must contain 2 elements.")
|
||||
|
||||
if max_iter is not None and max_iter <= 0:
|
||||
raise ValueError("`max_iter` must be None or positive integer.")
|
||||
|
||||
m, n = A.shape
|
||||
|
||||
b = np.atleast_1d(b)
|
||||
if b.ndim != 1:
|
||||
raise ValueError("`b` must have at most 1 dimension.")
|
||||
|
||||
if b.size != m:
|
||||
raise ValueError("Inconsistent shapes between `A` and `b`.")
|
||||
|
||||
lb, ub = prepare_bounds(bounds, n)
|
||||
|
||||
if lb.shape != (n,) and ub.shape != (n,):
|
||||
raise ValueError("Bounds have wrong shape.")
|
||||
|
||||
if np.any(lb >= ub):
|
||||
raise ValueError("Each lower bound must be strictly less than each "
|
||||
"upper bound.")
|
||||
|
||||
if lsq_solver == 'exact':
|
||||
x_lsq = np.linalg.lstsq(A, b, rcond=-1)[0]
|
||||
elif lsq_solver == 'lsmr':
|
||||
x_lsq = lsmr(A, b, atol=tol, btol=tol)[0]
|
||||
|
||||
if in_bounds(x_lsq, lb, ub):
|
||||
r = A.dot(x_lsq) - b
|
||||
cost = 0.5 * np.dot(r, r)
|
||||
termination_status = 3
|
||||
termination_message = TERMINATION_MESSAGES[termination_status]
|
||||
g = compute_grad(A, r)
|
||||
g_norm = norm(g, ord=np.inf)
|
||||
|
||||
if verbose > 0:
|
||||
print(termination_message)
|
||||
print("Final cost {0:.4e}, first-order optimality {1:.2e}"
|
||||
.format(cost, g_norm))
|
||||
|
||||
return OptimizeResult(
|
||||
x=x_lsq, fun=r, cost=cost, optimality=g_norm,
|
||||
active_mask=np.zeros(n), nit=0, status=termination_status,
|
||||
message=termination_message, success=True)
|
||||
|
||||
if method == 'trf':
|
||||
res = trf_linear(A, b, x_lsq, lb, ub, tol, lsq_solver, lsmr_tol,
|
||||
max_iter, verbose)
|
||||
elif method == 'bvls':
|
||||
res = bvls(A, b, x_lsq, lb, ub, tol, max_iter, verbose)
|
||||
|
||||
res.message = TERMINATION_MESSAGES[res.status]
|
||||
res.success = res.status > 0
|
||||
|
||||
if verbose > 0:
|
||||
print(res.message)
|
||||
print("Number of iterations {0}, initial cost {1:.4e}, "
|
||||
"final cost {2:.4e}, first-order optimality {3:.2e}."
|
||||
.format(res.nit, res.initial_cost, res.cost, res.optimality))
|
||||
|
||||
del res.initial_cost
|
||||
|
||||
return res
|
12
venv/Lib/site-packages/scipy/optimize/_lsq/setup.py
Normal file
12
venv/Lib/site-packages/scipy/optimize/_lsq/setup.py
Normal file
|
@ -0,0 +1,12 @@
|
|||
|
||||
def configuration(parent_package='', top_path=None):
|
||||
from numpy.distutils.misc_util import Configuration
|
||||
config = Configuration('_lsq', parent_package, top_path)
|
||||
config.add_extension('givens_elimination',
|
||||
sources=['givens_elimination.c'])
|
||||
return config
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from numpy.distutils.core import setup
|
||||
setup(**configuration(top_path='').todict())
|
560
venv/Lib/site-packages/scipy/optimize/_lsq/trf.py
Normal file
560
venv/Lib/site-packages/scipy/optimize/_lsq/trf.py
Normal file
|
@ -0,0 +1,560 @@
|
|||
"""Trust Region Reflective algorithm for least-squares optimization.
|
||||
|
||||
The algorithm is based on ideas from paper [STIR]_. The main idea is to
|
||||
account for the presence of the bounds by appropriate scaling of the variables (or,
|
||||
equivalently, changing a trust-region shape). Let's introduce a vector v:
|
||||
|
||||
| ub[i] - x[i], if g[i] < 0 and ub[i] < np.inf
|
||||
v[i] = | x[i] - lb[i], if g[i] > 0 and lb[i] > -np.inf
|
||||
| 1, otherwise
|
||||
|
||||
where g is the gradient of a cost function and lb, ub are the bounds. Its
|
||||
components are distances to the bounds at which the anti-gradient points (if
|
||||
this distance is finite). Define a scaling matrix D = diag(v**0.5).
|
||||
First-order optimality conditions can be stated as
|
||||
|
||||
D^2 g(x) = 0.
|
||||
|
||||
Meaning that components of the gradient should be zero for strictly interior
|
||||
variables, and components must point inside the feasible region for variables
|
||||
on the bound.
|
||||
|
||||
Now consider this system of equations as a new optimization problem. If the
|
||||
point x is strictly interior (not on the bound), then the left-hand side is
|
||||
differentiable and the Newton step for it satisfies
|
||||
|
||||
(D^2 H + diag(g) Jv) p = -D^2 g
|
||||
|
||||
where H is the Hessian matrix (or its J^T J approximation in least squares),
|
||||
Jv is the Jacobian matrix of v with components -1, 1 or 0, such that all
|
||||
elements of matrix C = diag(g) Jv are non-negative. Introduce the change
|
||||
of the variables x = D x_h (_h would be "hat" in LaTeX). In the new variables,
|
||||
we have a Newton step satisfying
|
||||
|
||||
B_h p_h = -g_h,
|
||||
|
||||
where B_h = D H D + C, g_h = D g. In least squares B_h = J_h^T J_h, where
|
||||
J_h = J D. Note that J_h and g_h are proper Jacobian and gradient with respect
|
||||
to "hat" variables. To guarantee global convergence we formulate a
|
||||
trust-region problem based on the Newton step in the new variables:
|
||||
|
||||
0.5 * p_h^T B_h p + g_h^T p_h -> min, ||p_h|| <= Delta
|
||||
|
||||
In the original space B = H + D^{-1} C D^{-1}, and the equivalent trust-region
|
||||
problem is
|
||||
|
||||
0.5 * p^T B p + g^T p -> min, ||D^{-1} p|| <= Delta
|
||||
|
||||
Here, the meaning of the matrix D becomes more clear: it alters the shape
|
||||
of a trust-region, such that large steps towards the bounds are not allowed.
|
||||
In the implementation, the trust-region problem is solved in "hat" space,
|
||||
but handling of the bounds is done in the original space (see below and read
|
||||
the code).
|
||||
|
||||
The introduction of the matrix D doesn't allow to ignore bounds, the algorithm
|
||||
must keep iterates strictly feasible (to satisfy aforementioned
|
||||
differentiability), the parameter theta controls step back from the boundary
|
||||
(see the code for details).
|
||||
|
||||
The algorithm does another important trick. If the trust-region solution
|
||||
doesn't fit into the bounds, then a reflected (from a firstly encountered
|
||||
bound) search direction is considered. For motivation and analysis refer to
|
||||
[STIR]_ paper (and other papers of the authors). In practice, it doesn't need
|
||||
a lot of justifications, the algorithm simply chooses the best step among
|
||||
three: a constrained trust-region step, a reflected step and a constrained
|
||||
Cauchy step (a minimizer along -g_h in "hat" space, or -D^2 g in the original
|
||||
space).
|
||||
|
||||
Another feature is that a trust-region radius control strategy is modified to
|
||||
account for appearance of the diagonal C matrix (called diag_h in the code).
|
||||
|
||||
Note that all described peculiarities are completely gone as we consider
|
||||
problems without bounds (the algorithm becomes a standard trust-region type
|
||||
algorithm very similar to ones implemented in MINPACK).
|
||||
|
||||
The implementation supports two methods of solving the trust-region problem.
|
||||
The first, called 'exact', applies SVD on Jacobian and then solves the problem
|
||||
very accurately using the algorithm described in [JJMore]_. It is not
|
||||
applicable to large problem. The second, called 'lsmr', uses the 2-D subspace
|
||||
approach (sometimes called "indefinite dogleg"), where the problem is solved
|
||||
in a subspace spanned by the gradient and the approximate Gauss-Newton step
|
||||
found by ``scipy.sparse.linalg.lsmr``. A 2-D trust-region problem is
|
||||
reformulated as a 4th order algebraic equation and solved very accurately by
|
||||
``numpy.roots``. The subspace approach allows to solve very large problems
|
||||
(up to couple of millions of residuals on a regular PC), provided the Jacobian
|
||||
matrix is sufficiently sparse.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [STIR] Branch, M.A., T.F. Coleman, and Y. Li, "A Subspace, Interior,
|
||||
and Conjugate Gradient Method for Large-Scale Bound-Constrained
|
||||
Minimization Problems," SIAM Journal on Scientific Computing,
|
||||
Vol. 21, Number 1, pp 1-23, 1999.
|
||||
.. [JJMore] More, J. J., "The Levenberg-Marquardt Algorithm: Implementation
|
||||
and Theory," Numerical Analysis, ed. G. A. Watson, Lecture
|
||||
"""
|
||||
import numpy as np
|
||||
from numpy.linalg import norm
|
||||
from scipy.linalg import svd, qr
|
||||
from scipy.sparse.linalg import lsmr
|
||||
from scipy.optimize import OptimizeResult
|
||||
|
||||
from .common import (
|
||||
step_size_to_bound, find_active_constraints, in_bounds,
|
||||
make_strictly_feasible, intersect_trust_region, solve_lsq_trust_region,
|
||||
solve_trust_region_2d, minimize_quadratic_1d, build_quadratic_1d,
|
||||
evaluate_quadratic, right_multiplied_operator, regularized_lsq_operator,
|
||||
CL_scaling_vector, compute_grad, compute_jac_scale, check_termination,
|
||||
update_tr_radius, scale_for_robust_loss_function, print_header_nonlinear,
|
||||
print_iteration_nonlinear)
|
||||
|
||||
|
||||
def trf(fun, jac, x0, f0, J0, lb, ub, ftol, xtol, gtol, max_nfev, x_scale,
|
||||
loss_function, tr_solver, tr_options, verbose):
|
||||
# For efficiency, it makes sense to run the simplified version of the
|
||||
# algorithm when no bounds are imposed. We decided to write the two
|
||||
# separate functions. It violates the DRY principle, but the individual
|
||||
# functions are kept the most readable.
|
||||
if np.all(lb == -np.inf) and np.all(ub == np.inf):
|
||||
return trf_no_bounds(
|
||||
fun, jac, x0, f0, J0, ftol, xtol, gtol, max_nfev, x_scale,
|
||||
loss_function, tr_solver, tr_options, verbose)
|
||||
else:
|
||||
return trf_bounds(
|
||||
fun, jac, x0, f0, J0, lb, ub, ftol, xtol, gtol, max_nfev, x_scale,
|
||||
loss_function, tr_solver, tr_options, verbose)
|
||||
|
||||
|
||||
def select_step(x, J_h, diag_h, g_h, p, p_h, d, Delta, lb, ub, theta):
|
||||
"""Select the best step according to Trust Region Reflective algorithm."""
|
||||
if in_bounds(x + p, lb, ub):
|
||||
p_value = evaluate_quadratic(J_h, g_h, p_h, diag=diag_h)
|
||||
return p, p_h, -p_value
|
||||
|
||||
p_stride, hits = step_size_to_bound(x, p, lb, ub)
|
||||
|
||||
# Compute the reflected direction.
|
||||
r_h = np.copy(p_h)
|
||||
r_h[hits.astype(bool)] *= -1
|
||||
r = d * r_h
|
||||
|
||||
# Restrict trust-region step, such that it hits the bound.
|
||||
p *= p_stride
|
||||
p_h *= p_stride
|
||||
x_on_bound = x + p
|
||||
|
||||
# Reflected direction will cross first either feasible region or trust
|
||||
# region boundary.
|
||||
_, to_tr = intersect_trust_region(p_h, r_h, Delta)
|
||||
to_bound, _ = step_size_to_bound(x_on_bound, r, lb, ub)
|
||||
|
||||
# Find lower and upper bounds on a step size along the reflected
|
||||
# direction, considering the strict feasibility requirement. There is no
|
||||
# single correct way to do that, the chosen approach seems to work best
|
||||
# on test problems.
|
||||
r_stride = min(to_bound, to_tr)
|
||||
if r_stride > 0:
|
||||
r_stride_l = (1 - theta) * p_stride / r_stride
|
||||
if r_stride == to_bound:
|
||||
r_stride_u = theta * to_bound
|
||||
else:
|
||||
r_stride_u = to_tr
|
||||
else:
|
||||
r_stride_l = 0
|
||||
r_stride_u = -1
|
||||
|
||||
# Check if reflection step is available.
|
||||
if r_stride_l <= r_stride_u:
|
||||
a, b, c = build_quadratic_1d(J_h, g_h, r_h, s0=p_h, diag=diag_h)
|
||||
r_stride, r_value = minimize_quadratic_1d(
|
||||
a, b, r_stride_l, r_stride_u, c=c)
|
||||
r_h *= r_stride
|
||||
r_h += p_h
|
||||
r = r_h * d
|
||||
else:
|
||||
r_value = np.inf
|
||||
|
||||
# Now correct p_h to make it strictly interior.
|
||||
p *= theta
|
||||
p_h *= theta
|
||||
p_value = evaluate_quadratic(J_h, g_h, p_h, diag=diag_h)
|
||||
|
||||
ag_h = -g_h
|
||||
ag = d * ag_h
|
||||
|
||||
to_tr = Delta / norm(ag_h)
|
||||
to_bound, _ = step_size_to_bound(x, ag, lb, ub)
|
||||
if to_bound < to_tr:
|
||||
ag_stride = theta * to_bound
|
||||
else:
|
||||
ag_stride = to_tr
|
||||
|
||||
a, b = build_quadratic_1d(J_h, g_h, ag_h, diag=diag_h)
|
||||
ag_stride, ag_value = minimize_quadratic_1d(a, b, 0, ag_stride)
|
||||
ag_h *= ag_stride
|
||||
ag *= ag_stride
|
||||
|
||||
if p_value < r_value and p_value < ag_value:
|
||||
return p, p_h, -p_value
|
||||
elif r_value < p_value and r_value < ag_value:
|
||||
return r, r_h, -r_value
|
||||
else:
|
||||
return ag, ag_h, -ag_value
|
||||
|
||||
|
||||
def trf_bounds(fun, jac, x0, f0, J0, lb, ub, ftol, xtol, gtol, max_nfev,
|
||||
x_scale, loss_function, tr_solver, tr_options, verbose):
|
||||
x = x0.copy()
|
||||
|
||||
f = f0
|
||||
f_true = f.copy()
|
||||
nfev = 1
|
||||
|
||||
J = J0
|
||||
njev = 1
|
||||
m, n = J.shape
|
||||
|
||||
if loss_function is not None:
|
||||
rho = loss_function(f)
|
||||
cost = 0.5 * np.sum(rho[0])
|
||||
J, f = scale_for_robust_loss_function(J, f, rho)
|
||||
else:
|
||||
cost = 0.5 * np.dot(f, f)
|
||||
|
||||
g = compute_grad(J, f)
|
||||
|
||||
jac_scale = isinstance(x_scale, str) and x_scale == 'jac'
|
||||
if jac_scale:
|
||||
scale, scale_inv = compute_jac_scale(J)
|
||||
else:
|
||||
scale, scale_inv = x_scale, 1 / x_scale
|
||||
|
||||
v, dv = CL_scaling_vector(x, g, lb, ub)
|
||||
v[dv != 0] *= scale_inv[dv != 0]
|
||||
Delta = norm(x0 * scale_inv / v**0.5)
|
||||
if Delta == 0:
|
||||
Delta = 1.0
|
||||
|
||||
g_norm = norm(g * v, ord=np.inf)
|
||||
|
||||
f_augmented = np.zeros((m + n))
|
||||
if tr_solver == 'exact':
|
||||
J_augmented = np.empty((m + n, n))
|
||||
elif tr_solver == 'lsmr':
|
||||
reg_term = 0.0
|
||||
regularize = tr_options.pop('regularize', True)
|
||||
|
||||
if max_nfev is None:
|
||||
max_nfev = x0.size * 100
|
||||
|
||||
alpha = 0.0 # "Levenberg-Marquardt" parameter
|
||||
|
||||
termination_status = None
|
||||
iteration = 0
|
||||
step_norm = None
|
||||
actual_reduction = None
|
||||
|
||||
if verbose == 2:
|
||||
print_header_nonlinear()
|
||||
|
||||
while True:
|
||||
v, dv = CL_scaling_vector(x, g, lb, ub)
|
||||
|
||||
g_norm = norm(g * v, ord=np.inf)
|
||||
if g_norm < gtol:
|
||||
termination_status = 1
|
||||
|
||||
if verbose == 2:
|
||||
print_iteration_nonlinear(iteration, nfev, cost, actual_reduction,
|
||||
step_norm, g_norm)
|
||||
|
||||
if termination_status is not None or nfev == max_nfev:
|
||||
break
|
||||
|
||||
# Now compute variables in "hat" space. Here, we also account for
|
||||
# scaling introduced by `x_scale` parameter. This part is a bit tricky,
|
||||
# you have to write down the formulas and see how the trust-region
|
||||
# problem is formulated when the two types of scaling are applied.
|
||||
# The idea is that first we apply `x_scale` and then apply Coleman-Li
|
||||
# approach in the new variables.
|
||||
|
||||
# v is recomputed in the variables after applying `x_scale`, note that
|
||||
# components which were identically 1 not affected.
|
||||
v[dv != 0] *= scale_inv[dv != 0]
|
||||
|
||||
# Here, we apply two types of scaling.
|
||||
d = v**0.5 * scale
|
||||
|
||||
# C = diag(g * scale) Jv
|
||||
diag_h = g * dv * scale
|
||||
|
||||
# After all this has been done, we continue normally.
|
||||
|
||||
# "hat" gradient.
|
||||
g_h = d * g
|
||||
|
||||
f_augmented[:m] = f
|
||||
if tr_solver == 'exact':
|
||||
J_augmented[:m] = J * d
|
||||
J_h = J_augmented[:m] # Memory view.
|
||||
J_augmented[m:] = np.diag(diag_h**0.5)
|
||||
U, s, V = svd(J_augmented, full_matrices=False)
|
||||
V = V.T
|
||||
uf = U.T.dot(f_augmented)
|
||||
elif tr_solver == 'lsmr':
|
||||
J_h = right_multiplied_operator(J, d)
|
||||
|
||||
if regularize:
|
||||
a, b = build_quadratic_1d(J_h, g_h, -g_h, diag=diag_h)
|
||||
to_tr = Delta / norm(g_h)
|
||||
ag_value = minimize_quadratic_1d(a, b, 0, to_tr)[1]
|
||||
reg_term = -ag_value / Delta**2
|
||||
|
||||
lsmr_op = regularized_lsq_operator(J_h, (diag_h + reg_term)**0.5)
|
||||
gn_h = lsmr(lsmr_op, f_augmented, **tr_options)[0]
|
||||
S = np.vstack((g_h, gn_h)).T
|
||||
S, _ = qr(S, mode='economic')
|
||||
JS = J_h.dot(S) # LinearOperator does dot too.
|
||||
B_S = np.dot(JS.T, JS) + np.dot(S.T * diag_h, S)
|
||||
g_S = S.T.dot(g_h)
|
||||
|
||||
# theta controls step back step ratio from the bounds.
|
||||
theta = max(0.995, 1 - g_norm)
|
||||
|
||||
actual_reduction = -1
|
||||
while actual_reduction <= 0 and nfev < max_nfev:
|
||||
if tr_solver == 'exact':
|
||||
p_h, alpha, n_iter = solve_lsq_trust_region(
|
||||
n, m, uf, s, V, Delta, initial_alpha=alpha)
|
||||
elif tr_solver == 'lsmr':
|
||||
p_S, _ = solve_trust_region_2d(B_S, g_S, Delta)
|
||||
p_h = S.dot(p_S)
|
||||
|
||||
p = d * p_h # Trust-region solution in the original space.
|
||||
step, step_h, predicted_reduction = select_step(
|
||||
x, J_h, diag_h, g_h, p, p_h, d, Delta, lb, ub, theta)
|
||||
|
||||
x_new = make_strictly_feasible(x + step, lb, ub, rstep=0)
|
||||
f_new = fun(x_new)
|
||||
nfev += 1
|
||||
|
||||
step_h_norm = norm(step_h)
|
||||
|
||||
if not np.all(np.isfinite(f_new)):
|
||||
Delta = 0.25 * step_h_norm
|
||||
continue
|
||||
|
||||
# Usual trust-region step quality estimation.
|
||||
if loss_function is not None:
|
||||
cost_new = loss_function(f_new, cost_only=True)
|
||||
else:
|
||||
cost_new = 0.5 * np.dot(f_new, f_new)
|
||||
actual_reduction = cost - cost_new
|
||||
Delta_new, ratio = update_tr_radius(
|
||||
Delta, actual_reduction, predicted_reduction,
|
||||
step_h_norm, step_h_norm > 0.95 * Delta)
|
||||
|
||||
step_norm = norm(step)
|
||||
termination_status = check_termination(
|
||||
actual_reduction, cost, step_norm, norm(x), ratio, ftol, xtol)
|
||||
if termination_status is not None:
|
||||
break
|
||||
|
||||
alpha *= Delta / Delta_new
|
||||
Delta = Delta_new
|
||||
|
||||
if actual_reduction > 0:
|
||||
x = x_new
|
||||
|
||||
f = f_new
|
||||
f_true = f.copy()
|
||||
|
||||
cost = cost_new
|
||||
|
||||
J = jac(x, f)
|
||||
njev += 1
|
||||
|
||||
if loss_function is not None:
|
||||
rho = loss_function(f)
|
||||
J, f = scale_for_robust_loss_function(J, f, rho)
|
||||
|
||||
g = compute_grad(J, f)
|
||||
|
||||
if jac_scale:
|
||||
scale, scale_inv = compute_jac_scale(J, scale_inv)
|
||||
else:
|
||||
step_norm = 0
|
||||
actual_reduction = 0
|
||||
|
||||
iteration += 1
|
||||
|
||||
if termination_status is None:
|
||||
termination_status = 0
|
||||
|
||||
active_mask = find_active_constraints(x, lb, ub, rtol=xtol)
|
||||
return OptimizeResult(
|
||||
x=x, cost=cost, fun=f_true, jac=J, grad=g, optimality=g_norm,
|
||||
active_mask=active_mask, nfev=nfev, njev=njev,
|
||||
status=termination_status)
|
||||
|
||||
|
||||
def trf_no_bounds(fun, jac, x0, f0, J0, ftol, xtol, gtol, max_nfev,
|
||||
x_scale, loss_function, tr_solver, tr_options, verbose):
|
||||
x = x0.copy()
|
||||
|
||||
f = f0
|
||||
f_true = f.copy()
|
||||
nfev = 1
|
||||
|
||||
J = J0
|
||||
njev = 1
|
||||
m, n = J.shape
|
||||
|
||||
if loss_function is not None:
|
||||
rho = loss_function(f)
|
||||
cost = 0.5 * np.sum(rho[0])
|
||||
J, f = scale_for_robust_loss_function(J, f, rho)
|
||||
else:
|
||||
cost = 0.5 * np.dot(f, f)
|
||||
|
||||
g = compute_grad(J, f)
|
||||
|
||||
jac_scale = isinstance(x_scale, str) and x_scale == 'jac'
|
||||
if jac_scale:
|
||||
scale, scale_inv = compute_jac_scale(J)
|
||||
else:
|
||||
scale, scale_inv = x_scale, 1 / x_scale
|
||||
|
||||
Delta = norm(x0 * scale_inv)
|
||||
if Delta == 0:
|
||||
Delta = 1.0
|
||||
|
||||
if tr_solver == 'lsmr':
|
||||
reg_term = 0
|
||||
damp = tr_options.pop('damp', 0.0)
|
||||
regularize = tr_options.pop('regularize', True)
|
||||
|
||||
if max_nfev is None:
|
||||
max_nfev = x0.size * 100
|
||||
|
||||
alpha = 0.0 # "Levenberg-Marquardt" parameter
|
||||
|
||||
termination_status = None
|
||||
iteration = 0
|
||||
step_norm = None
|
||||
actual_reduction = None
|
||||
|
||||
if verbose == 2:
|
||||
print_header_nonlinear()
|
||||
|
||||
while True:
|
||||
g_norm = norm(g, ord=np.inf)
|
||||
if g_norm < gtol:
|
||||
termination_status = 1
|
||||
|
||||
if verbose == 2:
|
||||
print_iteration_nonlinear(iteration, nfev, cost, actual_reduction,
|
||||
step_norm, g_norm)
|
||||
|
||||
if termination_status is not None or nfev == max_nfev:
|
||||
break
|
||||
|
||||
d = scale
|
||||
g_h = d * g
|
||||
|
||||
if tr_solver == 'exact':
|
||||
J_h = J * d
|
||||
U, s, V = svd(J_h, full_matrices=False)
|
||||
V = V.T
|
||||
uf = U.T.dot(f)
|
||||
elif tr_solver == 'lsmr':
|
||||
J_h = right_multiplied_operator(J, d)
|
||||
|
||||
if regularize:
|
||||
a, b = build_quadratic_1d(J_h, g_h, -g_h)
|
||||
to_tr = Delta / norm(g_h)
|
||||
ag_value = minimize_quadratic_1d(a, b, 0, to_tr)[1]
|
||||
reg_term = -ag_value / Delta**2
|
||||
|
||||
damp_full = (damp**2 + reg_term)**0.5
|
||||
gn_h = lsmr(J_h, f, damp=damp_full, **tr_options)[0]
|
||||
S = np.vstack((g_h, gn_h)).T
|
||||
S, _ = qr(S, mode='economic')
|
||||
JS = J_h.dot(S)
|
||||
B_S = np.dot(JS.T, JS)
|
||||
g_S = S.T.dot(g_h)
|
||||
|
||||
actual_reduction = -1
|
||||
while actual_reduction <= 0 and nfev < max_nfev:
|
||||
if tr_solver == 'exact':
|
||||
step_h, alpha, n_iter = solve_lsq_trust_region(
|
||||
n, m, uf, s, V, Delta, initial_alpha=alpha)
|
||||
elif tr_solver == 'lsmr':
|
||||
p_S, _ = solve_trust_region_2d(B_S, g_S, Delta)
|
||||
step_h = S.dot(p_S)
|
||||
|
||||
predicted_reduction = -evaluate_quadratic(J_h, g_h, step_h)
|
||||
step = d * step_h
|
||||
x_new = x + step
|
||||
f_new = fun(x_new)
|
||||
nfev += 1
|
||||
|
||||
step_h_norm = norm(step_h)
|
||||
|
||||
if not np.all(np.isfinite(f_new)):
|
||||
Delta = 0.25 * step_h_norm
|
||||
continue
|
||||
|
||||
# Usual trust-region step quality estimation.
|
||||
if loss_function is not None:
|
||||
cost_new = loss_function(f_new, cost_only=True)
|
||||
else:
|
||||
cost_new = 0.5 * np.dot(f_new, f_new)
|
||||
actual_reduction = cost - cost_new
|
||||
|
||||
Delta_new, ratio = update_tr_radius(
|
||||
Delta, actual_reduction, predicted_reduction,
|
||||
step_h_norm, step_h_norm > 0.95 * Delta)
|
||||
|
||||
step_norm = norm(step)
|
||||
termination_status = check_termination(
|
||||
actual_reduction, cost, step_norm, norm(x), ratio, ftol, xtol)
|
||||
if termination_status is not None:
|
||||
break
|
||||
|
||||
alpha *= Delta / Delta_new
|
||||
Delta = Delta_new
|
||||
|
||||
if actual_reduction > 0:
|
||||
x = x_new
|
||||
|
||||
f = f_new
|
||||
f_true = f.copy()
|
||||
|
||||
cost = cost_new
|
||||
|
||||
J = jac(x, f)
|
||||
njev += 1
|
||||
|
||||
if loss_function is not None:
|
||||
rho = loss_function(f)
|
||||
J, f = scale_for_robust_loss_function(J, f, rho)
|
||||
|
||||
g = compute_grad(J, f)
|
||||
|
||||
if jac_scale:
|
||||
scale, scale_inv = compute_jac_scale(J, scale_inv)
|
||||
else:
|
||||
step_norm = 0
|
||||
actual_reduction = 0
|
||||
|
||||
iteration += 1
|
||||
|
||||
if termination_status is None:
|
||||
termination_status = 0
|
||||
|
||||
active_mask = np.zeros_like(x)
|
||||
return OptimizeResult(
|
||||
x=x, cost=cost, fun=f_true, jac=J, grad=g, optimality=g_norm,
|
||||
active_mask=active_mask, nfev=nfev, njev=njev,
|
||||
status=termination_status)
|
249
venv/Lib/site-packages/scipy/optimize/_lsq/trf_linear.py
Normal file
249
venv/Lib/site-packages/scipy/optimize/_lsq/trf_linear.py
Normal file
|
@ -0,0 +1,249 @@
|
|||
"""The adaptation of Trust Region Reflective algorithm for a linear
|
||||
least-squares problem."""
|
||||
import numpy as np
|
||||
from numpy.linalg import norm
|
||||
from scipy.linalg import qr, solve_triangular
|
||||
from scipy.sparse.linalg import lsmr
|
||||
from scipy.optimize import OptimizeResult
|
||||
|
||||
from .givens_elimination import givens_elimination
|
||||
from .common import (
|
||||
EPS, step_size_to_bound, find_active_constraints, in_bounds,
|
||||
make_strictly_feasible, build_quadratic_1d, evaluate_quadratic,
|
||||
minimize_quadratic_1d, CL_scaling_vector, reflective_transformation,
|
||||
print_header_linear, print_iteration_linear, compute_grad,
|
||||
regularized_lsq_operator, right_multiplied_operator)
|
||||
|
||||
|
||||
def regularized_lsq_with_qr(m, n, R, QTb, perm, diag, copy_R=True):
|
||||
"""Solve regularized least squares using information from QR-decomposition.
|
||||
|
||||
The initial problem is to solve the following system in a least-squares
|
||||
sense:
|
||||
::
|
||||
|
||||
A x = b
|
||||
D x = 0
|
||||
|
||||
where D is diagonal matrix. The method is based on QR decomposition
|
||||
of the form A P = Q R, where P is a column permutation matrix, Q is an
|
||||
orthogonal matrix and R is an upper triangular matrix.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
m, n : int
|
||||
Initial shape of A.
|
||||
R : ndarray, shape (n, n)
|
||||
Upper triangular matrix from QR decomposition of A.
|
||||
QTb : ndarray, shape (n,)
|
||||
First n components of Q^T b.
|
||||
perm : ndarray, shape (n,)
|
||||
Array defining column permutation of A, such that ith column of
|
||||
P is perm[i]-th column of identity matrix.
|
||||
diag : ndarray, shape (n,)
|
||||
Array containing diagonal elements of D.
|
||||
|
||||
Returns
|
||||
-------
|
||||
x : ndarray, shape (n,)
|
||||
Found least-squares solution.
|
||||
"""
|
||||
if copy_R:
|
||||
R = R.copy()
|
||||
v = QTb.copy()
|
||||
|
||||
givens_elimination(R, v, diag[perm])
|
||||
|
||||
abs_diag_R = np.abs(np.diag(R))
|
||||
threshold = EPS * max(m, n) * np.max(abs_diag_R)
|
||||
nns, = np.nonzero(abs_diag_R > threshold)
|
||||
|
||||
R = R[np.ix_(nns, nns)]
|
||||
v = v[nns]
|
||||
|
||||
x = np.zeros(n)
|
||||
x[perm[nns]] = solve_triangular(R, v)
|
||||
|
||||
return x
|
||||
|
||||
|
||||
def backtracking(A, g, x, p, theta, p_dot_g, lb, ub):
|
||||
"""Find an appropriate step size using backtracking line search."""
|
||||
alpha = 1
|
||||
while True:
|
||||
x_new, _ = reflective_transformation(x + alpha * p, lb, ub)
|
||||
step = x_new - x
|
||||
cost_change = -evaluate_quadratic(A, g, step)
|
||||
if cost_change > -0.1 * alpha * p_dot_g:
|
||||
break
|
||||
alpha *= 0.5
|
||||
|
||||
active = find_active_constraints(x_new, lb, ub)
|
||||
if np.any(active != 0):
|
||||
x_new, _ = reflective_transformation(x + theta * alpha * p, lb, ub)
|
||||
x_new = make_strictly_feasible(x_new, lb, ub, rstep=0)
|
||||
step = x_new - x
|
||||
cost_change = -evaluate_quadratic(A, g, step)
|
||||
|
||||
return x, step, cost_change
|
||||
|
||||
|
||||
def select_step(x, A_h, g_h, c_h, p, p_h, d, lb, ub, theta):
|
||||
"""Select the best step according to Trust Region Reflective algorithm."""
|
||||
if in_bounds(x + p, lb, ub):
|
||||
return p
|
||||
|
||||
p_stride, hits = step_size_to_bound(x, p, lb, ub)
|
||||
r_h = np.copy(p_h)
|
||||
r_h[hits.astype(bool)] *= -1
|
||||
r = d * r_h
|
||||
|
||||
# Restrict step, such that it hits the bound.
|
||||
p *= p_stride
|
||||
p_h *= p_stride
|
||||
x_on_bound = x + p
|
||||
|
||||
# Find the step size along reflected direction.
|
||||
r_stride_u, _ = step_size_to_bound(x_on_bound, r, lb, ub)
|
||||
|
||||
# Stay interior.
|
||||
r_stride_l = (1 - theta) * r_stride_u
|
||||
r_stride_u *= theta
|
||||
|
||||
if r_stride_u > 0:
|
||||
a, b, c = build_quadratic_1d(A_h, g_h, r_h, s0=p_h, diag=c_h)
|
||||
r_stride, r_value = minimize_quadratic_1d(
|
||||
a, b, r_stride_l, r_stride_u, c=c)
|
||||
r_h = p_h + r_h * r_stride
|
||||
r = d * r_h
|
||||
else:
|
||||
r_value = np.inf
|
||||
|
||||
# Now correct p_h to make it strictly interior.
|
||||
p_h *= theta
|
||||
p *= theta
|
||||
p_value = evaluate_quadratic(A_h, g_h, p_h, diag=c_h)
|
||||
|
||||
ag_h = -g_h
|
||||
ag = d * ag_h
|
||||
ag_stride_u, _ = step_size_to_bound(x, ag, lb, ub)
|
||||
ag_stride_u *= theta
|
||||
a, b = build_quadratic_1d(A_h, g_h, ag_h, diag=c_h)
|
||||
ag_stride, ag_value = minimize_quadratic_1d(a, b, 0, ag_stride_u)
|
||||
ag *= ag_stride
|
||||
|
||||
if p_value < r_value and p_value < ag_value:
|
||||
return p
|
||||
elif r_value < p_value and r_value < ag_value:
|
||||
return r
|
||||
else:
|
||||
return ag
|
||||
|
||||
|
||||
def trf_linear(A, b, x_lsq, lb, ub, tol, lsq_solver, lsmr_tol, max_iter,
|
||||
verbose):
|
||||
m, n = A.shape
|
||||
x, _ = reflective_transformation(x_lsq, lb, ub)
|
||||
x = make_strictly_feasible(x, lb, ub, rstep=0.1)
|
||||
|
||||
if lsq_solver == 'exact':
|
||||
QT, R, perm = qr(A, mode='economic', pivoting=True)
|
||||
QT = QT.T
|
||||
|
||||
if m < n:
|
||||
R = np.vstack((R, np.zeros((n - m, n))))
|
||||
|
||||
QTr = np.zeros(n)
|
||||
k = min(m, n)
|
||||
elif lsq_solver == 'lsmr':
|
||||
r_aug = np.zeros(m + n)
|
||||
auto_lsmr_tol = False
|
||||
if lsmr_tol is None:
|
||||
lsmr_tol = 1e-2 * tol
|
||||
elif lsmr_tol == 'auto':
|
||||
auto_lsmr_tol = True
|
||||
|
||||
r = A.dot(x) - b
|
||||
g = compute_grad(A, r)
|
||||
cost = 0.5 * np.dot(r, r)
|
||||
initial_cost = cost
|
||||
|
||||
termination_status = None
|
||||
step_norm = None
|
||||
cost_change = None
|
||||
|
||||
if max_iter is None:
|
||||
max_iter = 100
|
||||
|
||||
if verbose == 2:
|
||||
print_header_linear()
|
||||
|
||||
for iteration in range(max_iter):
|
||||
v, dv = CL_scaling_vector(x, g, lb, ub)
|
||||
g_scaled = g * v
|
||||
g_norm = norm(g_scaled, ord=np.inf)
|
||||
if g_norm < tol:
|
||||
termination_status = 1
|
||||
|
||||
if verbose == 2:
|
||||
print_iteration_linear(iteration, cost, cost_change,
|
||||
step_norm, g_norm)
|
||||
|
||||
if termination_status is not None:
|
||||
break
|
||||
|
||||
diag_h = g * dv
|
||||
diag_root_h = diag_h ** 0.5
|
||||
d = v ** 0.5
|
||||
g_h = d * g
|
||||
|
||||
A_h = right_multiplied_operator(A, d)
|
||||
if lsq_solver == 'exact':
|
||||
QTr[:k] = QT.dot(r)
|
||||
p_h = -regularized_lsq_with_qr(m, n, R * d[perm], QTr, perm,
|
||||
diag_root_h, copy_R=False)
|
||||
elif lsq_solver == 'lsmr':
|
||||
lsmr_op = regularized_lsq_operator(A_h, diag_root_h)
|
||||
r_aug[:m] = r
|
||||
if auto_lsmr_tol:
|
||||
eta = 1e-2 * min(0.5, g_norm)
|
||||
lsmr_tol = max(EPS, min(0.1, eta * g_norm))
|
||||
p_h = -lsmr(lsmr_op, r_aug, atol=lsmr_tol, btol=lsmr_tol)[0]
|
||||
|
||||
p = d * p_h
|
||||
|
||||
p_dot_g = np.dot(p, g)
|
||||
if p_dot_g > 0:
|
||||
termination_status = -1
|
||||
|
||||
theta = 1 - min(0.005, g_norm)
|
||||
step = select_step(x, A_h, g_h, diag_h, p, p_h, d, lb, ub, theta)
|
||||
cost_change = -evaluate_quadratic(A, g, step)
|
||||
|
||||
# Perhaps almost never executed, the idea is that `p` is descent
|
||||
# direction thus we must find acceptable cost decrease using simple
|
||||
# "backtracking", otherwise the algorithm's logic would break.
|
||||
if cost_change < 0:
|
||||
x, step, cost_change = backtracking(
|
||||
A, g, x, p, theta, p_dot_g, lb, ub)
|
||||
else:
|
||||
x = make_strictly_feasible(x + step, lb, ub, rstep=0)
|
||||
|
||||
step_norm = norm(step)
|
||||
r = A.dot(x) - b
|
||||
g = compute_grad(A, r)
|
||||
|
||||
if cost_change < tol * cost:
|
||||
termination_status = 2
|
||||
|
||||
cost = 0.5 * np.dot(r, r)
|
||||
|
||||
if termination_status is None:
|
||||
termination_status = 0
|
||||
|
||||
active_mask = find_active_constraints(x, lb, ub, rtol=tol)
|
||||
|
||||
return OptimizeResult(
|
||||
x=x, fun=r, cost=cost, optimality=g_norm, active_mask=active_mask,
|
||||
nit=iteration + 1, status=termination_status,
|
||||
initial_cost=initial_cost)
|
837
venv/Lib/site-packages/scipy/optimize/_minimize.py
Normal file
837
venv/Lib/site-packages/scipy/optimize/_minimize.py
Normal file
|
@ -0,0 +1,837 @@
|
|||
"""
|
||||
Unified interfaces to minimization algorithms.
|
||||
|
||||
Functions
|
||||
---------
|
||||
- minimize : minimization of a function of several variables.
|
||||
- minimize_scalar : minimization of a function of one variable.
|
||||
"""
|
||||
|
||||
__all__ = ['minimize', 'minimize_scalar']
|
||||
|
||||
|
||||
from warnings import warn
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
# unconstrained minimization
|
||||
from .optimize import (_minimize_neldermead, _minimize_powell, _minimize_cg,
|
||||
_minimize_bfgs, _minimize_newtoncg,
|
||||
_minimize_scalar_brent, _minimize_scalar_bounded,
|
||||
_minimize_scalar_golden, MemoizeJac)
|
||||
from ._trustregion_dogleg import _minimize_dogleg
|
||||
from ._trustregion_ncg import _minimize_trust_ncg
|
||||
from ._trustregion_krylov import _minimize_trust_krylov
|
||||
from ._trustregion_exact import _minimize_trustregion_exact
|
||||
from ._trustregion_constr import _minimize_trustregion_constr
|
||||
|
||||
# constrained minimization
|
||||
from .lbfgsb import _minimize_lbfgsb
|
||||
from .tnc import _minimize_tnc
|
||||
from .cobyla import _minimize_cobyla
|
||||
from .slsqp import _minimize_slsqp
|
||||
from ._constraints import (old_bound_to_new, new_bounds_to_old,
|
||||
old_constraint_to_new, new_constraint_to_old,
|
||||
NonlinearConstraint, LinearConstraint, Bounds)
|
||||
from ._differentiable_functions import FD_METHODS
|
||||
|
||||
MINIMIZE_METHODS = ['nelder-mead', 'powell', 'cg', 'bfgs', 'newton-cg',
|
||||
'l-bfgs-b', 'tnc', 'cobyla', 'slsqp', 'trust-constr',
|
||||
'dogleg', 'trust-ncg', 'trust-exact', 'trust-krylov']
|
||||
|
||||
|
||||
def minimize(fun, x0, args=(), method=None, jac=None, hess=None,
|
||||
hessp=None, bounds=None, constraints=(), tol=None,
|
||||
callback=None, options=None):
|
||||
"""Minimization of scalar function of one or more variables.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
fun : callable
|
||||
The objective function to be minimized.
|
||||
|
||||
``fun(x, *args) -> float``
|
||||
|
||||
where ``x`` is an 1-D array with shape (n,) and ``args``
|
||||
is a tuple of the fixed parameters needed to completely
|
||||
specify the function.
|
||||
x0 : ndarray, shape (n,)
|
||||
Initial guess. Array of real elements of size (n,),
|
||||
where 'n' is the number of independent variables.
|
||||
args : tuple, optional
|
||||
Extra arguments passed to the objective function and its
|
||||
derivatives (`fun`, `jac` and `hess` functions).
|
||||
method : str or callable, optional
|
||||
Type of solver. Should be one of
|
||||
|
||||
- 'Nelder-Mead' :ref:`(see here) <optimize.minimize-neldermead>`
|
||||
- 'Powell' :ref:`(see here) <optimize.minimize-powell>`
|
||||
- 'CG' :ref:`(see here) <optimize.minimize-cg>`
|
||||
- 'BFGS' :ref:`(see here) <optimize.minimize-bfgs>`
|
||||
- 'Newton-CG' :ref:`(see here) <optimize.minimize-newtoncg>`
|
||||
- 'L-BFGS-B' :ref:`(see here) <optimize.minimize-lbfgsb>`
|
||||
- 'TNC' :ref:`(see here) <optimize.minimize-tnc>`
|
||||
- 'COBYLA' :ref:`(see here) <optimize.minimize-cobyla>`
|
||||
- 'SLSQP' :ref:`(see here) <optimize.minimize-slsqp>`
|
||||
- 'trust-constr':ref:`(see here) <optimize.minimize-trustconstr>`
|
||||
- 'dogleg' :ref:`(see here) <optimize.minimize-dogleg>`
|
||||
- 'trust-ncg' :ref:`(see here) <optimize.minimize-trustncg>`
|
||||
- 'trust-exact' :ref:`(see here) <optimize.minimize-trustexact>`
|
||||
- 'trust-krylov' :ref:`(see here) <optimize.minimize-trustkrylov>`
|
||||
- custom - a callable object (added in version 0.14.0),
|
||||
see below for description.
|
||||
|
||||
If not given, chosen to be one of ``BFGS``, ``L-BFGS-B``, ``SLSQP``,
|
||||
depending if the problem has constraints or bounds.
|
||||
jac : {callable, '2-point', '3-point', 'cs', bool}, optional
|
||||
Method for computing the gradient vector. Only for CG, BFGS,
|
||||
Newton-CG, L-BFGS-B, TNC, SLSQP, dogleg, trust-ncg, trust-krylov,
|
||||
trust-exact and trust-constr.
|
||||
If it is a callable, it should be a function that returns the gradient
|
||||
vector:
|
||||
|
||||
``jac(x, *args) -> array_like, shape (n,)``
|
||||
|
||||
where ``x`` is an array with shape (n,) and ``args`` is a tuple with
|
||||
the fixed parameters. If `jac` is a Boolean and is True, `fun` is
|
||||
assumed to return and objective and gradient as and ``(f, g)`` tuple.
|
||||
Methods 'Newton-CG', 'trust-ncg', 'dogleg', 'trust-exact', and
|
||||
'trust-krylov' require that either a callable be supplied, or that
|
||||
`fun` return the objective and gradient.
|
||||
If None or False, the gradient will be estimated using 2-point finite
|
||||
difference estimation with an absolute step size.
|
||||
Alternatively, the keywords {'2-point', '3-point', 'cs'} can be used
|
||||
to select a finite difference scheme for numerical estimation of the
|
||||
gradient with a relative step size. These finite difference schemes
|
||||
obey any specified `bounds`.
|
||||
hess : {callable, '2-point', '3-point', 'cs', HessianUpdateStrategy}, optional
|
||||
Method for computing the Hessian matrix. Only for Newton-CG, dogleg,
|
||||
trust-ncg, trust-krylov, trust-exact and trust-constr. If it is
|
||||
callable, it should return the Hessian matrix:
|
||||
|
||||
``hess(x, *args) -> {LinearOperator, spmatrix, array}, (n, n)``
|
||||
|
||||
where x is a (n,) ndarray and `args` is a tuple with the fixed
|
||||
parameters. LinearOperator and sparse matrix returns are
|
||||
allowed only for 'trust-constr' method. Alternatively, the keywords
|
||||
{'2-point', '3-point', 'cs'} select a finite difference scheme
|
||||
for numerical estimation. Or, objects implementing
|
||||
`HessianUpdateStrategy` interface can be used to approximate
|
||||
the Hessian. Available quasi-Newton methods implementing
|
||||
this interface are:
|
||||
|
||||
- `BFGS`;
|
||||
- `SR1`.
|
||||
|
||||
Whenever the gradient is estimated via finite-differences,
|
||||
the Hessian cannot be estimated with options
|
||||
{'2-point', '3-point', 'cs'} and needs to be
|
||||
estimated using one of the quasi-Newton strategies.
|
||||
Finite-difference options {'2-point', '3-point', 'cs'} and
|
||||
`HessianUpdateStrategy` are available only for 'trust-constr' method.
|
||||
hessp : callable, optional
|
||||
Hessian of objective function times an arbitrary vector p. Only for
|
||||
Newton-CG, trust-ncg, trust-krylov, trust-constr.
|
||||
Only one of `hessp` or `hess` needs to be given. If `hess` is
|
||||
provided, then `hessp` will be ignored. `hessp` must compute the
|
||||
Hessian times an arbitrary vector:
|
||||
|
||||
``hessp(x, p, *args) -> ndarray shape (n,)``
|
||||
|
||||
where x is a (n,) ndarray, p is an arbitrary vector with
|
||||
dimension (n,) and `args` is a tuple with the fixed
|
||||
parameters.
|
||||
bounds : sequence or `Bounds`, optional
|
||||
Bounds on variables for L-BFGS-B, TNC, SLSQP, Powell, and
|
||||
trust-constr methods. There are two ways to specify the bounds:
|
||||
|
||||
1. Instance of `Bounds` class.
|
||||
2. Sequence of ``(min, max)`` pairs for each element in `x`. None
|
||||
is used to specify no bound.
|
||||
|
||||
constraints : {Constraint, dict} or List of {Constraint, dict}, optional
|
||||
Constraints definition (only for COBYLA, SLSQP and trust-constr).
|
||||
Constraints for 'trust-constr' are defined as a single object or a
|
||||
list of objects specifying constraints to the optimization problem.
|
||||
Available constraints are:
|
||||
|
||||
- `LinearConstraint`
|
||||
- `NonlinearConstraint`
|
||||
|
||||
Constraints for COBYLA, SLSQP are defined as a list of dictionaries.
|
||||
Each dictionary with fields:
|
||||
|
||||
type : str
|
||||
Constraint type: 'eq' for equality, 'ineq' for inequality.
|
||||
fun : callable
|
||||
The function defining the constraint.
|
||||
jac : callable, optional
|
||||
The Jacobian of `fun` (only for SLSQP).
|
||||
args : sequence, optional
|
||||
Extra arguments to be passed to the function and Jacobian.
|
||||
|
||||
Equality constraint means that the constraint function result is to
|
||||
be zero whereas inequality means that it is to be non-negative.
|
||||
Note that COBYLA only supports inequality constraints.
|
||||
tol : float, optional
|
||||
Tolerance for termination. For detailed control, use solver-specific
|
||||
options.
|
||||
options : dict, optional
|
||||
A dictionary of solver options. All methods accept the following
|
||||
generic options:
|
||||
|
||||
maxiter : int
|
||||
Maximum number of iterations to perform. Depending on the
|
||||
method each iteration may use several function evaluations.
|
||||
disp : bool
|
||||
Set to True to print convergence messages.
|
||||
|
||||
For method-specific options, see :func:`show_options()`.
|
||||
callback : callable, optional
|
||||
Called after each iteration. For 'trust-constr' it is a callable with
|
||||
the signature:
|
||||
|
||||
``callback(xk, OptimizeResult state) -> bool``
|
||||
|
||||
where ``xk`` is the current parameter vector. and ``state``
|
||||
is an `OptimizeResult` object, with the same fields
|
||||
as the ones from the return. If callback returns True
|
||||
the algorithm execution is terminated.
|
||||
For all the other methods, the signature is:
|
||||
|
||||
``callback(xk)``
|
||||
|
||||
where ``xk`` is the current parameter vector.
|
||||
|
||||
Returns
|
||||
-------
|
||||
res : OptimizeResult
|
||||
The optimization result represented as a ``OptimizeResult`` object.
|
||||
Important attributes are: ``x`` the solution array, ``success`` a
|
||||
Boolean flag indicating if the optimizer exited successfully and
|
||||
``message`` which describes the cause of the termination. See
|
||||
`OptimizeResult` for a description of other attributes.
|
||||
|
||||
See also
|
||||
--------
|
||||
minimize_scalar : Interface to minimization algorithms for scalar
|
||||
univariate functions
|
||||
show_options : Additional options accepted by the solvers
|
||||
|
||||
Notes
|
||||
-----
|
||||
This section describes the available solvers that can be selected by the
|
||||
'method' parameter. The default method is *BFGS*.
|
||||
|
||||
**Unconstrained minimization**
|
||||
|
||||
Method :ref:`Nelder-Mead <optimize.minimize-neldermead>` uses the
|
||||
Simplex algorithm [1]_, [2]_. This algorithm is robust in many
|
||||
applications. However, if numerical computation of derivative can be
|
||||
trusted, other algorithms using the first and/or second derivatives
|
||||
information might be preferred for their better performance in
|
||||
general.
|
||||
|
||||
Method :ref:`CG <optimize.minimize-cg>` uses a nonlinear conjugate
|
||||
gradient algorithm by Polak and Ribiere, a variant of the
|
||||
Fletcher-Reeves method described in [5]_ pp.120-122. Only the
|
||||
first derivatives are used.
|
||||
|
||||
Method :ref:`BFGS <optimize.minimize-bfgs>` uses the quasi-Newton
|
||||
method of Broyden, Fletcher, Goldfarb, and Shanno (BFGS) [5]_
|
||||
pp. 136. It uses the first derivatives only. BFGS has proven good
|
||||
performance even for non-smooth optimizations. This method also
|
||||
returns an approximation of the Hessian inverse, stored as
|
||||
`hess_inv` in the OptimizeResult object.
|
||||
|
||||
Method :ref:`Newton-CG <optimize.minimize-newtoncg>` uses a
|
||||
Newton-CG algorithm [5]_ pp. 168 (also known as the truncated
|
||||
Newton method). It uses a CG method to the compute the search
|
||||
direction. See also *TNC* method for a box-constrained
|
||||
minimization with a similar algorithm. Suitable for large-scale
|
||||
problems.
|
||||
|
||||
Method :ref:`dogleg <optimize.minimize-dogleg>` uses the dog-leg
|
||||
trust-region algorithm [5]_ for unconstrained minimization. This
|
||||
algorithm requires the gradient and Hessian; furthermore the
|
||||
Hessian is required to be positive definite.
|
||||
|
||||
Method :ref:`trust-ncg <optimize.minimize-trustncg>` uses the
|
||||
Newton conjugate gradient trust-region algorithm [5]_ for
|
||||
unconstrained minimization. This algorithm requires the gradient
|
||||
and either the Hessian or a function that computes the product of
|
||||
the Hessian with a given vector. Suitable for large-scale problems.
|
||||
|
||||
Method :ref:`trust-krylov <optimize.minimize-trustkrylov>` uses
|
||||
the Newton GLTR trust-region algorithm [14]_, [15]_ for unconstrained
|
||||
minimization. This algorithm requires the gradient
|
||||
and either the Hessian or a function that computes the product of
|
||||
the Hessian with a given vector. Suitable for large-scale problems.
|
||||
On indefinite problems it requires usually less iterations than the
|
||||
`trust-ncg` method and is recommended for medium and large-scale problems.
|
||||
|
||||
Method :ref:`trust-exact <optimize.minimize-trustexact>`
|
||||
is a trust-region method for unconstrained minimization in which
|
||||
quadratic subproblems are solved almost exactly [13]_. This
|
||||
algorithm requires the gradient and the Hessian (which is
|
||||
*not* required to be positive definite). It is, in many
|
||||
situations, the Newton method to converge in fewer iteraction
|
||||
and the most recommended for small and medium-size problems.
|
||||
|
||||
**Bound-Constrained minimization**
|
||||
|
||||
Method :ref:`L-BFGS-B <optimize.minimize-lbfgsb>` uses the L-BFGS-B
|
||||
algorithm [6]_, [7]_ for bound constrained minimization.
|
||||
|
||||
Method :ref:`Powell <optimize.minimize-powell>` is a modification
|
||||
of Powell's method [3]_, [4]_ which is a conjugate direction
|
||||
method. It performs sequential one-dimensional minimizations along
|
||||
each vector of the directions set (`direc` field in `options` and
|
||||
`info`), which is updated at each iteration of the main
|
||||
minimization loop. The function need not be differentiable, and no
|
||||
derivatives are taken. If bounds are not provided, then an
|
||||
unbounded line search will be used. If bounds are provided and
|
||||
the initial guess is within the bounds, then every function
|
||||
evaluation throughout the minimization procedure will be within
|
||||
the bounds. If bounds are provided, the initial guess is outside
|
||||
the bounds, and `direc` is full rank (default has full rank), then
|
||||
some function evaluations during the first iteration may be
|
||||
outside the bounds, but every function evaluation after the first
|
||||
iteration will be within the bounds. If `direc` is not full rank,
|
||||
then some parameters may not be optimized and the solution is not
|
||||
guaranteed to be within the bounds.
|
||||
|
||||
Method :ref:`TNC <optimize.minimize-tnc>` uses a truncated Newton
|
||||
algorithm [5]_, [8]_ to minimize a function with variables subject
|
||||
to bounds. This algorithm uses gradient information; it is also
|
||||
called Newton Conjugate-Gradient. It differs from the *Newton-CG*
|
||||
method described above as it wraps a C implementation and allows
|
||||
each variable to be given upper and lower bounds.
|
||||
|
||||
**Constrained Minimization**
|
||||
|
||||
Method :ref:`COBYLA <optimize.minimize-cobyla>` uses the
|
||||
Constrained Optimization BY Linear Approximation (COBYLA) method
|
||||
[9]_, [10]_, [11]_. The algorithm is based on linear
|
||||
approximations to the objective function and each constraint. The
|
||||
method wraps a FORTRAN implementation of the algorithm. The
|
||||
constraints functions 'fun' may return either a single number
|
||||
or an array or list of numbers.
|
||||
|
||||
Method :ref:`SLSQP <optimize.minimize-slsqp>` uses Sequential
|
||||
Least SQuares Programming to minimize a function of several
|
||||
variables with any combination of bounds, equality and inequality
|
||||
constraints. The method wraps the SLSQP Optimization subroutine
|
||||
originally implemented by Dieter Kraft [12]_. Note that the
|
||||
wrapper handles infinite values in bounds by converting them into
|
||||
large floating values.
|
||||
|
||||
Method :ref:`trust-constr <optimize.minimize-trustconstr>` is a
|
||||
trust-region algorithm for constrained optimization. It swiches
|
||||
between two implementations depending on the problem definition.
|
||||
It is the most versatile constrained minimization algorithm
|
||||
implemented in SciPy and the most appropriate for large-scale problems.
|
||||
For equality constrained problems it is an implementation of Byrd-Omojokun
|
||||
Trust-Region SQP method described in [17]_ and in [5]_, p. 549. When
|
||||
inequality constraints are imposed as well, it swiches to the trust-region
|
||||
interior point method described in [16]_. This interior point algorithm,
|
||||
in turn, solves inequality constraints by introducing slack variables
|
||||
and solving a sequence of equality-constrained barrier problems
|
||||
for progressively smaller values of the barrier parameter.
|
||||
The previously described equality constrained SQP method is
|
||||
used to solve the subproblems with increasing levels of accuracy
|
||||
as the iterate gets closer to a solution.
|
||||
|
||||
**Finite-Difference Options**
|
||||
|
||||
For Method :ref:`trust-constr <optimize.minimize-trustconstr>`
|
||||
the gradient and the Hessian may be approximated using
|
||||
three finite-difference schemes: {'2-point', '3-point', 'cs'}.
|
||||
The scheme 'cs' is, potentially, the most accurate but it
|
||||
requires the function to correctly handles complex inputs and to
|
||||
be differentiable in the complex plane. The scheme '3-point' is more
|
||||
accurate than '2-point' but requires twice as many operations.
|
||||
|
||||
**Custom minimizers**
|
||||
|
||||
It may be useful to pass a custom minimization method, for example
|
||||
when using a frontend to this method such as `scipy.optimize.basinhopping`
|
||||
or a different library. You can simply pass a callable as the ``method``
|
||||
parameter.
|
||||
|
||||
The callable is called as ``method(fun, x0, args, **kwargs, **options)``
|
||||
where ``kwargs`` corresponds to any other parameters passed to `minimize`
|
||||
(such as `callback`, `hess`, etc.), except the `options` dict, which has
|
||||
its contents also passed as `method` parameters pair by pair. Also, if
|
||||
`jac` has been passed as a bool type, `jac` and `fun` are mangled so that
|
||||
`fun` returns just the function values and `jac` is converted to a function
|
||||
returning the Jacobian. The method shall return an `OptimizeResult`
|
||||
object.
|
||||
|
||||
The provided `method` callable must be able to accept (and possibly ignore)
|
||||
arbitrary parameters; the set of parameters accepted by `minimize` may
|
||||
expand in future versions and then these parameters will be passed to
|
||||
the method. You can find an example in the scipy.optimize tutorial.
|
||||
|
||||
.. versionadded:: 0.11.0
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Nelder, J A, and R Mead. 1965. A Simplex Method for Function
|
||||
Minimization. The Computer Journal 7: 308-13.
|
||||
.. [2] Wright M H. 1996. Direct search methods: Once scorned, now
|
||||
respectable, in Numerical Analysis 1995: Proceedings of the 1995
|
||||
Dundee Biennial Conference in Numerical Analysis (Eds. D F
|
||||
Griffiths and G A Watson). Addison Wesley Longman, Harlow, UK.
|
||||
191-208.
|
||||
.. [3] Powell, M J D. 1964. An efficient method for finding the minimum of
|
||||
a function of several variables without calculating derivatives. The
|
||||
Computer Journal 7: 155-162.
|
||||
.. [4] Press W, S A Teukolsky, W T Vetterling and B P Flannery.
|
||||
Numerical Recipes (any edition), Cambridge University Press.
|
||||
.. [5] Nocedal, J, and S J Wright. 2006. Numerical Optimization.
|
||||
Springer New York.
|
||||
.. [6] Byrd, R H and P Lu and J. Nocedal. 1995. A Limited Memory
|
||||
Algorithm for Bound Constrained Optimization. SIAM Journal on
|
||||
Scientific and Statistical Computing 16 (5): 1190-1208.
|
||||
.. [7] Zhu, C and R H Byrd and J Nocedal. 1997. L-BFGS-B: Algorithm
|
||||
778: L-BFGS-B, FORTRAN routines for large scale bound constrained
|
||||
optimization. ACM Transactions on Mathematical Software 23 (4):
|
||||
550-560.
|
||||
.. [8] Nash, S G. Newton-Type Minimization Via the Lanczos Method.
|
||||
1984. SIAM Journal of Numerical Analysis 21: 770-778.
|
||||
.. [9] Powell, M J D. A direct search optimization method that models
|
||||
the objective and constraint functions by linear interpolation.
|
||||
1994. Advances in Optimization and Numerical Analysis, eds. S. Gomez
|
||||
and J-P Hennart, Kluwer Academic (Dordrecht), 51-67.
|
||||
.. [10] Powell M J D. Direct search algorithms for optimization
|
||||
calculations. 1998. Acta Numerica 7: 287-336.
|
||||
.. [11] Powell M J D. A view of algorithms for optimization without
|
||||
derivatives. 2007.Cambridge University Technical Report DAMTP
|
||||
2007/NA03
|
||||
.. [12] Kraft, D. A software package for sequential quadratic
|
||||
programming. 1988. Tech. Rep. DFVLR-FB 88-28, DLR German Aerospace
|
||||
Center -- Institute for Flight Mechanics, Koln, Germany.
|
||||
.. [13] Conn, A. R., Gould, N. I., and Toint, P. L.
|
||||
Trust region methods. 2000. Siam. pp. 169-200.
|
||||
.. [14] F. Lenders, C. Kirches, A. Potschka: "trlib: A vector-free
|
||||
implementation of the GLTR method for iterative solution of
|
||||
the trust region problem", https://arxiv.org/abs/1611.04718
|
||||
.. [15] N. Gould, S. Lucidi, M. Roma, P. Toint: "Solving the
|
||||
Trust-Region Subproblem using the Lanczos Method",
|
||||
SIAM J. Optim., 9(2), 504--525, (1999).
|
||||
.. [16] Byrd, Richard H., Mary E. Hribar, and Jorge Nocedal. 1999.
|
||||
An interior point algorithm for large-scale nonlinear programming.
|
||||
SIAM Journal on Optimization 9.4: 877-900.
|
||||
.. [17] Lalee, Marucha, Jorge Nocedal, and Todd Plantega. 1998. On the
|
||||
implementation of an algorithm for large-scale equality constrained
|
||||
optimization. SIAM Journal on Optimization 8.3: 682-706.
|
||||
|
||||
Examples
|
||||
--------
|
||||
Let us consider the problem of minimizing the Rosenbrock function. This
|
||||
function (and its respective derivatives) is implemented in `rosen`
|
||||
(resp. `rosen_der`, `rosen_hess`) in the `scipy.optimize`.
|
||||
|
||||
>>> from scipy.optimize import minimize, rosen, rosen_der
|
||||
|
||||
A simple application of the *Nelder-Mead* method is:
|
||||
|
||||
>>> x0 = [1.3, 0.7, 0.8, 1.9, 1.2]
|
||||
>>> res = minimize(rosen, x0, method='Nelder-Mead', tol=1e-6)
|
||||
>>> res.x
|
||||
array([ 1., 1., 1., 1., 1.])
|
||||
|
||||
Now using the *BFGS* algorithm, using the first derivative and a few
|
||||
options:
|
||||
|
||||
>>> res = minimize(rosen, x0, method='BFGS', jac=rosen_der,
|
||||
... options={'gtol': 1e-6, 'disp': True})
|
||||
Optimization terminated successfully.
|
||||
Current function value: 0.000000
|
||||
Iterations: 26
|
||||
Function evaluations: 31
|
||||
Gradient evaluations: 31
|
||||
>>> res.x
|
||||
array([ 1., 1., 1., 1., 1.])
|
||||
>>> print(res.message)
|
||||
Optimization terminated successfully.
|
||||
>>> res.hess_inv
|
||||
array([[ 0.00749589, 0.01255155, 0.02396251, 0.04750988, 0.09495377], # may vary
|
||||
[ 0.01255155, 0.02510441, 0.04794055, 0.09502834, 0.18996269],
|
||||
[ 0.02396251, 0.04794055, 0.09631614, 0.19092151, 0.38165151],
|
||||
[ 0.04750988, 0.09502834, 0.19092151, 0.38341252, 0.7664427 ],
|
||||
[ 0.09495377, 0.18996269, 0.38165151, 0.7664427, 1.53713523]])
|
||||
|
||||
|
||||
Next, consider a minimization problem with several constraints (namely
|
||||
Example 16.4 from [5]_). The objective function is:
|
||||
|
||||
>>> fun = lambda x: (x[0] - 1)**2 + (x[1] - 2.5)**2
|
||||
|
||||
There are three constraints defined as:
|
||||
|
||||
>>> cons = ({'type': 'ineq', 'fun': lambda x: x[0] - 2 * x[1] + 2},
|
||||
... {'type': 'ineq', 'fun': lambda x: -x[0] - 2 * x[1] + 6},
|
||||
... {'type': 'ineq', 'fun': lambda x: -x[0] + 2 * x[1] + 2})
|
||||
|
||||
And variables must be positive, hence the following bounds:
|
||||
|
||||
>>> bnds = ((0, None), (0, None))
|
||||
|
||||
The optimization problem is solved using the SLSQP method as:
|
||||
|
||||
>>> res = minimize(fun, (2, 0), method='SLSQP', bounds=bnds,
|
||||
... constraints=cons)
|
||||
|
||||
It should converge to the theoretical solution (1.4 ,1.7).
|
||||
|
||||
"""
|
||||
x0 = np.asarray(x0)
|
||||
if x0.dtype.kind in np.typecodes["AllInteger"]:
|
||||
x0 = np.asarray(x0, dtype=float)
|
||||
|
||||
if not isinstance(args, tuple):
|
||||
args = (args,)
|
||||
|
||||
if method is None:
|
||||
# Select automatically
|
||||
if constraints:
|
||||
method = 'SLSQP'
|
||||
elif bounds is not None:
|
||||
method = 'L-BFGS-B'
|
||||
else:
|
||||
method = 'BFGS'
|
||||
|
||||
if callable(method):
|
||||
meth = "_custom"
|
||||
else:
|
||||
meth = method.lower()
|
||||
|
||||
if options is None:
|
||||
options = {}
|
||||
# check if optional parameters are supported by the selected method
|
||||
# - jac
|
||||
if meth in ('nelder-mead', 'powell', 'cobyla') and bool(jac):
|
||||
warn('Method %s does not use gradient information (jac).' % method,
|
||||
RuntimeWarning)
|
||||
# - hess
|
||||
if meth not in ('newton-cg', 'dogleg', 'trust-ncg', 'trust-constr',
|
||||
'trust-krylov', 'trust-exact', '_custom') and hess is not None:
|
||||
warn('Method %s does not use Hessian information (hess).' % method,
|
||||
RuntimeWarning)
|
||||
# - hessp
|
||||
if meth not in ('newton-cg', 'dogleg', 'trust-ncg', 'trust-constr',
|
||||
'trust-krylov', '_custom') \
|
||||
and hessp is not None:
|
||||
warn('Method %s does not use Hessian-vector product '
|
||||
'information (hessp).' % method, RuntimeWarning)
|
||||
# - constraints or bounds
|
||||
if (meth in ('nelder-mead', 'cg', 'bfgs', 'newton-cg', 'dogleg',
|
||||
'trust-ncg') and (bounds is not None or np.any(constraints))):
|
||||
warn('Method %s cannot handle constraints nor bounds.' % method,
|
||||
RuntimeWarning)
|
||||
if meth in ('l-bfgs-b', 'tnc', 'powell') and np.any(constraints):
|
||||
warn('Method %s cannot handle constraints.' % method,
|
||||
RuntimeWarning)
|
||||
if meth == 'cobyla' and bounds is not None:
|
||||
warn('Method %s cannot handle bounds.' % method,
|
||||
RuntimeWarning)
|
||||
# - callback
|
||||
if (meth in ('cobyla',) and callback is not None):
|
||||
warn('Method %s does not support callback.' % method, RuntimeWarning)
|
||||
# - return_all
|
||||
if (meth in ('l-bfgs-b', 'tnc', 'cobyla', 'slsqp') and
|
||||
options.get('return_all', False)):
|
||||
warn('Method %s does not support the return_all option.' % method,
|
||||
RuntimeWarning)
|
||||
|
||||
# check gradient vector
|
||||
if callable(jac):
|
||||
pass
|
||||
elif jac is True:
|
||||
# fun returns func and grad
|
||||
fun = MemoizeJac(fun)
|
||||
jac = fun.derivative
|
||||
elif (jac in FD_METHODS and
|
||||
meth in ['trust-constr', 'bfgs', 'cg', 'l-bfgs-b', 'tnc']):
|
||||
# finite differences
|
||||
pass
|
||||
elif meth in ['trust-constr']:
|
||||
# default jac calculation for this method
|
||||
jac = '2-point'
|
||||
elif jac is None or bool(jac) is False:
|
||||
# this will cause e.g. LBFGS to use forward difference, absolute step
|
||||
jac = None
|
||||
else:
|
||||
# default if jac option is not understood
|
||||
jac = None
|
||||
|
||||
# set default tolerances
|
||||
if tol is not None:
|
||||
options = dict(options)
|
||||
if meth == 'nelder-mead':
|
||||
options.setdefault('xatol', tol)
|
||||
options.setdefault('fatol', tol)
|
||||
if meth in ('newton-cg', 'powell', 'tnc'):
|
||||
options.setdefault('xtol', tol)
|
||||
if meth in ('powell', 'l-bfgs-b', 'tnc', 'slsqp'):
|
||||
options.setdefault('ftol', tol)
|
||||
if meth in ('bfgs', 'cg', 'l-bfgs-b', 'tnc', 'dogleg',
|
||||
'trust-ncg', 'trust-exact', 'trust-krylov'):
|
||||
options.setdefault('gtol', tol)
|
||||
if meth in ('cobyla', '_custom'):
|
||||
options.setdefault('tol', tol)
|
||||
if meth == 'trust-constr':
|
||||
options.setdefault('xtol', tol)
|
||||
options.setdefault('gtol', tol)
|
||||
options.setdefault('barrier_tol', tol)
|
||||
|
||||
if meth == '_custom':
|
||||
# custom method called before bounds and constraints are 'standardised'
|
||||
# custom method should be able to accept whatever bounds/constraints
|
||||
# are provided to it.
|
||||
return method(fun, x0, args=args, jac=jac, hess=hess, hessp=hessp,
|
||||
bounds=bounds, constraints=constraints,
|
||||
callback=callback, **options)
|
||||
|
||||
if bounds is not None:
|
||||
bounds = standardize_bounds(bounds, x0, meth)
|
||||
|
||||
if constraints is not None:
|
||||
constraints = standardize_constraints(constraints, x0, meth)
|
||||
|
||||
if meth == 'nelder-mead':
|
||||
return _minimize_neldermead(fun, x0, args, callback, **options)
|
||||
elif meth == 'powell':
|
||||
return _minimize_powell(fun, x0, args, callback, bounds, **options)
|
||||
elif meth == 'cg':
|
||||
return _minimize_cg(fun, x0, args, jac, callback, **options)
|
||||
elif meth == 'bfgs':
|
||||
return _minimize_bfgs(fun, x0, args, jac, callback, **options)
|
||||
elif meth == 'newton-cg':
|
||||
return _minimize_newtoncg(fun, x0, args, jac, hess, hessp, callback,
|
||||
**options)
|
||||
elif meth == 'l-bfgs-b':
|
||||
return _minimize_lbfgsb(fun, x0, args, jac, bounds,
|
||||
callback=callback, **options)
|
||||
elif meth == 'tnc':
|
||||
return _minimize_tnc(fun, x0, args, jac, bounds, callback=callback,
|
||||
**options)
|
||||
elif meth == 'cobyla':
|
||||
return _minimize_cobyla(fun, x0, args, constraints, **options)
|
||||
elif meth == 'slsqp':
|
||||
return _minimize_slsqp(fun, x0, args, jac, bounds,
|
||||
constraints, callback=callback, **options)
|
||||
elif meth == 'trust-constr':
|
||||
return _minimize_trustregion_constr(fun, x0, args, jac, hess, hessp,
|
||||
bounds, constraints,
|
||||
callback=callback, **options)
|
||||
elif meth == 'dogleg':
|
||||
return _minimize_dogleg(fun, x0, args, jac, hess,
|
||||
callback=callback, **options)
|
||||
elif meth == 'trust-ncg':
|
||||
return _minimize_trust_ncg(fun, x0, args, jac, hess, hessp,
|
||||
callback=callback, **options)
|
||||
elif meth == 'trust-krylov':
|
||||
return _minimize_trust_krylov(fun, x0, args, jac, hess, hessp,
|
||||
callback=callback, **options)
|
||||
elif meth == 'trust-exact':
|
||||
return _minimize_trustregion_exact(fun, x0, args, jac, hess,
|
||||
callback=callback, **options)
|
||||
else:
|
||||
raise ValueError('Unknown solver %s' % method)
|
||||
|
||||
|
||||
def minimize_scalar(fun, bracket=None, bounds=None, args=(),
|
||||
method='brent', tol=None, options=None):
|
||||
"""Minimization of scalar function of one variable.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
fun : callable
|
||||
Objective function.
|
||||
Scalar function, must return a scalar.
|
||||
bracket : sequence, optional
|
||||
For methods 'brent' and 'golden', `bracket` defines the bracketing
|
||||
interval and can either have three items ``(a, b, c)`` so that
|
||||
``a < b < c`` and ``fun(b) < fun(a), fun(c)`` or two items ``a`` and
|
||||
``c`` which are assumed to be a starting interval for a downhill
|
||||
bracket search (see `bracket`); it doesn't always mean that the
|
||||
obtained solution will satisfy ``a <= x <= c``.
|
||||
bounds : sequence, optional
|
||||
For method 'bounded', `bounds` is mandatory and must have two items
|
||||
corresponding to the optimization bounds.
|
||||
args : tuple, optional
|
||||
Extra arguments passed to the objective function.
|
||||
method : str or callable, optional
|
||||
Type of solver. Should be one of:
|
||||
|
||||
- 'Brent' :ref:`(see here) <optimize.minimize_scalar-brent>`
|
||||
- 'Bounded' :ref:`(see here) <optimize.minimize_scalar-bounded>`
|
||||
- 'Golden' :ref:`(see here) <optimize.minimize_scalar-golden>`
|
||||
- custom - a callable object (added in version 0.14.0), see below
|
||||
|
||||
tol : float, optional
|
||||
Tolerance for termination. For detailed control, use solver-specific
|
||||
options.
|
||||
options : dict, optional
|
||||
A dictionary of solver options.
|
||||
|
||||
maxiter : int
|
||||
Maximum number of iterations to perform.
|
||||
disp : bool
|
||||
Set to True to print convergence messages.
|
||||
|
||||
See :func:`show_options()` for solver-specific options.
|
||||
|
||||
Returns
|
||||
-------
|
||||
res : OptimizeResult
|
||||
The optimization result represented as a ``OptimizeResult`` object.
|
||||
Important attributes are: ``x`` the solution array, ``success`` a
|
||||
Boolean flag indicating if the optimizer exited successfully and
|
||||
``message`` which describes the cause of the termination. See
|
||||
`OptimizeResult` for a description of other attributes.
|
||||
|
||||
See also
|
||||
--------
|
||||
minimize : Interface to minimization algorithms for scalar multivariate
|
||||
functions
|
||||
show_options : Additional options accepted by the solvers
|
||||
|
||||
Notes
|
||||
-----
|
||||
This section describes the available solvers that can be selected by the
|
||||
'method' parameter. The default method is *Brent*.
|
||||
|
||||
Method :ref:`Brent <optimize.minimize_scalar-brent>` uses Brent's
|
||||
algorithm to find a local minimum. The algorithm uses inverse
|
||||
parabolic interpolation when possible to speed up convergence of
|
||||
the golden section method.
|
||||
|
||||
Method :ref:`Golden <optimize.minimize_scalar-golden>` uses the
|
||||
golden section search technique. It uses analog of the bisection
|
||||
method to decrease the bracketed interval. It is usually
|
||||
preferable to use the *Brent* method.
|
||||
|
||||
Method :ref:`Bounded <optimize.minimize_scalar-bounded>` can
|
||||
perform bounded minimization. It uses the Brent method to find a
|
||||
local minimum in the interval x1 < xopt < x2.
|
||||
|
||||
**Custom minimizers**
|
||||
|
||||
It may be useful to pass a custom minimization method, for example
|
||||
when using some library frontend to minimize_scalar. You can simply
|
||||
pass a callable as the ``method`` parameter.
|
||||
|
||||
The callable is called as ``method(fun, args, **kwargs, **options)``
|
||||
where ``kwargs`` corresponds to any other parameters passed to `minimize`
|
||||
(such as `bracket`, `tol`, etc.), except the `options` dict, which has
|
||||
its contents also passed as `method` parameters pair by pair. The method
|
||||
shall return an `OptimizeResult` object.
|
||||
|
||||
The provided `method` callable must be able to accept (and possibly ignore)
|
||||
arbitrary parameters; the set of parameters accepted by `minimize` may
|
||||
expand in future versions and then these parameters will be passed to
|
||||
the method. You can find an example in the scipy.optimize tutorial.
|
||||
|
||||
.. versionadded:: 0.11.0
|
||||
|
||||
Examples
|
||||
--------
|
||||
Consider the problem of minimizing the following function.
|
||||
|
||||
>>> def f(x):
|
||||
... return (x - 2) * x * (x + 2)**2
|
||||
|
||||
Using the *Brent* method, we find the local minimum as:
|
||||
|
||||
>>> from scipy.optimize import minimize_scalar
|
||||
>>> res = minimize_scalar(f)
|
||||
>>> res.x
|
||||
1.28077640403
|
||||
|
||||
Using the *Bounded* method, we find a local minimum with specified
|
||||
bounds as:
|
||||
|
||||
>>> res = minimize_scalar(f, bounds=(-3, -1), method='bounded')
|
||||
>>> res.x
|
||||
-2.0000002026
|
||||
|
||||
"""
|
||||
if not isinstance(args, tuple):
|
||||
args = (args,)
|
||||
|
||||
if callable(method):
|
||||
meth = "_custom"
|
||||
else:
|
||||
meth = method.lower()
|
||||
if options is None:
|
||||
options = {}
|
||||
|
||||
if tol is not None:
|
||||
options = dict(options)
|
||||
if meth == 'bounded' and 'xatol' not in options:
|
||||
warn("Method 'bounded' does not support relative tolerance in x; "
|
||||
"defaulting to absolute tolerance.", RuntimeWarning)
|
||||
options['xatol'] = tol
|
||||
elif meth == '_custom':
|
||||
options.setdefault('tol', tol)
|
||||
else:
|
||||
options.setdefault('xtol', tol)
|
||||
|
||||
if meth == '_custom':
|
||||
return method(fun, args=args, bracket=bracket, bounds=bounds, **options)
|
||||
elif meth == 'brent':
|
||||
return _minimize_scalar_brent(fun, bracket, args, **options)
|
||||
elif meth == 'bounded':
|
||||
if bounds is None:
|
||||
raise ValueError('The `bounds` parameter is mandatory for '
|
||||
'method `bounded`.')
|
||||
# replace boolean "disp" option, if specified, by an integer value, as
|
||||
# expected by _minimize_scalar_bounded()
|
||||
disp = options.get('disp')
|
||||
if isinstance(disp, bool):
|
||||
options['disp'] = 2 * int(disp)
|
||||
return _minimize_scalar_bounded(fun, bounds, args, **options)
|
||||
elif meth == 'golden':
|
||||
return _minimize_scalar_golden(fun, bracket, args, **options)
|
||||
else:
|
||||
raise ValueError('Unknown solver %s' % method)
|
||||
|
||||
|
||||
def standardize_bounds(bounds, x0, meth):
|
||||
"""Converts bounds to the form required by the solver."""
|
||||
if meth in {'trust-constr', 'powell'}:
|
||||
if not isinstance(bounds, Bounds):
|
||||
lb, ub = old_bound_to_new(bounds)
|
||||
bounds = Bounds(lb, ub)
|
||||
elif meth in ('l-bfgs-b', 'tnc', 'slsqp'):
|
||||
if isinstance(bounds, Bounds):
|
||||
bounds = new_bounds_to_old(bounds.lb, bounds.ub, x0.shape[0])
|
||||
return bounds
|
||||
|
||||
|
||||
def standardize_constraints(constraints, x0, meth):
|
||||
"""Converts constraints to the form required by the solver."""
|
||||
all_constraint_types = (NonlinearConstraint, LinearConstraint, dict)
|
||||
new_constraint_types = all_constraint_types[:-1]
|
||||
if isinstance(constraints, all_constraint_types):
|
||||
constraints = [constraints]
|
||||
constraints = list(constraints) # ensure it's a mutable sequence
|
||||
|
||||
if meth == 'trust-constr':
|
||||
for i, con in enumerate(constraints):
|
||||
if not isinstance(con, new_constraint_types):
|
||||
constraints[i] = old_constraint_to_new(i, con)
|
||||
else:
|
||||
# iterate over copy, changing original
|
||||
for i, con in enumerate(list(constraints)):
|
||||
if isinstance(con, new_constraint_types):
|
||||
old_constraints = new_constraint_to_old(con, x0)
|
||||
constraints[i] = old_constraints[0]
|
||||
constraints.extend(old_constraints[1:]) # appends 1 if present
|
||||
|
||||
return constraints
|
BIN
venv/Lib/site-packages/scipy/optimize/_minpack.cp36-win32.pyd
Normal file
BIN
venv/Lib/site-packages/scipy/optimize/_minpack.cp36-win32.pyd
Normal file
Binary file not shown.
84
venv/Lib/site-packages/scipy/optimize/_nnls.py
Normal file
84
venv/Lib/site-packages/scipy/optimize/_nnls.py
Normal file
|
@ -0,0 +1,84 @@
|
|||
from . import __nnls
|
||||
from numpy import asarray_chkfinite, zeros, double
|
||||
|
||||
__all__ = ['nnls']
|
||||
|
||||
|
||||
def nnls(A, b, maxiter=None):
|
||||
"""
|
||||
Solve ``argmin_x || Ax - b ||_2`` for ``x>=0``. This is a wrapper
|
||||
for a FORTRAN non-negative least squares solver.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
A : ndarray
|
||||
Matrix ``A`` as shown above.
|
||||
b : ndarray
|
||||
Right-hand side vector.
|
||||
maxiter: int, optional
|
||||
Maximum number of iterations, optional.
|
||||
Default is ``3 * A.shape[1]``.
|
||||
|
||||
Returns
|
||||
-------
|
||||
x : ndarray
|
||||
Solution vector.
|
||||
rnorm : float
|
||||
The residual, ``|| Ax-b ||_2``.
|
||||
|
||||
See Also
|
||||
--------
|
||||
lsq_linear : Linear least squares with bounds on the variables
|
||||
|
||||
Notes
|
||||
-----
|
||||
The FORTRAN code was published in the book below. The algorithm
|
||||
is an active set method. It solves the KKT (Karush-Kuhn-Tucker)
|
||||
conditions for the non-negative least squares problem.
|
||||
|
||||
References
|
||||
----------
|
||||
Lawson C., Hanson R.J., (1987) Solving Least Squares Problems, SIAM
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from scipy.optimize import nnls
|
||||
...
|
||||
>>> A = np.array([[1, 0], [1, 0], [0, 1]])
|
||||
>>> b = np.array([2, 1, 1])
|
||||
>>> nnls(A, b)
|
||||
(array([1.5, 1. ]), 0.7071067811865475)
|
||||
|
||||
>>> b = np.array([-1, -1, -1])
|
||||
>>> nnls(A, b)
|
||||
(array([0., 0.]), 1.7320508075688772)
|
||||
|
||||
"""
|
||||
|
||||
A, b = map(asarray_chkfinite, (A, b))
|
||||
|
||||
if len(A.shape) != 2:
|
||||
raise ValueError("Expected a two-dimensional array (matrix)" +
|
||||
", but the shape of A is %s" % (A.shape, ))
|
||||
if len(b.shape) != 1:
|
||||
raise ValueError("Expected a one-dimensional array (vector" +
|
||||
", but the shape of b is %s" % (b.shape, ))
|
||||
|
||||
m, n = A.shape
|
||||
|
||||
if m != b.shape[0]:
|
||||
raise ValueError(
|
||||
"Incompatible dimensions. The first dimension of " +
|
||||
"A is %s, while the shape of b is %s" % (m, (b.shape[0], )))
|
||||
|
||||
maxiter = -1 if maxiter is None else int(maxiter)
|
||||
|
||||
w = zeros((n,), dtype=double)
|
||||
zz = zeros((m,), dtype=double)
|
||||
index = zeros((n,), dtype=int)
|
||||
|
||||
x, rnorm, mode = __nnls.nnls(A, m, n, b, w, zz, index, maxiter)
|
||||
if mode != 1:
|
||||
raise RuntimeError("too many iterations")
|
||||
|
||||
return x, rnorm
|
682
venv/Lib/site-packages/scipy/optimize/_numdiff.py
Normal file
682
venv/Lib/site-packages/scipy/optimize/_numdiff.py
Normal file
|
@ -0,0 +1,682 @@
|
|||
"""Routines for numerical differentiation."""
|
||||
|
||||
import numpy as np
|
||||
from numpy.linalg import norm
|
||||
|
||||
from scipy.sparse.linalg import LinearOperator
|
||||
from ..sparse import issparse, csc_matrix, csr_matrix, coo_matrix, find
|
||||
from ._group_columns import group_dense, group_sparse
|
||||
|
||||
EPS = np.finfo(np.float64).eps
|
||||
|
||||
|
||||
def _adjust_scheme_to_bounds(x0, h, num_steps, scheme, lb, ub):
|
||||
"""Adjust final difference scheme to the presence of bounds.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x0 : ndarray, shape (n,)
|
||||
Point at which we wish to estimate derivative.
|
||||
h : ndarray, shape (n,)
|
||||
Desired absolute finite difference steps.
|
||||
num_steps : int
|
||||
Number of `h` steps in one direction required to implement finite
|
||||
difference scheme. For example, 2 means that we need to evaluate
|
||||
f(x0 + 2 * h) or f(x0 - 2 * h)
|
||||
scheme : {'1-sided', '2-sided'}
|
||||
Whether steps in one or both directions are required. In other
|
||||
words '1-sided' applies to forward and backward schemes, '2-sided'
|
||||
applies to center schemes.
|
||||
lb : ndarray, shape (n,)
|
||||
Lower bounds on independent variables.
|
||||
ub : ndarray, shape (n,)
|
||||
Upper bounds on independent variables.
|
||||
|
||||
Returns
|
||||
-------
|
||||
h_adjusted : ndarray, shape (n,)
|
||||
Adjusted absolute step sizes. Step size decreases only if a sign flip
|
||||
or switching to one-sided scheme doesn't allow to take a full step.
|
||||
use_one_sided : ndarray of bool, shape (n,)
|
||||
Whether to switch to one-sided scheme. Informative only for
|
||||
``scheme='2-sided'``.
|
||||
"""
|
||||
if scheme == '1-sided':
|
||||
use_one_sided = np.ones_like(h, dtype=bool)
|
||||
elif scheme == '2-sided':
|
||||
h = np.abs(h)
|
||||
use_one_sided = np.zeros_like(h, dtype=bool)
|
||||
else:
|
||||
raise ValueError("`scheme` must be '1-sided' or '2-sided'.")
|
||||
|
||||
if np.all((lb == -np.inf) & (ub == np.inf)):
|
||||
return h, use_one_sided
|
||||
|
||||
h_total = h * num_steps
|
||||
h_adjusted = h.copy()
|
||||
|
||||
lower_dist = x0 - lb
|
||||
upper_dist = ub - x0
|
||||
|
||||
if scheme == '1-sided':
|
||||
x = x0 + h_total
|
||||
violated = (x < lb) | (x > ub)
|
||||
fitting = np.abs(h_total) <= np.maximum(lower_dist, upper_dist)
|
||||
h_adjusted[violated & fitting] *= -1
|
||||
|
||||
forward = (upper_dist >= lower_dist) & ~fitting
|
||||
h_adjusted[forward] = upper_dist[forward] / num_steps
|
||||
backward = (upper_dist < lower_dist) & ~fitting
|
||||
h_adjusted[backward] = -lower_dist[backward] / num_steps
|
||||
elif scheme == '2-sided':
|
||||
central = (lower_dist >= h_total) & (upper_dist >= h_total)
|
||||
|
||||
forward = (upper_dist >= lower_dist) & ~central
|
||||
h_adjusted[forward] = np.minimum(
|
||||
h[forward], 0.5 * upper_dist[forward] / num_steps)
|
||||
use_one_sided[forward] = True
|
||||
|
||||
backward = (upper_dist < lower_dist) & ~central
|
||||
h_adjusted[backward] = -np.minimum(
|
||||
h[backward], 0.5 * lower_dist[backward] / num_steps)
|
||||
use_one_sided[backward] = True
|
||||
|
||||
min_dist = np.minimum(upper_dist, lower_dist) / num_steps
|
||||
adjusted_central = (~central & (np.abs(h_adjusted) <= min_dist))
|
||||
h_adjusted[adjusted_central] = min_dist[adjusted_central]
|
||||
use_one_sided[adjusted_central] = False
|
||||
|
||||
return h_adjusted, use_one_sided
|
||||
|
||||
|
||||
relative_step = {"2-point": EPS**0.5,
|
||||
"3-point": EPS**(1/3),
|
||||
"cs": EPS**0.5}
|
||||
|
||||
|
||||
def _compute_absolute_step(rel_step, x0, method):
|
||||
"""
|
||||
Computes an absolute step from a relative step for finite difference
|
||||
calculation.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
rel_step: None or array-like
|
||||
Relative step for the finite difference calculation
|
||||
x0 : np.ndarray
|
||||
Parameter vector
|
||||
method : {'2-point', '3-point', 'cs'}
|
||||
"""
|
||||
if rel_step is None:
|
||||
rel_step = relative_step[method]
|
||||
sign_x0 = (x0 >= 0).astype(float) * 2 - 1
|
||||
return rel_step * sign_x0 * np.maximum(1.0, np.abs(x0))
|
||||
|
||||
|
||||
def _prepare_bounds(bounds, x0):
|
||||
"""
|
||||
Prepares new-style bounds from a two-tuple specifying the lower and upper
|
||||
limits for values in x0. If a value is not bound then the lower/upper bound
|
||||
will be expected to be -np.inf/np.inf.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> _prepare_bounds([(0, 1, 2), (1, 2, np.inf)], [0.5, 1.5, 2.5])
|
||||
(array([0., 1., 2.]), array([ 1., 2., inf]))
|
||||
"""
|
||||
lb, ub = [np.asarray(b, dtype=float) for b in bounds]
|
||||
if lb.ndim == 0:
|
||||
lb = np.resize(lb, x0.shape)
|
||||
|
||||
if ub.ndim == 0:
|
||||
ub = np.resize(ub, x0.shape)
|
||||
|
||||
return lb, ub
|
||||
|
||||
|
||||
def group_columns(A, order=0):
|
||||
"""Group columns of a 2-D matrix for sparse finite differencing [1]_.
|
||||
|
||||
Two columns are in the same group if in each row at least one of them
|
||||
has zero. A greedy sequential algorithm is used to construct groups.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
A : array_like or sparse matrix, shape (m, n)
|
||||
Matrix of which to group columns.
|
||||
order : int, iterable of int with shape (n,) or None
|
||||
Permutation array which defines the order of columns enumeration.
|
||||
If int or None, a random permutation is used with `order` used as
|
||||
a random seed. Default is 0, that is use a random permutation but
|
||||
guarantee repeatability.
|
||||
|
||||
Returns
|
||||
-------
|
||||
groups : ndarray of int, shape (n,)
|
||||
Contains values from 0 to n_groups-1, where n_groups is the number
|
||||
of found groups. Each value ``groups[i]`` is an index of a group to
|
||||
which ith column assigned. The procedure was helpful only if
|
||||
n_groups is significantly less than n.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] A. Curtis, M. J. D. Powell, and J. Reid, "On the estimation of
|
||||
sparse Jacobian matrices", Journal of the Institute of Mathematics
|
||||
and its Applications, 13 (1974), pp. 117-120.
|
||||
"""
|
||||
if issparse(A):
|
||||
A = csc_matrix(A)
|
||||
else:
|
||||
A = np.atleast_2d(A)
|
||||
A = (A != 0).astype(np.int32)
|
||||
|
||||
if A.ndim != 2:
|
||||
raise ValueError("`A` must be 2-dimensional.")
|
||||
|
||||
m, n = A.shape
|
||||
|
||||
if order is None or np.isscalar(order):
|
||||
rng = np.random.RandomState(order)
|
||||
order = rng.permutation(n)
|
||||
else:
|
||||
order = np.asarray(order)
|
||||
if order.shape != (n,):
|
||||
raise ValueError("`order` has incorrect shape.")
|
||||
|
||||
A = A[:, order]
|
||||
|
||||
if issparse(A):
|
||||
groups = group_sparse(m, n, A.indices, A.indptr)
|
||||
else:
|
||||
groups = group_dense(m, n, A)
|
||||
|
||||
groups[order] = groups.copy()
|
||||
|
||||
return groups
|
||||
|
||||
|
||||
def approx_derivative(fun, x0, method='3-point', rel_step=None, abs_step=None,
|
||||
f0=None, bounds=(-np.inf, np.inf), sparsity=None,
|
||||
as_linear_operator=False, args=(), kwargs={}):
|
||||
"""Compute finite difference approximation of the derivatives of a
|
||||
vector-valued function.
|
||||
|
||||
If a function maps from R^n to R^m, its derivatives form m-by-n matrix
|
||||
called the Jacobian, where an element (i, j) is a partial derivative of
|
||||
f[i] with respect to x[j].
|
||||
|
||||
Parameters
|
||||
----------
|
||||
fun : callable
|
||||
Function of which to estimate the derivatives. The argument x
|
||||
passed to this function is ndarray of shape (n,) (never a scalar
|
||||
even if n=1). It must return 1-D array_like of shape (m,) or a scalar.
|
||||
x0 : array_like of shape (n,) or float
|
||||
Point at which to estimate the derivatives. Float will be converted
|
||||
to a 1-D array.
|
||||
method : {'3-point', '2-point', 'cs'}, optional
|
||||
Finite difference method to use:
|
||||
- '2-point' - use the first order accuracy forward or backward
|
||||
difference.
|
||||
- '3-point' - use central difference in interior points and the
|
||||
second order accuracy forward or backward difference
|
||||
near the boundary.
|
||||
- 'cs' - use a complex-step finite difference scheme. This assumes
|
||||
that the user function is real-valued and can be
|
||||
analytically continued to the complex plane. Otherwise,
|
||||
produces bogus results.
|
||||
rel_step : None or array_like, optional
|
||||
Relative step size to use. The absolute step size is computed as
|
||||
``h = rel_step * sign(x0) * max(1, abs(x0))``, possibly adjusted to
|
||||
fit into the bounds. For ``method='3-point'`` the sign of `h` is
|
||||
ignored. If None (default) then step is selected automatically,
|
||||
see Notes.
|
||||
abs_step : array_like, optional
|
||||
Absolute step size to use, possibly adjusted to fit into the bounds.
|
||||
For ``method='3-point'`` the sign of `abs_step` is ignored. By default
|
||||
relative steps are used, only if ``abs_step is not None`` are absolute
|
||||
steps used.
|
||||
f0 : None or array_like, optional
|
||||
If not None it is assumed to be equal to ``fun(x0)``, in this case
|
||||
the ``fun(x0)`` is not called. Default is None.
|
||||
bounds : tuple of array_like, optional
|
||||
Lower and upper bounds on independent variables. Defaults to no bounds.
|
||||
Each bound must match the size of `x0` or be a scalar, in the latter
|
||||
case the bound will be the same for all variables. Use it to limit the
|
||||
range of function evaluation. Bounds checking is not implemented
|
||||
when `as_linear_operator` is True.
|
||||
sparsity : {None, array_like, sparse matrix, 2-tuple}, optional
|
||||
Defines a sparsity structure of the Jacobian matrix. If the Jacobian
|
||||
matrix is known to have only few non-zero elements in each row, then
|
||||
it's possible to estimate its several columns by a single function
|
||||
evaluation [3]_. To perform such economic computations two ingredients
|
||||
are required:
|
||||
|
||||
* structure : array_like or sparse matrix of shape (m, n). A zero
|
||||
element means that a corresponding element of the Jacobian
|
||||
identically equals to zero.
|
||||
* groups : array_like of shape (n,). A column grouping for a given
|
||||
sparsity structure, use `group_columns` to obtain it.
|
||||
|
||||
A single array or a sparse matrix is interpreted as a sparsity
|
||||
structure, and groups are computed inside the function. A tuple is
|
||||
interpreted as (structure, groups). If None (default), a standard
|
||||
dense differencing will be used.
|
||||
|
||||
Note, that sparse differencing makes sense only for large Jacobian
|
||||
matrices where each row contains few non-zero elements.
|
||||
as_linear_operator : bool, optional
|
||||
When True the function returns an `scipy.sparse.linalg.LinearOperator`.
|
||||
Otherwise it returns a dense array or a sparse matrix depending on
|
||||
`sparsity`. The linear operator provides an efficient way of computing
|
||||
``J.dot(p)`` for any vector ``p`` of shape (n,), but does not allow
|
||||
direct access to individual elements of the matrix. By default
|
||||
`as_linear_operator` is False.
|
||||
args, kwargs : tuple and dict, optional
|
||||
Additional arguments passed to `fun`. Both empty by default.
|
||||
The calling signature is ``fun(x, *args, **kwargs)``.
|
||||
|
||||
Returns
|
||||
-------
|
||||
J : {ndarray, sparse matrix, LinearOperator}
|
||||
Finite difference approximation of the Jacobian matrix.
|
||||
If `as_linear_operator` is True returns a LinearOperator
|
||||
with shape (m, n). Otherwise it returns a dense array or sparse
|
||||
matrix depending on how `sparsity` is defined. If `sparsity`
|
||||
is None then a ndarray with shape (m, n) is returned. If
|
||||
`sparsity` is not None returns a csr_matrix with shape (m, n).
|
||||
For sparse matrices and linear operators it is always returned as
|
||||
a 2-D structure, for ndarrays, if m=1 it is returned
|
||||
as a 1-D gradient array with shape (n,).
|
||||
|
||||
See Also
|
||||
--------
|
||||
check_derivative : Check correctness of a function computing derivatives.
|
||||
|
||||
Notes
|
||||
-----
|
||||
If `rel_step` is not provided, it assigned to ``EPS**(1/s)``, where EPS is
|
||||
machine epsilon for float64 numbers, s=2 for '2-point' method and s=3 for
|
||||
'3-point' method. Such relative step approximately minimizes a sum of
|
||||
truncation and round-off errors, see [1]_. Relative steps are used by
|
||||
default. However, absolute steps are used when ``abs_step is not None``.
|
||||
If any of the absolute steps produces an indistinguishable difference from
|
||||
the original `x0`, ``(x0 + abs_step) - x0 == 0``, then a relative step is
|
||||
substituted for that particular entry.
|
||||
|
||||
A finite difference scheme for '3-point' method is selected automatically.
|
||||
The well-known central difference scheme is used for points sufficiently
|
||||
far from the boundary, and 3-point forward or backward scheme is used for
|
||||
points near the boundary. Both schemes have the second-order accuracy in
|
||||
terms of Taylor expansion. Refer to [2]_ for the formulas of 3-point
|
||||
forward and backward difference schemes.
|
||||
|
||||
For dense differencing when m=1 Jacobian is returned with a shape (n,),
|
||||
on the other hand when n=1 Jacobian is returned with a shape (m, 1).
|
||||
Our motivation is the following: a) It handles a case of gradient
|
||||
computation (m=1) in a conventional way. b) It clearly separates these two
|
||||
different cases. b) In all cases np.atleast_2d can be called to get 2-D
|
||||
Jacobian with correct dimensions.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] W. H. Press et. al. "Numerical Recipes. The Art of Scientific
|
||||
Computing. 3rd edition", sec. 5.7.
|
||||
|
||||
.. [2] A. Curtis, M. J. D. Powell, and J. Reid, "On the estimation of
|
||||
sparse Jacobian matrices", Journal of the Institute of Mathematics
|
||||
and its Applications, 13 (1974), pp. 117-120.
|
||||
|
||||
.. [3] B. Fornberg, "Generation of Finite Difference Formulas on
|
||||
Arbitrarily Spaced Grids", Mathematics of Computation 51, 1988.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import numpy as np
|
||||
>>> from scipy.optimize import approx_derivative
|
||||
>>>
|
||||
>>> def f(x, c1, c2):
|
||||
... return np.array([x[0] * np.sin(c1 * x[1]),
|
||||
... x[0] * np.cos(c2 * x[1])])
|
||||
...
|
||||
>>> x0 = np.array([1.0, 0.5 * np.pi])
|
||||
>>> approx_derivative(f, x0, args=(1, 2))
|
||||
array([[ 1., 0.],
|
||||
[-1., 0.]])
|
||||
|
||||
Bounds can be used to limit the region of function evaluation.
|
||||
In the example below we compute left and right derivative at point 1.0.
|
||||
|
||||
>>> def g(x):
|
||||
... return x**2 if x >= 1 else x
|
||||
...
|
||||
>>> x0 = 1.0
|
||||
>>> approx_derivative(g, x0, bounds=(-np.inf, 1.0))
|
||||
array([ 1.])
|
||||
>>> approx_derivative(g, x0, bounds=(1.0, np.inf))
|
||||
array([ 2.])
|
||||
"""
|
||||
if method not in ['2-point', '3-point', 'cs']:
|
||||
raise ValueError("Unknown method '%s'. " % method)
|
||||
|
||||
x0 = np.atleast_1d(x0)
|
||||
if x0.ndim > 1:
|
||||
raise ValueError("`x0` must have at most 1 dimension.")
|
||||
|
||||
lb, ub = _prepare_bounds(bounds, x0)
|
||||
|
||||
if lb.shape != x0.shape or ub.shape != x0.shape:
|
||||
raise ValueError("Inconsistent shapes between bounds and `x0`.")
|
||||
|
||||
if as_linear_operator and not (np.all(np.isinf(lb))
|
||||
and np.all(np.isinf(ub))):
|
||||
raise ValueError("Bounds not supported when "
|
||||
"`as_linear_operator` is True.")
|
||||
|
||||
def fun_wrapped(x):
|
||||
f = np.atleast_1d(fun(x, *args, **kwargs))
|
||||
if f.ndim > 1:
|
||||
raise RuntimeError("`fun` return value has "
|
||||
"more than 1 dimension.")
|
||||
return f
|
||||
|
||||
if f0 is None:
|
||||
f0 = fun_wrapped(x0)
|
||||
else:
|
||||
f0 = np.atleast_1d(f0)
|
||||
if f0.ndim > 1:
|
||||
raise ValueError("`f0` passed has more than 1 dimension.")
|
||||
|
||||
if np.any((x0 < lb) | (x0 > ub)):
|
||||
raise ValueError("`x0` violates bound constraints.")
|
||||
|
||||
if as_linear_operator:
|
||||
if rel_step is None:
|
||||
rel_step = relative_step[method]
|
||||
|
||||
return _linear_operator_difference(fun_wrapped, x0,
|
||||
f0, rel_step, method)
|
||||
else:
|
||||
# by default we use rel_step
|
||||
if abs_step is None:
|
||||
h = _compute_absolute_step(rel_step, x0, method)
|
||||
else:
|
||||
# user specifies an absolute step
|
||||
sign_x0 = (x0 >= 0).astype(float) * 2 - 1
|
||||
h = abs_step
|
||||
|
||||
# cannot have a zero step. This might happen if x0 is very large
|
||||
# or small. In which case fall back to relative step.
|
||||
dx = ((x0 + h) - x0)
|
||||
h = np.where(dx == 0,
|
||||
relative_step[method] * sign_x0 *
|
||||
np.maximum(1.0, np.abs(x0)),
|
||||
h)
|
||||
|
||||
if method == '2-point':
|
||||
h, use_one_sided = _adjust_scheme_to_bounds(
|
||||
x0, h, 1, '1-sided', lb, ub)
|
||||
elif method == '3-point':
|
||||
h, use_one_sided = _adjust_scheme_to_bounds(
|
||||
x0, h, 1, '2-sided', lb, ub)
|
||||
elif method == 'cs':
|
||||
use_one_sided = False
|
||||
|
||||
if sparsity is None:
|
||||
return _dense_difference(fun_wrapped, x0, f0, h,
|
||||
use_one_sided, method)
|
||||
else:
|
||||
if not issparse(sparsity) and len(sparsity) == 2:
|
||||
structure, groups = sparsity
|
||||
else:
|
||||
structure = sparsity
|
||||
groups = group_columns(sparsity)
|
||||
|
||||
if issparse(structure):
|
||||
structure = csc_matrix(structure)
|
||||
else:
|
||||
structure = np.atleast_2d(structure)
|
||||
|
||||
groups = np.atleast_1d(groups)
|
||||
return _sparse_difference(fun_wrapped, x0, f0, h,
|
||||
use_one_sided, structure,
|
||||
groups, method)
|
||||
|
||||
|
||||
def _linear_operator_difference(fun, x0, f0, h, method):
|
||||
m = f0.size
|
||||
n = x0.size
|
||||
|
||||
if method == '2-point':
|
||||
def matvec(p):
|
||||
if np.array_equal(p, np.zeros_like(p)):
|
||||
return np.zeros(m)
|
||||
dx = h / norm(p)
|
||||
x = x0 + dx*p
|
||||
df = fun(x) - f0
|
||||
return df / dx
|
||||
|
||||
elif method == '3-point':
|
||||
def matvec(p):
|
||||
if np.array_equal(p, np.zeros_like(p)):
|
||||
return np.zeros(m)
|
||||
dx = 2*h / norm(p)
|
||||
x1 = x0 - (dx/2)*p
|
||||
x2 = x0 + (dx/2)*p
|
||||
f1 = fun(x1)
|
||||
f2 = fun(x2)
|
||||
df = f2 - f1
|
||||
return df / dx
|
||||
|
||||
elif method == 'cs':
|
||||
def matvec(p):
|
||||
if np.array_equal(p, np.zeros_like(p)):
|
||||
return np.zeros(m)
|
||||
dx = h / norm(p)
|
||||
x = x0 + dx*p*1.j
|
||||
f1 = fun(x)
|
||||
df = f1.imag
|
||||
return df / dx
|
||||
|
||||
else:
|
||||
raise RuntimeError("Never be here.")
|
||||
|
||||
return LinearOperator((m, n), matvec)
|
||||
|
||||
|
||||
def _dense_difference(fun, x0, f0, h, use_one_sided, method):
|
||||
m = f0.size
|
||||
n = x0.size
|
||||
J_transposed = np.empty((n, m))
|
||||
h_vecs = np.diag(h)
|
||||
|
||||
for i in range(h.size):
|
||||
if method == '2-point':
|
||||
x = x0 + h_vecs[i]
|
||||
dx = x[i] - x0[i] # Recompute dx as exactly representable number.
|
||||
df = fun(x) - f0
|
||||
elif method == '3-point' and use_one_sided[i]:
|
||||
x1 = x0 + h_vecs[i]
|
||||
x2 = x0 + 2 * h_vecs[i]
|
||||
dx = x2[i] - x0[i]
|
||||
f1 = fun(x1)
|
||||
f2 = fun(x2)
|
||||
df = -3.0 * f0 + 4 * f1 - f2
|
||||
elif method == '3-point' and not use_one_sided[i]:
|
||||
x1 = x0 - h_vecs[i]
|
||||
x2 = x0 + h_vecs[i]
|
||||
dx = x2[i] - x1[i]
|
||||
f1 = fun(x1)
|
||||
f2 = fun(x2)
|
||||
df = f2 - f1
|
||||
elif method == 'cs':
|
||||
f1 = fun(x0 + h_vecs[i]*1.j)
|
||||
df = f1.imag
|
||||
dx = h_vecs[i, i]
|
||||
else:
|
||||
raise RuntimeError("Never be here.")
|
||||
|
||||
J_transposed[i] = df / dx
|
||||
|
||||
if m == 1:
|
||||
J_transposed = np.ravel(J_transposed)
|
||||
|
||||
return J_transposed.T
|
||||
|
||||
|
||||
def _sparse_difference(fun, x0, f0, h, use_one_sided,
|
||||
structure, groups, method):
|
||||
m = f0.size
|
||||
n = x0.size
|
||||
row_indices = []
|
||||
col_indices = []
|
||||
fractions = []
|
||||
|
||||
n_groups = np.max(groups) + 1
|
||||
for group in range(n_groups):
|
||||
# Perturb variables which are in the same group simultaneously.
|
||||
e = np.equal(group, groups)
|
||||
h_vec = h * e
|
||||
if method == '2-point':
|
||||
x = x0 + h_vec
|
||||
dx = x - x0
|
||||
df = fun(x) - f0
|
||||
# The result is written to columns which correspond to perturbed
|
||||
# variables.
|
||||
cols, = np.nonzero(e)
|
||||
# Find all non-zero elements in selected columns of Jacobian.
|
||||
i, j, _ = find(structure[:, cols])
|
||||
# Restore column indices in the full array.
|
||||
j = cols[j]
|
||||
elif method == '3-point':
|
||||
# Here we do conceptually the same but separate one-sided
|
||||
# and two-sided schemes.
|
||||
x1 = x0.copy()
|
||||
x2 = x0.copy()
|
||||
|
||||
mask_1 = use_one_sided & e
|
||||
x1[mask_1] += h_vec[mask_1]
|
||||
x2[mask_1] += 2 * h_vec[mask_1]
|
||||
|
||||
mask_2 = ~use_one_sided & e
|
||||
x1[mask_2] -= h_vec[mask_2]
|
||||
x2[mask_2] += h_vec[mask_2]
|
||||
|
||||
dx = np.zeros(n)
|
||||
dx[mask_1] = x2[mask_1] - x0[mask_1]
|
||||
dx[mask_2] = x2[mask_2] - x1[mask_2]
|
||||
|
||||
f1 = fun(x1)
|
||||
f2 = fun(x2)
|
||||
|
||||
cols, = np.nonzero(e)
|
||||
i, j, _ = find(structure[:, cols])
|
||||
j = cols[j]
|
||||
|
||||
mask = use_one_sided[j]
|
||||
df = np.empty(m)
|
||||
|
||||
rows = i[mask]
|
||||
df[rows] = -3 * f0[rows] + 4 * f1[rows] - f2[rows]
|
||||
|
||||
rows = i[~mask]
|
||||
df[rows] = f2[rows] - f1[rows]
|
||||
elif method == 'cs':
|
||||
f1 = fun(x0 + h_vec*1.j)
|
||||
df = f1.imag
|
||||
dx = h_vec
|
||||
cols, = np.nonzero(e)
|
||||
i, j, _ = find(structure[:, cols])
|
||||
j = cols[j]
|
||||
else:
|
||||
raise ValueError("Never be here.")
|
||||
|
||||
# All that's left is to compute the fraction. We store i, j and
|
||||
# fractions as separate arrays and later construct coo_matrix.
|
||||
row_indices.append(i)
|
||||
col_indices.append(j)
|
||||
fractions.append(df[i] / dx[j])
|
||||
|
||||
row_indices = np.hstack(row_indices)
|
||||
col_indices = np.hstack(col_indices)
|
||||
fractions = np.hstack(fractions)
|
||||
J = coo_matrix((fractions, (row_indices, col_indices)), shape=(m, n))
|
||||
return csr_matrix(J)
|
||||
|
||||
|
||||
def check_derivative(fun, jac, x0, bounds=(-np.inf, np.inf), args=(),
|
||||
kwargs={}):
|
||||
"""Check correctness of a function computing derivatives (Jacobian or
|
||||
gradient) by comparison with a finite difference approximation.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
fun : callable
|
||||
Function of which to estimate the derivatives. The argument x
|
||||
passed to this function is ndarray of shape (n,) (never a scalar
|
||||
even if n=1). It must return 1-D array_like of shape (m,) or a scalar.
|
||||
jac : callable
|
||||
Function which computes Jacobian matrix of `fun`. It must work with
|
||||
argument x the same way as `fun`. The return value must be array_like
|
||||
or sparse matrix with an appropriate shape.
|
||||
x0 : array_like of shape (n,) or float
|
||||
Point at which to estimate the derivatives. Float will be converted
|
||||
to 1-D array.
|
||||
bounds : 2-tuple of array_like, optional
|
||||
Lower and upper bounds on independent variables. Defaults to no bounds.
|
||||
Each bound must match the size of `x0` or be a scalar, in the latter
|
||||
case the bound will be the same for all variables. Use it to limit the
|
||||
range of function evaluation.
|
||||
args, kwargs : tuple and dict, optional
|
||||
Additional arguments passed to `fun` and `jac`. Both empty by default.
|
||||
The calling signature is ``fun(x, *args, **kwargs)`` and the same
|
||||
for `jac`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
accuracy : float
|
||||
The maximum among all relative errors for elements with absolute values
|
||||
higher than 1 and absolute errors for elements with absolute values
|
||||
less or equal than 1. If `accuracy` is on the order of 1e-6 or lower,
|
||||
then it is likely that your `jac` implementation is correct.
|
||||
|
||||
See Also
|
||||
--------
|
||||
approx_derivative : Compute finite difference approximation of derivative.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import numpy as np
|
||||
>>> from scipy.optimize import check_derivative
|
||||
>>>
|
||||
>>>
|
||||
>>> def f(x, c1, c2):
|
||||
... return np.array([x[0] * np.sin(c1 * x[1]),
|
||||
... x[0] * np.cos(c2 * x[1])])
|
||||
...
|
||||
>>> def jac(x, c1, c2):
|
||||
... return np.array([
|
||||
... [np.sin(c1 * x[1]), c1 * x[0] * np.cos(c1 * x[1])],
|
||||
... [np.cos(c2 * x[1]), -c2 * x[0] * np.sin(c2 * x[1])]
|
||||
... ])
|
||||
...
|
||||
>>>
|
||||
>>> x0 = np.array([1.0, 0.5 * np.pi])
|
||||
>>> check_derivative(f, jac, x0, args=(1, 2))
|
||||
2.4492935982947064e-16
|
||||
"""
|
||||
J_to_test = jac(x0, *args, **kwargs)
|
||||
if issparse(J_to_test):
|
||||
J_diff = approx_derivative(fun, x0, bounds=bounds, sparsity=J_to_test,
|
||||
args=args, kwargs=kwargs)
|
||||
J_to_test = csr_matrix(J_to_test)
|
||||
abs_err = J_to_test - J_diff
|
||||
i, j, abs_err_data = find(abs_err)
|
||||
J_diff_data = np.asarray(J_diff[i, j]).ravel()
|
||||
return np.max(np.abs(abs_err_data) /
|
||||
np.maximum(1, np.abs(J_diff_data)))
|
||||
else:
|
||||
J_diff = approx_derivative(fun, x0, bounds=bounds,
|
||||
args=args, kwargs=kwargs)
|
||||
abs_err = np.abs(J_to_test - J_diff)
|
||||
return np.max(abs_err / np.maximum(1, np.abs(J_diff)))
|
450
venv/Lib/site-packages/scipy/optimize/_remove_redundancy.py
Normal file
450
venv/Lib/site-packages/scipy/optimize/_remove_redundancy.py
Normal file
|
@ -0,0 +1,450 @@
|
|||
"""
|
||||
Routines for removing redundant (linearly dependent) equations from linear
|
||||
programming equality constraints.
|
||||
"""
|
||||
# Author: Matt Haberland
|
||||
|
||||
import numpy as np
|
||||
from scipy.linalg import svd
|
||||
import scipy
|
||||
from scipy.linalg.blas import dtrsm
|
||||
|
||||
|
||||
def _row_count(A):
|
||||
"""
|
||||
Counts the number of nonzeros in each row of input array A.
|
||||
Nonzeros are defined as any element with absolute value greater than
|
||||
tol = 1e-13. This value should probably be an input to the function.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
A : 2-D array
|
||||
An array representing a matrix
|
||||
|
||||
Returns
|
||||
-------
|
||||
rowcount : 1-D array
|
||||
Number of nonzeros in each row of A
|
||||
|
||||
"""
|
||||
tol = 1e-13
|
||||
return np.array((abs(A) > tol).sum(axis=1)).flatten()
|
||||
|
||||
|
||||
def _get_densest(A, eligibleRows):
|
||||
"""
|
||||
Returns the index of the densest row of A. Ignores rows that are not
|
||||
eligible for consideration.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
A : 2-D array
|
||||
An array representing a matrix
|
||||
eligibleRows : 1-D logical array
|
||||
Values indicate whether the corresponding row of A is eligible
|
||||
to be considered
|
||||
|
||||
Returns
|
||||
-------
|
||||
i_densest : int
|
||||
Index of the densest row in A eligible for consideration
|
||||
|
||||
"""
|
||||
rowCounts = _row_count(A)
|
||||
return np.argmax(rowCounts * eligibleRows)
|
||||
|
||||
|
||||
def _remove_zero_rows(A, b):
|
||||
"""
|
||||
Eliminates trivial equations from system of equations defined by Ax = b
|
||||
and identifies trivial infeasibilities
|
||||
|
||||
Parameters
|
||||
----------
|
||||
A : 2-D array
|
||||
An array representing the left-hand side of a system of equations
|
||||
b : 1-D array
|
||||
An array representing the right-hand side of a system of equations
|
||||
|
||||
Returns
|
||||
-------
|
||||
A : 2-D array
|
||||
An array representing the left-hand side of a system of equations
|
||||
b : 1-D array
|
||||
An array representing the right-hand side of a system of equations
|
||||
status: int
|
||||
An integer indicating the status of the removal operation
|
||||
0: No infeasibility identified
|
||||
2: Trivially infeasible
|
||||
message : str
|
||||
A string descriptor of the exit status of the optimization.
|
||||
|
||||
"""
|
||||
status = 0
|
||||
message = ""
|
||||
i_zero = _row_count(A) == 0
|
||||
A = A[np.logical_not(i_zero), :]
|
||||
if not(np.allclose(b[i_zero], 0)):
|
||||
status = 2
|
||||
message = "There is a zero row in A_eq with a nonzero corresponding " \
|
||||
"entry in b_eq. The problem is infeasible."
|
||||
b = b[np.logical_not(i_zero)]
|
||||
return A, b, status, message
|
||||
|
||||
|
||||
def bg_update_dense(plu, perm_r, v, j):
|
||||
LU, p = plu
|
||||
|
||||
vperm = v[perm_r]
|
||||
u = dtrsm(1, LU, vperm, lower=1, diag=1)
|
||||
LU[:j+1, j] = u[:j+1]
|
||||
l = u[j+1:]
|
||||
piv = LU[j, j]
|
||||
LU[j+1:, j] += (l/piv)
|
||||
return LU, p
|
||||
|
||||
|
||||
def _remove_redundancy_dense(A, rhs, true_rank=None):
|
||||
"""
|
||||
Eliminates redundant equations from system of equations defined by Ax = b
|
||||
and identifies infeasibilities.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
A : 2-D sparse matrix
|
||||
An matrix representing the left-hand side of a system of equations
|
||||
rhs : 1-D array
|
||||
An array representing the right-hand side of a system of equations
|
||||
|
||||
Returns
|
||||
----------
|
||||
A : 2-D sparse matrix
|
||||
A matrix representing the left-hand side of a system of equations
|
||||
rhs : 1-D array
|
||||
An array representing the right-hand side of a system of equations
|
||||
status: int
|
||||
An integer indicating the status of the system
|
||||
0: No infeasibility identified
|
||||
2: Trivially infeasible
|
||||
message : str
|
||||
A string descriptor of the exit status of the optimization.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [2] Andersen, Erling D. "Finding all linearly dependent rows in
|
||||
large-scale linear programming." Optimization Methods and Software
|
||||
6.3 (1995): 219-227.
|
||||
|
||||
"""
|
||||
tolapiv = 1e-8
|
||||
tolprimal = 1e-8
|
||||
status = 0
|
||||
message = ""
|
||||
inconsistent = ("There is a linear combination of rows of A_eq that "
|
||||
"results in zero, suggesting a redundant constraint. "
|
||||
"However the same linear combination of b_eq is "
|
||||
"nonzero, suggesting that the constraints conflict "
|
||||
"and the problem is infeasible.")
|
||||
A, rhs, status, message = _remove_zero_rows(A, rhs)
|
||||
|
||||
if status != 0:
|
||||
return A, rhs, status, message
|
||||
|
||||
m, n = A.shape
|
||||
|
||||
v = list(range(m)) # Artificial column indices.
|
||||
b = list(v) # Basis column indices.
|
||||
# This is better as a list than a set because column order of basis matrix
|
||||
# needs to be consistent.
|
||||
d = [] # Indices of dependent rows
|
||||
perm_r = None
|
||||
|
||||
A_orig = A
|
||||
A = np.zeros((m, m + n), order='F')
|
||||
np.fill_diagonal(A, 1)
|
||||
A[:, m:] = A_orig
|
||||
e = np.zeros(m)
|
||||
|
||||
js_candidates = np.arange(m, m+n, dtype=int) # candidate columns for basis
|
||||
# manual masking was faster than masked array
|
||||
js_mask = np.ones(js_candidates.shape, dtype=bool)
|
||||
|
||||
# Implements basic algorithm from [2]
|
||||
# Uses some of the suggested improvements (removing zero rows and
|
||||
# Bartels-Golub update idea).
|
||||
# Removing column singletons would be easy, but it is not as important
|
||||
# because the procedure is performed only on the equality constraint
|
||||
# matrix from the original problem - not on the canonical form matrix,
|
||||
# which would have many more column singletons due to slack variables
|
||||
# from the inequality constraints.
|
||||
# The thoughts on "crashing" the initial basis are only really useful if
|
||||
# the matrix is sparse.
|
||||
|
||||
lu = np.eye(m, order='F'), np.arange(m) # initial LU is trivial
|
||||
perm_r = lu[1]
|
||||
for i in v:
|
||||
|
||||
e[i] = 1
|
||||
if i > 0:
|
||||
e[i-1] = 0
|
||||
|
||||
try: # fails for i==0 and any time it gets ill-conditioned
|
||||
j = b[i-1]
|
||||
lu = bg_update_dense(lu, perm_r, A[:, j], i-1)
|
||||
except Exception:
|
||||
lu = scipy.linalg.lu_factor(A[:, b])
|
||||
LU, p = lu
|
||||
perm_r = list(range(m))
|
||||
for i1, i2 in enumerate(p):
|
||||
perm_r[i1], perm_r[i2] = perm_r[i2], perm_r[i1]
|
||||
|
||||
pi = scipy.linalg.lu_solve(lu, e, trans=1)
|
||||
|
||||
js = js_candidates[js_mask]
|
||||
batch = 50
|
||||
|
||||
# This is a tiny bit faster than looping over columns indivually,
|
||||
# like for j in js: if abs(A[:,j].transpose().dot(pi)) > tolapiv:
|
||||
for j_index in range(0, len(js), batch):
|
||||
j_indices = js[j_index: min(j_index+batch, len(js))]
|
||||
|
||||
c = abs(A[:, j_indices].transpose().dot(pi))
|
||||
if (c > tolapiv).any():
|
||||
j = js[j_index + np.argmax(c)] # very independent column
|
||||
b[i] = j
|
||||
js_mask[j-m] = False
|
||||
break
|
||||
else:
|
||||
bibar = pi.T.dot(rhs.reshape(-1, 1))
|
||||
bnorm = np.linalg.norm(rhs)
|
||||
if abs(bibar)/(1+bnorm) > tolprimal: # inconsistent
|
||||
status = 2
|
||||
message = inconsistent
|
||||
return A_orig, rhs, status, message
|
||||
else: # dependent
|
||||
d.append(i)
|
||||
if true_rank is not None and len(d) == m - true_rank:
|
||||
break # found all redundancies
|
||||
|
||||
keep = set(range(m))
|
||||
keep = list(keep - set(d))
|
||||
return A_orig[keep, :], rhs[keep], status, message
|
||||
|
||||
|
||||
def _remove_redundancy_sparse(A, rhs):
|
||||
"""
|
||||
Eliminates redundant equations from system of equations defined by Ax = b
|
||||
and identifies infeasibilities.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
A : 2-D sparse matrix
|
||||
An matrix representing the left-hand side of a system of equations
|
||||
rhs : 1-D array
|
||||
An array representing the right-hand side of a system of equations
|
||||
|
||||
Returns
|
||||
-------
|
||||
A : 2-D sparse matrix
|
||||
A matrix representing the left-hand side of a system of equations
|
||||
rhs : 1-D array
|
||||
An array representing the right-hand side of a system of equations
|
||||
status: int
|
||||
An integer indicating the status of the system
|
||||
0: No infeasibility identified
|
||||
2: Trivially infeasible
|
||||
message : str
|
||||
A string descriptor of the exit status of the optimization.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [2] Andersen, Erling D. "Finding all linearly dependent rows in
|
||||
large-scale linear programming." Optimization Methods and Software
|
||||
6.3 (1995): 219-227.
|
||||
|
||||
"""
|
||||
|
||||
tolapiv = 1e-8
|
||||
tolprimal = 1e-8
|
||||
status = 0
|
||||
message = ""
|
||||
inconsistent = ("There is a linear combination of rows of A_eq that "
|
||||
"results in zero, suggesting a redundant constraint. "
|
||||
"However the same linear combination of b_eq is "
|
||||
"nonzero, suggesting that the constraints conflict "
|
||||
"and the problem is infeasible.")
|
||||
A, rhs, status, message = _remove_zero_rows(A, rhs)
|
||||
|
||||
if status != 0:
|
||||
return A, rhs, status, message
|
||||
|
||||
m, n = A.shape
|
||||
|
||||
v = list(range(m)) # Artificial column indices.
|
||||
b = list(v) # Basis column indices.
|
||||
# This is better as a list than a set because column order of basis matrix
|
||||
# needs to be consistent.
|
||||
k = set(range(m, m+n)) # Structural column indices.
|
||||
d = [] # Indices of dependent rows
|
||||
|
||||
A_orig = A
|
||||
A = scipy.sparse.hstack((scipy.sparse.eye(m), A)).tocsc()
|
||||
e = np.zeros(m)
|
||||
|
||||
# Implements basic algorithm from [2]
|
||||
# Uses only one of the suggested improvements (removing zero rows).
|
||||
# Removing column singletons would be easy, but it is not as important
|
||||
# because the procedure is performed only on the equality constraint
|
||||
# matrix from the original problem - not on the canonical form matrix,
|
||||
# which would have many more column singletons due to slack variables
|
||||
# from the inequality constraints.
|
||||
# The thoughts on "crashing" the initial basis sound useful, but the
|
||||
# description of the procedure seems to assume a lot of familiarity with
|
||||
# the subject; it is not very explicit. I already went through enough
|
||||
# trouble getting the basic algorithm working, so I was not interested in
|
||||
# trying to decipher this, too. (Overall, the paper is fraught with
|
||||
# mistakes and ambiguities - which is strange, because the rest of
|
||||
# Andersen's papers are quite good.)
|
||||
# I tried and tried and tried to improve performance using the
|
||||
# Bartels-Golub update. It works, but it's only practical if the LU
|
||||
# factorization can be specialized as described, and that is not possible
|
||||
# until the SciPy SuperLU interface permits control over column
|
||||
# permutation - see issue #7700.
|
||||
|
||||
for i in v:
|
||||
B = A[:, b]
|
||||
|
||||
e[i] = 1
|
||||
if i > 0:
|
||||
e[i-1] = 0
|
||||
|
||||
pi = scipy.sparse.linalg.spsolve(B.transpose(), e).reshape(-1, 1)
|
||||
|
||||
js = list(k-set(b)) # not efficient, but this is not the time sink...
|
||||
|
||||
# Due to overhead, it tends to be faster (for problems tested) to
|
||||
# compute the full matrix-vector product rather than individual
|
||||
# vector-vector products (with the chance of terminating as soon
|
||||
# as any are nonzero). For very large matrices, it might be worth
|
||||
# it to compute, say, 100 or 1000 at a time and stop when a nonzero
|
||||
# is found.
|
||||
|
||||
c = (np.abs(A[:, js].transpose().dot(pi)) > tolapiv).nonzero()[0]
|
||||
if len(c) > 0: # independent
|
||||
j = js[c[0]]
|
||||
# in a previous commit, the previous line was changed to choose
|
||||
# index j corresponding with the maximum dot product.
|
||||
# While this avoided issues with almost
|
||||
# singular matrices, it slowed the routine in most NETLIB tests.
|
||||
# I think this is because these columns were denser than the
|
||||
# first column with nonzero dot product (c[0]).
|
||||
# It would be nice to have a heuristic that balances sparsity with
|
||||
# high dot product, but I don't think it's worth the time to
|
||||
# develop one right now. Bartels-Golub update is a much higher
|
||||
# priority.
|
||||
b[i] = j # replace artificial column
|
||||
else:
|
||||
bibar = pi.T.dot(rhs.reshape(-1, 1))
|
||||
bnorm = np.linalg.norm(rhs)
|
||||
if abs(bibar)/(1 + bnorm) > tolprimal:
|
||||
status = 2
|
||||
message = inconsistent
|
||||
return A_orig, rhs, status, message
|
||||
else: # dependent
|
||||
d.append(i)
|
||||
|
||||
keep = set(range(m))
|
||||
keep = list(keep - set(d))
|
||||
return A_orig[keep, :], rhs[keep], status, message
|
||||
|
||||
|
||||
def _remove_redundancy(A, b):
|
||||
"""
|
||||
Eliminates redundant equations from system of equations defined by Ax = b
|
||||
and identifies infeasibilities.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
A : 2-D array
|
||||
An array representing the left-hand side of a system of equations
|
||||
b : 1-D array
|
||||
An array representing the right-hand side of a system of equations
|
||||
|
||||
Returns
|
||||
-------
|
||||
A : 2-D array
|
||||
An array representing the left-hand side of a system of equations
|
||||
b : 1-D array
|
||||
An array representing the right-hand side of a system of equations
|
||||
status: int
|
||||
An integer indicating the status of the system
|
||||
0: No infeasibility identified
|
||||
2: Trivially infeasible
|
||||
message : str
|
||||
A string descriptor of the exit status of the optimization.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [2] Andersen, Erling D. "Finding all linearly dependent rows in
|
||||
large-scale linear programming." Optimization Methods and Software
|
||||
6.3 (1995): 219-227.
|
||||
|
||||
"""
|
||||
|
||||
A, b, status, message = _remove_zero_rows(A, b)
|
||||
|
||||
if status != 0:
|
||||
return A, b, status, message
|
||||
|
||||
U, s, Vh = svd(A)
|
||||
eps = np.finfo(float).eps
|
||||
tol = s.max() * max(A.shape) * eps
|
||||
|
||||
m, n = A.shape
|
||||
s_min = s[-1] if m <= n else 0
|
||||
|
||||
# this algorithm is faster than that of [2] when the nullspace is small
|
||||
# but it could probably be improvement by randomized algorithms and with
|
||||
# a sparse implementation.
|
||||
# it relies on repeated singular value decomposition to find linearly
|
||||
# dependent rows (as identified by columns of U that correspond with zero
|
||||
# singular values). Unfortunately, only one row can be removed per
|
||||
# decomposition (I tried otherwise; doing so can cause problems.)
|
||||
# It would be nice if we could do truncated SVD like sp.sparse.linalg.svds
|
||||
# but that function is unreliable at finding singular values near zero.
|
||||
# Finding max eigenvalue L of A A^T, then largest eigenvalue (and
|
||||
# associated eigenvector) of -A A^T + L I (I is identity) via power
|
||||
# iteration would also work in theory, but is only efficient if the
|
||||
# smallest nonzero eigenvalue of A A^T is close to the largest nonzero
|
||||
# eigenvalue.
|
||||
|
||||
while abs(s_min) < tol:
|
||||
v = U[:, -1] # TODO: return these so user can eliminate from problem?
|
||||
# rows need to be represented in significant amount
|
||||
eligibleRows = np.abs(v) > tol * 10e6
|
||||
if not np.any(eligibleRows) or np.any(np.abs(v.dot(A)) > tol):
|
||||
status = 4
|
||||
message = ("Due to numerical issues, redundant equality "
|
||||
"constraints could not be removed automatically. "
|
||||
"Try providing your constraint matrices as sparse "
|
||||
"matrices to activate sparse presolve, try turning "
|
||||
"off redundancy removal, or try turning off presolve "
|
||||
"altogether.")
|
||||
break
|
||||
if np.any(np.abs(v.dot(b)) > tol * 100): # factor of 100 to fix 10038 and 10349
|
||||
status = 2
|
||||
message = ("There is a linear combination of rows of A_eq that "
|
||||
"results in zero, suggesting a redundant constraint. "
|
||||
"However the same linear combination of b_eq is "
|
||||
"nonzero, suggesting that the constraints conflict "
|
||||
"and the problem is infeasible.")
|
||||
break
|
||||
|
||||
i_remove = _get_densest(A, eligibleRows)
|
||||
A = np.delete(A, i_remove, axis=0)
|
||||
b = np.delete(b, i_remove)
|
||||
U, s, Vh = svd(A)
|
||||
m, n = A.shape
|
||||
s_min = s[-1] if m <= n else 0
|
||||
|
||||
return A, b, status, message
|
651
venv/Lib/site-packages/scipy/optimize/_root.py
Normal file
651
venv/Lib/site-packages/scipy/optimize/_root.py
Normal file
|
@ -0,0 +1,651 @@
|
|||
"""
|
||||
Unified interfaces to root finding algorithms.
|
||||
|
||||
Functions
|
||||
---------
|
||||
- root : find a root of a vector function.
|
||||
"""
|
||||
__all__ = ['root']
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
from warnings import warn
|
||||
|
||||
from .optimize import MemoizeJac, OptimizeResult, _check_unknown_options
|
||||
from .minpack import _root_hybr, leastsq
|
||||
from ._spectral import _root_df_sane
|
||||
from . import nonlin
|
||||
|
||||
|
||||
def root(fun, x0, args=(), method='hybr', jac=None, tol=None, callback=None,
|
||||
options=None):
|
||||
"""
|
||||
Find a root of a vector function.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
fun : callable
|
||||
A vector function to find a root of.
|
||||
x0 : ndarray
|
||||
Initial guess.
|
||||
args : tuple, optional
|
||||
Extra arguments passed to the objective function and its Jacobian.
|
||||
method : str, optional
|
||||
Type of solver. Should be one of
|
||||
|
||||
- 'hybr' :ref:`(see here) <optimize.root-hybr>`
|
||||
- 'lm' :ref:`(see here) <optimize.root-lm>`
|
||||
- 'broyden1' :ref:`(see here) <optimize.root-broyden1>`
|
||||
- 'broyden2' :ref:`(see here) <optimize.root-broyden2>`
|
||||
- 'anderson' :ref:`(see here) <optimize.root-anderson>`
|
||||
- 'linearmixing' :ref:`(see here) <optimize.root-linearmixing>`
|
||||
- 'diagbroyden' :ref:`(see here) <optimize.root-diagbroyden>`
|
||||
- 'excitingmixing' :ref:`(see here) <optimize.root-excitingmixing>`
|
||||
- 'krylov' :ref:`(see here) <optimize.root-krylov>`
|
||||
- 'df-sane' :ref:`(see here) <optimize.root-dfsane>`
|
||||
|
||||
jac : bool or callable, optional
|
||||
If `jac` is a Boolean and is True, `fun` is assumed to return the
|
||||
value of Jacobian along with the objective function. If False, the
|
||||
Jacobian will be estimated numerically.
|
||||
`jac` can also be a callable returning the Jacobian of `fun`. In
|
||||
this case, it must accept the same arguments as `fun`.
|
||||
tol : float, optional
|
||||
Tolerance for termination. For detailed control, use solver-specific
|
||||
options.
|
||||
callback : function, optional
|
||||
Optional callback function. It is called on every iteration as
|
||||
``callback(x, f)`` where `x` is the current solution and `f`
|
||||
the corresponding residual. For all methods but 'hybr' and 'lm'.
|
||||
options : dict, optional
|
||||
A dictionary of solver options. E.g., `xtol` or `maxiter`, see
|
||||
:obj:`show_options()` for details.
|
||||
|
||||
Returns
|
||||
-------
|
||||
sol : OptimizeResult
|
||||
The solution represented as a ``OptimizeResult`` object.
|
||||
Important attributes are: ``x`` the solution array, ``success`` a
|
||||
Boolean flag indicating if the algorithm exited successfully and
|
||||
``message`` which describes the cause of the termination. See
|
||||
`OptimizeResult` for a description of other attributes.
|
||||
|
||||
See also
|
||||
--------
|
||||
show_options : Additional options accepted by the solvers
|
||||
|
||||
Notes
|
||||
-----
|
||||
This section describes the available solvers that can be selected by the
|
||||
'method' parameter. The default method is *hybr*.
|
||||
|
||||
Method *hybr* uses a modification of the Powell hybrid method as
|
||||
implemented in MINPACK [1]_.
|
||||
|
||||
Method *lm* solves the system of nonlinear equations in a least squares
|
||||
sense using a modification of the Levenberg-Marquardt algorithm as
|
||||
implemented in MINPACK [1]_.
|
||||
|
||||
Method *df-sane* is a derivative-free spectral method. [3]_
|
||||
|
||||
Methods *broyden1*, *broyden2*, *anderson*, *linearmixing*,
|
||||
*diagbroyden*, *excitingmixing*, *krylov* are inexact Newton methods,
|
||||
with backtracking or full line searches [2]_. Each method corresponds
|
||||
to a particular Jacobian approximations. See `nonlin` for details.
|
||||
|
||||
- Method *broyden1* uses Broyden's first Jacobian approximation, it is
|
||||
known as Broyden's good method.
|
||||
- Method *broyden2* uses Broyden's second Jacobian approximation, it
|
||||
is known as Broyden's bad method.
|
||||
- Method *anderson* uses (extended) Anderson mixing.
|
||||
- Method *Krylov* uses Krylov approximation for inverse Jacobian. It
|
||||
is suitable for large-scale problem.
|
||||
- Method *diagbroyden* uses diagonal Broyden Jacobian approximation.
|
||||
- Method *linearmixing* uses a scalar Jacobian approximation.
|
||||
- Method *excitingmixing* uses a tuned diagonal Jacobian
|
||||
approximation.
|
||||
|
||||
.. warning::
|
||||
|
||||
The algorithms implemented for methods *diagbroyden*,
|
||||
*linearmixing* and *excitingmixing* may be useful for specific
|
||||
problems, but whether they will work may depend strongly on the
|
||||
problem.
|
||||
|
||||
.. versionadded:: 0.11.0
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] More, Jorge J., Burton S. Garbow, and Kenneth E. Hillstrom.
|
||||
1980. User Guide for MINPACK-1.
|
||||
.. [2] C. T. Kelley. 1995. Iterative Methods for Linear and Nonlinear
|
||||
Equations. Society for Industrial and Applied Mathematics.
|
||||
<https://archive.siam.org/books/kelley/fr16/>
|
||||
.. [3] W. La Cruz, J.M. Martinez, M. Raydan. Math. Comp. 75, 1429 (2006).
|
||||
|
||||
Examples
|
||||
--------
|
||||
The following functions define a system of nonlinear equations and its
|
||||
jacobian.
|
||||
|
||||
>>> def fun(x):
|
||||
... return [x[0] + 0.5 * (x[0] - x[1])**3 - 1.0,
|
||||
... 0.5 * (x[1] - x[0])**3 + x[1]]
|
||||
|
||||
>>> def jac(x):
|
||||
... return np.array([[1 + 1.5 * (x[0] - x[1])**2,
|
||||
... -1.5 * (x[0] - x[1])**2],
|
||||
... [-1.5 * (x[1] - x[0])**2,
|
||||
... 1 + 1.5 * (x[1] - x[0])**2]])
|
||||
|
||||
A solution can be obtained as follows.
|
||||
|
||||
>>> from scipy import optimize
|
||||
>>> sol = optimize.root(fun, [0, 0], jac=jac, method='hybr')
|
||||
>>> sol.x
|
||||
array([ 0.8411639, 0.1588361])
|
||||
|
||||
"""
|
||||
if not isinstance(args, tuple):
|
||||
args = (args,)
|
||||
|
||||
meth = method.lower()
|
||||
if options is None:
|
||||
options = {}
|
||||
|
||||
if callback is not None and meth in ('hybr', 'lm'):
|
||||
warn('Method %s does not accept callback.' % method,
|
||||
RuntimeWarning)
|
||||
|
||||
# fun also returns the Jacobian
|
||||
if not callable(jac) and meth in ('hybr', 'lm'):
|
||||
if bool(jac):
|
||||
fun = MemoizeJac(fun)
|
||||
jac = fun.derivative
|
||||
else:
|
||||
jac = None
|
||||
|
||||
# set default tolerances
|
||||
if tol is not None:
|
||||
options = dict(options)
|
||||
if meth in ('hybr', 'lm'):
|
||||
options.setdefault('xtol', tol)
|
||||
elif meth in ('df-sane',):
|
||||
options.setdefault('ftol', tol)
|
||||
elif meth in ('broyden1', 'broyden2', 'anderson', 'linearmixing',
|
||||
'diagbroyden', 'excitingmixing', 'krylov'):
|
||||
options.setdefault('xtol', tol)
|
||||
options.setdefault('xatol', np.inf)
|
||||
options.setdefault('ftol', np.inf)
|
||||
options.setdefault('fatol', np.inf)
|
||||
|
||||
if meth == 'hybr':
|
||||
sol = _root_hybr(fun, x0, args=args, jac=jac, **options)
|
||||
elif meth == 'lm':
|
||||
sol = _root_leastsq(fun, x0, args=args, jac=jac, **options)
|
||||
elif meth == 'df-sane':
|
||||
_warn_jac_unused(jac, method)
|
||||
sol = _root_df_sane(fun, x0, args=args, callback=callback,
|
||||
**options)
|
||||
elif meth in ('broyden1', 'broyden2', 'anderson', 'linearmixing',
|
||||
'diagbroyden', 'excitingmixing', 'krylov'):
|
||||
_warn_jac_unused(jac, method)
|
||||
sol = _root_nonlin_solve(fun, x0, args=args, jac=jac,
|
||||
_method=meth, _callback=callback,
|
||||
**options)
|
||||
else:
|
||||
raise ValueError('Unknown solver %s' % method)
|
||||
|
||||
return sol
|
||||
|
||||
|
||||
def _warn_jac_unused(jac, method):
|
||||
if jac is not None:
|
||||
warn('Method %s does not use the jacobian (jac).' % (method,),
|
||||
RuntimeWarning)
|
||||
|
||||
|
||||
def _root_leastsq(fun, x0, args=(), jac=None,
|
||||
col_deriv=0, xtol=1.49012e-08, ftol=1.49012e-08,
|
||||
gtol=0.0, maxiter=0, eps=0.0, factor=100, diag=None,
|
||||
**unknown_options):
|
||||
"""
|
||||
Solve for least squares with Levenberg-Marquardt
|
||||
|
||||
Options
|
||||
-------
|
||||
col_deriv : bool
|
||||
non-zero to specify that the Jacobian function computes derivatives
|
||||
down the columns (faster, because there is no transpose operation).
|
||||
ftol : float
|
||||
Relative error desired in the sum of squares.
|
||||
xtol : float
|
||||
Relative error desired in the approximate solution.
|
||||
gtol : float
|
||||
Orthogonality desired between the function vector and the columns
|
||||
of the Jacobian.
|
||||
maxiter : int
|
||||
The maximum number of calls to the function. If zero, then
|
||||
100*(N+1) is the maximum where N is the number of elements in x0.
|
||||
epsfcn : float
|
||||
A suitable step length for the forward-difference approximation of
|
||||
the Jacobian (for Dfun=None). If epsfcn is less than the machine
|
||||
precision, it is assumed that the relative errors in the functions
|
||||
are of the order of the machine precision.
|
||||
factor : float
|
||||
A parameter determining the initial step bound
|
||||
(``factor * || diag * x||``). Should be in interval ``(0.1, 100)``.
|
||||
diag : sequence
|
||||
N positive entries that serve as a scale factors for the variables.
|
||||
"""
|
||||
|
||||
_check_unknown_options(unknown_options)
|
||||
x, cov_x, info, msg, ier = leastsq(fun, x0, args=args, Dfun=jac,
|
||||
full_output=True,
|
||||
col_deriv=col_deriv, xtol=xtol,
|
||||
ftol=ftol, gtol=gtol,
|
||||
maxfev=maxiter, epsfcn=eps,
|
||||
factor=factor, diag=diag)
|
||||
sol = OptimizeResult(x=x, message=msg, status=ier,
|
||||
success=ier in (1, 2, 3, 4), cov_x=cov_x,
|
||||
fun=info.pop('fvec'))
|
||||
sol.update(info)
|
||||
return sol
|
||||
|
||||
|
||||
def _root_nonlin_solve(fun, x0, args=(), jac=None,
|
||||
_callback=None, _method=None,
|
||||
nit=None, disp=False, maxiter=None,
|
||||
ftol=None, fatol=None, xtol=None, xatol=None,
|
||||
tol_norm=None, line_search='armijo', jac_options=None,
|
||||
**unknown_options):
|
||||
_check_unknown_options(unknown_options)
|
||||
|
||||
f_tol = fatol
|
||||
f_rtol = ftol
|
||||
x_tol = xatol
|
||||
x_rtol = xtol
|
||||
verbose = disp
|
||||
if jac_options is None:
|
||||
jac_options = dict()
|
||||
|
||||
jacobian = {'broyden1': nonlin.BroydenFirst,
|
||||
'broyden2': nonlin.BroydenSecond,
|
||||
'anderson': nonlin.Anderson,
|
||||
'linearmixing': nonlin.LinearMixing,
|
||||
'diagbroyden': nonlin.DiagBroyden,
|
||||
'excitingmixing': nonlin.ExcitingMixing,
|
||||
'krylov': nonlin.KrylovJacobian
|
||||
}[_method]
|
||||
|
||||
if args:
|
||||
if jac:
|
||||
def f(x):
|
||||
return fun(x, *args)[0]
|
||||
else:
|
||||
def f(x):
|
||||
return fun(x, *args)
|
||||
else:
|
||||
f = fun
|
||||
|
||||
x, info = nonlin.nonlin_solve(f, x0, jacobian=jacobian(**jac_options),
|
||||
iter=nit, verbose=verbose,
|
||||
maxiter=maxiter, f_tol=f_tol,
|
||||
f_rtol=f_rtol, x_tol=x_tol,
|
||||
x_rtol=x_rtol, tol_norm=tol_norm,
|
||||
line_search=line_search,
|
||||
callback=_callback, full_output=True,
|
||||
raise_exception=False)
|
||||
sol = OptimizeResult(x=x)
|
||||
sol.update(info)
|
||||
return sol
|
||||
|
||||
def _root_broyden1_doc():
|
||||
"""
|
||||
Options
|
||||
-------
|
||||
nit : int, optional
|
||||
Number of iterations to make. If omitted (default), make as many
|
||||
as required to meet tolerances.
|
||||
disp : bool, optional
|
||||
Print status to stdout on every iteration.
|
||||
maxiter : int, optional
|
||||
Maximum number of iterations to make. If more are needed to
|
||||
meet convergence, `NoConvergence` is raised.
|
||||
ftol : float, optional
|
||||
Relative tolerance for the residual. If omitted, not used.
|
||||
fatol : float, optional
|
||||
Absolute tolerance (in max-norm) for the residual.
|
||||
If omitted, default is 6e-6.
|
||||
xtol : float, optional
|
||||
Relative minimum step size. If omitted, not used.
|
||||
xatol : float, optional
|
||||
Absolute minimum step size, as determined from the Jacobian
|
||||
approximation. If the step size is smaller than this, optimization
|
||||
is terminated as successful. If omitted, not used.
|
||||
tol_norm : function(vector) -> scalar, optional
|
||||
Norm to use in convergence check. Default is the maximum norm.
|
||||
line_search : {None, 'armijo' (default), 'wolfe'}, optional
|
||||
Which type of a line search to use to determine the step size in
|
||||
the direction given by the Jacobian approximation. Defaults to
|
||||
'armijo'.
|
||||
jac_options : dict, optional
|
||||
Options for the respective Jacobian approximation.
|
||||
alpha : float, optional
|
||||
Initial guess for the Jacobian is (-1/alpha).
|
||||
reduction_method : str or tuple, optional
|
||||
Method used in ensuring that the rank of the Broyden
|
||||
matrix stays low. Can either be a string giving the
|
||||
name of the method, or a tuple of the form ``(method,
|
||||
param1, param2, ...)`` that gives the name of the
|
||||
method and values for additional parameters.
|
||||
|
||||
Methods available:
|
||||
|
||||
- ``restart``
|
||||
Drop all matrix columns. Has no
|
||||
extra parameters.
|
||||
- ``simple``
|
||||
Drop oldest matrix column. Has no
|
||||
extra parameters.
|
||||
- ``svd``
|
||||
Keep only the most significant SVD
|
||||
components.
|
||||
|
||||
Extra parameters:
|
||||
|
||||
- ``to_retain``
|
||||
Number of SVD components to
|
||||
retain when rank reduction is done.
|
||||
Default is ``max_rank - 2``.
|
||||
max_rank : int, optional
|
||||
Maximum rank for the Broyden matrix.
|
||||
Default is infinity (i.e., no rank reduction).
|
||||
"""
|
||||
pass
|
||||
|
||||
def _root_broyden2_doc():
|
||||
"""
|
||||
Options
|
||||
-------
|
||||
nit : int, optional
|
||||
Number of iterations to make. If omitted (default), make as many
|
||||
as required to meet tolerances.
|
||||
disp : bool, optional
|
||||
Print status to stdout on every iteration.
|
||||
maxiter : int, optional
|
||||
Maximum number of iterations to make. If more are needed to
|
||||
meet convergence, `NoConvergence` is raised.
|
||||
ftol : float, optional
|
||||
Relative tolerance for the residual. If omitted, not used.
|
||||
fatol : float, optional
|
||||
Absolute tolerance (in max-norm) for the residual.
|
||||
If omitted, default is 6e-6.
|
||||
xtol : float, optional
|
||||
Relative minimum step size. If omitted, not used.
|
||||
xatol : float, optional
|
||||
Absolute minimum step size, as determined from the Jacobian
|
||||
approximation. If the step size is smaller than this, optimization
|
||||
is terminated as successful. If omitted, not used.
|
||||
tol_norm : function(vector) -> scalar, optional
|
||||
Norm to use in convergence check. Default is the maximum norm.
|
||||
line_search : {None, 'armijo' (default), 'wolfe'}, optional
|
||||
Which type of a line search to use to determine the step size in
|
||||
the direction given by the Jacobian approximation. Defaults to
|
||||
'armijo'.
|
||||
jac_options : dict, optional
|
||||
Options for the respective Jacobian approximation.
|
||||
|
||||
alpha : float, optional
|
||||
Initial guess for the Jacobian is (-1/alpha).
|
||||
reduction_method : str or tuple, optional
|
||||
Method used in ensuring that the rank of the Broyden
|
||||
matrix stays low. Can either be a string giving the
|
||||
name of the method, or a tuple of the form ``(method,
|
||||
param1, param2, ...)`` that gives the name of the
|
||||
method and values for additional parameters.
|
||||
|
||||
Methods available:
|
||||
|
||||
- ``restart``
|
||||
Drop all matrix columns. Has no
|
||||
extra parameters.
|
||||
- ``simple``
|
||||
Drop oldest matrix column. Has no
|
||||
extra parameters.
|
||||
- ``svd``
|
||||
Keep only the most significant SVD
|
||||
components.
|
||||
|
||||
Extra parameters:
|
||||
|
||||
- ``to_retain``
|
||||
Number of SVD components to
|
||||
retain when rank reduction is done.
|
||||
Default is ``max_rank - 2``.
|
||||
max_rank : int, optional
|
||||
Maximum rank for the Broyden matrix.
|
||||
Default is infinity (i.e., no rank reduction).
|
||||
"""
|
||||
pass
|
||||
|
||||
def _root_anderson_doc():
|
||||
"""
|
||||
Options
|
||||
-------
|
||||
nit : int, optional
|
||||
Number of iterations to make. If omitted (default), make as many
|
||||
as required to meet tolerances.
|
||||
disp : bool, optional
|
||||
Print status to stdout on every iteration.
|
||||
maxiter : int, optional
|
||||
Maximum number of iterations to make. If more are needed to
|
||||
meet convergence, `NoConvergence` is raised.
|
||||
ftol : float, optional
|
||||
Relative tolerance for the residual. If omitted, not used.
|
||||
fatol : float, optional
|
||||
Absolute tolerance (in max-norm) for the residual.
|
||||
If omitted, default is 6e-6.
|
||||
xtol : float, optional
|
||||
Relative minimum step size. If omitted, not used.
|
||||
xatol : float, optional
|
||||
Absolute minimum step size, as determined from the Jacobian
|
||||
approximation. If the step size is smaller than this, optimization
|
||||
is terminated as successful. If omitted, not used.
|
||||
tol_norm : function(vector) -> scalar, optional
|
||||
Norm to use in convergence check. Default is the maximum norm.
|
||||
line_search : {None, 'armijo' (default), 'wolfe'}, optional
|
||||
Which type of a line search to use to determine the step size in
|
||||
the direction given by the Jacobian approximation. Defaults to
|
||||
'armijo'.
|
||||
jac_options : dict, optional
|
||||
Options for the respective Jacobian approximation.
|
||||
|
||||
alpha : float, optional
|
||||
Initial guess for the Jacobian is (-1/alpha).
|
||||
M : float, optional
|
||||
Number of previous vectors to retain. Defaults to 5.
|
||||
w0 : float, optional
|
||||
Regularization parameter for numerical stability.
|
||||
Compared to unity, good values of the order of 0.01.
|
||||
"""
|
||||
pass
|
||||
|
||||
def _root_linearmixing_doc():
|
||||
"""
|
||||
Options
|
||||
-------
|
||||
nit : int, optional
|
||||
Number of iterations to make. If omitted (default), make as many
|
||||
as required to meet tolerances.
|
||||
disp : bool, optional
|
||||
Print status to stdout on every iteration.
|
||||
maxiter : int, optional
|
||||
Maximum number of iterations to make. If more are needed to
|
||||
meet convergence, ``NoConvergence`` is raised.
|
||||
ftol : float, optional
|
||||
Relative tolerance for the residual. If omitted, not used.
|
||||
fatol : float, optional
|
||||
Absolute tolerance (in max-norm) for the residual.
|
||||
If omitted, default is 6e-6.
|
||||
xtol : float, optional
|
||||
Relative minimum step size. If omitted, not used.
|
||||
xatol : float, optional
|
||||
Absolute minimum step size, as determined from the Jacobian
|
||||
approximation. If the step size is smaller than this, optimization
|
||||
is terminated as successful. If omitted, not used.
|
||||
tol_norm : function(vector) -> scalar, optional
|
||||
Norm to use in convergence check. Default is the maximum norm.
|
||||
line_search : {None, 'armijo' (default), 'wolfe'}, optional
|
||||
Which type of a line search to use to determine the step size in
|
||||
the direction given by the Jacobian approximation. Defaults to
|
||||
'armijo'.
|
||||
jac_options : dict, optional
|
||||
Options for the respective Jacobian approximation.
|
||||
|
||||
alpha : float, optional
|
||||
initial guess for the jacobian is (-1/alpha).
|
||||
"""
|
||||
pass
|
||||
|
||||
def _root_diagbroyden_doc():
|
||||
"""
|
||||
Options
|
||||
-------
|
||||
nit : int, optional
|
||||
Number of iterations to make. If omitted (default), make as many
|
||||
as required to meet tolerances.
|
||||
disp : bool, optional
|
||||
Print status to stdout on every iteration.
|
||||
maxiter : int, optional
|
||||
Maximum number of iterations to make. If more are needed to
|
||||
meet convergence, `NoConvergence` is raised.
|
||||
ftol : float, optional
|
||||
Relative tolerance for the residual. If omitted, not used.
|
||||
fatol : float, optional
|
||||
Absolute tolerance (in max-norm) for the residual.
|
||||
If omitted, default is 6e-6.
|
||||
xtol : float, optional
|
||||
Relative minimum step size. If omitted, not used.
|
||||
xatol : float, optional
|
||||
Absolute minimum step size, as determined from the Jacobian
|
||||
approximation. If the step size is smaller than this, optimization
|
||||
is terminated as successful. If omitted, not used.
|
||||
tol_norm : function(vector) -> scalar, optional
|
||||
Norm to use in convergence check. Default is the maximum norm.
|
||||
line_search : {None, 'armijo' (default), 'wolfe'}, optional
|
||||
Which type of a line search to use to determine the step size in
|
||||
the direction given by the Jacobian approximation. Defaults to
|
||||
'armijo'.
|
||||
jac_options : dict, optional
|
||||
Options for the respective Jacobian approximation.
|
||||
|
||||
alpha : float, optional
|
||||
initial guess for the jacobian is (-1/alpha).
|
||||
"""
|
||||
pass
|
||||
|
||||
def _root_excitingmixing_doc():
|
||||
"""
|
||||
Options
|
||||
-------
|
||||
nit : int, optional
|
||||
Number of iterations to make. If omitted (default), make as many
|
||||
as required to meet tolerances.
|
||||
disp : bool, optional
|
||||
Print status to stdout on every iteration.
|
||||
maxiter : int, optional
|
||||
Maximum number of iterations to make. If more are needed to
|
||||
meet convergence, `NoConvergence` is raised.
|
||||
ftol : float, optional
|
||||
Relative tolerance for the residual. If omitted, not used.
|
||||
fatol : float, optional
|
||||
Absolute tolerance (in max-norm) for the residual.
|
||||
If omitted, default is 6e-6.
|
||||
xtol : float, optional
|
||||
Relative minimum step size. If omitted, not used.
|
||||
xatol : float, optional
|
||||
Absolute minimum step size, as determined from the Jacobian
|
||||
approximation. If the step size is smaller than this, optimization
|
||||
is terminated as successful. If omitted, not used.
|
||||
tol_norm : function(vector) -> scalar, optional
|
||||
Norm to use in convergence check. Default is the maximum norm.
|
||||
line_search : {None, 'armijo' (default), 'wolfe'}, optional
|
||||
Which type of a line search to use to determine the step size in
|
||||
the direction given by the Jacobian approximation. Defaults to
|
||||
'armijo'.
|
||||
jac_options : dict, optional
|
||||
Options for the respective Jacobian approximation.
|
||||
|
||||
alpha : float, optional
|
||||
Initial Jacobian approximation is (-1/alpha).
|
||||
alphamax : float, optional
|
||||
The entries of the diagonal Jacobian are kept in the range
|
||||
``[alpha, alphamax]``.
|
||||
"""
|
||||
pass
|
||||
|
||||
def _root_krylov_doc():
|
||||
"""
|
||||
Options
|
||||
-------
|
||||
nit : int, optional
|
||||
Number of iterations to make. If omitted (default), make as many
|
||||
as required to meet tolerances.
|
||||
disp : bool, optional
|
||||
Print status to stdout on every iteration.
|
||||
maxiter : int, optional
|
||||
Maximum number of iterations to make. If more are needed to
|
||||
meet convergence, `NoConvergence` is raised.
|
||||
ftol : float, optional
|
||||
Relative tolerance for the residual. If omitted, not used.
|
||||
fatol : float, optional
|
||||
Absolute tolerance (in max-norm) for the residual.
|
||||
If omitted, default is 6e-6.
|
||||
xtol : float, optional
|
||||
Relative minimum step size. If omitted, not used.
|
||||
xatol : float, optional
|
||||
Absolute minimum step size, as determined from the Jacobian
|
||||
approximation. If the step size is smaller than this, optimization
|
||||
is terminated as successful. If omitted, not used.
|
||||
tol_norm : function(vector) -> scalar, optional
|
||||
Norm to use in convergence check. Default is the maximum norm.
|
||||
line_search : {None, 'armijo' (default), 'wolfe'}, optional
|
||||
Which type of a line search to use to determine the step size in
|
||||
the direction given by the Jacobian approximation. Defaults to
|
||||
'armijo'.
|
||||
jac_options : dict, optional
|
||||
Options for the respective Jacobian approximation.
|
||||
|
||||
rdiff : float, optional
|
||||
Relative step size to use in numerical differentiation.
|
||||
method : {'lgmres', 'gmres', 'bicgstab', 'cgs', 'minres'} or function
|
||||
Krylov method to use to approximate the Jacobian.
|
||||
Can be a string, or a function implementing the same
|
||||
interface as the iterative solvers in
|
||||
`scipy.sparse.linalg`.
|
||||
|
||||
The default is `scipy.sparse.linalg.lgmres`.
|
||||
inner_M : LinearOperator or InverseJacobian
|
||||
Preconditioner for the inner Krylov iteration.
|
||||
Note that you can use also inverse Jacobians as (adaptive)
|
||||
preconditioners. For example,
|
||||
|
||||
>>> jac = BroydenFirst()
|
||||
>>> kjac = KrylovJacobian(inner_M=jac.inverse).
|
||||
|
||||
If the preconditioner has a method named 'update', it will
|
||||
be called as ``update(x, f)`` after each nonlinear step,
|
||||
with ``x`` giving the current point, and ``f`` the current
|
||||
function value.
|
||||
inner_tol, inner_maxiter, ...
|
||||
Parameters to pass on to the "inner" Krylov solver.
|
||||
See `scipy.sparse.linalg.gmres` for details.
|
||||
outer_k : int, optional
|
||||
Size of the subspace kept across LGMRES nonlinear
|
||||
iterations.
|
||||
|
||||
See `scipy.sparse.linalg.lgmres` for details.
|
||||
"""
|
||||
pass
|
458
venv/Lib/site-packages/scipy/optimize/_root_scalar.py
Normal file
458
venv/Lib/site-packages/scipy/optimize/_root_scalar.py
Normal file
|
@ -0,0 +1,458 @@
|
|||
"""
|
||||
Unified interfaces to root finding algorithms for real or complex
|
||||
scalar functions.
|
||||
|
||||
Functions
|
||||
---------
|
||||
- root : find a root of a scalar function.
|
||||
"""
|
||||
import numpy as np
|
||||
|
||||
from . import zeros as optzeros
|
||||
|
||||
__all__ = ['root_scalar']
|
||||
|
||||
|
||||
class MemoizeDer(object):
|
||||
"""Decorator that caches the value and derivative(s) of function each
|
||||
time it is called.
|
||||
|
||||
This is a simplistic memoizer that calls and caches a single value
|
||||
of `f(x, *args)`.
|
||||
It assumes that `args` does not change between invocations.
|
||||
It supports the use case of a root-finder where `args` is fixed,
|
||||
`x` changes, and only rarely, if at all, does x assume the same value
|
||||
more than once."""
|
||||
def __init__(self, fun):
|
||||
self.fun = fun
|
||||
self.vals = None
|
||||
self.x = None
|
||||
self.n_calls = 0
|
||||
|
||||
def __call__(self, x, *args):
|
||||
r"""Calculate f or use cached value if available"""
|
||||
# Derivative may be requested before the function itself, always check
|
||||
if self.vals is None or x != self.x:
|
||||
fg = self.fun(x, *args)
|
||||
self.x = x
|
||||
self.n_calls += 1
|
||||
self.vals = fg[:]
|
||||
return self.vals[0]
|
||||
|
||||
def fprime(self, x, *args):
|
||||
r"""Calculate f' or use a cached value if available"""
|
||||
if self.vals is None or x != self.x:
|
||||
self(x, *args)
|
||||
return self.vals[1]
|
||||
|
||||
def fprime2(self, x, *args):
|
||||
r"""Calculate f'' or use a cached value if available"""
|
||||
if self.vals is None or x != self.x:
|
||||
self(x, *args)
|
||||
return self.vals[2]
|
||||
|
||||
def ncalls(self):
|
||||
return self.n_calls
|
||||
|
||||
|
||||
def root_scalar(f, args=(), method=None, bracket=None,
|
||||
fprime=None, fprime2=None,
|
||||
x0=None, x1=None,
|
||||
xtol=None, rtol=None, maxiter=None,
|
||||
options=None):
|
||||
"""
|
||||
Find a root of a scalar function.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
f : callable
|
||||
A function to find a root of.
|
||||
args : tuple, optional
|
||||
Extra arguments passed to the objective function and its derivative(s).
|
||||
method : str, optional
|
||||
Type of solver. Should be one of
|
||||
|
||||
- 'bisect' :ref:`(see here) <optimize.root_scalar-bisect>`
|
||||
- 'brentq' :ref:`(see here) <optimize.root_scalar-brentq>`
|
||||
- 'brenth' :ref:`(see here) <optimize.root_scalar-brenth>`
|
||||
- 'ridder' :ref:`(see here) <optimize.root_scalar-ridder>`
|
||||
- 'toms748' :ref:`(see here) <optimize.root_scalar-toms748>`
|
||||
- 'newton' :ref:`(see here) <optimize.root_scalar-newton>`
|
||||
- 'secant' :ref:`(see here) <optimize.root_scalar-secant>`
|
||||
- 'halley' :ref:`(see here) <optimize.root_scalar-halley>`
|
||||
|
||||
bracket: A sequence of 2 floats, optional
|
||||
An interval bracketing a root. `f(x, *args)` must have different
|
||||
signs at the two endpoints.
|
||||
x0 : float, optional
|
||||
Initial guess.
|
||||
x1 : float, optional
|
||||
A second guess.
|
||||
fprime : bool or callable, optional
|
||||
If `fprime` is a boolean and is True, `f` is assumed to return the
|
||||
value of the objective function and of the derivative.
|
||||
`fprime` can also be a callable returning the derivative of `f`. In
|
||||
this case, it must accept the same arguments as `f`.
|
||||
fprime2 : bool or callable, optional
|
||||
If `fprime2` is a boolean and is True, `f` is assumed to return the
|
||||
value of the objective function and of the
|
||||
first and second derivatives.
|
||||
`fprime2` can also be a callable returning the second derivative of `f`.
|
||||
In this case, it must accept the same arguments as `f`.
|
||||
xtol : float, optional
|
||||
Tolerance (absolute) for termination.
|
||||
rtol : float, optional
|
||||
Tolerance (relative) for termination.
|
||||
maxiter : int, optional
|
||||
Maximum number of iterations.
|
||||
options : dict, optional
|
||||
A dictionary of solver options. E.g., ``k``, see
|
||||
:obj:`show_options()` for details.
|
||||
|
||||
Returns
|
||||
-------
|
||||
sol : RootResults
|
||||
The solution represented as a ``RootResults`` object.
|
||||
Important attributes are: ``root`` the solution , ``converged`` a
|
||||
boolean flag indicating if the algorithm exited successfully and
|
||||
``flag`` which describes the cause of the termination. See
|
||||
`RootResults` for a description of other attributes.
|
||||
|
||||
See also
|
||||
--------
|
||||
show_options : Additional options accepted by the solvers
|
||||
root : Find a root of a vector function.
|
||||
|
||||
Notes
|
||||
-----
|
||||
This section describes the available solvers that can be selected by the
|
||||
'method' parameter.
|
||||
|
||||
The default is to use the best method available for the situation
|
||||
presented.
|
||||
If a bracket is provided, it may use one of the bracketing methods.
|
||||
If a derivative and an initial value are specified, it may
|
||||
select one of the derivative-based methods.
|
||||
If no method is judged applicable, it will raise an Exception.
|
||||
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
Find the root of a simple cubic
|
||||
|
||||
>>> from scipy import optimize
|
||||
>>> def f(x):
|
||||
... return (x**3 - 1) # only one real root at x = 1
|
||||
|
||||
>>> def fprime(x):
|
||||
... return 3*x**2
|
||||
|
||||
The `brentq` method takes as input a bracket
|
||||
|
||||
>>> sol = optimize.root_scalar(f, bracket=[0, 3], method='brentq')
|
||||
>>> sol.root, sol.iterations, sol.function_calls
|
||||
(1.0, 10, 11)
|
||||
|
||||
The `newton` method takes as input a single point and uses the derivative(s)
|
||||
|
||||
>>> sol = optimize.root_scalar(f, x0=0.2, fprime=fprime, method='newton')
|
||||
>>> sol.root, sol.iterations, sol.function_calls
|
||||
(1.0, 11, 22)
|
||||
|
||||
The function can provide the value and derivative(s) in a single call.
|
||||
|
||||
>>> def f_p_pp(x):
|
||||
... return (x**3 - 1), 3*x**2, 6*x
|
||||
|
||||
>>> sol = optimize.root_scalar(f_p_pp, x0=0.2, fprime=True, method='newton')
|
||||
>>> sol.root, sol.iterations, sol.function_calls
|
||||
(1.0, 11, 11)
|
||||
|
||||
>>> sol = optimize.root_scalar(f_p_pp, x0=0.2, fprime=True, fprime2=True, method='halley')
|
||||
>>> sol.root, sol.iterations, sol.function_calls
|
||||
(1.0, 7, 8)
|
||||
|
||||
|
||||
"""
|
||||
if not isinstance(args, tuple):
|
||||
args = (args,)
|
||||
|
||||
if options is None:
|
||||
options = {}
|
||||
|
||||
# fun also returns the derivative(s)
|
||||
is_memoized = False
|
||||
if fprime2 is not None and not callable(fprime2):
|
||||
if bool(fprime2):
|
||||
f = MemoizeDer(f)
|
||||
is_memoized = True
|
||||
fprime2 = f.fprime2
|
||||
fprime = f.fprime
|
||||
else:
|
||||
fprime2 = None
|
||||
if fprime is not None and not callable(fprime):
|
||||
if bool(fprime):
|
||||
f = MemoizeDer(f)
|
||||
is_memoized = True
|
||||
fprime = f.fprime
|
||||
else:
|
||||
fprime = None
|
||||
|
||||
# respect solver-specific default tolerances - only pass in if actually set
|
||||
kwargs = {}
|
||||
for k in ['xtol', 'rtol', 'maxiter']:
|
||||
v = locals().get(k)
|
||||
if v is not None:
|
||||
kwargs[k] = v
|
||||
|
||||
# Set any solver-specific options
|
||||
if options:
|
||||
kwargs.update(options)
|
||||
# Always request full_output from the underlying method as _root_scalar
|
||||
# always returns a RootResults object
|
||||
kwargs.update(full_output=True, disp=False)
|
||||
|
||||
# Pick a method if not specified.
|
||||
# Use the "best" method available for the situation.
|
||||
if not method:
|
||||
if bracket:
|
||||
method = 'brentq'
|
||||
elif x0 is not None:
|
||||
if fprime:
|
||||
if fprime2:
|
||||
method = 'halley'
|
||||
else:
|
||||
method = 'newton'
|
||||
else:
|
||||
method = 'secant'
|
||||
if not method:
|
||||
raise ValueError('Unable to select a solver as neither bracket '
|
||||
'nor starting point provided.')
|
||||
|
||||
meth = method.lower()
|
||||
map2underlying = {'halley': 'newton', 'secant': 'newton'}
|
||||
|
||||
try:
|
||||
methodc = getattr(optzeros, map2underlying.get(meth, meth))
|
||||
except AttributeError:
|
||||
raise ValueError('Unknown solver %s' % meth)
|
||||
|
||||
if meth in ['bisect', 'ridder', 'brentq', 'brenth', 'toms748']:
|
||||
if not isinstance(bracket, (list, tuple, np.ndarray)):
|
||||
raise ValueError('Bracket needed for %s' % method)
|
||||
|
||||
a, b = bracket[:2]
|
||||
r, sol = methodc(f, a, b, args=args, **kwargs)
|
||||
elif meth in ['secant']:
|
||||
if x0 is None:
|
||||
raise ValueError('x0 must not be None for %s' % method)
|
||||
if x1 is None:
|
||||
raise ValueError('x1 must not be None for %s' % method)
|
||||
if 'xtol' in kwargs:
|
||||
kwargs['tol'] = kwargs.pop('xtol')
|
||||
r, sol = methodc(f, x0, args=args, fprime=None, fprime2=None,
|
||||
x1=x1, **kwargs)
|
||||
elif meth in ['newton']:
|
||||
if x0 is None:
|
||||
raise ValueError('x0 must not be None for %s' % method)
|
||||
if not fprime:
|
||||
raise ValueError('fprime must be specified for %s' % method)
|
||||
if 'xtol' in kwargs:
|
||||
kwargs['tol'] = kwargs.pop('xtol')
|
||||
r, sol = methodc(f, x0, args=args, fprime=fprime, fprime2=None,
|
||||
**kwargs)
|
||||
elif meth in ['halley']:
|
||||
if x0 is None:
|
||||
raise ValueError('x0 must not be None for %s' % method)
|
||||
if not fprime:
|
||||
raise ValueError('fprime must be specified for %s' % method)
|
||||
if not fprime2:
|
||||
raise ValueError('fprime2 must be specified for %s' % method)
|
||||
if 'xtol' in kwargs:
|
||||
kwargs['tol'] = kwargs.pop('xtol')
|
||||
r, sol = methodc(f, x0, args=args, fprime=fprime, fprime2=fprime2, **kwargs)
|
||||
else:
|
||||
raise ValueError('Unknown solver %s' % method)
|
||||
|
||||
if is_memoized:
|
||||
# Replace the function_calls count with the memoized count.
|
||||
# Avoids double and triple-counting.
|
||||
n_calls = f.n_calls
|
||||
sol.function_calls = n_calls
|
||||
|
||||
return sol
|
||||
|
||||
|
||||
def _root_scalar_brentq_doc():
|
||||
r"""
|
||||
Options
|
||||
-------
|
||||
args : tuple, optional
|
||||
Extra arguments passed to the objective function.
|
||||
xtol : float, optional
|
||||
Tolerance (absolute) for termination.
|
||||
rtol : float, optional
|
||||
Tolerance (relative) for termination.
|
||||
maxiter : int, optional
|
||||
Maximum number of iterations.
|
||||
options: dict, optional
|
||||
Specifies any method-specific options not covered above
|
||||
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
def _root_scalar_brenth_doc():
|
||||
r"""
|
||||
Options
|
||||
-------
|
||||
args : tuple, optional
|
||||
Extra arguments passed to the objective function.
|
||||
xtol : float, optional
|
||||
Tolerance (absolute) for termination.
|
||||
rtol : float, optional
|
||||
Tolerance (relative) for termination.
|
||||
maxiter : int, optional
|
||||
Maximum number of iterations.
|
||||
options: dict, optional
|
||||
Specifies any method-specific options not covered above.
|
||||
|
||||
"""
|
||||
pass
|
||||
|
||||
def _root_scalar_toms748_doc():
|
||||
r"""
|
||||
Options
|
||||
-------
|
||||
args : tuple, optional
|
||||
Extra arguments passed to the objective function.
|
||||
xtol : float, optional
|
||||
Tolerance (absolute) for termination.
|
||||
rtol : float, optional
|
||||
Tolerance (relative) for termination.
|
||||
maxiter : int, optional
|
||||
Maximum number of iterations.
|
||||
options: dict, optional
|
||||
Specifies any method-specific options not covered above.
|
||||
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
def _root_scalar_secant_doc():
|
||||
r"""
|
||||
Options
|
||||
-------
|
||||
args : tuple, optional
|
||||
Extra arguments passed to the objective function.
|
||||
xtol : float, optional
|
||||
Tolerance (absolute) for termination.
|
||||
rtol : float, optional
|
||||
Tolerance (relative) for termination.
|
||||
maxiter : int, optional
|
||||
Maximum number of iterations.
|
||||
x0 : float, required
|
||||
Initial guess.
|
||||
x1 : float, required
|
||||
A second guess.
|
||||
options: dict, optional
|
||||
Specifies any method-specific options not covered above.
|
||||
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
def _root_scalar_newton_doc():
|
||||
r"""
|
||||
Options
|
||||
-------
|
||||
args : tuple, optional
|
||||
Extra arguments passed to the objective function and its derivative.
|
||||
xtol : float, optional
|
||||
Tolerance (absolute) for termination.
|
||||
rtol : float, optional
|
||||
Tolerance (relative) for termination.
|
||||
maxiter : int, optional
|
||||
Maximum number of iterations.
|
||||
x0 : float, required
|
||||
Initial guess.
|
||||
fprime : bool or callable, optional
|
||||
If `fprime` is a boolean and is True, `f` is assumed to return the
|
||||
value of derivative along with the objective function.
|
||||
`fprime` can also be a callable returning the derivative of `f`. In
|
||||
this case, it must accept the same arguments as `f`.
|
||||
options: dict, optional
|
||||
Specifies any method-specific options not covered above.
|
||||
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
def _root_scalar_halley_doc():
|
||||
r"""
|
||||
Options
|
||||
-------
|
||||
args : tuple, optional
|
||||
Extra arguments passed to the objective function and its derivatives.
|
||||
xtol : float, optional
|
||||
Tolerance (absolute) for termination.
|
||||
rtol : float, optional
|
||||
Tolerance (relative) for termination.
|
||||
maxiter : int, optional
|
||||
Maximum number of iterations.
|
||||
x0 : float, required
|
||||
Initial guess.
|
||||
fprime : bool or callable, required
|
||||
If `fprime` is a boolean and is True, `f` is assumed to return the
|
||||
value of derivative along with the objective function.
|
||||
`fprime` can also be a callable returning the derivative of `f`. In
|
||||
this case, it must accept the same arguments as `f`.
|
||||
fprime2 : bool or callable, required
|
||||
If `fprime2` is a boolean and is True, `f` is assumed to return the
|
||||
value of 1st and 2nd derivatives along with the objective function.
|
||||
`fprime2` can also be a callable returning the 2nd derivative of `f`.
|
||||
In this case, it must accept the same arguments as `f`.
|
||||
options: dict, optional
|
||||
Specifies any method-specific options not covered above.
|
||||
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
def _root_scalar_ridder_doc():
|
||||
r"""
|
||||
Options
|
||||
-------
|
||||
args : tuple, optional
|
||||
Extra arguments passed to the objective function.
|
||||
xtol : float, optional
|
||||
Tolerance (absolute) for termination.
|
||||
rtol : float, optional
|
||||
Tolerance (relative) for termination.
|
||||
maxiter : int, optional
|
||||
Maximum number of iterations.
|
||||
options: dict, optional
|
||||
Specifies any method-specific options not covered above.
|
||||
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
def _root_scalar_bisect_doc():
|
||||
r"""
|
||||
Options
|
||||
-------
|
||||
args : tuple, optional
|
||||
Extra arguments passed to the objective function.
|
||||
xtol : float, optional
|
||||
Tolerance (absolute) for termination.
|
||||
rtol : float, optional
|
||||
Tolerance (relative) for termination.
|
||||
maxiter : int, optional
|
||||
Maximum number of iterations.
|
||||
options: dict, optional
|
||||
Specifies any method-specific options not covered above.
|
||||
|
||||
"""
|
||||
pass
|
1669
venv/Lib/site-packages/scipy/optimize/_shgo.py
Normal file
1669
venv/Lib/site-packages/scipy/optimize/_shgo.py
Normal file
File diff suppressed because it is too large
Load diff
Binary file not shown.
Binary file not shown.
Binary file not shown.
372
venv/Lib/site-packages/scipy/optimize/_shgo_lib/sobol_seq.py
Normal file
372
venv/Lib/site-packages/scipy/optimize/_shgo_lib/sobol_seq.py
Normal file
|
@ -0,0 +1,372 @@
|
|||
"""
|
||||
Licensing:
|
||||
This code is distributed under the MIT license.
|
||||
|
||||
|
||||
Authors:
|
||||
Original FORTRAN77 version of i4_sobol by Bennett Fox.
|
||||
MATLAB version by John Burkardt.
|
||||
PYTHON version by Corrado Chisari
|
||||
|
||||
Original Python version of is_prime by Corrado Chisari
|
||||
|
||||
Original MATLAB versions of other functions by John Burkardt.
|
||||
PYTHON versions by Corrado Chisari
|
||||
|
||||
Original code is available from
|
||||
http://people.sc.fsu.edu/~jburkardt/py_src/sobol/sobol.html
|
||||
|
||||
Modifications:
|
||||
Wrapped into Python class [30.10.2017]
|
||||
"""
|
||||
import numpy as np
|
||||
|
||||
__all__ = ['Sobol']
|
||||
|
||||
|
||||
class Sobol:
|
||||
def __init__(self):
|
||||
# Init class variables
|
||||
self.atmost = None
|
||||
self.dim_max = None
|
||||
self.dim_num_save = None
|
||||
self.initialized = None
|
||||
self.lastq = None
|
||||
self.log_max = None
|
||||
self.maxcol = None
|
||||
self.poly = None
|
||||
self.recipd = None
|
||||
self.seed_save = None
|
||||
self.v = None
|
||||
|
||||
def i4_sobol_generate(self, dim_num, n, skip=1):
|
||||
"""
|
||||
i4_sobol_generate generates a Sobol dataset.
|
||||
|
||||
Parameters:
|
||||
Input, integer dim_num, the spatial dimension.
|
||||
Input, integer N, the number of points to generate.
|
||||
Input, integer SKIP, the number of initial points to skip.
|
||||
|
||||
Output, real R(M,N), the points.
|
||||
"""
|
||||
r = np.full((n, dim_num), np.nan)
|
||||
for j in range(n):
|
||||
seed = j + skip
|
||||
r[j, 0:dim_num], next_seed = self.i4_sobol(dim_num, seed)
|
||||
|
||||
return r
|
||||
|
||||
def i4_bit_hi1(self, n):
|
||||
"""
|
||||
i4_bit_hi1 returns the position of the high 1 bit base 2 in an integer.
|
||||
|
||||
Example:
|
||||
+------+-------------+-----
|
||||
| N | Binary | BIT
|
||||
+------|-------------+-----
|
||||
| 0 | 0 | 0
|
||||
| 1 | 1 | 1
|
||||
| 2 | 10 | 2
|
||||
| 3 | 11 | 2
|
||||
| 4 | 100 | 3
|
||||
| 5 | 101 | 3
|
||||
| 6 | 110 | 3
|
||||
| 7 | 111 | 3
|
||||
| 8 | 1000 | 4
|
||||
| 9 | 1001 | 4
|
||||
| 10 | 1010 | 4
|
||||
| 11 | 1011 | 4
|
||||
| 12 | 1100 | 4
|
||||
| 13 | 1101 | 4
|
||||
| 14 | 1110 | 4
|
||||
| 15 | 1111 | 4
|
||||
| 16 | 10000 | 5
|
||||
| 17 | 10001 | 5
|
||||
| 1023 | 1111111111 | 10
|
||||
| 1024 | 10000000000 | 11
|
||||
| 1025 | 10000000001 | 11
|
||||
|
||||
Parameters:
|
||||
Input, integer N, the integer to be measured.
|
||||
N should be nonnegative. If N is nonpositive,
|
||||
the value will always be 0.
|
||||
|
||||
Output, integer BIT, the number of bits base 2.
|
||||
"""
|
||||
i = np.floor(n)
|
||||
bit = 0
|
||||
while i > 0:
|
||||
bit += 1
|
||||
i //= 2
|
||||
return bit
|
||||
|
||||
def i4_bit_lo0(self, n):
|
||||
"""
|
||||
I4_BIT_LO0 returns the position of the low 0 bit base 2 in an integer.
|
||||
|
||||
Example:
|
||||
+------+------------+----
|
||||
| N | Binary | BIT
|
||||
+------+------------+----
|
||||
| 0 | 0 | 1
|
||||
| 1 | 1 | 2
|
||||
| 2 | 10 | 1
|
||||
| 3 | 11 | 3
|
||||
| 4 | 100 | 1
|
||||
| 5 | 101 | 2
|
||||
| 6 | 110 | 1
|
||||
| 7 | 111 | 4
|
||||
| 8 | 1000 | 1
|
||||
| 9 | 1001 | 2
|
||||
| 10 | 1010 | 1
|
||||
| 11 | 1011 | 3
|
||||
| 12 | 1100 | 1
|
||||
| 13 | 1101 | 2
|
||||
| 14 | 1110 | 1
|
||||
| 15 | 1111 | 5
|
||||
| 16 | 10000 | 1
|
||||
| 17 | 10001 | 2
|
||||
| 1023 | 1111111111 | 1
|
||||
| 1024 | 0000000000 | 1
|
||||
| 1025 | 0000000001 | 1
|
||||
|
||||
Parameters:
|
||||
Input, integer N, the integer to be measured.
|
||||
N should be nonnegative.
|
||||
|
||||
Output, integer BIT, the position of the low 1 bit.
|
||||
"""
|
||||
bit = 1
|
||||
i = np.floor(n)
|
||||
while i != 2 * (i // 2):
|
||||
bit += 1
|
||||
i //= 2
|
||||
return bit
|
||||
|
||||
def i4_sobol(self, dim_num, seed):
|
||||
"""
|
||||
i4_sobol generates a new quasirandom Sobol vector with each call.
|
||||
|
||||
Discussion:
|
||||
The routine adapts the ideas of Antonov and Saleev.
|
||||
|
||||
Reference:
|
||||
Antonov, Saleev,
|
||||
USSR Computational Mathematics and Mathematical Physics,
|
||||
Volume 19, 1980, pages 252 - 256.
|
||||
|
||||
Paul Bratley, Bennett Fox,
|
||||
Algorithm 659:
|
||||
Implementing Sobol's Quasirandom Sequence Generator,
|
||||
ACM Transactions on Mathematical Software,
|
||||
Volume 14, Number 1, pp. 88-100, 1988.
|
||||
|
||||
Bennett Fox,
|
||||
Algorithm 647:
|
||||
Implementation and Relative Efficiency of Quasirandom
|
||||
Sequence Generators,
|
||||
ACM Transactions on Mathematical Software,
|
||||
Volume 12, Number 4, pp. 362-376, 1986.
|
||||
|
||||
Ilya Sobol,
|
||||
USSR Computational Mathematics and Mathematical Physics,
|
||||
Volume 16, pp. 236-242, 1977.
|
||||
|
||||
Ilya Sobol, Levitan,
|
||||
The Production of Points Uniformly Distributed in a Multidimensional
|
||||
Cube (in Russian),
|
||||
Preprint IPM Akad. Nauk SSSR,
|
||||
Number 40, Moscow 1976.
|
||||
|
||||
Parameters:
|
||||
Input, integer DIM_NUM, the number of spatial dimensions.
|
||||
DIM_NUM must satisfy 1 <= DIM_NUM <= 40.
|
||||
|
||||
Input/output, integer SEED, the "seed" for the sequence.
|
||||
This is essentially the index in the sequence of the quasirandom
|
||||
value to be generated. On output, SEED has been set to the
|
||||
appropriate next value, usually simply SEED+1.
|
||||
If SEED is less than 0 on input, it is treated as though it were 0.
|
||||
An input value of 0 requests the first (0th) element of the sequence.
|
||||
|
||||
Output, real QUASI(DIM_NUM), the next quasirandom vector.
|
||||
"""
|
||||
|
||||
# if 'self.initialized' not in list(globals().keys()):
|
||||
if self.initialized is None:
|
||||
self.initialized = 0
|
||||
self.dim_num_save = -1
|
||||
|
||||
if not self.initialized or dim_num != self.dim_num_save:
|
||||
self.initialized = 1
|
||||
self.dim_max = 40
|
||||
self.dim_num_save = -1
|
||||
self.log_max = 30
|
||||
self.seed_save = -1
|
||||
|
||||
# Initialize (part of) V.
|
||||
self.v = np.zeros((self.dim_max, self.log_max))
|
||||
self.v[0:40, 0] = np.transpose([
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
|
||||
|
||||
self.v[2:40, 1] = np.transpose([
|
||||
1, 3, 1, 3, 1, 3, 3, 1,
|
||||
3, 1, 3, 1, 3, 1, 1, 3, 1, 3,
|
||||
1, 3, 1, 3, 3, 1, 3, 1, 3, 1,
|
||||
3, 1, 1, 3, 1, 3, 1, 3, 1, 3])
|
||||
|
||||
self.v[3:40, 2] = np.transpose([
|
||||
7, 5, 1, 3, 3, 7, 5,
|
||||
5, 7, 7, 1, 3, 3, 7, 5, 1, 1,
|
||||
5, 3, 3, 1, 7, 5, 1, 3, 3, 7,
|
||||
5, 1, 1, 5, 7, 7, 5, 1, 3, 3])
|
||||
|
||||
self.v[5:40, 3] = np.transpose([
|
||||
1, 7, 9, 13, 11,
|
||||
1, 3, 7, 9, 5, 13, 13, 11, 3, 15,
|
||||
5, 3, 15, 7, 9, 13, 9, 1, 11, 7,
|
||||
5, 15, 1, 15, 11, 5, 3, 1, 7, 9])
|
||||
|
||||
self.v[7:40, 4] = np.transpose([
|
||||
9, 3, 27,
|
||||
15, 29, 21, 23, 19, 11, 25, 7, 13, 17,
|
||||
1, 25, 29, 3, 31, 11, 5, 23, 27, 19,
|
||||
21, 5, 1, 17, 13, 7, 15, 9, 31, 9])
|
||||
|
||||
self.v[13:40, 5] = np.transpose([
|
||||
37, 33, 7, 5, 11, 39, 63,
|
||||
27, 17, 15, 23, 29, 3, 21, 13, 31, 25,
|
||||
9, 49, 33, 19, 29, 11, 19, 27, 15, 25])
|
||||
|
||||
self.v[19:40, 6] = np.transpose([
|
||||
13,
|
||||
33, 115, 41, 79, 17, 29, 119, 75, 73, 105,
|
||||
7, 59, 65, 21, 3, 113, 61, 89, 45, 107])
|
||||
|
||||
self.v[37:40, 7] = np.transpose([
|
||||
7, 23, 39])
|
||||
|
||||
# Set POLY.
|
||||
self.poly = [
|
||||
1, 3, 7, 11, 13, 19, 25, 37, 59, 47,
|
||||
61, 55, 41, 67, 97, 91, 109, 103, 115, 131,
|
||||
193, 137, 145, 143, 241, 157, 185, 167, 229, 171,
|
||||
213, 191, 253, 203, 211, 239, 247, 285, 369, 299]
|
||||
|
||||
self.atmost = 2 ** self.log_max - 1
|
||||
|
||||
# Find the number of bits in ATMOST.
|
||||
self.maxcol = self.i4_bit_hi1(self.atmost)
|
||||
|
||||
# Initialize row 1 of V.
|
||||
self.v[0, 0:self.maxcol] = 1
|
||||
|
||||
# Things to do only if the dimension changed.
|
||||
if dim_num != self.dim_num_save:
|
||||
self.dim_num_save = dim_num
|
||||
|
||||
# Initialize the remaining rows of V.
|
||||
for i in range(2, dim_num + 1):
|
||||
|
||||
# The bits of the integer POLY(I) gives the form of
|
||||
# self.polynomial I.
|
||||
# Find the degree of self.polynomial I from binary encoding.
|
||||
j = self.poly[i - 1]
|
||||
m = 0
|
||||
j //= 2
|
||||
while j > 0:
|
||||
j //= 2
|
||||
m += 1
|
||||
|
||||
# Expand this bit pattern to separate
|
||||
# components of the logical array INCLUD.
|
||||
j = self.poly[i - 1]
|
||||
includ = np.zeros(m)
|
||||
for k in range(m, 0, -1):
|
||||
j2 = j // 2
|
||||
includ[k - 1] = (j != 2 * j2)
|
||||
j = j2
|
||||
|
||||
# Calculate the remaining elements of row I as explained
|
||||
# in Bratley and Fox, section 2.
|
||||
for j in range(m + 1, self.maxcol + 1):
|
||||
newv = self.v[i - 1, j - m - 1]
|
||||
lseed = 1
|
||||
for k in range(1, m + 1):
|
||||
lseed *= 2
|
||||
if includ[k - 1]:
|
||||
newv = np.bitwise_xor(
|
||||
int(newv),
|
||||
int(lseed * self.v[i - 1, j - k - 1]))
|
||||
self.v[i - 1, j - 1] = newv
|
||||
|
||||
# Multiply columns of V by appropriate power of 2.
|
||||
lseed = 1
|
||||
for j in range(self.maxcol - 1, 0, -1):
|
||||
lseed *= 2
|
||||
self.v[0:dim_num, j - 1] = self.v[0:dim_num, j - 1] * lseed
|
||||
|
||||
# RECIPD is 1/(common denominator of the elements in V).
|
||||
self.recipd = 1.0 / (2 * lseed)
|
||||
self.lastq = np.zeros(dim_num)
|
||||
|
||||
seed = int(np.floor(seed))
|
||||
|
||||
if seed < 0:
|
||||
seed = 0
|
||||
|
||||
lseed = 1
|
||||
if seed == 0:
|
||||
self.lastq = np.zeros(dim_num)
|
||||
|
||||
elif seed == self.seed_save + 1:
|
||||
|
||||
# Find the position of the right-hand zero in SEED.
|
||||
lseed = self.i4_bit_lo0(seed)
|
||||
|
||||
elif seed <= self.seed_save:
|
||||
|
||||
self.seed_save = 0
|
||||
self.lastq = np.zeros(dim_num)
|
||||
|
||||
for seed_temp in range(int(self.seed_save), int(seed)):
|
||||
lseed = self.i4_bit_lo0(seed_temp)
|
||||
for i in range(1, dim_num + 1):
|
||||
self.lastq[i - 1] = np.bitwise_xor(
|
||||
int(self.lastq[i - 1]), int(self.v[i - 1, lseed - 1]))
|
||||
|
||||
lseed = self.i4_bit_lo0(seed)
|
||||
|
||||
elif self.seed_save + 1 < seed:
|
||||
|
||||
for seed_temp in range(int(self.seed_save + 1), int(seed)):
|
||||
lseed = self.i4_bit_lo0(seed_temp)
|
||||
for i in range(1, dim_num + 1):
|
||||
self.lastq[i - 1] = np.bitwise_xor(
|
||||
int(self.lastq[i - 1]), int(self.v[i - 1, lseed - 1]))
|
||||
|
||||
lseed = self.i4_bit_lo0(seed)
|
||||
|
||||
# Check that the user is not calling too many times!
|
||||
if self.maxcol < lseed:
|
||||
print('I4_SOBOL - Fatal error!')
|
||||
print(' Too many calls!')
|
||||
print(' MAXCOL = %d\n' % self.maxcol)
|
||||
print(' L = %d\n' % lseed)
|
||||
return
|
||||
|
||||
# Calculate the new components of QUASI.
|
||||
quasi = np.zeros(dim_num)
|
||||
for i in range(1, dim_num + 1):
|
||||
quasi[i - 1] = self.lastq[i - 1] * self.recipd
|
||||
self.lastq[i - 1] = np.bitwise_xor(
|
||||
int(self.lastq[i - 1]), int(self.v[i - 1, lseed - 1]))
|
||||
|
||||
self.seed_save = seed
|
||||
seed += 1
|
||||
|
||||
return [quasi, seed]
|
BIN
venv/Lib/site-packages/scipy/optimize/_shgo_lib/sobol_vec.gz
Normal file
BIN
venv/Lib/site-packages/scipy/optimize/_shgo_lib/sobol_vec.gz
Normal file
Binary file not shown.
661
venv/Lib/site-packages/scipy/optimize/_shgo_lib/triangulation.py
Normal file
661
venv/Lib/site-packages/scipy/optimize/_shgo_lib/triangulation.py
Normal file
|
@ -0,0 +1,661 @@
|
|||
import numpy as np
|
||||
import copy
|
||||
|
||||
|
||||
class Complex:
|
||||
def __init__(self, dim, func, func_args=(), symmetry=False, bounds=None,
|
||||
g_cons=None, g_args=()):
|
||||
self.dim = dim
|
||||
self.bounds = bounds
|
||||
self.symmetry = symmetry # TODO: Define the functions to be used
|
||||
# here in init to avoid if checks
|
||||
self.gen = 0
|
||||
self.perm_cycle = 0
|
||||
|
||||
# Every cell is stored in a list of its generation,
|
||||
# e.g., the initial cell is stored in self.H[0]
|
||||
# 1st get new cells are stored in self.H[1] etc.
|
||||
# When a cell is subgenerated it is removed from this list
|
||||
|
||||
self.H = [] # Storage structure of cells
|
||||
# Cache of all vertices
|
||||
self.V = VertexCache(func, func_args, bounds, g_cons, g_args)
|
||||
|
||||
# Generate n-cube here:
|
||||
self.n_cube(dim, symmetry=symmetry)
|
||||
|
||||
# TODO: Assign functions to a the complex instead
|
||||
if symmetry:
|
||||
self.generation_cycle = 1
|
||||
# self.centroid = self.C0()[-1].x
|
||||
# self.C0.centroid = self.centroid
|
||||
else:
|
||||
self.add_centroid()
|
||||
|
||||
self.H.append([])
|
||||
self.H[0].append(self.C0)
|
||||
self.hgr = self.C0.homology_group_rank()
|
||||
self.hgrd = 0 # Complex group rank differential
|
||||
# self.hgr = self.C0.hg_n
|
||||
|
||||
# Build initial graph
|
||||
self.graph_map()
|
||||
|
||||
self.performance = []
|
||||
self.performance.append(0)
|
||||
self.performance.append(0)
|
||||
|
||||
def __call__(self):
|
||||
return self.H
|
||||
|
||||
def n_cube(self, dim, symmetry=False, printout=False):
|
||||
"""
|
||||
Generate the simplicial triangulation of the N-D hypercube
|
||||
containing 2**n vertices
|
||||
"""
|
||||
origin = list(np.zeros(dim, dtype=int))
|
||||
self.origin = origin
|
||||
supremum = list(np.ones(dim, dtype=int))
|
||||
self.supremum = supremum
|
||||
|
||||
# tuple versions for indexing
|
||||
origintuple = tuple(origin)
|
||||
supremumtuple = tuple(supremum)
|
||||
|
||||
x_parents = [origintuple]
|
||||
|
||||
if symmetry:
|
||||
self.C0 = Simplex(0, 0, 0, self.dim) # Initial cell object
|
||||
self.C0.add_vertex(self.V[origintuple])
|
||||
|
||||
i_s = 0
|
||||
self.perm_symmetry(i_s, x_parents, origin)
|
||||
self.C0.add_vertex(self.V[supremumtuple])
|
||||
else:
|
||||
self.C0 = Cell(0, 0, origin, supremum) # Initial cell object
|
||||
self.C0.add_vertex(self.V[origintuple])
|
||||
self.C0.add_vertex(self.V[supremumtuple])
|
||||
|
||||
i_parents = []
|
||||
self.perm(i_parents, x_parents, origin)
|
||||
|
||||
if printout:
|
||||
print("Initial hyper cube:")
|
||||
for v in self.C0():
|
||||
v.print_out()
|
||||
|
||||
def perm(self, i_parents, x_parents, xi):
|
||||
# TODO: Cut out of for if outside linear constraint cutting planes
|
||||
xi_t = tuple(xi)
|
||||
|
||||
# Construct required iterator
|
||||
iter_range = [x for x in range(self.dim) if x not in i_parents]
|
||||
|
||||
for i in iter_range:
|
||||
i2_parents = copy.copy(i_parents)
|
||||
i2_parents.append(i)
|
||||
xi2 = copy.copy(xi)
|
||||
xi2[i] = 1
|
||||
# Make new vertex list a hashable tuple
|
||||
xi2_t = tuple(xi2)
|
||||
# Append to cell
|
||||
self.C0.add_vertex(self.V[xi2_t])
|
||||
# Connect neighbors and vice versa
|
||||
# Parent point
|
||||
self.V[xi2_t].connect(self.V[xi_t])
|
||||
|
||||
# Connect all family of simplices in parent containers
|
||||
for x_ip in x_parents:
|
||||
self.V[xi2_t].connect(self.V[x_ip])
|
||||
|
||||
x_parents2 = copy.copy(x_parents)
|
||||
x_parents2.append(xi_t)
|
||||
|
||||
# Permutate
|
||||
self.perm(i2_parents, x_parents2, xi2)
|
||||
|
||||
def perm_symmetry(self, i_s, x_parents, xi):
|
||||
# TODO: Cut out of for if outside linear constraint cutting planes
|
||||
xi_t = tuple(xi)
|
||||
xi2 = copy.copy(xi)
|
||||
xi2[i_s] = 1
|
||||
# Make new vertex list a hashable tuple
|
||||
xi2_t = tuple(xi2)
|
||||
# Append to cell
|
||||
self.C0.add_vertex(self.V[xi2_t])
|
||||
# Connect neighbors and vice versa
|
||||
# Parent point
|
||||
self.V[xi2_t].connect(self.V[xi_t])
|
||||
|
||||
# Connect all family of simplices in parent containers
|
||||
for x_ip in x_parents:
|
||||
self.V[xi2_t].connect(self.V[x_ip])
|
||||
|
||||
x_parents2 = copy.copy(x_parents)
|
||||
x_parents2.append(xi_t)
|
||||
|
||||
i_s += 1
|
||||
if i_s == self.dim:
|
||||
return
|
||||
# Permutate
|
||||
self.perm_symmetry(i_s, x_parents2, xi2)
|
||||
|
||||
def add_centroid(self):
|
||||
"""Split the central edge between the origin and supremum of
|
||||
a cell and add the new vertex to the complex"""
|
||||
self.centroid = list(
|
||||
(np.array(self.origin) + np.array(self.supremum)) / 2.0)
|
||||
self.C0.add_vertex(self.V[tuple(self.centroid)])
|
||||
self.C0.centroid = self.centroid
|
||||
|
||||
# Disconnect origin and supremum
|
||||
self.V[tuple(self.origin)].disconnect(self.V[tuple(self.supremum)])
|
||||
|
||||
# Connect centroid to all other vertices
|
||||
for v in self.C0():
|
||||
self.V[tuple(self.centroid)].connect(self.V[tuple(v.x)])
|
||||
|
||||
self.centroid_added = True
|
||||
return
|
||||
|
||||
# Construct incidence array:
|
||||
def incidence(self):
|
||||
if self.centroid_added:
|
||||
self.structure = np.zeros([2 ** self.dim + 1, 2 ** self.dim + 1],
|
||||
dtype=int)
|
||||
else:
|
||||
self.structure = np.zeros([2 ** self.dim, 2 ** self.dim],
|
||||
dtype=int)
|
||||
|
||||
for v in self.HC.C0():
|
||||
for v2 in v.nn:
|
||||
self.structure[v.index, v2.index] = 1
|
||||
|
||||
return
|
||||
|
||||
# A more sparse incidence generator:
|
||||
def graph_map(self):
|
||||
""" Make a list of size 2**n + 1 where an entry is a vertex
|
||||
incidence, each list element contains a list of indexes
|
||||
corresponding to that entries neighbors"""
|
||||
|
||||
self.graph = [[v2.index for v2 in v.nn] for v in self.C0()]
|
||||
|
||||
# Graph structure method:
|
||||
# 0. Capture the indices of the initial cell.
|
||||
# 1. Generate new origin and supremum scalars based on current generation
|
||||
# 2. Generate a new set of vertices corresponding to a new
|
||||
# "origin" and "supremum"
|
||||
# 3. Connected based on the indices of the previous graph structure
|
||||
# 4. Disconnect the edges in the original cell
|
||||
|
||||
def sub_generate_cell(self, C_i, gen):
|
||||
"""Subgenerate a cell `C_i` of generation `gen` and
|
||||
homology group rank `hgr`."""
|
||||
origin_new = tuple(C_i.centroid)
|
||||
centroid_index = len(C_i()) - 1
|
||||
|
||||
# If not gen append
|
||||
try:
|
||||
self.H[gen]
|
||||
except IndexError:
|
||||
self.H.append([])
|
||||
|
||||
# Generate subcubes using every extreme vertex in C_i as a supremum
|
||||
# and the centroid of C_i as the origin
|
||||
H_new = [] # list storing all the new cubes split from C_i
|
||||
for i, v in enumerate(C_i()[:-1]):
|
||||
supremum = tuple(v.x)
|
||||
H_new.append(
|
||||
self.construct_hypercube(origin_new, supremum, gen, C_i.hg_n))
|
||||
|
||||
for i, connections in enumerate(self.graph):
|
||||
# Present vertex V_new[i]; connect to all connections:
|
||||
if i == centroid_index: # Break out of centroid
|
||||
break
|
||||
|
||||
for j in connections:
|
||||
C_i()[i].disconnect(C_i()[j])
|
||||
|
||||
# Destroy the old cell
|
||||
if C_i is not self.C0: # Garbage collector does this anyway; not needed
|
||||
del C_i
|
||||
|
||||
# TODO: Recalculate all the homology group ranks of each cell
|
||||
return H_new
|
||||
|
||||
def split_generation(self):
|
||||
"""
|
||||
Run sub_generate_cell for every cell in the current complex self.gen
|
||||
"""
|
||||
no_splits = False # USED IN SHGO
|
||||
try:
|
||||
for c in self.H[self.gen]:
|
||||
if self.symmetry:
|
||||
# self.sub_generate_cell_symmetry(c, self.gen + 1)
|
||||
self.split_simplex_symmetry(c, self.gen + 1)
|
||||
else:
|
||||
self.sub_generate_cell(c, self.gen + 1)
|
||||
except IndexError:
|
||||
no_splits = True # USED IN SHGO
|
||||
|
||||
self.gen += 1
|
||||
return no_splits # USED IN SHGO
|
||||
|
||||
def construct_hypercube(self, origin, supremum, gen, hgr,
|
||||
printout=False):
|
||||
"""
|
||||
Build a hypercube with triangulations symmetric to C0.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
origin : vec
|
||||
supremum : vec (tuple)
|
||||
gen : generation
|
||||
hgr : parent homology group rank
|
||||
"""
|
||||
# Initiate new cell
|
||||
v_o = np.array(origin)
|
||||
v_s = np.array(supremum)
|
||||
|
||||
C_new = Cell(gen, hgr, origin, supremum)
|
||||
C_new.centroid = tuple((v_o + v_s) * .5)
|
||||
|
||||
# Build new indexed vertex list
|
||||
V_new = []
|
||||
|
||||
for i, v in enumerate(self.C0()[:-1]):
|
||||
v_x = np.array(v.x)
|
||||
sub_cell_t1 = v_o - v_o * v_x
|
||||
sub_cell_t2 = v_s * v_x
|
||||
|
||||
vec = sub_cell_t1 + sub_cell_t2
|
||||
|
||||
vec = tuple(vec)
|
||||
C_new.add_vertex(self.V[vec])
|
||||
V_new.append(vec)
|
||||
|
||||
# Add new centroid
|
||||
C_new.add_vertex(self.V[C_new.centroid])
|
||||
V_new.append(C_new.centroid)
|
||||
|
||||
# Connect new vertices #TODO: Thread into other loop; no need for V_new
|
||||
for i, connections in enumerate(self.graph):
|
||||
# Present vertex V_new[i]; connect to all connections:
|
||||
for j in connections:
|
||||
self.V[V_new[i]].connect(self.V[V_new[j]])
|
||||
|
||||
if printout:
|
||||
print("A sub hyper cube with:")
|
||||
print("origin: {}".format(origin))
|
||||
print("supremum: {}".format(supremum))
|
||||
for v in C_new():
|
||||
v.print_out()
|
||||
|
||||
# Append the new cell to the to complex
|
||||
self.H[gen].append(C_new)
|
||||
|
||||
return C_new
|
||||
|
||||
def split_simplex_symmetry(self, S, gen):
|
||||
"""
|
||||
Split a hypersimplex S into two sub simplices by building a hyperplane
|
||||
which connects to a new vertex on an edge (the longest edge in
|
||||
dim = {2, 3}) and every other vertex in the simplex that is not
|
||||
connected to the edge being split.
|
||||
|
||||
This function utilizes the knowledge that the problem is specified
|
||||
with symmetric constraints
|
||||
|
||||
The longest edge is tracked by an ordering of the
|
||||
vertices in every simplices, the edge between first and second
|
||||
vertex is the longest edge to be split in the next iteration.
|
||||
"""
|
||||
# If not gen append
|
||||
try:
|
||||
self.H[gen]
|
||||
except IndexError:
|
||||
self.H.append([])
|
||||
|
||||
# Find new vertex.
|
||||
# V_new_x = tuple((np.array(C()[0].x) + np.array(C()[1].x)) / 2.0)
|
||||
s = S()
|
||||
firstx = s[0].x
|
||||
lastx = s[-1].x
|
||||
V_new = self.V[tuple((np.array(firstx) + np.array(lastx)) / 2.0)]
|
||||
|
||||
# Disconnect old longest edge
|
||||
self.V[firstx].disconnect(self.V[lastx])
|
||||
|
||||
# Connect new vertices to all other vertices
|
||||
for v in s[:]:
|
||||
v.connect(self.V[V_new.x])
|
||||
|
||||
# New "lower" simplex
|
||||
S_new_l = Simplex(gen, S.hg_n, self.generation_cycle,
|
||||
self.dim)
|
||||
S_new_l.add_vertex(s[0])
|
||||
S_new_l.add_vertex(V_new) # Add new vertex
|
||||
for v in s[1:-1]: # Add all other vertices
|
||||
S_new_l.add_vertex(v)
|
||||
|
||||
# New "upper" simplex
|
||||
S_new_u = Simplex(gen, S.hg_n, S.generation_cycle, self.dim)
|
||||
|
||||
# First vertex on new long edge
|
||||
S_new_u.add_vertex(s[S_new_u.generation_cycle + 1])
|
||||
|
||||
for v in s[1:-1]: # Remaining vertices
|
||||
S_new_u.add_vertex(v)
|
||||
|
||||
for k, v in enumerate(s[1:-1]): # iterate through inner vertices
|
||||
if k == S.generation_cycle:
|
||||
S_new_u.add_vertex(V_new)
|
||||
else:
|
||||
S_new_u.add_vertex(v)
|
||||
|
||||
S_new_u.add_vertex(s[-1]) # Second vertex on new long edge
|
||||
|
||||
self.H[gen].append(S_new_l)
|
||||
self.H[gen].append(S_new_u)
|
||||
|
||||
return
|
||||
|
||||
# Plots
|
||||
def plot_complex(self):
|
||||
"""
|
||||
Here, C is the LIST of simplexes S in the
|
||||
2- or 3-D complex
|
||||
|
||||
To plot a single simplex S in a set C, use e.g., [C[0]]
|
||||
"""
|
||||
from matplotlib import pyplot # type: ignore[import]
|
||||
if self.dim == 2:
|
||||
pyplot.figure()
|
||||
for C in self.H:
|
||||
for c in C:
|
||||
for v in c():
|
||||
if self.bounds is None:
|
||||
x_a = np.array(v.x, dtype=float)
|
||||
else:
|
||||
x_a = np.array(v.x, dtype=float)
|
||||
for i in range(len(self.bounds)):
|
||||
x_a[i] = (x_a[i] * (self.bounds[i][1]
|
||||
- self.bounds[i][0])
|
||||
+ self.bounds[i][0])
|
||||
|
||||
# logging.info('v.x_a = {}'.format(x_a))
|
||||
|
||||
pyplot.plot([x_a[0]], [x_a[1]], 'o')
|
||||
|
||||
xlines = []
|
||||
ylines = []
|
||||
for vn in v.nn:
|
||||
if self.bounds is None:
|
||||
xn_a = np.array(vn.x, dtype=float)
|
||||
else:
|
||||
xn_a = np.array(vn.x, dtype=float)
|
||||
for i in range(len(self.bounds)):
|
||||
xn_a[i] = (xn_a[i] * (self.bounds[i][1]
|
||||
- self.bounds[i][0])
|
||||
+ self.bounds[i][0])
|
||||
|
||||
# logging.info('vn.x = {}'.format(vn.x))
|
||||
|
||||
xlines.append(xn_a[0])
|
||||
ylines.append(xn_a[1])
|
||||
xlines.append(x_a[0])
|
||||
ylines.append(x_a[1])
|
||||
|
||||
pyplot.plot(xlines, ylines)
|
||||
|
||||
if self.bounds is None:
|
||||
pyplot.ylim([-1e-2, 1 + 1e-2])
|
||||
pyplot.xlim([-1e-2, 1 + 1e-2])
|
||||
else:
|
||||
pyplot.ylim(
|
||||
[self.bounds[1][0] - 1e-2, self.bounds[1][1] + 1e-2])
|
||||
pyplot.xlim(
|
||||
[self.bounds[0][0] - 1e-2, self.bounds[0][1] + 1e-2])
|
||||
|
||||
pyplot.show()
|
||||
|
||||
elif self.dim == 3:
|
||||
fig = pyplot.figure()
|
||||
ax = fig.add_subplot(111, projection='3d')
|
||||
|
||||
for C in self.H:
|
||||
for c in C:
|
||||
for v in c():
|
||||
x = []
|
||||
y = []
|
||||
z = []
|
||||
# logging.info('v.x = {}'.format(v.x))
|
||||
x.append(v.x[0])
|
||||
y.append(v.x[1])
|
||||
z.append(v.x[2])
|
||||
for vn in v.nn:
|
||||
x.append(vn.x[0])
|
||||
y.append(vn.x[1])
|
||||
z.append(vn.x[2])
|
||||
x.append(v.x[0])
|
||||
y.append(v.x[1])
|
||||
z.append(v.x[2])
|
||||
# logging.info('vn.x = {}'.format(vn.x))
|
||||
|
||||
ax.plot(x, y, z, label='simplex')
|
||||
|
||||
pyplot.show()
|
||||
else:
|
||||
print("dimension higher than 3 or wrong complex format")
|
||||
return
|
||||
|
||||
|
||||
class VertexGroup(object):
|
||||
def __init__(self, p_gen, p_hgr):
|
||||
self.p_gen = p_gen # parent generation
|
||||
self.p_hgr = p_hgr # parent homology group rank
|
||||
self.hg_n = None
|
||||
self.hg_d = None
|
||||
|
||||
# Maybe add parent homology group rank total history
|
||||
# This is the sum off all previously split cells
|
||||
# cumulatively throughout its entire history
|
||||
self.C = []
|
||||
|
||||
def __call__(self):
|
||||
return self.C
|
||||
|
||||
def add_vertex(self, V):
|
||||
if V not in self.C:
|
||||
self.C.append(V)
|
||||
|
||||
def homology_group_rank(self):
|
||||
"""
|
||||
Returns the homology group order of the current cell
|
||||
"""
|
||||
if self.hg_n is None:
|
||||
self.hg_n = sum(1 for v in self.C if v.minimiser())
|
||||
|
||||
return self.hg_n
|
||||
|
||||
def homology_group_differential(self):
|
||||
"""
|
||||
Returns the difference between the current homology group of the
|
||||
cell and its parent group
|
||||
"""
|
||||
if self.hg_d is None:
|
||||
self.hgd = self.hg_n - self.p_hgr
|
||||
|
||||
return self.hgd
|
||||
|
||||
def polytopial_sperner_lemma(self):
|
||||
"""
|
||||
Returns the number of stationary points theoretically contained in the
|
||||
cell based information currently known about the cell
|
||||
"""
|
||||
pass
|
||||
|
||||
def print_out(self):
|
||||
"""
|
||||
Print the current cell to console
|
||||
"""
|
||||
for v in self():
|
||||
v.print_out()
|
||||
|
||||
|
||||
class Cell(VertexGroup):
|
||||
"""
|
||||
Contains a cell that is symmetric to the initial hypercube triangulation
|
||||
"""
|
||||
|
||||
def __init__(self, p_gen, p_hgr, origin, supremum):
|
||||
super(Cell, self).__init__(p_gen, p_hgr)
|
||||
|
||||
self.origin = origin
|
||||
self.supremum = supremum
|
||||
self.centroid = None # (Not always used)
|
||||
# TODO: self.bounds
|
||||
|
||||
|
||||
class Simplex(VertexGroup):
|
||||
"""
|
||||
Contains a simplex that is symmetric to the initial symmetry constrained
|
||||
hypersimplex triangulation
|
||||
"""
|
||||
|
||||
def __init__(self, p_gen, p_hgr, generation_cycle, dim):
|
||||
super(Simplex, self).__init__(p_gen, p_hgr)
|
||||
|
||||
self.generation_cycle = (generation_cycle + 1) % (dim - 1)
|
||||
|
||||
|
||||
class Vertex:
|
||||
def __init__(self, x, bounds=None, func=None, func_args=(), g_cons=None,
|
||||
g_cons_args=(), nn=None, index=None):
|
||||
self.x = x
|
||||
self.order = sum(x)
|
||||
x_a = np.array(x, dtype=float)
|
||||
if bounds is not None:
|
||||
for i, (lb, ub) in enumerate(bounds):
|
||||
x_a[i] = x_a[i] * (ub - lb) + lb
|
||||
|
||||
# TODO: Make saving the array structure optional
|
||||
self.x_a = x_a
|
||||
|
||||
# Note Vertex is only initiated once for all x so only
|
||||
# evaluated once
|
||||
if func is not None:
|
||||
self.feasible = True
|
||||
if g_cons is not None:
|
||||
for g, args in zip(g_cons, g_cons_args):
|
||||
if g(self.x_a, *args) < 0.0:
|
||||
self.f = np.inf
|
||||
self.feasible = False
|
||||
break
|
||||
if self.feasible:
|
||||
self.f = func(x_a, *func_args)
|
||||
|
||||
if nn is not None:
|
||||
self.nn = nn
|
||||
else:
|
||||
self.nn = set()
|
||||
|
||||
self.fval = None
|
||||
self.check_min = True
|
||||
|
||||
# Index:
|
||||
if index is not None:
|
||||
self.index = index
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.x)
|
||||
|
||||
def connect(self, v):
|
||||
if v is not self and v not in self.nn:
|
||||
self.nn.add(v)
|
||||
v.nn.add(self)
|
||||
|
||||
if self.minimiser():
|
||||
v._min = False
|
||||
v.check_min = False
|
||||
|
||||
# TEMPORARY
|
||||
self.check_min = True
|
||||
v.check_min = True
|
||||
|
||||
def disconnect(self, v):
|
||||
if v in self.nn:
|
||||
self.nn.remove(v)
|
||||
v.nn.remove(self)
|
||||
self.check_min = True
|
||||
v.check_min = True
|
||||
|
||||
def minimiser(self):
|
||||
"""Check whether this vertex is strictly less than all its neighbors"""
|
||||
if self.check_min:
|
||||
self._min = all(self.f < v.f for v in self.nn)
|
||||
self.check_min = False
|
||||
|
||||
return self._min
|
||||
|
||||
def print_out(self):
|
||||
print("Vertex: {}".format(self.x))
|
||||
constr = 'Connections: '
|
||||
for vc in self.nn:
|
||||
constr += '{} '.format(vc.x)
|
||||
|
||||
print(constr)
|
||||
print('Order = {}'.format(self.order))
|
||||
|
||||
|
||||
class VertexCache:
|
||||
def __init__(self, func, func_args=(), bounds=None, g_cons=None,
|
||||
g_cons_args=(), indexed=True):
|
||||
|
||||
self.cache = {}
|
||||
self.func = func
|
||||
self.g_cons = g_cons
|
||||
self.g_cons_args = g_cons_args
|
||||
self.func_args = func_args
|
||||
self.bounds = bounds
|
||||
self.nfev = 0
|
||||
self.size = 0
|
||||
|
||||
if indexed:
|
||||
self.index = -1
|
||||
|
||||
def __getitem__(self, x, indexed=True):
|
||||
try:
|
||||
return self.cache[x]
|
||||
except KeyError:
|
||||
if indexed:
|
||||
self.index += 1
|
||||
xval = Vertex(x, bounds=self.bounds,
|
||||
func=self.func, func_args=self.func_args,
|
||||
g_cons=self.g_cons,
|
||||
g_cons_args=self.g_cons_args,
|
||||
index=self.index)
|
||||
else:
|
||||
xval = Vertex(x, bounds=self.bounds,
|
||||
func=self.func, func_args=self.func_args,
|
||||
g_cons=self.g_cons,
|
||||
g_cons_args=self.g_cons_args)
|
||||
|
||||
# logging.info("New generated vertex at x = {}".format(x))
|
||||
# NOTE: Surprisingly high performance increase if logging is commented out
|
||||
self.cache[x] = xval
|
||||
|
||||
# TODO: Check
|
||||
if self.func is not None:
|
||||
if self.g_cons is not None:
|
||||
if xval.feasible:
|
||||
self.nfev += 1
|
||||
self.size += 1
|
||||
else:
|
||||
self.size += 1
|
||||
else:
|
||||
self.nfev += 1
|
||||
self.size += 1
|
||||
|
||||
return self.cache[x]
|
BIN
venv/Lib/site-packages/scipy/optimize/_slsqp.cp36-win32.pyd
Normal file
BIN
venv/Lib/site-packages/scipy/optimize/_slsqp.cp36-win32.pyd
Normal file
Binary file not shown.
257
venv/Lib/site-packages/scipy/optimize/_spectral.py
Normal file
257
venv/Lib/site-packages/scipy/optimize/_spectral.py
Normal file
|
@ -0,0 +1,257 @@
|
|||
"""
|
||||
Spectral Algorithm for Nonlinear Equations
|
||||
"""
|
||||
import collections
|
||||
|
||||
import numpy as np
|
||||
from scipy.optimize import OptimizeResult
|
||||
from scipy.optimize.optimize import _check_unknown_options
|
||||
from .linesearch import _nonmonotone_line_search_cruz, _nonmonotone_line_search_cheng
|
||||
|
||||
class _NoConvergence(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def _root_df_sane(func, x0, args=(), ftol=1e-8, fatol=1e-300, maxfev=1000,
|
||||
fnorm=None, callback=None, disp=False, M=10, eta_strategy=None,
|
||||
sigma_eps=1e-10, sigma_0=1.0, line_search='cruz', **unknown_options):
|
||||
r"""
|
||||
Solve nonlinear equation with the DF-SANE method
|
||||
|
||||
Options
|
||||
-------
|
||||
ftol : float, optional
|
||||
Relative norm tolerance.
|
||||
fatol : float, optional
|
||||
Absolute norm tolerance.
|
||||
Algorithm terminates when ``||func(x)|| < fatol + ftol ||func(x_0)||``.
|
||||
fnorm : callable, optional
|
||||
Norm to use in the convergence check. If None, 2-norm is used.
|
||||
maxfev : int, optional
|
||||
Maximum number of function evaluations.
|
||||
disp : bool, optional
|
||||
Whether to print convergence process to stdout.
|
||||
eta_strategy : callable, optional
|
||||
Choice of the ``eta_k`` parameter, which gives slack for growth
|
||||
of ``||F||**2``. Called as ``eta_k = eta_strategy(k, x, F)`` with
|
||||
`k` the iteration number, `x` the current iterate and `F` the current
|
||||
residual. Should satisfy ``eta_k > 0`` and ``sum(eta, k=0..inf) < inf``.
|
||||
Default: ``||F||**2 / (1 + k)**2``.
|
||||
sigma_eps : float, optional
|
||||
The spectral coefficient is constrained to ``sigma_eps < sigma < 1/sigma_eps``.
|
||||
Default: 1e-10
|
||||
sigma_0 : float, optional
|
||||
Initial spectral coefficient.
|
||||
Default: 1.0
|
||||
M : int, optional
|
||||
Number of iterates to include in the nonmonotonic line search.
|
||||
Default: 10
|
||||
line_search : {'cruz', 'cheng'}
|
||||
Type of line search to employ. 'cruz' is the original one defined in
|
||||
[Martinez & Raydan. Math. Comp. 75, 1429 (2006)], 'cheng' is
|
||||
a modified search defined in [Cheng & Li. IMA J. Numer. Anal. 29, 814 (2009)].
|
||||
Default: 'cruz'
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] "Spectral residual method without gradient information for solving
|
||||
large-scale nonlinear systems of equations." W. La Cruz,
|
||||
J.M. Martinez, M. Raydan. Math. Comp. **75**, 1429 (2006).
|
||||
.. [2] W. La Cruz, Opt. Meth. Software, 29, 24 (2014).
|
||||
.. [3] W. Cheng, D.-H. Li. IMA J. Numer. Anal. **29**, 814 (2009).
|
||||
|
||||
"""
|
||||
_check_unknown_options(unknown_options)
|
||||
|
||||
if line_search not in ('cheng', 'cruz'):
|
||||
raise ValueError("Invalid value %r for 'line_search'" % (line_search,))
|
||||
|
||||
nexp = 2
|
||||
|
||||
if eta_strategy is None:
|
||||
# Different choice from [1], as their eta is not invariant
|
||||
# vs. scaling of F.
|
||||
def eta_strategy(k, x, F):
|
||||
# Obtain squared 2-norm of the initial residual from the outer scope
|
||||
return f_0 / (1 + k)**2
|
||||
|
||||
if fnorm is None:
|
||||
def fnorm(F):
|
||||
# Obtain squared 2-norm of the current residual from the outer scope
|
||||
return f_k**(1.0/nexp)
|
||||
|
||||
def fmerit(F):
|
||||
return np.linalg.norm(F)**nexp
|
||||
|
||||
nfev = [0]
|
||||
f, x_k, x_shape, f_k, F_k, is_complex = _wrap_func(func, x0, fmerit, nfev, maxfev, args)
|
||||
|
||||
k = 0
|
||||
f_0 = f_k
|
||||
sigma_k = sigma_0
|
||||
|
||||
F_0_norm = fnorm(F_k)
|
||||
|
||||
# For the 'cruz' line search
|
||||
prev_fs = collections.deque([f_k], M)
|
||||
|
||||
# For the 'cheng' line search
|
||||
Q = 1.0
|
||||
C = f_0
|
||||
|
||||
converged = False
|
||||
message = "too many function evaluations required"
|
||||
|
||||
while True:
|
||||
F_k_norm = fnorm(F_k)
|
||||
|
||||
if disp:
|
||||
print("iter %d: ||F|| = %g, sigma = %g" % (k, F_k_norm, sigma_k))
|
||||
|
||||
if callback is not None:
|
||||
callback(x_k, F_k)
|
||||
|
||||
if F_k_norm < ftol * F_0_norm + fatol:
|
||||
# Converged!
|
||||
message = "successful convergence"
|
||||
converged = True
|
||||
break
|
||||
|
||||
# Control spectral parameter, from [2]
|
||||
if abs(sigma_k) > 1/sigma_eps:
|
||||
sigma_k = 1/sigma_eps * np.sign(sigma_k)
|
||||
elif abs(sigma_k) < sigma_eps:
|
||||
sigma_k = sigma_eps
|
||||
|
||||
# Line search direction
|
||||
d = -sigma_k * F_k
|
||||
|
||||
# Nonmonotone line search
|
||||
eta = eta_strategy(k, x_k, F_k)
|
||||
try:
|
||||
if line_search == 'cruz':
|
||||
alpha, xp, fp, Fp = _nonmonotone_line_search_cruz(f, x_k, d, prev_fs, eta=eta)
|
||||
elif line_search == 'cheng':
|
||||
alpha, xp, fp, Fp, C, Q = _nonmonotone_line_search_cheng(f, x_k, d, f_k, C, Q, eta=eta)
|
||||
except _NoConvergence:
|
||||
break
|
||||
|
||||
# Update spectral parameter
|
||||
s_k = xp - x_k
|
||||
y_k = Fp - F_k
|
||||
sigma_k = np.vdot(s_k, s_k) / np.vdot(s_k, y_k)
|
||||
|
||||
# Take step
|
||||
x_k = xp
|
||||
F_k = Fp
|
||||
f_k = fp
|
||||
|
||||
# Store function value
|
||||
if line_search == 'cruz':
|
||||
prev_fs.append(fp)
|
||||
|
||||
k += 1
|
||||
|
||||
x = _wrap_result(x_k, is_complex, shape=x_shape)
|
||||
F = _wrap_result(F_k, is_complex)
|
||||
|
||||
result = OptimizeResult(x=x, success=converged,
|
||||
message=message,
|
||||
fun=F, nfev=nfev[0], nit=k)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _wrap_func(func, x0, fmerit, nfev_list, maxfev, args=()):
|
||||
"""
|
||||
Wrap a function and an initial value so that (i) complex values
|
||||
are wrapped to reals, and (ii) value for a merit function
|
||||
fmerit(x, f) is computed at the same time, (iii) iteration count
|
||||
is maintained and an exception is raised if it is exceeded.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
func : callable
|
||||
Function to wrap
|
||||
x0 : ndarray
|
||||
Initial value
|
||||
fmerit : callable
|
||||
Merit function fmerit(f) for computing merit value from residual.
|
||||
nfev_list : list
|
||||
List to store number of evaluations in. Should be [0] in the beginning.
|
||||
maxfev : int
|
||||
Maximum number of evaluations before _NoConvergence is raised.
|
||||
args : tuple
|
||||
Extra arguments to func
|
||||
|
||||
Returns
|
||||
-------
|
||||
wrap_func : callable
|
||||
Wrapped function, to be called as
|
||||
``F, fp = wrap_func(x0)``
|
||||
x0_wrap : ndarray of float
|
||||
Wrapped initial value; raveled to 1-D and complex
|
||||
values mapped to reals.
|
||||
x0_shape : tuple
|
||||
Shape of the initial value array
|
||||
f : float
|
||||
Merit function at F
|
||||
F : ndarray of float
|
||||
Residual at x0_wrap
|
||||
is_complex : bool
|
||||
Whether complex values were mapped to reals
|
||||
|
||||
"""
|
||||
x0 = np.asarray(x0)
|
||||
x0_shape = x0.shape
|
||||
F = np.asarray(func(x0, *args)).ravel()
|
||||
is_complex = np.iscomplexobj(x0) or np.iscomplexobj(F)
|
||||
x0 = x0.ravel()
|
||||
|
||||
nfev_list[0] = 1
|
||||
|
||||
if is_complex:
|
||||
def wrap_func(x):
|
||||
if nfev_list[0] >= maxfev:
|
||||
raise _NoConvergence()
|
||||
nfev_list[0] += 1
|
||||
z = _real2complex(x).reshape(x0_shape)
|
||||
v = np.asarray(func(z, *args)).ravel()
|
||||
F = _complex2real(v)
|
||||
f = fmerit(F)
|
||||
return f, F
|
||||
|
||||
x0 = _complex2real(x0)
|
||||
F = _complex2real(F)
|
||||
else:
|
||||
def wrap_func(x):
|
||||
if nfev_list[0] >= maxfev:
|
||||
raise _NoConvergence()
|
||||
nfev_list[0] += 1
|
||||
x = x.reshape(x0_shape)
|
||||
F = np.asarray(func(x, *args)).ravel()
|
||||
f = fmerit(F)
|
||||
return f, F
|
||||
|
||||
return wrap_func, x0, x0_shape, fmerit(F), F, is_complex
|
||||
|
||||
|
||||
def _wrap_result(result, is_complex, shape=None):
|
||||
"""
|
||||
Convert from real to complex and reshape result arrays.
|
||||
"""
|
||||
if is_complex:
|
||||
z = _real2complex(result)
|
||||
else:
|
||||
z = result
|
||||
if shape is not None:
|
||||
z = z.reshape(shape)
|
||||
return z
|
||||
|
||||
|
||||
def _real2complex(x):
|
||||
return np.ascontiguousarray(x, dtype=float).view(np.complex128)
|
||||
|
||||
|
||||
def _complex2real(z):
|
||||
return np.ascontiguousarray(z, dtype=complex).view(np.float64)
|
12
venv/Lib/site-packages/scipy/optimize/_trlib/__init__.py
Normal file
12
venv/Lib/site-packages/scipy/optimize/_trlib/__init__.py
Normal file
|
@ -0,0 +1,12 @@
|
|||
from ._trlib import TRLIBQuadraticSubproblem
|
||||
|
||||
__all__ = ['TRLIBQuadraticSubproblem', 'get_trlib_quadratic_subproblem']
|
||||
|
||||
|
||||
def get_trlib_quadratic_subproblem(tol_rel_i=-2.0, tol_rel_b=-3.0, disp=False):
|
||||
def subproblem_factory(x, fun, jac, hess, hessp):
|
||||
return TRLIBQuadraticSubproblem(x, fun, jac, hess, hessp,
|
||||
tol_rel_i=tol_rel_i,
|
||||
tol_rel_b=tol_rel_b,
|
||||
disp=disp)
|
||||
return subproblem_factory
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
30
venv/Lib/site-packages/scipy/optimize/_trlib/setup.py
Normal file
30
venv/Lib/site-packages/scipy/optimize/_trlib/setup.py
Normal file
|
@ -0,0 +1,30 @@
|
|||
def configuration(parent_package='', top_path=None):
|
||||
from numpy import get_include
|
||||
from scipy._build_utils.system_info import get_info
|
||||
from scipy._build_utils import uses_blas64
|
||||
from numpy.distutils.misc_util import Configuration
|
||||
|
||||
from os.path import join, dirname
|
||||
|
||||
if uses_blas64():
|
||||
lapack_opt = get_info('lapack_ilp64_opt')
|
||||
else:
|
||||
lapack_opt = get_info('lapack_opt')
|
||||
|
||||
lib_inc = join(dirname(dirname(dirname(__file__))), '_lib')
|
||||
bld_inc = join(dirname(dirname(dirname(__file__))), '_build_utils', 'src')
|
||||
|
||||
config = Configuration('_trlib', parent_package, top_path)
|
||||
config.add_extension('_trlib',
|
||||
sources=['_trlib.c', 'trlib_krylov.c',
|
||||
'trlib_eigen_inverse.c', 'trlib_leftmost.c',
|
||||
'trlib_quadratic_zero.c', 'trlib_tri_factor.c'],
|
||||
include_dirs=[get_include(), lib_inc, bld_inc, 'trlib'],
|
||||
extra_info=lapack_opt,
|
||||
)
|
||||
return config
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from numpy.distutils.core import setup
|
||||
setup(**configuration(top_path='').todict())
|
266
venv/Lib/site-packages/scipy/optimize/_trustregion.py
Normal file
266
venv/Lib/site-packages/scipy/optimize/_trustregion.py
Normal file
|
@ -0,0 +1,266 @@
|
|||
"""Trust-region optimization."""
|
||||
import math
|
||||
|
||||
import numpy as np
|
||||
import scipy.linalg
|
||||
from .optimize import (_check_unknown_options, wrap_function, _status_message,
|
||||
OptimizeResult, _prepare_scalar_function)
|
||||
|
||||
__all__ = []
|
||||
|
||||
|
||||
class BaseQuadraticSubproblem(object):
|
||||
"""
|
||||
Base/abstract class defining the quadratic model for trust-region
|
||||
minimization. Child classes must implement the ``solve`` method.
|
||||
|
||||
Values of the objective function, Jacobian and Hessian (if provided) at
|
||||
the current iterate ``x`` are evaluated on demand and then stored as
|
||||
attributes ``fun``, ``jac``, ``hess``.
|
||||
"""
|
||||
|
||||
def __init__(self, x, fun, jac, hess=None, hessp=None):
|
||||
self._x = x
|
||||
self._f = None
|
||||
self._g = None
|
||||
self._h = None
|
||||
self._g_mag = None
|
||||
self._cauchy_point = None
|
||||
self._newton_point = None
|
||||
self._fun = fun
|
||||
self._jac = jac
|
||||
self._hess = hess
|
||||
self._hessp = hessp
|
||||
|
||||
def __call__(self, p):
|
||||
return self.fun + np.dot(self.jac, p) + 0.5 * np.dot(p, self.hessp(p))
|
||||
|
||||
@property
|
||||
def fun(self):
|
||||
"""Value of objective function at current iteration."""
|
||||
if self._f is None:
|
||||
self._f = self._fun(self._x)
|
||||
return self._f
|
||||
|
||||
@property
|
||||
def jac(self):
|
||||
"""Value of Jacobian of objective function at current iteration."""
|
||||
if self._g is None:
|
||||
self._g = self._jac(self._x)
|
||||
return self._g
|
||||
|
||||
@property
|
||||
def hess(self):
|
||||
"""Value of Hessian of objective function at current iteration."""
|
||||
if self._h is None:
|
||||
self._h = self._hess(self._x)
|
||||
return self._h
|
||||
|
||||
def hessp(self, p):
|
||||
if self._hessp is not None:
|
||||
return self._hessp(self._x, p)
|
||||
else:
|
||||
return np.dot(self.hess, p)
|
||||
|
||||
@property
|
||||
def jac_mag(self):
|
||||
"""Magnitude of jacobian of objective function at current iteration."""
|
||||
if self._g_mag is None:
|
||||
self._g_mag = scipy.linalg.norm(self.jac)
|
||||
return self._g_mag
|
||||
|
||||
def get_boundaries_intersections(self, z, d, trust_radius):
|
||||
"""
|
||||
Solve the scalar quadratic equation ||z + t d|| == trust_radius.
|
||||
This is like a line-sphere intersection.
|
||||
Return the two values of t, sorted from low to high.
|
||||
"""
|
||||
a = np.dot(d, d)
|
||||
b = 2 * np.dot(z, d)
|
||||
c = np.dot(z, z) - trust_radius**2
|
||||
sqrt_discriminant = math.sqrt(b*b - 4*a*c)
|
||||
|
||||
# The following calculation is mathematically
|
||||
# equivalent to:
|
||||
# ta = (-b - sqrt_discriminant) / (2*a)
|
||||
# tb = (-b + sqrt_discriminant) / (2*a)
|
||||
# but produce smaller round off errors.
|
||||
# Look at Matrix Computation p.97
|
||||
# for a better justification.
|
||||
aux = b + math.copysign(sqrt_discriminant, b)
|
||||
ta = -aux / (2*a)
|
||||
tb = -2*c / aux
|
||||
return sorted([ta, tb])
|
||||
|
||||
def solve(self, trust_radius):
|
||||
raise NotImplementedError('The solve method should be implemented by '
|
||||
'the child class')
|
||||
|
||||
|
||||
def _minimize_trust_region(fun, x0, args=(), jac=None, hess=None, hessp=None,
|
||||
subproblem=None, initial_trust_radius=1.0,
|
||||
max_trust_radius=1000.0, eta=0.15, gtol=1e-4,
|
||||
maxiter=None, disp=False, return_all=False,
|
||||
callback=None, inexact=True, **unknown_options):
|
||||
"""
|
||||
Minimization of scalar function of one or more variables using a
|
||||
trust-region algorithm.
|
||||
|
||||
Options for the trust-region algorithm are:
|
||||
initial_trust_radius : float
|
||||
Initial trust radius.
|
||||
max_trust_radius : float
|
||||
Never propose steps that are longer than this value.
|
||||
eta : float
|
||||
Trust region related acceptance stringency for proposed steps.
|
||||
gtol : float
|
||||
Gradient norm must be less than `gtol`
|
||||
before successful termination.
|
||||
maxiter : int
|
||||
Maximum number of iterations to perform.
|
||||
disp : bool
|
||||
If True, print convergence message.
|
||||
inexact : bool
|
||||
Accuracy to solve subproblems. If True requires less nonlinear
|
||||
iterations, but more vector products. Only effective for method
|
||||
trust-krylov.
|
||||
|
||||
This function is called by the `minimize` function.
|
||||
It is not supposed to be called directly.
|
||||
"""
|
||||
_check_unknown_options(unknown_options)
|
||||
|
||||
if jac is None:
|
||||
raise ValueError('Jacobian is currently required for trust-region '
|
||||
'methods')
|
||||
if hess is None and hessp is None:
|
||||
raise ValueError('Either the Hessian or the Hessian-vector product '
|
||||
'is currently required for trust-region methods')
|
||||
if subproblem is None:
|
||||
raise ValueError('A subproblem solving strategy is required for '
|
||||
'trust-region methods')
|
||||
if not (0 <= eta < 0.25):
|
||||
raise Exception('invalid acceptance stringency')
|
||||
if max_trust_radius <= 0:
|
||||
raise Exception('the max trust radius must be positive')
|
||||
if initial_trust_radius <= 0:
|
||||
raise ValueError('the initial trust radius must be positive')
|
||||
if initial_trust_radius >= max_trust_radius:
|
||||
raise ValueError('the initial trust radius must be less than the '
|
||||
'max trust radius')
|
||||
|
||||
# force the initial guess into a nice format
|
||||
x0 = np.asarray(x0).flatten()
|
||||
|
||||
# A ScalarFunction representing the problem. This caches calls to fun, jac,
|
||||
# hess.
|
||||
sf = _prepare_scalar_function(fun, x0, jac=jac, hess=hess, args=args)
|
||||
fun = sf.fun
|
||||
jac = sf.grad
|
||||
if hess is not None:
|
||||
hess = sf.hess
|
||||
# ScalarFunction doesn't represent hessp
|
||||
nhessp, hessp = wrap_function(hessp, args)
|
||||
|
||||
# limit the number of iterations
|
||||
if maxiter is None:
|
||||
maxiter = len(x0)*200
|
||||
|
||||
# init the search status
|
||||
warnflag = 0
|
||||
|
||||
# initialize the search
|
||||
trust_radius = initial_trust_radius
|
||||
x = x0
|
||||
if return_all:
|
||||
allvecs = [x]
|
||||
m = subproblem(x, fun, jac, hess, hessp)
|
||||
k = 0
|
||||
|
||||
# search for the function min
|
||||
# do not even start if the gradient is small enough
|
||||
while m.jac_mag >= gtol:
|
||||
|
||||
# Solve the sub-problem.
|
||||
# This gives us the proposed step relative to the current position
|
||||
# and it tells us whether the proposed step
|
||||
# has reached the trust region boundary or not.
|
||||
try:
|
||||
p, hits_boundary = m.solve(trust_radius)
|
||||
except np.linalg.linalg.LinAlgError:
|
||||
warnflag = 3
|
||||
break
|
||||
|
||||
# calculate the predicted value at the proposed point
|
||||
predicted_value = m(p)
|
||||
|
||||
# define the local approximation at the proposed point
|
||||
x_proposed = x + p
|
||||
m_proposed = subproblem(x_proposed, fun, jac, hess, hessp)
|
||||
|
||||
# evaluate the ratio defined in equation (4.4)
|
||||
actual_reduction = m.fun - m_proposed.fun
|
||||
predicted_reduction = m.fun - predicted_value
|
||||
if predicted_reduction <= 0:
|
||||
warnflag = 2
|
||||
break
|
||||
rho = actual_reduction / predicted_reduction
|
||||
|
||||
# update the trust radius according to the actual/predicted ratio
|
||||
if rho < 0.25:
|
||||
trust_radius *= 0.25
|
||||
elif rho > 0.75 and hits_boundary:
|
||||
trust_radius = min(2*trust_radius, max_trust_radius)
|
||||
|
||||
# if the ratio is high enough then accept the proposed step
|
||||
if rho > eta:
|
||||
x = x_proposed
|
||||
m = m_proposed
|
||||
|
||||
# append the best guess, call back, increment the iteration count
|
||||
if return_all:
|
||||
allvecs.append(np.copy(x))
|
||||
if callback is not None:
|
||||
callback(np.copy(x))
|
||||
k += 1
|
||||
|
||||
# check if the gradient is small enough to stop
|
||||
if m.jac_mag < gtol:
|
||||
warnflag = 0
|
||||
break
|
||||
|
||||
# check if we have looked at enough iterations
|
||||
if k >= maxiter:
|
||||
warnflag = 1
|
||||
break
|
||||
|
||||
# print some stuff if requested
|
||||
status_messages = (
|
||||
_status_message['success'],
|
||||
_status_message['maxiter'],
|
||||
'A bad approximation caused failure to predict improvement.',
|
||||
'A linalg error occurred, such as a non-psd Hessian.',
|
||||
)
|
||||
if disp:
|
||||
if warnflag == 0:
|
||||
print(status_messages[warnflag])
|
||||
else:
|
||||
print('Warning: ' + status_messages[warnflag])
|
||||
print(" Current function value: %f" % m.fun)
|
||||
print(" Iterations: %d" % k)
|
||||
print(" Function evaluations: %d" % sf.nfev)
|
||||
print(" Gradient evaluations: %d" % sf.ngev)
|
||||
print(" Hessian evaluations: %d" % (sf.nhev + nhessp[0]))
|
||||
|
||||
result = OptimizeResult(x=x, success=(warnflag == 0), status=warnflag,
|
||||
fun=m.fun, jac=m.jac, nfev=sf.nfev, njev=sf.ngev,
|
||||
nhev=sf.nhev + nhessp[0], nit=k,
|
||||
message=status_messages[warnflag])
|
||||
|
||||
if hess is not None:
|
||||
result['hess'] = m.hess
|
||||
|
||||
if return_all:
|
||||
result['allvecs'] = allvecs
|
||||
|
||||
return result
|
|
@ -0,0 +1,6 @@
|
|||
"""This module contains the equality constrained SQP solver."""
|
||||
|
||||
|
||||
from .minimize_trustregion_constr import _minimize_trustregion_constr
|
||||
|
||||
__all__ = ['_minimize_trustregion_constr']
|
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue