267 lines
9 KiB
Python
267 lines
9 KiB
Python
|
"""Trust-region optimization."""
|
||
|
import math
|
||
|
|
||
|
import numpy as np
|
||
|
import scipy.linalg
|
||
|
from .optimize import (_check_unknown_options, wrap_function, _status_message,
|
||
|
OptimizeResult, _prepare_scalar_function)
|
||
|
|
||
|
__all__ = []
|
||
|
|
||
|
|
||
|
class BaseQuadraticSubproblem(object):
|
||
|
"""
|
||
|
Base/abstract class defining the quadratic model for trust-region
|
||
|
minimization. Child classes must implement the ``solve`` method.
|
||
|
|
||
|
Values of the objective function, Jacobian and Hessian (if provided) at
|
||
|
the current iterate ``x`` are evaluated on demand and then stored as
|
||
|
attributes ``fun``, ``jac``, ``hess``.
|
||
|
"""
|
||
|
|
||
|
def __init__(self, x, fun, jac, hess=None, hessp=None):
|
||
|
self._x = x
|
||
|
self._f = None
|
||
|
self._g = None
|
||
|
self._h = None
|
||
|
self._g_mag = None
|
||
|
self._cauchy_point = None
|
||
|
self._newton_point = None
|
||
|
self._fun = fun
|
||
|
self._jac = jac
|
||
|
self._hess = hess
|
||
|
self._hessp = hessp
|
||
|
|
||
|
def __call__(self, p):
|
||
|
return self.fun + np.dot(self.jac, p) + 0.5 * np.dot(p, self.hessp(p))
|
||
|
|
||
|
@property
|
||
|
def fun(self):
|
||
|
"""Value of objective function at current iteration."""
|
||
|
if self._f is None:
|
||
|
self._f = self._fun(self._x)
|
||
|
return self._f
|
||
|
|
||
|
@property
|
||
|
def jac(self):
|
||
|
"""Value of Jacobian of objective function at current iteration."""
|
||
|
if self._g is None:
|
||
|
self._g = self._jac(self._x)
|
||
|
return self._g
|
||
|
|
||
|
@property
|
||
|
def hess(self):
|
||
|
"""Value of Hessian of objective function at current iteration."""
|
||
|
if self._h is None:
|
||
|
self._h = self._hess(self._x)
|
||
|
return self._h
|
||
|
|
||
|
def hessp(self, p):
|
||
|
if self._hessp is not None:
|
||
|
return self._hessp(self._x, p)
|
||
|
else:
|
||
|
return np.dot(self.hess, p)
|
||
|
|
||
|
@property
|
||
|
def jac_mag(self):
|
||
|
"""Magnitude of jacobian of objective function at current iteration."""
|
||
|
if self._g_mag is None:
|
||
|
self._g_mag = scipy.linalg.norm(self.jac)
|
||
|
return self._g_mag
|
||
|
|
||
|
def get_boundaries_intersections(self, z, d, trust_radius):
|
||
|
"""
|
||
|
Solve the scalar quadratic equation ||z + t d|| == trust_radius.
|
||
|
This is like a line-sphere intersection.
|
||
|
Return the two values of t, sorted from low to high.
|
||
|
"""
|
||
|
a = np.dot(d, d)
|
||
|
b = 2 * np.dot(z, d)
|
||
|
c = np.dot(z, z) - trust_radius**2
|
||
|
sqrt_discriminant = math.sqrt(b*b - 4*a*c)
|
||
|
|
||
|
# The following calculation is mathematically
|
||
|
# equivalent to:
|
||
|
# ta = (-b - sqrt_discriminant) / (2*a)
|
||
|
# tb = (-b + sqrt_discriminant) / (2*a)
|
||
|
# but produce smaller round off errors.
|
||
|
# Look at Matrix Computation p.97
|
||
|
# for a better justification.
|
||
|
aux = b + math.copysign(sqrt_discriminant, b)
|
||
|
ta = -aux / (2*a)
|
||
|
tb = -2*c / aux
|
||
|
return sorted([ta, tb])
|
||
|
|
||
|
def solve(self, trust_radius):
|
||
|
raise NotImplementedError('The solve method should be implemented by '
|
||
|
'the child class')
|
||
|
|
||
|
|
||
|
def _minimize_trust_region(fun, x0, args=(), jac=None, hess=None, hessp=None,
|
||
|
subproblem=None, initial_trust_radius=1.0,
|
||
|
max_trust_radius=1000.0, eta=0.15, gtol=1e-4,
|
||
|
maxiter=None, disp=False, return_all=False,
|
||
|
callback=None, inexact=True, **unknown_options):
|
||
|
"""
|
||
|
Minimization of scalar function of one or more variables using a
|
||
|
trust-region algorithm.
|
||
|
|
||
|
Options for the trust-region algorithm are:
|
||
|
initial_trust_radius : float
|
||
|
Initial trust radius.
|
||
|
max_trust_radius : float
|
||
|
Never propose steps that are longer than this value.
|
||
|
eta : float
|
||
|
Trust region related acceptance stringency for proposed steps.
|
||
|
gtol : float
|
||
|
Gradient norm must be less than `gtol`
|
||
|
before successful termination.
|
||
|
maxiter : int
|
||
|
Maximum number of iterations to perform.
|
||
|
disp : bool
|
||
|
If True, print convergence message.
|
||
|
inexact : bool
|
||
|
Accuracy to solve subproblems. If True requires less nonlinear
|
||
|
iterations, but more vector products. Only effective for method
|
||
|
trust-krylov.
|
||
|
|
||
|
This function is called by the `minimize` function.
|
||
|
It is not supposed to be called directly.
|
||
|
"""
|
||
|
_check_unknown_options(unknown_options)
|
||
|
|
||
|
if jac is None:
|
||
|
raise ValueError('Jacobian is currently required for trust-region '
|
||
|
'methods')
|
||
|
if hess is None and hessp is None:
|
||
|
raise ValueError('Either the Hessian or the Hessian-vector product '
|
||
|
'is currently required for trust-region methods')
|
||
|
if subproblem is None:
|
||
|
raise ValueError('A subproblem solving strategy is required for '
|
||
|
'trust-region methods')
|
||
|
if not (0 <= eta < 0.25):
|
||
|
raise Exception('invalid acceptance stringency')
|
||
|
if max_trust_radius <= 0:
|
||
|
raise Exception('the max trust radius must be positive')
|
||
|
if initial_trust_radius <= 0:
|
||
|
raise ValueError('the initial trust radius must be positive')
|
||
|
if initial_trust_radius >= max_trust_radius:
|
||
|
raise ValueError('the initial trust radius must be less than the '
|
||
|
'max trust radius')
|
||
|
|
||
|
# force the initial guess into a nice format
|
||
|
x0 = np.asarray(x0).flatten()
|
||
|
|
||
|
# A ScalarFunction representing the problem. This caches calls to fun, jac,
|
||
|
# hess.
|
||
|
sf = _prepare_scalar_function(fun, x0, jac=jac, hess=hess, args=args)
|
||
|
fun = sf.fun
|
||
|
jac = sf.grad
|
||
|
if hess is not None:
|
||
|
hess = sf.hess
|
||
|
# ScalarFunction doesn't represent hessp
|
||
|
nhessp, hessp = wrap_function(hessp, args)
|
||
|
|
||
|
# limit the number of iterations
|
||
|
if maxiter is None:
|
||
|
maxiter = len(x0)*200
|
||
|
|
||
|
# init the search status
|
||
|
warnflag = 0
|
||
|
|
||
|
# initialize the search
|
||
|
trust_radius = initial_trust_radius
|
||
|
x = x0
|
||
|
if return_all:
|
||
|
allvecs = [x]
|
||
|
m = subproblem(x, fun, jac, hess, hessp)
|
||
|
k = 0
|
||
|
|
||
|
# search for the function min
|
||
|
# do not even start if the gradient is small enough
|
||
|
while m.jac_mag >= gtol:
|
||
|
|
||
|
# Solve the sub-problem.
|
||
|
# This gives us the proposed step relative to the current position
|
||
|
# and it tells us whether the proposed step
|
||
|
# has reached the trust region boundary or not.
|
||
|
try:
|
||
|
p, hits_boundary = m.solve(trust_radius)
|
||
|
except np.linalg.linalg.LinAlgError:
|
||
|
warnflag = 3
|
||
|
break
|
||
|
|
||
|
# calculate the predicted value at the proposed point
|
||
|
predicted_value = m(p)
|
||
|
|
||
|
# define the local approximation at the proposed point
|
||
|
x_proposed = x + p
|
||
|
m_proposed = subproblem(x_proposed, fun, jac, hess, hessp)
|
||
|
|
||
|
# evaluate the ratio defined in equation (4.4)
|
||
|
actual_reduction = m.fun - m_proposed.fun
|
||
|
predicted_reduction = m.fun - predicted_value
|
||
|
if predicted_reduction <= 0:
|
||
|
warnflag = 2
|
||
|
break
|
||
|
rho = actual_reduction / predicted_reduction
|
||
|
|
||
|
# update the trust radius according to the actual/predicted ratio
|
||
|
if rho < 0.25:
|
||
|
trust_radius *= 0.25
|
||
|
elif rho > 0.75 and hits_boundary:
|
||
|
trust_radius = min(2*trust_radius, max_trust_radius)
|
||
|
|
||
|
# if the ratio is high enough then accept the proposed step
|
||
|
if rho > eta:
|
||
|
x = x_proposed
|
||
|
m = m_proposed
|
||
|
|
||
|
# append the best guess, call back, increment the iteration count
|
||
|
if return_all:
|
||
|
allvecs.append(np.copy(x))
|
||
|
if callback is not None:
|
||
|
callback(np.copy(x))
|
||
|
k += 1
|
||
|
|
||
|
# check if the gradient is small enough to stop
|
||
|
if m.jac_mag < gtol:
|
||
|
warnflag = 0
|
||
|
break
|
||
|
|
||
|
# check if we have looked at enough iterations
|
||
|
if k >= maxiter:
|
||
|
warnflag = 1
|
||
|
break
|
||
|
|
||
|
# print some stuff if requested
|
||
|
status_messages = (
|
||
|
_status_message['success'],
|
||
|
_status_message['maxiter'],
|
||
|
'A bad approximation caused failure to predict improvement.',
|
||
|
'A linalg error occurred, such as a non-psd Hessian.',
|
||
|
)
|
||
|
if disp:
|
||
|
if warnflag == 0:
|
||
|
print(status_messages[warnflag])
|
||
|
else:
|
||
|
print('Warning: ' + status_messages[warnflag])
|
||
|
print(" Current function value: %f" % m.fun)
|
||
|
print(" Iterations: %d" % k)
|
||
|
print(" Function evaluations: %d" % sf.nfev)
|
||
|
print(" Gradient evaluations: %d" % sf.ngev)
|
||
|
print(" Hessian evaluations: %d" % (sf.nhev + nhessp[0]))
|
||
|
|
||
|
result = OptimizeResult(x=x, success=(warnflag == 0), status=warnflag,
|
||
|
fun=m.fun, jac=m.jac, nfev=sf.nfev, njev=sf.ngev,
|
||
|
nhev=sf.nhev + nhessp[0], nit=k,
|
||
|
message=status_messages[warnflag])
|
||
|
|
||
|
if hess is not None:
|
||
|
result['hess'] = m.hess
|
||
|
|
||
|
if return_all:
|
||
|
result['allvecs'] = allvecs
|
||
|
|
||
|
return result
|