Source code for lbfgsb.main

# SPDX-License-Identifier: BSD-3-Clause
# Copyright (c) 2025 Antoine COLLET

"""
This code is a python port of the famous implementation of Limited-memory
Broyden-Fletcher-Goldfarb-Shanno (L-BFGS), algorithm 778 written in Fortran [2,3]
(last update in 2011).
Note that this is not a wrapper such as minimize in scipy but a complete
reimplementation (nevertheless relying heavily on numpy and scipy to
maintain correct performances).
The original code can be found here: https://dl.acm.org/doi/10.1145/279232.279236

The aim of this reimplementation was threefold. First, familiarize ourselves with
the code, its logic and inner optimizations. Second, gain access to certain
parameters that are hard-coded in the Fortran code and cannot be modified (typically
wolfe conditions parameters for the line search). Third,
implement additional functionalities that require significant modification of
the code core.

Additional features
--------------------
Explain about objective function update on the fly.
TODO: point to the doc of the main routine.
TODO:
https://towardsdatascience.com/numerical-optimization-based-on-the-l-bfgs-method-f6582135b0ca

References
----------
[1] R. H. Byrd, P. Lu and J. Nocedal. A Limited Memory Algorithm for Bound
    Constrained Optimization, (1995), SIAM Journal on Scientific and
    Statistical Computing, 16, 5, pp. 1190-1208.
[2] C. Zhu, R. H. Byrd and J. Nocedal. L-BFGS-B: Algorithm 778: L-BFGS-B,
    FORTRAN routines for large scale bound constrained optimization (1997),
    ACM Transactions on Mathematical Software, 23, 4, pp. 550 - 560.
[3] J.L. Morales and J. Nocedal. L-BFGS-B: Remark on Algorithm 778: L-BFGS-B,
    FORTRAN routines for large scale bound constrained optimization (2011),
    ACM Transactions on Mathematical Software, 38, 1.
"""

import copy
import logging
import warnings
from collections import deque
from dataclasses import dataclass
from typing import Callable, Deque, Optional, Tuple, Union

import numpy as np
from numpy.typing import ArrayLike
from scipy.optimize import (
    LbfgsInvHessProduct,  # noqa : F401
    OptimizeResult,
)

from lbfgsb._numba_helpers import NUMBA_AVAILABLE
from lbfgsb.base import (
    clip2bounds,
    count_var_at_bounds,
    display_iter,
    display_results,
    display_start,
    get_bounds,
    is_any_inf,
    projgr,
)
from lbfgsb.bfgsmats import (
    LBFGSB_MATRICES,
    make_X_and_G_respect_strong_wolfe,
    update_lbfgs_matrices,
)
from lbfgsb.cauchy import get_cauchy_point
from lbfgsb.linesearch import line_search
from lbfgsb.scalar_function import (
    JacOption,
    Objective,
    ObjectiveAndGradient,
    ScalarFunction,
    prepare_scalar_function,
)
from lbfgsb.subspacemin import get_freev, subspace_minimization
from lbfgsb.types import NDArrayFloat


@dataclass
class InternalState:
    """Class to keep track of internal state."""

    # keep track of some values (best, init)
    nit: int = 0
    status: str = "IDLE."
    task_str: str = "START"
    is_success: bool = False
    warnflag: int = 2


[docs] def minimize_lbfgsb( *, x0: NDArrayFloat, fun: Union[Objective, ObjectiveAndGradient], jac: JacOption, update_fun_def: Optional[ Callable[ [ NDArrayFloat, float, float, NDArrayFloat, Deque[NDArrayFloat], Deque[NDArrayFloat], ], Tuple[float, float, NDArrayFloat, Deque[NDArrayFloat]], ] ] = None, bounds: Optional[ArrayLike] = None, checkpoint: Optional[OptimizeResult] = None, maxcor: int = 10, ftarget: Optional[Union[float, Callable[[], float]]] = None, ftol: float = 1e-5, gtol: Union[float, Callable[[], float]] = 1e-5, maxiter: int = 50, eps: float = 1e-8, maxfun: int = 15000, callback: Optional[Callable[[NDArrayFloat, OptimizeResult], bool]] = None, maxls: int = 20, finite_diff_rel_step: Optional[float] = None, max_steplength: float = 1e8, ftol_linesearch: float = 1e-3, gtol_linesearch: float = 0.9, xtol_linesearch: float = 1e-1, eps_SY: float = 2.2e-16, iprint: int = -1, logger: Optional[logging.Logger] = None, is_check_factorizations: bool = False, is_use_numba_jit: bool = True, ) -> OptimizeResult: r""" Minimize a scalar function of one or more variables using the L-BFGS-B algorithm. This routine solves bound-constrained optimization problems using a pure Python implementation of the limited-memory Broyden-Fletcher-Goldfarb-Shanno algorithm with bound constraints, commonly known as L-BFGS-B. The problem solved is .. math:: \min_x f(x) subject to .. math:: l_i \leq x_i \leq u_i,\quad i = 1, \ldots, n. The algorithm stores only a limited number of correction pairs to approximate the inverse Hessian, making it suitable for large-scale problems. Parameters ---------- x0 : ndarray, shape (n,) Initial guess. The array contains the starting values of the ``n`` independent variables. If bounds are provided, ``x0`` is clipped to the feasible box before the first evaluation. fun : callable Objective function to minimize. If ``jac`` is ``None``, ``False``, a finite-difference scheme, or a callable, ``fun`` must return only the scalar objective value: .. code-block:: python fun(x) -> float If ``jac`` is ``True``, ``fun`` must return both the objective value and the gradient: .. code-block:: python fun(x) -> tuple[float, ndarray] In all cases, ``x`` is a one-dimensional array with shape ``(n,)``. If additional arguments are required by the objective function, use a closure, ``functools.partial``, or another wrapper. jac : {callable, bool, '2-point', '3-point', 'cs'}, optional Method used to compute the gradient vector. If ``jac`` is a callable, it must return the gradient vector: .. code-block:: python jac(x) -> ndarray, shape (n,) If ``jac`` is ``True``, ``fun`` is assumed to return a pair ``(f, g)``, where ``f`` is the scalar objective value and ``g`` is the gradient vector. This is compatible with the convention used by :func:`scipy.optimize.minimize`. If ``jac`` is one of ``'2-point'``, ``'3-point'``, or ``'cs'``, the gradient is approximated numerically using the corresponding finite-difference scheme. These finite-difference schemes obey the specified bounds. If ``jac`` is ``None`` or ``False``, the gradient is approximated using forward finite differences with absolute step size ``eps``. Default is ``None``. update_fun_def : callable, optional Experimental callback used to update the objective-function definition during optimization and to modify the stored gradient sequence accordingly. This is primarily intended for problems where the objective function changes during optimization, for example regularized problems where the regularization weight is adapted on the fly. Since the L-BFGS approximation depends on past gradient differences, the stored gradient sequence must be updated to remain consistent with the new objective definition. The callable must have signature: .. code-block:: python update_fun_def( x, f0, f0_old, grad, x_deque, grad_deque, ) -> tuple[float, float, ndarray, deque[ndarray]] where ``x`` is the current point, ``f0`` is the current objective value, ``f0_old`` is the previous objective value, ``grad`` is the current gradient, ``x_deque`` is the stored sequence of past iterates, and ``grad_deque`` is the stored sequence of past gradients. bounds : sequence or scipy.optimize.Bounds, optional Bounds on the variables. Bounds may be specified in one of two forms: 1. An instance of :class:`scipy.optimize.Bounds`. 2. A sequence of ``(min, max)`` pairs for each variable. Use ``None`` for one side of a bound to indicate no bound in that direction. checkpoint : scipy.optimize.OptimizeResult, optional Previous optimization result used to restart the solver. When provided, the solver restarts from the checkpoint without discarding the stored L-BFGS correction pairs. The previous objective value, gradient, and inverse-Hessian approximation are reused. The objective-function definition must be unchanged between the original run and the restarted run. This is useful when an optimization was stopped early, when stopping criteria need to be modified, or when parameters such as ``maxcor`` or ``ftol`` need to be changed between runs. .. versionadded:: 1.0 maxcor : int, optional Maximum number of variable-metric corrections used to approximate the inverse Hessian. The L-BFGS-B algorithm does not store the full Hessian matrix; it stores at most ``maxcor`` correction pairs. Default is ``10``. ftarget : float or callable, optional Target objective-function value. The optimization stops when .. math:: f^{k+1} \leq f_{\mathrm{target}}. If ``ftarget`` is callable, it is called once after the first objective and gradient evaluation. This makes it possible to define a target value based on the initial objective value, for example when scaling is applied. If ``None``, this stopping criterion is disabled. Default is ``None``. ftol : float, optional Relative objective-function decrease tolerance. The optimization stops when .. math:: \frac{f^k - f^{k+1}} {\max(|f^k|, |f^{k+1}|, 1)} \leq \mathrm{ftol}. In the original Fortran implementation, this corresponds to ``factr * epsmch``. Typical values are: - ``5e-3`` for low accuracy, - ``5e-8`` for moderate accuracy, - ``5e-14`` for high accuracy. If ``ftol`` is ``0``, the test stops only when the objective function remains unchanged after one iteration. Default is ``1e-5``. gtol : float or callable, optional Projected-gradient tolerance. The optimization stops when .. math:: \max_i |\operatorname{proj} g_i| \leq \mathrm{gtol}, where ``proj g`` is the projected gradient. If ``gtol`` is callable, it is called once after the first objective and gradient evaluation. This allows the stopping tolerance to depend on the initial objective value. Default is ``1e-5``. eps : float, optional Absolute step size used for forward finite-difference approximation of the gradient when ``jac`` is ``None`` or ``False``. Default is ``1e-8``. maxfun : int, optional Maximum number of objective-function evaluations. The limit may be exceeded when gradients are estimated by numerical differentiation. Interruptions due to ``maxfun`` are postponed until the end of the current minimization iteration. Default is ``15000``. maxiter : int, optional Maximum number of iterations. Default is ``50``. callback : callable, optional Callback called after each iteration. The callable must have signature: .. code-block:: python callback(xk, state) -> bool where ``xk`` is the current parameter vector and ``state`` is an :class:`scipy.optimize.OptimizeResult` containing the current solver state. If the callback returns ``True``, the optimization stops. maxls : int, optional Maximum number of line-search steps per iteration. Default is ``20``. finite_diff_rel_step : float, optional Relative step size used for numerical gradient approximation when ``jac`` is ``'2-point'``, ``'3-point'``, or ``'cs'``. The absolute step size is computed as .. math:: h = \mathrm{rel\_step} \operatorname{sign}(x) \max(1, |x|), and is adjusted if necessary to respect the bounds. For ``jac='3-point'``, the sign of ``h`` is ignored. If ``None``, the step size is selected automatically. Default is ``None``. max_steplength : float, optional Maximum allowed step length during the line search. Default is ``1e8``. ftol_linesearch : float, optional Tolerance for the sufficient-decrease condition in the line search. This is the Wolfe condition parameter :math:`c_1` in .. math:: f(x_k + \alpha_k p_k) \leq f(x_k) + c_1 \alpha_k p_k^\mathrm{T} \nabla f(x_k). Usually, :math:`c_1` is small and satisfies :math:`0 < c_1 < 1`. In the original L-BFGS-B Fortran implementation, this parameter is hard-coded to ``1e-3``. Default is ``1e-3``. gtol_linesearch : float, optional Tolerance for the curvature condition in the line search. This is the Wolfe condition parameter :math:`c_2` in .. math:: \left| p_k^\mathrm{T} \nabla f(x_k + \alpha_k p_k) \right| \leq c_2 \left| p_k^\mathrm{T} \nabla f(x_k) \right|. The parameters should satisfy :math:`0 < c_1 < c_2 < 1`. Usually, :math:`c_2` is much larger than :math:`c_1`. In the original L-BFGS-B Fortran implementation, this parameter is hard-coded to ``0.9``. Default is ``0.9``. xtol_linesearch : float, optional Relative tolerance for acceptable steps in the line-search procedure. In the original L-BFGS-B Fortran implementation, this parameter is hard-coded to ``0.1``. Default is ``0.1``. eps_SY : float, optional Numerical threshold used when updating the L-BFGS correction matrices. Default is ``2.2e-16``. iprint : int, optional Verbosity level. - ``iprint < 0``: no output. - ``iprint == 0``: print only the final summary. - ``0 < iprint < 99``: print objective value and projected-gradient norm every ``iprint`` iterations. - ``iprint >= 99``: print detailed information at every iteration, except full vectors. Default is ``-1``. logger : logging.Logger, optional Logger used for textual output. If ``None``, no output is emitted regardless of ``iprint``. Default is ``None``. is_check_factorizations : bool, optional Whether to run additional consistency checks on matrix factorizations. Intended for development and debugging only. Default is ``False``. is_use_numba_jit : bool, optional Whether to use ``numba`` just-in-time compilation for performance-critical parts of the algorithm. If ``True`` but ``numba`` is unavailable, a warning is raised and the solver falls back to the pure Python implementation. The expected speed-up for large problems is typically between two and five times. Default is ``True``. .. versionadded:: 1.0 Returns ------- scipy.optimize.OptimizeResult Optimization result represented as an :class:`scipy.optimize.OptimizeResult`. Important fields include: ``x`` : ndarray Final solution. ``fun`` : float Objective value at the final solution. ``jac`` : ndarray Gradient at the final solution. ``nfev`` : int Number of objective-function evaluations. ``njev`` : int Number of gradient evaluations. ``nit`` : int Number of iterations. ``status`` : int Solver status code. ``message`` : str Termination message. ``success`` : bool Whether the solver terminated successfully. ``hess_inv`` : scipy.optimize.LbfgsInvHessProduct Limited-memory inverse-Hessian approximation. Raises ------ ValueError Raised when the provided checkpoint is inconsistent with ``x0`` or when restored correction vectors have incompatible dimensions. Notes ----- This implementation is a Python reimplementation of the L-BFGS-B algorithm, rather than a wrapper around the original Fortran code. It exposes several parameters that are fixed in the historical implementation, including line-search Wolfe-condition parameters. The convention ``jac=True`` follows :func:`scipy.optimize.minimize`: in that case, ``fun`` must return both the objective value and the gradient. For example: .. code-block:: python def fun(x): f = x[0] ** 2 + x[1] ** 2 g = np.array([2 * x[0], 2 * x[1]]) return f, g res = minimize_lbfgsb(x0=x0, fun=fun, jac=True) If ``jac`` is ``None`` or ``False``, finite differences are used. Examples -------- Minimize a quadratic function with a separate gradient function: .. code-block:: python import numpy as np from lbfgsb import minimize_lbfgsb def objective(x): return x[0] ** 2 + x[1] ** 2 def gradient(x): return np.array([2 * x[0], 2 * x[1]]) x0 = np.array([10.0, 10.0]) res = minimize_lbfgsb( x0=x0, fun=objective, jac=gradient, ) print(res.x) print(res.fun) Use the SciPy-compatible ``jac=True`` convention: .. code-block:: python import numpy as np from lbfgsb import minimize_lbfgsb def objective_and_gradient(x): f = x[0] ** 2 + x[1] ** 2 g = np.array([2 * x[0], 2 * x[1]]) return f, g x0 = np.array([10.0, 10.0]) res = minimize_lbfgsb( x0=x0, fun=objective_and_gradient, jac=True, ) print(res.x) print(res.fun) Use finite differences with bounds: .. code-block:: python import numpy as np from lbfgsb import minimize_lbfgsb def objective(x): return x[0] ** 2 + x[1] ** 2 x0 = np.array([10.0, 10.0]) bounds = [(0.0, None), (0.0, None)] res = minimize_lbfgsb( x0=x0, fun=objective, jac="2-point", bounds=bounds, ) References ---------- .. [1] R. H. Byrd, P. Lu and J. Nocedal. A Limited Memory Algorithm for Bound Constrained Optimization. SIAM Journal on Scientific and Statistical Computing, 16(5), pp. 1190-1208, 1995. .. [2] C. Zhu, R. H. Byrd and J. Nocedal. L-BFGS-B: Algorithm 778: L-BFGS-B, FORTRAN routines for large scale bound constrained optimization. ACM Transactions on Mathematical Software, 23(4), pp. 550-560, 1997. .. [3] J. L. Morales and J. Nocedal. L-BFGS-B: Remark on Algorithm 778: L-BFGS-B, FORTRAN routines for large scale bound constrained optimization. ACM Transactions on Mathematical Software, 38(1), 2011. """ if not NUMBA_AVAILABLE and is_use_numba_jit: warnings.warn( "`is_use_numba_jit` set to `True` (default) but numba cannot be imported!" " Falling back to pure python (slower)." ) is_use_numba_jit = False lb, ub = get_bounds(x0, bounds) max_steplength_user: float = copy.copy(max_steplength) # True if all values have lower and upper bounds is_boxed: bool = not is_any_inf([lb, ub]) # applying the bounds to the initial guess x0 n = x0.size x = clip2bounds(x0, lb, ub) # Some display about the problem at hand. The display depends on the value of iprint display_start( np.finfo(float).eps, n, maxcor, count_var_at_bounds(x, lb, ub), iprint ) X, G = initialize_X_and_G(x, checkpoint, maxcor) # Initialization of the matrices mats = LBFGSB_MATRICES(n) # wrapper storing the calls to f and g and handling finite difference approximation sf: ScalarFunction = prepare_scalar_function( fun, x, jac=jac, epsilon=eps, bounds=(lb, ub), finite_diff_rel_step=finite_diff_rel_step, ) # restore the number of iterations and objective function evaluation if checkpoint is not None: sf.nfev = checkpoint.nfev sf.ngev = checkpoint.njev # First evaluation of the objective function if no checkpoint provided if checkpoint is None: f0 = sf.fun(x) else: f0 = checkpoint.fun # potential update of stop criterion if ftarget is not None: try: _ftarget: Optional[float] = ftarget() # type: ignore except TypeError: _ftarget = ftarget # type: ignore else: _ftarget = None try: _gtol: float = gtol() # type: ignore except TypeError: _gtol = gtol # type: ignore # Create an internal state instance istate = InternalState() if checkpoint is not None: istate.nit = checkpoint.nit # early check of stop criterion -> Extreme case in which x0 satisfies the # criterion, then no optimization is needed and one does not need to compute # anything else. if is_f0_target_reached(f0, _ftarget, istate): # leave the optimization routine if checkpoint is None: if len(X) == 0: X.append(x) G.append(np.zeros_like(x)) return OptimizeResult( fun=f0, jac=G[0], nfev=sf.nfev, njev=sf.ngev, nit=istate.nit, status=istate.warnflag, message=istate.task_str, x=x, success=istate.is_success, hess_inv=LbfgsInvHessProduct( np.diff(np.array(X), axis=0), np.diff(np.array(G), axis=0) ), ) else: return checkpoint # Compute the first gradient if no checkpoint provided if checkpoint is None: grad = sf.grad(x) else: grad = checkpoint.jac # Note, no need to further update anything because the scaling is handled by the # ScalarFunction instance # perform an early potential update of the objective function definition and # upgrade the gradient and the past sequence of gradients accordingly if update_fun_def is not None: f0, f0_old, grad, G = update_fun_def(x, f0, copy.copy(f0), grad, X, G) if len(X) > 0: # only happens if checkpoint is provided (L-BFGS-B restart) mats = update_lbfgs_matrices( x.copy(), # copy otherwise x might be changed in X when updated grad, X, G, maxcor, mats, is_force_update=True, eps=eps_SY, is_check_factorization=is_check_factorizations, is_use_numba_jit=is_use_numba_jit, ) else: # Store first res to X and G X.append(np.copy(x)) G.append(grad) # For now the free variables at the cauchy points is an empty set free_vars = np.array([], dtype=np.int_) # Check the infinity norm of the projected gradient sbgnrm = projgr(x, grad, lb, ub) display_iter(istate.nit, sbgnrm, f0, iprint, logger=logger) # bool indicating if results of the current iteration have been displayed. It # avoid duplicates for the last round. has_displayed_results = False # Note that interruptions due to maxfun are postponed # until the completion of the current minimization iteration. while ( projgr(x, grad, lb, ub) > _gtol and istate.nit < maxiter and sf.nfev < maxfun and not istate.is_success ): if iprint > 99 and logger is not None: logger.info("\n") logger.info(f"ITERATION {istate.nit + 1}\n") f0_old = f0 + 0.0 # copy # find cauchy point x_cp, c = get_cauchy_point( x, grad, lb, ub, mats, iprint, logger, is_use_numba_jit=is_use_numba_jit ) # Get the free variables for the GCP # Note: no numba needed here free_vars, active_vars = get_freev( x_cp, lb, ub, istate.nit, free_vars, iprint, logger ) # subspace minimization: find the search direction for the minimization problem xbar: NDArrayFloat = subspace_minimization( x, x_cp, free_vars, active_vars, c, grad, lb, ub, mats, is_check_factorizations=is_check_factorizations, is_use_numba_jit=is_use_numba_jit, ) d = xbar - x steplength = line_search( x, f0, grad, d, lb, ub, istate.nit, max_steplength_user, is_boxed, sf, ftol_linesearch, gtol_linesearch, xtol_linesearch, # The maximum number of function evaluation in linesearch must take into # account maxfun and the number of call already performed. min(maxls, maxfun - sf.nfev), iprint, logger, is_use_numba_jit=is_use_numba_jit, ) if steplength is None: if len(X) == 1: # Hessian already rebooted: abort. istate.task_str = "ABNORMAL_TERMINATION_IN_LNSRCH" istate.warnflag = 2 istate.is_success = False break # leave the while and finish the program. else: istate.task_str = "RESTART_FROM_LNSRCH" # Keep only the last correction X = deque([X[-1]]) G = deque([G[-1]]) # Reboot BFGS-Hessian mats = LBFGSB_MATRICES(n) else: # x update x += steplength * d # new evaluation -> normally, the function has been updated in # the linesearch step f0, grad = sf.fun_and_grad(x) if update_fun_def is None: if is_f0_target_reached(f0, _ftarget, istate): break # the while loop elif is_f0_min_change_reached(f0, f0_old, ftol, istate): break # the while loop # perform a potential update of the objective function definition and # upgrade the gradient and the past sequence of gradients accordingly else: f0, f0_old, grad, G = update_fun_def(x, f0, f0_old, grad, X, G) # Check stop criterion: minimum relative change in the # objective function if is_f0_min_change_reached(f0, f0_old, ftol, istate): break # the while loop # Check stop criterion: minimum objective function value elif is_f0_target_reached(f0, _ftarget, istate): break # the while loop # We must check if the updated G satisfy the strong wolfe condition X, G = make_X_and_G_respect_strong_wolfe( X, G, eps_SY, logger=logger, is_use_numba_jit=is_use_numba_jit ) mats = update_lbfgs_matrices( x.copy(), # copy otherwise x might be changed in X when updated grad, X, G, maxcor, mats, is_force_update=False, eps=eps_SY, is_check_factorization=is_check_factorizations, is_use_numba_jit=is_use_numba_jit, ) # callback is a user defined mechanism to stop optimization # if callback returns True, then it stops. # Note: no need to callback if an other stop criterion has already been # reached (istate.is_success) if callback is not None and not istate.is_success: if callback( np.copy(x), OptimizeResult( fun=f0, jac=grad, nfev=sf.nfev, njev=sf.ngev, nit=istate.nit, status=istate.warnflag, message=istate.task_str, x=x, success=istate.is_success, hess_inv=LbfgsInvHessProduct( np.atleast_2d(np.diff(np.array(X), axis=0)), np.atleast_2d(np.diff(np.array(G), axis=0)), ), ), ): istate.task_str = "STOP: USER CALLBACK" istate.is_success = True display_iter(istate.nit + 1, projgr(x, grad, lb, ub), f0, iprint, logger) # Result display has_displayed_results = display_results( istate.nit + 1, maxiter, x, grad, lb, ub, f0, _gtol, False, iprint, logger ) istate.nit += 1 # Final display. If is_success, then it already happened if not has_displayed_results: display_results( istate.nit, maxiter, x, grad, lb, ub, f0, _gtol, True, iprint, logger ) if projgr(x, grad, lb, ub) <= _gtol: istate.task_str = "CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL" istate.is_success = True istate.warnflag = 1 elif istate.nit == maxiter: istate.task_str = "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT" istate.is_success = True istate.warnflag = 1 elif sf.nfev >= maxfun: istate.task_str = "STOP: TOTAL NO. of f AND g EVALUATIONS EXCEEDS LIMIT" istate.is_success = True istate.warnflag = 1 # error: b'ERROR: STPMAX .LT. STPMIN' return OptimizeResult( fun=f0, jac=grad, nfev=sf.nfev, njev=sf.ngev, nit=istate.nit, status=istate.warnflag, message=istate.task_str, x=x, success=istate.is_success, hess_inv=LbfgsInvHessProduct( np.atleast_2d(np.diff(np.array(X), axis=0)), np.atleast_2d(np.diff(np.array(G), axis=0)), ), )
def initialize_X_and_G( x: NDArrayFloat, checkpoint: Optional[OptimizeResult], maxcor: int ) -> Tuple[Deque[NDArrayFloat], Deque[NDArrayFloat]]: """ Initialize the sequence of adjusted values and associated gradients. This routine is mainly dedicated to restore the sequence from a checkpoint. The sequence is stored in the `hess_inv` attribute as "sk" and "yk" which are differences, e.g., sk = np.atleast_2d(np.diff(np.array(X), axis=0)). Parameters ---------- x : NDArrayFloat Adjusted values. checkpoint : Optional[OptimizeResult] Optional checkpoint (see solver restart). maxcor : int Maximum number of corrections stored. Returns ------- Tuple[Deque[NDArrayFloat], Deque[NDArrayFloat]] X and G. """ # Initialize X and G # Deque = similar to list but with faster operations to remove and add # values to extremities X: Deque[NDArrayFloat] = deque() G: Deque[NDArrayFloat] = deque() # if it is a L-BFGS-B restart (checkpoint is provided), then X and G are restored # from x, jac and the sequence of differences sk and yk stored in the inverse # Hessian approximation instance (LbfgsInvHessProduct). if checkpoint is None: return X, G # x0 and checkpoint.x should be the same otherwisee there is an issue try: np.testing.assert_equal(x, checkpoint.x) except AssertionError as e: raise ValueError( "When 'checkpoint' is provided (L-BFGS-B restart), x0 and checkpoint.x" " should be equal!" ) from e n_corrs, n = checkpoint.hess_inv.sk.shape if n_corrs == 0: return X, G if n != x.size: raise ValueError( f"The size of correction vector ({n}) does" f" not match the size of x ({x.size})!" ) # restore the past X and G for x, g in zip( checkpoint.x - np.cumsum(checkpoint.hess_inv.sk, axis=0), checkpoint.jac - np.cumsum(checkpoint.hess_inv.yk, axis=0), ): if len(X) > maxcor: X.popleft() G.popleft() X.append(x) G.append(g) # at this point, X and G do not have x nor jac -> it is added a bit later return X, G def is_f0_min_change_reached( f0: float, f0_old: float, ftol: float, istate: InternalState ) -> bool: """ Return whether the minimum obj fun change has been reached. It updates the internal state. """ if (f0_old - f0) / max(abs(f0_old), abs(f0), 1) < ftol: istate.task_str = "CONVERGENCE: REL_REDUCTION_OF_F_<=_FTOL" istate.is_success = True istate.warnflag = 0 return True return False def is_f0_target_reached( f0: float, ftarget: Optional[float], istate: InternalState ) -> bool: """ Return whether the obj fun target has been reached. It updates the internal state. """ if ftarget is None: return False if f0 > ftarget: return False istate.task_str = "CONVERGENCE: F_<=_TARGET" istate.is_success = True istate.warnflag = 0 return True