Source code for lbfgsb.linesearch

# SPDX-License-Identifier: BSD-3-Clause
# Copyright (c) 2025 Antoine COLLET

r"""
Implement the line search algorithm by Moré and Thuente (1994),
currently used for the L-BFGS-B algorithm.

The target of this line search algorithm is to find a step size :math:`\alpha`
that satisfies the strong Wolfe conditions

.. math::

    f(x + \alpha d) \leq f(x) + \alpha \mu g(x)^T d

and

.. math::

    |g(x + \alpha d)^T d| \leq \eta |g(x)^T d|.

Functions
^^^^^^^^^

.. autosummary::
   :toctree: _autosummary

    max_allowed_steplength
    line_search

References
----------
[1] Moré, J. J., & Thuente, D. J. (1994). Line search algorithms with guaranteed
    sufficient decrease.
"""

import logging
import warnings
from typing import Optional

import numpy as np
import scipy as sp
from packaging.version import Version
from scipy import __version__ as spversion

from lbfgsb._numba_helpers import njit
from lbfgsb.scalar_function import ScalarFunction
from lbfgsb.types import NDArrayFloat

_FLOAT64_MAX = np.finfo(np.float64).max


def _safe_sumsq(x: NDArrayFloat) -> float:
    """Return ``sum(x**2)`` without emitting overflow warnings.

    If the squared norm cannot be represented as a finite ``float64``, return
    ``np.inf``.

    Parameters
    ----------
    x : NDArrayFloat
        Input vector.

    Returns
    -------
    float
        Squared Euclidean norm, or ``np.inf`` if it cannot be represented
        finitely.
    """
    if x.size == 0:
        return 0.0

    if not np.all(np.isfinite(x)):
        return float("inf")

    max_abs = float(np.max(np.abs(x)))

    if max_abs == 0.0:
        return 0.0

    if not np.isfinite(max_abs):
        return float("inf")

    y = x / max_abs

    with np.errstate(over="ignore", invalid="ignore"):
        scaled_sum = float(np.dot(y, y))

    if not np.isfinite(scaled_sum) or scaled_sum <= 0.0:
        return float("inf")

    limit = np.sqrt(_FLOAT64_MAX / scaled_sum)

    if max_abs > limit:
        return float("inf")

    return float(max_abs * max_abs * scaled_sum)


def _safe_dot(a: NDArrayFloat, b: NDArrayFloat) -> float:
    """Return ``dot(a, b)`` while avoiding floating-point warnings.

    Parameters
    ----------
    a, b : NDArrayFloat
        Input vectors.

    Returns
    -------
    float
        Dot product if finite, otherwise ``np.inf``.
    """
    if not np.all(np.isfinite(a)) or not np.all(np.isfinite(b)):
        return float("inf")

    with np.errstate(over="ignore", invalid="ignore"):
        value = float(np.dot(a, b))

    if not np.isfinite(value):
        return float("inf")

    return value


def _trial_point(
    x0: NDArrayFloat,
    alpha: float,
    d: NDArrayFloat,
) -> Optional[NDArrayFloat]:
    """Return ``x0 + alpha * d`` if finite, otherwise ``None``."""
    if not np.isfinite(alpha):
        return None

    with np.errstate(over="ignore", invalid="ignore"):
        x = x0 + alpha * d

    if not np.all(np.isfinite(x)):
        return None

    return x



[docs]
def max_allowed_steplength(
    x: NDArrayFloat,
    d: NDArrayFloat,
    lb: NDArrayFloat,
    ub: NDArrayFloat,
    max_steplength: float,
    n_iter: int,
) -> float:
    r"""
    Compute the largest admissible nonnegative step length.

    The returned step length ``alpha`` satisfies

    .. math::

        lb \leq x + \alpha d \leq ub

    and

    .. math::

        0 \leq \alpha \leq \mathrm{max\_steplength}.

    Parameters
    ----------
    x : NDArrayFloat
        Starting point.
    d : NDArrayFloat
        Search direction.
    lb : NDArrayFloat
        Lower bounds.
    ub : NDArrayFloat
        Upper bounds.
    max_steplength : float
        Maximum step length allowed by the caller.
    n_iter : int
        Current number of outer iterations.

    Returns
    -------
    float
        Maximum admissible step length.

    References
    ----------
    * R. H. Byrd, P. Lu and J. Nocedal. A Limited Memory Algorithm for Bound
      Constrained Optimization, (1995), SIAM Journal on Scientific and
      Statistical Computing, 16, 5, pp. 1190-1208.
    * C. Zhu, R. H. Byrd and J. Nocedal. L-BFGS-B: Algorithm 778: L-BFGS-B,
      FORTRAN routines for large scale bound constrained optimization (1997),
      ACM Transactions on Mathematical Software, 23, 4, pp. 550-560.
    * J. L. Morales and J. Nocedal. L-BFGS-B: Remark on Algorithm 778:
      L-BFGS-B, FORTRAN routines for large scale bound constrained optimization
      (2011), ACM Transactions on Mathematical Software, 38, 1.
    """
    if n_iter == 0:
        return 1.0

    if not np.isfinite(max_steplength) or max_steplength <= 0.0:
        return 0.0

    if not np.all(np.isfinite(d)):
        return 0.0

    mask = d != 0.0

    if not np.any(mask):
        return float(max_steplength)

    with np.errstate(divide="ignore", invalid="ignore", over="ignore"):
        candidates = np.where(
            d[mask] > 0.0,
            (ub[mask] - x[mask]) / d[mask],
            (lb[mask] - x[mask]) / d[mask],
        )

    candidates = candidates[np.isfinite(candidates)]
    candidates = candidates[candidates >= 0.0]

    if candidates.size == 0:
        return float(max_steplength)

    return float(min(max_steplength, float(np.min(candidates))))



@njit(cache=True)
def max_allowed_steplength_numba(
    x: NDArrayFloat,
    d: NDArrayFloat,
    lb: NDArrayFloat,
    ub: NDArrayFloat,
    max_steplength: float,
    n_iter: int,
) -> float:
    """See :func:`max_allowed_steplength`."""
    if n_iter == 0:
        return 1.0

    if not np.isfinite(max_steplength) or max_steplength <= 0.0:
        return 0.0

    alpha = max_steplength
    found = False

    n = x.size
    for i in range(n):
        di = d[i]

        if not np.isfinite(di):
            return 0.0

        if di != 0.0:
            if di > 0.0:
                tmp = (ub[i] - x[i]) / di
            else:
                tmp = (lb[i] - x[i]) / di

            if np.isfinite(tmp) and tmp >= 0.0:
                if not found or tmp < alpha:
                    alpha = tmp
                    found = True

    return alpha



[docs]
def line_search(
    x0: NDArrayFloat,
    f0: float,
    g0: NDArrayFloat,
    d: NDArrayFloat,
    lb: NDArrayFloat,
    ub: NDArrayFloat,
    above_iter: int,
    max_steplength_user: float,
    is_boxed: bool,
    sf: ScalarFunction,
    ftol: float = 1e-3,
    gtol: float = 0.9,
    xtol: float = 1e-1,
    max_iter: int = 30,
    iprint: int = 10,
    logger: Optional[logging.Logger] = None,
    isave: Optional[np.typing.NDArray[np.intc]] = None,
    dsave: Optional[NDArrayFloat] = None,
    is_use_numba_jit: bool = False,
) -> Optional[float]:
    r"""
    Find a step satisfying sufficient decrease and curvature conditions.

    The line search attempts to find a step length ``alpha`` such that

    .. math::

        f(x_0 + \alpha d)
        \leq
        f(x_0) + c_1 \alpha \nabla f(x_0)^T d

    and

    .. math::

        |\nabla f(x_0 + \alpha d)^T d|
        \leq
        c_2 |\nabla f(x_0)^T d|.

    These are the strong Wolfe conditions.

    Notes
    -----
    For SciPy versions older than 1.12, this routine calls the historical
    Minpack2 ``dcsrch`` implementation. For SciPy 1.12 and above, it uses
    SciPy's Python implementation.

    Parameters
    ----------
    x0 : NDArrayFloat
        Starting point.
    f0 : float
        Objective value at ``x0``.
    g0 : NDArrayFloat
        Gradient at ``x0``.
    d : NDArrayFloat
        Search direction.
    lb : NDArrayFloat
        Lower bounds.
    ub : NDArrayFloat
        Upper bounds.
    above_iter : int
        Current outer iteration number.
    max_steplength_user : float
        User-provided maximum step length.
    is_boxed : bool
        Whether all variables have both lower and upper bounds.
    sf : ScalarFunction
        Objective and gradient wrapper.
    ftol : float, optional
        Sufficient-decrease Wolfe parameter :math:`c_1`. Default is ``1e-3``.
    gtol : float, optional
        Curvature Wolfe parameter :math:`c_2`. Default is ``0.9``.
    xtol : float, optional
        Relative tolerance for acceptable steps in the line-search procedure.
        Default is ``1e-1``.
    max_iter : int, optional
        Maximum number of line-search iterations. Default is ``30``.
    iprint : int, optional
        Verbosity level. Default is ``10``.
    logger : logging.Logger, optional
        Logger used for textual output.
    isave : ndarray, optional
        Integer workspace used by the legacy Minpack2 implementation.
    dsave : ndarray, optional
        Floating-point workspace used by the legacy Minpack2 implementation.
    is_use_numba_jit : bool, optional
        Whether to use Numba-compiled helper functions. Default is ``False``.

    Returns
    -------
    float or None
        Step length if the line search succeeds or returns a usable warning
        state. ``None`` otherwise.
    """
    if isave is None:
        isave = np.zeros((2,), dtype=np.intc)

    if dsave is None:
        dsave = np.zeros((13,), dtype=np.float64)

    if max_iter <= 0:
        return None

    if not np.isfinite(f0):
        return None

    if not np.all(np.isfinite(x0)):
        return None

    if not np.all(np.isfinite(g0)):
        return None

    if not np.all(np.isfinite(d)):
        return None

    if is_use_numba_jit:
        max_steplength = max_allowed_steplength_numba(
            x0,
            d,
            lb,
            ub,
            max_steplength_user,
            above_iter,
        )
    else:
        max_steplength = max_allowed_steplength(
            x0,
            d,
            lb,
            ub,
            max_steplength_user,
            above_iter,
        )

    if not np.isfinite(max_steplength) or max_steplength <= 0.0:
        return None

    dphi0 = _safe_dot(g0, d)

    if not np.isfinite(dphi0):
        return None

    # Moré-Thuente line search requires a descent direction.
    if dphi0 >= 0.0:
        return None

    if above_iter == 0 and not is_boxed:
        dd = _safe_sumsq(d)

        if not np.isfinite(dd) or dd <= 0.0:
            return None

        steplength_0 = min(1.0 / np.sqrt(dd), max_steplength)
    else:
        steplength_0 = min(1.0, max_steplength)

    if not np.isfinite(steplength_0) or steplength_0 <= 0.0:
        return None

    # Support for Python 3.7 and 3.8: the minpack2 wrapper was removed from
    # SciPy 1.12 and replaced by a Python implementation. Python 3.7 and 3.8
    # cannot use SciPy 1.12, so the legacy Minpack2 path is still needed.
    is_use_minpack2: bool = Version(spversion) < Version("1.12")

    def phi(alpha: float) -> float:
        """Return objective value for step length ``alpha``."""
        x_trial = _trial_point(x0, alpha, d)

        if x_trial is None:
            return float("inf")

        value = sf.fun(x_trial)

        if not np.isfinite(value):
            return float("inf")

        return float(value)

    def dphi(alpha: float) -> float:
        """Return directional derivative for step length ``alpha``."""
        x_trial = _trial_point(x0, alpha, d)

        if x_trial is None:
            return float("nan")

        grad = sf.grad(x_trial)

        if not np.all(np.isfinite(grad)):
            return float("nan")

        value = _safe_dot(grad, d)

        if not np.isfinite(value):
            return float("nan")

        return float(value)

    task = b"START"
    f_m1 = float(f0)
    dphi_m1 = float(dphi0)
    _iter = 0

    steplength: Optional[float] = None
    best_stp: Optional[float] = None

    if not is_use_minpack2:
        # There is an issue in DCSRCH.__call__: it can return steplength=None
        # when task is a warning, while this should not necessarily happen,
        # for example when steplength reaches max_steplength. Therefore, keep
        # the explicit iteration loop.
        dcsrch = sp.optimize._dcsrch.DCSRCH(
            phi,
            dphi,
            ftol,
            gtol,
            xtol,
            0.0,
            max_steplength,
        )

    while _iter < max_iter:
        if is_use_minpack2:
            # SciPy older than 1.12 uses the Fortran Minpack2 implementation.
            with warnings.catch_warnings():
                # scipy.optimize.minpack2 may be deprecated on newer SciPy
                # versions, but this path is needed for Python 3.7/3.8
                # compatibility.
                warnings.filterwarnings("ignore", category=DeprecationWarning)
                steplength_raw, f0_raw, dphi0_raw, task = sp.optimize.minpack2.dcsrch(
                    steplength_0,
                    f_m1,
                    dphi_m1,
                    ftol,
                    gtol,
                    xtol,
                    task,
                    0,
                    max_steplength,
                    isave,
                    dsave,
                )

            steplength = None if steplength_raw is None else float(steplength_raw)
            f0 = float(f0_raw)
            dphi0 = float(dphi0_raw)

        else:
            # SciPy >= 1.12 uses the Python implementation.
            steplength_raw, f0_raw, dphi0_raw, task = dcsrch._iterate(
                steplength_0,
                f_m1,
                dphi_m1,
                task,
            )

            steplength = None if steplength_raw is None else float(steplength_raw)

        if task[:2] == b"FG":
            if steplength is None:
                return None

            if not np.isfinite(steplength) or steplength <= 0.0:
                return None

            x_trial = _trial_point(x0, steplength, d)

            if x_trial is None:
                return None

            stp_old = float(steplength_0)
            f_m1_old = float(f_m1)

            steplength_0 = float(steplength)

            f_new, g_new = sf.fun_and_grad(x_trial)

            if not np.isfinite(f_new):
                return None

            if not np.all(np.isfinite(g_new)):
                return None

            dphi_new = _safe_dot(g_new, d)

            if not np.isfinite(dphi_new):
                return None

            f_m1 = float(f_new)
            dphi_m1 = float(dphi_new)

            if f_m1 < f_m1_old:
                best_stp = steplength_0
            else:
                best_stp = stp_old

        else:
            break

        _iter += 1

    else:
        # max_iter reached: the line search did not converge.
        task = b"WARNING: dcsrch did not converge within max iterations"

    if steplength is None:
        return None

    if not np.isfinite(steplength) or steplength <= 0.0:
        return None

    if task[:4] != b"CONV" and task[:4] != b"WARN":
        return None

    if best_stp is not None:
        steplength = best_stp

    if not np.isfinite(steplength) or steplength <= 0.0:
        return None

    if iprint >= 99 and logger is not None:
        dd = _safe_sumsq(d)

        if np.isfinite(dd):
            norm_step = steplength * np.sqrt(dd)
        else:
            norm_step = float("inf")

        logger.info(f"LINE SEARCH {_iter} times; norm of step = {norm_step}")

    return float(steplength)