# SPDX-License-Identifier: BSD-3-Clause
# Copyright (c) 2025 Antoine COLLET
"""
Subspace minimization procedure of the L-BFGS-B algorithm,
mainly for internal use.
The target of subspace minimization is to minimize the quadratic function m(x)
over the free variables, subject to the bound condition.
Free variables stand for coordinates that are not at the boundary in xcp,
the generalized Cauchy point.
In the classical implementation of L-BFGS-B [1], the minimization is done by first
ignoring the box constraints, followed by a line search.
Functions
^^^^^^^^^
.. autosummary::
:toctree: _autosummary
get_freev
form_k_from_wm
form_k_from_za
factorize_k
subspace_minimization
Reference:
[1] R. H. Byrd, P. Lu, and J. Nocedal (1995). A limited memory algorithm for bound
constrained optimization.
[2] C. Voglis and I. E. Lagaris (2004). BOXCQP: An algorithm for bound constrained
convex quadratic problems.
"""
import logging
from typing import Optional, Tuple
import numpy as np
import scipy as sp
from lbfgsb._numba_helpers import njit
from lbfgsb.bfgsmats import LBFGSB_MATRICES, bmv, bmv_numba
from lbfgsb.types import NDArrayFloat, NDArrayInt
[docs]
def get_freev(
x_cp: NDArrayFloat,
lb: NDArrayFloat,
ub: NDArrayFloat,
iter: int,
free_vars_old: Optional[NDArrayInt] = None,
iprint: int = -1,
logger: Optional[logging.Logger] = None,
) -> Tuple[NDArrayInt, NDArrayInt]:
"""
Get the free variables and build sparse Z and A matrices.
Parameters
----------
x_cp : NDArrayFloat
Generalized cauchy point.
lb : NDArrayFloat
Lower bounds.
ub : NDArrayFloat
Upper bounds.
free_vars_old : NDArrayInt
Free variables at x_cp at the previous iteration.
iter : int
Iteration number.
iprint : int, optional
Controls the frequency of output. ``iprint < 0`` means no output;
``iprint = 0`` print only one line at the last iteration;
``0 < iprint < 99`` print also f and ``|proj g|`` every iprint iterations;
``iprint >= 99`` print details of every iteration except n-vectors;
logger: Optional[Logger], optional
:class:`logging.Logger` instance. If None, nothing is displayed, no matter the
value of `iprint`, by default None.
Returns
-------
Tuple[NDArrayInt, spmatrix, spmatrix]
The free variables and sparse matrices Z and A.
"""
# number of variables
n: int = x_cp.size
# Array of free variable and active variable indices (from 0 to n-1)
free_vars: NDArrayInt = ((x_cp != ub) & (x_cp != lb)).nonzero()[0]
active_vars: NDArrayInt = (~np.isin(np.arange(n), free_vars)).nonzero()[0]
# Some display
# 1) Indicate which variable is leaving the free variables and which is
# entering the free variables -> Not for the first iteration
if iprint > 100 and iter > 0 and free_vars_old is not None and logger is not None:
# Variables leaving the free variables
leaving_vars = active_vars[np.isin(active_vars, free_vars_old)]
logger.info(f"Variables leaving the free variables set = {leaving_vars}")
entering_vars = free_vars[~np.isin(free_vars, free_vars_old)]
logger.info(f"Variables entering the free variables set = {entering_vars}")
logger.info(
f"N variables leaving = {leaving_vars.size} \t,"
f" N variables entering = {entering_vars.size}"
)
# 2) Display the total of free variables at x_cp
if iprint > 99 and logger is not None:
logger.info(f"{free_vars.size} variables are free at GCP, iter = {iter + 1}")
return free_vars, active_vars
def form_k(
free_vars: NDArrayInt,
active_vars: NDArrayInt,
WTZ: NDArrayFloat,
mats: LBFGSB_MATRICES,
is_assert_correct: bool = True,
) -> NDArrayFloat:
""" """
# Construct K = M^{-1}(I - 1/theta M WT Z @ ZT @ W))
K = form_k_from_za(
free_vars, active_vars, mats.Y, mats.S, mats.D, mats.L, mats.theta
)
if is_assert_correct:
K_wm = form_k_from_wm(WTZ, mats.invMfactors, mats.theta)
np.testing.assert_allclose(K, K_wm, atol=1e-8)
return K
@njit(cache=True)
def solve_triangular_numba(
L: NDArrayFloat, v: NDArrayFloat, lower: bool = True
) -> NDArrayFloat:
"""
Numba replacement for:
solve_triangular(U, lower=False)
"""
n = v.size
y = np.empty(n)
if lower:
# Forward solve: L y = v
for i in range(n):
s = v[i]
for j in range(i):
s -= L[i, j] * y[j]
y[i] = s / L[i, i]
else:
# Backward solve: U p = y
for i in range(n - 1, -1, -1):
s = v[i]
for j in range(i + 1, n):
s -= L[i, j] * y[j]
y[i] = s / L[i, i]
return y
[docs]
def factorize_k(
K: NDArrayFloat,
is_assert_correct: bool = True,
) -> Optional[NDArrayFloat]:
"""
Return the L with LEL^T factorization of the indefinite matrix K.
K = [-D -Y'ZZ'Y/theta L_a'-R_z' ]
[L_a -R_z theta*S'AA'S ]
where
E = [-I 0]
[ 0 I]
Parameters
----------
K: NDArrayFloat
Indefinite matrix to factorize.
is_assert_correct : bool, optional
Whether to test the correctness of the factorization, by default True
Returns
-------
Optional[NDArrayFloat]
LK, the lower triangle of the matrix factorization K = LK @ E @ LK.T
"""
# The factorization only makes sense if K is at least (2, 2).
if K.size < 4:
assert K.size == 1
return np.sqrt(K)
# Extract the subblocks of K with K12 = K21.T (K is symmetric)
m = int(K.shape[0] / 2)
K11 = -K[:m, :m]
K12 = -K[:m, m:]
K22 = K[m:, m:]
# LK is a lower triangle of the matrix factorization LK @ E @ LK.T
# Initiate the array
LK = np.zeros((2 * m, 2 * m), dtype=np.float64)
# Form L, the lower part of LL' = D+Y' ZZ'Y/theta
L11 = sp.linalg.cholesky(K11, lower=True, overwrite_a=False)
# Top-left
LK[:m, :m] = L11
# then form L^-1(-L_a'+R_z') in the (1,2) block.
L12 = sp.linalg.solve_triangular(L11, K12, lower=True, trans="N")
# Top-right
LK[m:, :m] = L12.T
# Form L22 from S'AA'S*theta + (L^-1(-L_a'+R_z'))'L^-1(-L_a'+R_z')
# Bottom-right
LK[m:, m:] = sp.linalg.cholesky(K22 + L12.T @ L12, lower=True)
# Test the factorization
if is_assert_correct:
E = np.identity(n=2 * m)
E[:m, :m] *= -1
np.testing.assert_allclose(LK @ E @ LK.T, K, atol=1e-8)
return LK
[docs]
def subspace_minimization(
x: NDArrayFloat,
xc: NDArrayFloat,
free_vars: NDArrayInt,
active_vars: NDArrayInt,
c: NDArrayFloat,
grad: NDArrayFloat,
lb: NDArrayFloat,
ub: NDArrayFloat,
mats: LBFGSB_MATRICES,
is_check_factorizations: bool = False,
is_use_numba_jit: bool = False,
) -> NDArrayFloat:
r"""
Computes an approximate solution of the subspace problem.
This is following section 5.1 in Byrd et al. (1995).
.. math::
\begin{aligned}
\min& &\langle r, (x-xcp)\rangle + 1/2 \langle x-xcp, B (x-xcp)\rangle\\
\text{s.t.}& &l<=x<=u\\
& & x_i=xcp_i \text{for all} i \in A(xcp)
\end{aligned}
along the subspace unconstrained Newton direction :math:`d = -(Z'BZ)^(-1) r`.
Parameters
----------
x : NDArrayFloat
Starting point for the GCP computation
xc : NDArrayFloat
Cauchy point.
c : NDArrayFloat
W^T(xc-x), computed with the Cauchy point.
grad : NDArrayFloat
Gradient of f(x). grad must be a nonzero vector.
lb : NDArrayFloat
Lower bound vector.
ub : NDArrayFloat
Upper bound vector.
mats: LBFGSB_MATRICES
Wrapper for BFGS matrices.
is_check_factorizations: bool
Whether to check the different factorizations performed. The default is False.
is_use_numba_jit: bool
Whether to use `numba` just-in-time compilation to speed-up the computation
intensive part of the algorithm. The default is False.
.. versionadded:: 1.0
Returns
-------
NDArrayFloat
xbar
References
----------
* R. H. Byrd, P. Lu and J. Nocedal. A Limited Memory Algorithm for Bound
Constrained Optimization, (1995), SIAM Journal on Scientific and
Statistical Computing, 16, 5, pp. 1190-1208.
* C. Zhu, R. H. Byrd and J. Nocedal. L-BFGS-B: Algorithm 778: L-BFGS-B,
FORTRAN routines for large scale bound constrained optimization (1997),
ACM Transactions on Mathematical Software, 23, 4, pp. 550 - 560.
* J.L. Morales and J. Nocedal. L-BFGS-B: Remark on Algorithm 778: L-BFGS-B,
FORTRAN routines for large scale bound constrained optimization (2011),
ACM Transactions on Mathematical Software, 38, 1.
"""
# Direct primal method
invThet = 1.0 / mats.theta
# d = (1/theta)r + (1/theta*2) Z'WK^(-1)W'Z r.
if free_vars.size == 0:
return xc
# ------------------------------------------------------------------
# Pre-slice everything ONCE (major speedup)
# ------------------------------------------------------------------
xc_free = xc[free_vars]
ub_free = ub[free_vars]
lb_free = lb[free_vars]
# Same as W.T.dot(Z) but numpy does not handle correctly
# numpy_array.dot(sparce_matrix), so we give the responsibility to the
# sparse matrix
# Note that here, Z is suppose to have a shape (t, n) with t the number
# of free_vars and n the number of variables.
# WTZ = W.T.dot(Z.todense()) works but this is much less efficient
W_free = mats.W[free_vars, :]
WTZ = W_free.T # shape (m, t)
r = grad + mats.theta * (xc - x)
# At iter 0, M is [[0.0]] and so is invMfactors
if mats.use_factor:
r -= mats.W.dot(bmv(mats.invMfactors, c))
rHat = r[free_vars]
v = WTZ @ rHat
# Factorization of M^{-1}(I - 1/theta M WT Z @ ZT @ W))
if mats.use_factor:
K = form_k(
free_vars,
active_vars,
WTZ,
mats,
is_assert_correct=is_check_factorizations,
)
# The assertion includes minor overhead
LK: Optional[NDArrayFloat] = factorize_k(
K, is_assert_correct=is_check_factorizations
)
else:
LK = None
if LK is not None:
# LK is the lowest triangle of the cholesky factorization
# of (I - 1/theta M WT Z @ ZT @ W)^{-1} M.
if is_use_numba_jit:
v = solve_triangular_numba(LK, v, lower=True)
else:
v = sp.linalg.solve_triangular(LK, v, lower=True)
v[: int(LK.shape[0] / 2)] *= -1
if is_use_numba_jit:
v = solve_triangular_numba(LK.T, v, lower=False)
else:
v = sp.linalg.solve_triangular(LK.T, v, lower=False)
else:
# This is less efficient but it should only happen if LK is None, i.e., at
# iteration 0
if mats.use_factor:
if is_use_numba_jit:
v = bmv_numba(*mats.invMfactors, v)
N = -bmv_numba(*mats.invMfactors, invThet * (WTZ @ WTZ.T))
else:
v = bmv(mats.invMfactors, v)
N = -bmv(mats.invMfactors, invThet * (WTZ @ WTZ.T))
else:
M = mats.invMfactors[0] @ mats.invMfactors[1]
v = M @ v
N = -M @ (invThet * WTZ @ WTZ.T)
# Add the identity matrix: this is the same as N = np.eye(N.shape[0]) - M.dot(N)
# but much faster
np.fill_diagonal(N, N.diagonal() + 1)
v = np.linalg.solve(N, v)
# Careful, there is an error in the original paper (the negative sign is
# missing) !
dHat = -invThet * (rHat + invThet * (WTZ.T @ v))
# We can then backtrack towards the feasible region, if necessary, to obtain
# alpha a positive scalar between 0 and 1 -> Eq (5.8)
mask = dHat != 0.0
if mask.any():
d = dHat[mask]
xc_m = xc_free[mask]
step = np.empty_like(d)
pos = d > 0.0
step[pos] = (ub_free[mask][pos] - xc_m[pos]) / d[pos]
step[~pos] = (lb_free[mask][~pos] - xc_m[~pos]) / d[~pos]
alpha_star = min(1.0, step.min())
else:
alpha_star = 1.0
# Eq (5.2) -> update free variables only
xc[free_vars] = xc_free + alpha_star * dHat
return xc