From 3401cc53a128c7c0bd6687f3995476e5d63b9da4 Mon Sep 17 00:00:00 2001 From: ChrisRackauckas Date: Mon, 8 Dec 2025 15:51:57 -0500 Subject: [PATCH] Fix false convergence when LBFGSB encounters Inf/NaN at bounds (issue #1094) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The L-BFGS-B algorithm evaluates the function and gradient at bounds during its Cauchy step. When a function has a singularity at the boundary (e.g., log(0)), this produces Inf/NaN values that corrupt the optimization state. The Fortran code would then incorrectly report convergence. This fix: - Tracks whether Inf/NaN values are encountered in the objective or gradient - Detects false convergence: when Inf/NaN was encountered, optimizer reports Success, but the solution hasn't moved from the starting point - Returns Failure instead of Success in such cases with a helpful warning Closes #1094 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .../src/OptimizationLBFGSB.jl | 27 ++++++++++++- lib/OptimizationLBFGSB/test/runtests.jl | 38 ++++++++++++++++++- 2 files changed, 63 insertions(+), 2 deletions(-) diff --git a/lib/OptimizationLBFGSB/src/OptimizationLBFGSB.jl b/lib/OptimizationLBFGSB/src/OptimizationLBFGSB.jl index 8ea012a3f..2a6dc2bc5 100644 --- a/lib/OptimizationLBFGSB/src/OptimizationLBFGSB.jl +++ b/lib/OptimizationLBFGSB/src/OptimizationLBFGSB.jl @@ -197,9 +197,15 @@ function SciMLBase.__solve(cache::OptimizationCache{O}) where {O <: LBFGSB} stats = stats, retcode = opt_ret) else iter_count = Ref(0) + encountered_inf_nan = Ref(false) + _loss = function (θ) x = cache.f(θ, cache.p) iter_count[] += 1 + # Track if we encounter Inf/NaN values in the objective + if !isfinite(x[1]) + encountered_inf_nan[] = true + end opt_state = OptimizationBase.OptimizationState( u = θ, objective = x[1]) if cache.callback(opt_state, x...) @@ -208,6 +214,15 @@ function SciMLBase.__solve(cache::OptimizationCache{O}) where {O <: LBFGSB} return x[1] end + # Wrap gradient function to track Inf/NaN values + _grad! = function (G, θ) + cache.f.grad(G, θ) + # Track if we encounter Inf/NaN values in the gradient + if !all(isfinite, G) + encountered_inf_nan[] = true + end + end + n = length(cache.u0) if cache.lb === nothing @@ -225,7 +240,7 @@ function SciMLBase.__solve(cache::OptimizationCache{O}) where {O <: LBFGSB} t0 = time() res = optimizer( - _loss, cache.f.grad, cache.u0, bounds; m = cache.opt.m, solver_kwargs...) + _loss, _grad!, cache.u0, bounds; m = cache.opt.m, solver_kwargs...) # Extract the task message from the result stop_reason = task_message_to_string(optimizer.task) @@ -233,6 +248,16 @@ function SciMLBase.__solve(cache::OptimizationCache{O}) where {O <: LBFGSB} # Deduce the return code from the stop reason opt_ret = deduce_retcode(stop_reason) + # Detect false convergence due to Inf/NaN values + # If we encountered Inf/NaN and the optimizer claims success but the solution + # is essentially unchanged from the starting point, this is a false convergence + if encountered_inf_nan[] && opt_ret == ReturnCode.Success + if isapprox(res[2], cache.u0; rtol = 1e-8, atol = 1e-12) + @warn "LBFGSB encountered Inf/NaN values during optimization (likely due to function singularity at bounds). The solution has not moved from the initial point. Consider using bounds that exclude singularities." + opt_ret = ReturnCode.Failure + end + end + t1 = time() stats = OptimizationBase.OptimizationStats(; iterations = optimizer.isave[30], diff --git a/lib/OptimizationLBFGSB/test/runtests.jl b/lib/OptimizationLBFGSB/test/runtests.jl index 90c11f0ee..041403a45 100644 --- a/lib/OptimizationLBFGSB/test/runtests.jl +++ b/lib/OptimizationLBFGSB/test/runtests.jl @@ -54,4 +54,40 @@ using Test lb = [-10.0, -10.0, -10.0, -10.0, -10.0], ub = [10.0, 10.0, 10.0, 10.0, 10.0]) opt1 = solve(prob, OptimizationLBFGSB.LBFGSB(), maxiters = 1000, callback = callback) @test opt1.objective < l0 -end \ No newline at end of file + + # Test for issue #1094: LBFGSB should return Failure when encountering Inf/NaN + # at bounds (e.g., due to function singularity) + @testset "Inf/NaN detection at bounds (issue #1094)" begin + # Function with singularity at α = -1 (log(0) = -Inf) + ne = [47.79, 54.64, 60.68, 65.85, 70.10] + nt = [49.01, 56.09, 62.38, 67.80, 72.29] + + function chi2_singular(alpha, p) + n_th = (1 + alpha[1]) * nt + total = 0.0 + for i in eachindex(ne) + if ne[i] == 0.0 + total += 2 * n_th[i] + else + total += 2 * (n_th[i] - ne[i] + ne[i] * log(ne[i] / n_th[i])) + end + end + return total + end + + # With bounds including singularity at -1, should fail + optf_singular = OptimizationFunction(chi2_singular, OptimizationBase.AutoForwardDiff()) + prob_singular = OptimizationProblem(optf_singular, [0.0]; lb = [-1.0], ub = [1.0]) + res_singular = solve(prob_singular, OptimizationLBFGSB.LBFGSB()) + @test res_singular.retcode == ReturnCode.Failure + + # With safe bounds (away from singularity), should succeed + # The optimizer should find a minimum with a negative value of alpha + prob_safe = OptimizationProblem(optf_singular, [0.0]; lb = [-0.9], ub = [1.0]) + res_safe = solve(prob_safe, OptimizationLBFGSB.LBFGSB()) + @test res_safe.retcode == ReturnCode.Success + # The minimum should be negative (somewhere between -0.1 and 0) + @test res_safe.u[1] < 0.0 + @test res_safe.u[1] > -0.5 + end +end