From 3401cc53a128c7c0bd6687f3995476e5d63b9da4 Mon Sep 17 00:00:00 2001
From: ChrisRackauckas <me@chrisrackauckas.com>
Date: Mon, 8 Dec 2025 15:51:57 -0500
Subject: [PATCH] Fix false convergence when LBFGSB encounters Inf/NaN at
 bounds (issue #1094)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The L-BFGS-B algorithm evaluates the function and gradient at bounds during
its Cauchy step. When a function has a singularity at the boundary (e.g.,
log(0)), this produces Inf/NaN values that corrupt the optimization state.
The Fortran code would then incorrectly report convergence.

This fix:
- Tracks whether Inf/NaN values are encountered in the objective or gradient
- Detects false convergence: when Inf/NaN was encountered, optimizer reports
  Success, but the solution hasn't moved from the starting point
- Returns Failure instead of Success in such cases with a helpful warning

Closes #1094

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 .../src/OptimizationLBFGSB.jl                 | 27 ++++++++++++-
 lib/OptimizationLBFGSB/test/runtests.jl       | 38 ++++++++++++++++++-
 2 files changed, 63 insertions(+), 2 deletions(-)

diff --git a/lib/OptimizationLBFGSB/src/OptimizationLBFGSB.jl b/lib/OptimizationLBFGSB/src/OptimizationLBFGSB.jl
index 8ea012a3f..2a6dc2bc5 100644
--- a/lib/OptimizationLBFGSB/src/OptimizationLBFGSB.jl
+++ b/lib/OptimizationLBFGSB/src/OptimizationLBFGSB.jl
@@ -197,9 +197,15 @@ function SciMLBase.__solve(cache::OptimizationCache{O}) where {O <: LBFGSB}
             stats = stats, retcode = opt_ret)
     else
         iter_count = Ref(0)
+        encountered_inf_nan = Ref(false)
+
         _loss = function (θ)
             x = cache.f(θ, cache.p)
             iter_count[] += 1
+            # Track if we encounter Inf/NaN values in the objective
+            if !isfinite(x[1])
+                encountered_inf_nan[] = true
+            end
             opt_state = OptimizationBase.OptimizationState(
                 u = θ, objective = x[1])
             if cache.callback(opt_state, x...)
@@ -208,6 +214,15 @@ function SciMLBase.__solve(cache::OptimizationCache{O}) where {O <: LBFGSB}
             return x[1]
         end
 
+        # Wrap gradient function to track Inf/NaN values
+        _grad! = function (G, θ)
+            cache.f.grad(G, θ)
+            # Track if we encounter Inf/NaN values in the gradient
+            if !all(isfinite, G)
+                encountered_inf_nan[] = true
+            end
+        end
+
         n = length(cache.u0)
 
         if cache.lb === nothing
@@ -225,7 +240,7 @@ function SciMLBase.__solve(cache::OptimizationCache{O}) where {O <: LBFGSB}
         t0 = time()
 
         res = optimizer(
-            _loss, cache.f.grad, cache.u0, bounds; m = cache.opt.m, solver_kwargs...)
+            _loss, _grad!, cache.u0, bounds; m = cache.opt.m, solver_kwargs...)
 
         # Extract the task message from the result
         stop_reason = task_message_to_string(optimizer.task)
@@ -233,6 +248,16 @@ function SciMLBase.__solve(cache::OptimizationCache{O}) where {O <: LBFGSB}
         # Deduce the return code from the stop reason
         opt_ret = deduce_retcode(stop_reason)
 
+        # Detect false convergence due to Inf/NaN values
+        # If we encountered Inf/NaN and the optimizer claims success but the solution
+        # is essentially unchanged from the starting point, this is a false convergence
+        if encountered_inf_nan[] && opt_ret == ReturnCode.Success
+            if isapprox(res[2], cache.u0; rtol = 1e-8, atol = 1e-12)
+                @warn "LBFGSB encountered Inf/NaN values during optimization (likely due to function singularity at bounds). The solution has not moved from the initial point. Consider using bounds that exclude singularities."
+                opt_ret = ReturnCode.Failure
+            end
+        end
+
         t1 = time()
 
         stats = OptimizationBase.OptimizationStats(; iterations = optimizer.isave[30],
diff --git a/lib/OptimizationLBFGSB/test/runtests.jl b/lib/OptimizationLBFGSB/test/runtests.jl
index 90c11f0ee..041403a45 100644
--- a/lib/OptimizationLBFGSB/test/runtests.jl
+++ b/lib/OptimizationLBFGSB/test/runtests.jl
@@ -54,4 +54,40 @@ using Test
         lb = [-10.0, -10.0, -10.0, -10.0, -10.0], ub = [10.0, 10.0, 10.0, 10.0, 10.0])
     opt1 = solve(prob, OptimizationLBFGSB.LBFGSB(), maxiters = 1000, callback = callback)
     @test opt1.objective < l0
-end
\ No newline at end of file
+
+    # Test for issue #1094: LBFGSB should return Failure when encountering Inf/NaN
+    # at bounds (e.g., due to function singularity)
+    @testset "Inf/NaN detection at bounds (issue #1094)" begin
+        # Function with singularity at α = -1 (log(0) = -Inf)
+        ne = [47.79, 54.64, 60.68, 65.85, 70.10]
+        nt = [49.01, 56.09, 62.38, 67.80, 72.29]
+
+        function chi2_singular(alpha, p)
+            n_th = (1 + alpha[1]) * nt
+            total = 0.0
+            for i in eachindex(ne)
+                if ne[i] == 0.0
+                    total += 2 * n_th[i]
+                else
+                    total += 2 * (n_th[i] - ne[i] + ne[i] * log(ne[i] / n_th[i]))
+                end
+            end
+            return total
+        end
+
+        # With bounds including singularity at -1, should fail
+        optf_singular = OptimizationFunction(chi2_singular, OptimizationBase.AutoForwardDiff())
+        prob_singular = OptimizationProblem(optf_singular, [0.0]; lb = [-1.0], ub = [1.0])
+        res_singular = solve(prob_singular, OptimizationLBFGSB.LBFGSB())
+        @test res_singular.retcode == ReturnCode.Failure
+
+        # With safe bounds (away from singularity), should succeed
+        # The optimizer should find a minimum with a negative value of alpha
+        prob_safe = OptimizationProblem(optf_singular, [0.0]; lb = [-0.9], ub = [1.0])
+        res_safe = solve(prob_safe, OptimizationLBFGSB.LBFGSB())
+        @test res_safe.retcode == ReturnCode.Success
+        # The minimum should be negative (somewhere between -0.1 and 0)
+        @test res_safe.u[1] < 0.0
+        @test res_safe.u[1] > -0.5
+    end
+end