From 688df7453702c53369a8c12a06ea45970a190940 Mon Sep 17 00:00:00 2001 From: rjbaral Date: Fri, 29 Sep 2023 17:06:53 -0700 Subject: [PATCH] stopping criteria update --- src/LMTR_alg.jl | 19 ++++++++++--------- src/LM_alg.jl | 19 ++++++++++--------- src/R2_alg.jl | 1 - src/TRDH_alg.jl | 2 +- src/TR_alg.jl | 6 ++---- 5 files changed, 23 insertions(+), 24 deletions(-) diff --git a/src/LMTR_alg.jl b/src/LMTR_alg.jl index ae6620f5..fd1ece1e 100644 --- a/src/LMTR_alg.jl +++ b/src/LMTR_alg.jl @@ -115,11 +115,11 @@ function LMTR( if verbose > 0 #! format: off - @info @sprintf "%6s %8s %8s %8s %7s %7s %8s %7s %7s %7s %7s %1s" "outer" "inner" "f(x)" "h(x)" "√ξ1" "√ξ" "ρ" "Δ" "‖x‖" "‖s‖" "1/ν" "TR" + @info @sprintf "%6s %8s %8s %8s %7s %7s %8s %7s %7s %7s %7s %1s" "outer" "inner" "f(x)" "h(x)" "√(ξ1/ν)" "√(ξ/ν)" "ρ" "Δ" "‖x‖" "‖s‖" "1/ν" "TR" #! format: on end - local ξ1 + local ξ1, ξ k = 0 Fk = residual(nls, xk) @@ -132,6 +132,7 @@ function LMTR( σmax = opnorm(Jk) νInv = (1 + θ) * σmax^2 # ‖J'J‖ = ‖J‖² + ν = 1 / νInv mν∇fk = -∇fk / νInv @@ -179,18 +180,18 @@ function LMTR( ξ1 > 0 || error("LMTR: first prox-gradient step should produce a decrease but ξ1 = $(ξ1)") if ξ1 ≥ 0 && k == 1 - ϵ_increment = ϵr * sqrt(ξ1) + ϵ_increment = ϵr * sqrt(ξ1/ν) ϵ += ϵ_increment # make stopping test absolute and relative ϵ_subsolver += ϵ_increment end - if sqrt(ξ1) < ϵ + if sqrt(ξ1/ν) < ϵ # the current xk is approximately first-order stationary optimal = true continue end - subsolver_options.ϵa = k == 1 ? 1.0e-5 : max(ϵ_subsolver, min(1.0e-1, ξ1 / 10)) + subsolver_options.ϵa = k == 1 ? 1.0e-5 : max(ϵ_subsolver, min(1.0e-1, ξ1 / ν / 10)) ∆_effective = min(β * χ(s), Δk) treats_bounds ? set_bounds!(ψ, max.(-∆_effective, l_bound - xk), min.(∆_effective, u_bound - xk)) : @@ -230,7 +231,7 @@ function LMTR( if (verbose > 0) && (k % ptf == 0) #! format: off - @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k iter fk hk sqrt(ξ1) sqrt(ξ) ρk ∆_effective χ(xk) sNorm νInv TR_stat + @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k iter fk hk sqrt(ξ1/ν) sqrt(ξ/ν) ρk ∆_effective χ(xk) sNorm νInv TR_stat #! format: on end @@ -270,9 +271,9 @@ function LMTR( @info @sprintf "%6d %8s %8.1e %8.1e" k "" fk hk elseif optimal #! format: off - @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k 1 fk hk sqrt(ξ1) sqrt(ξ1) "" Δk χ(xk) χ(s) νInv + @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k 1 fk hk sqrt(ξ1/ν) sqrt(ξ/ν) "" Δk χ(xk) χ(s) νInv #! format: on - @info "LMTR: terminating with √ξ1 = $(sqrt(ξ1))" + @info "LMTR: terminating with √(ξ1/ν) = $(sqrt(ξ1/ν))" end end @@ -290,7 +291,7 @@ function LMTR( set_status!(stats, status) set_solution!(stats, xk) set_objective!(stats, fk + hk) - set_residuals!(stats, zero(eltype(xk)), ξ1 ≥ 0 ? sqrt(ξ1) : ξ1) + set_residuals!(stats, zero(eltype(xk)), ξ1 ≥ 0 ? sqrt(ξ1/ν) : ξ1) set_iter!(stats, k) set_time!(stats, elapsed_time) set_solver_specific!(stats, :Fhist, Fobj_hist[1:k]) diff --git a/src/LM_alg.jl b/src/LM_alg.jl index dc5c29e1..722c98ba 100644 --- a/src/LM_alg.jl +++ b/src/LM_alg.jl @@ -100,7 +100,7 @@ function LM( xkn = similar(xk) - local ξ1 + local ξ1, ξ k = 0 Fobj_hist = zeros(maxIter) Hobj_hist = zeros(maxIter) @@ -110,7 +110,7 @@ function LM( if verbose > 0 #! format: off - @info @sprintf "%6s %8s %8s %8s %7s %7s %8s %7s %7s %7s %7s %1s" "outer" "inner" "f(x)" "h(x)" "√ξ1" "√ξ" "ρ" "σ" "‖x‖" "‖s‖" "‖Jₖ‖²" "reg" + @info @sprintf "%6s %8s %8s %8s %7s %7s %8s %7s %7s %7s %7s %1s" "outer" "inner" "f(x)" "h(x)" "√(ξ1/ν)" "√(ξ/ν)" "ρ" "σ" "‖x‖" "‖s‖" "‖Jₖ‖²" "reg" #! format: on end @@ -125,6 +125,7 @@ function LM( σmax = opnorm(Jk) νInv = (1 + θ) * (σmax^2 + σk) # ‖J'J + σₖ I‖ = ‖J‖² + σₖ + ν = 1 / νInv s = zero(xk) @@ -178,18 +179,18 @@ function LM( ξ1 > 0 || error("LM: first prox-gradient step should produce a decrease but ξ1 = $(ξ1)") if ξ1 ≥ 0 && k == 1 - ϵ_increment = ϵr * sqrt(ξ1) + ϵ_increment = ϵr * sqrt(ξ1/ν) ϵ += ϵ_increment # make stopping test absolute and relative ϵ_subsolver += ϵ_increment end - if sqrt(ξ1) < ϵ + if sqrt(ξ1/ν) < ϵ # the current xk is approximately first-order stationary optimal = true continue end - subsolver_options.ϵa = k == 1 ? 1.0e-1 : max(ϵ_subsolver, min(1.0e-2, ξ1 / 10)) + subsolver_options.ϵa = k == 1 ? 1.0e-1 : max(ϵ_subsolver, min(1.0e-2, ξ1 / ν / 10)) subsolver_options.ν = ν @debug "setting inner stopping tolerance to" subsolver_options.optTol s, iter, _ = with_logger(subsolver_logger) do @@ -220,7 +221,7 @@ function LM( if (verbose > 0) && (k % ptf == 0) #! format: off - @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k iter fk hk sqrt(ξ1) sqrt(ξ) ρk σk norm(xk) norm(s) νInv σ_stat + @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8.1e %7.1e %7.1e %7.1e %7.1e %1s" k iter fk hk sqrt(ξ1/ν) sqrt(ξ/ν) ρk σk norm(xk) norm(s) νInv σ_stat #! format: off end @@ -260,9 +261,9 @@ function LM( @info @sprintf "%6d %8s %8.1e %8.1e" k "" fk hk elseif optimal #! format: off - @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k 1 fk hk sqrt(ξ1) sqrt(ξ1) "" σk norm(xk) norm(s) νInv + @info @sprintf "%6d %8d %8.1e %8.1e %7.1e %7.1e %8s %7.1e %7.1e %7.1e %7.1e" k 1 fk hk sqrt(ξ1/ν) sqrt(ξ/ν) "" σk norm(xk) norm(s) νInv #! format: on - @info "LM: terminating with √ξ1 = $(sqrt(ξ1))" + @info "LM: terminating with √(ξ1/ν) = $(sqrt(ξ1/ν))" end end status = if optimal @@ -279,7 +280,7 @@ function LM( set_status!(stats, status) set_solution!(stats, xk) set_objective!(stats, fk + hk) - set_residuals!(stats, zero(eltype(xk)), ξ1 ≥ 0 ? sqrt(ξ1) : ξ1) + set_residuals!(stats, zero(eltype(xk)), ξ1 ≥ 0 ? sqrt(ξ1/ν) : ξ1) set_iter!(stats, k) set_time!(stats, elapsed_time) set_solver_specific!(stats, :Fhist, Fobj_hist[1:k]) diff --git a/src/R2_alg.jl b/src/R2_alg.jl index 88481fc9..c4b4c7f1 100644 --- a/src/R2_alg.jl +++ b/src/R2_alg.jl @@ -313,7 +313,6 @@ function R2!( #! format: off σ_stat = (η2 ≤ ρk < Inf) ? "↘" : (ρk < η1 ? "↗" : "=") @info @sprintf "%6d %8.1e %8.1e %7.1e %8.1e %7.1e %7.1e %7.1e %1s" k fk hk sqrt_ξ_νInv ρk σk norm(xk) norm(s) σ_stat - #! format: on end diff --git a/src/TRDH_alg.jl b/src/TRDH_alg.jl index ed822092..b5abf888 100644 --- a/src/TRDH_alg.jl +++ b/src/TRDH_alg.jl @@ -252,7 +252,7 @@ function TRDH( set_radius!(ψ, Δ_effective) end - # model with diagonal hessian + # model with diagonal hessian φ(d) = ∇fk' * d + (d' * (Dk.d .* d)) / 2 mk(d) = φ(d) + ψ(d) diff --git a/src/TR_alg.jl b/src/TR_alg.jl index 9fc15f9d..71975953 100644 --- a/src/TR_alg.jl +++ b/src/TR_alg.jl @@ -121,7 +121,7 @@ function TR( #! format: on end - local ξ1 + local ξ1, ξ k = 0 fk = obj(f, xk) @@ -133,8 +133,6 @@ function TR( λmax = opnorm(Bk) νInv = (1 + θ) * λmax - ν = 1 / (νInv + 1 / (Δk * α)) - sqrt_ξ1_νInv = one(R) optimal = false tired = k ≥ maxIter || elapsed_time > maxTime @@ -179,7 +177,7 @@ function TR( continue end - subsolver_options.ϵa = k == 1 ? 1.0e-5 : max(ϵ_subsolver, min(1e-2, sqrt_ξ1_νInv)) + subsolver_options.ϵa = k == 1 ? 1.0e-5 : max(ϵ_subsolver, min(1e-2, sqrt_ξ1_νInv) * ξ1) ∆_effective = min(β * χ(s), Δk) (has_bounds(f) || subsolver == TRDH) ? set_bounds!(ψ, max.(-∆_effective, l_bound - xk), min.(∆_effective, u_bound - xk)) :