JuliaGNI · benedict-96 · Jun 2, 2025 · Jun 3, 2025 · Jun 3, 2025 · Jun 3, 2025
diff --git a/docs/src/tikz/optimizer.tex b/docs/src/tikz/optimizer.tex
@@ -0,0 +1,29 @@
+\documentclass[tikz,border=10pt]{standalone}
+\usetikzlibrary{positioning, mindmap}
+
+\definecolor{morange}{RGB}{255,127,14}
+\definecolor{mblue}{RGB}{31,119,180}
+\definecolor{mred}{RGB}{214,39,40}
+\definecolor{mpurple}{RGB}{148,103,189}
+\definecolor{mgreen}{RGB}{44,160,44}
+
+\begin{document}
+\begin{tikzpicture}
+  \path[mindmap,concept color=black,text=white]
+    node[concept] {\texttt{Optimizer <: \\NonlinearSolver}}
+    [clockwise from=0]
+    % note that `sibling angle' can only be defined in
+    % `level 1 concept/.append style={}'
+    child[concept color=mgreen] {node[concept] {\texttt{hessian::\\Hessian}}}
+    % note that the `concept color' is passed to the `child'(!)
+    child[concept color=mblue] {
+      node[concept] {\texttt{objective::\\Multivariate-\\Objective}}
+      [clockwise from=-30]
+      child { node[concept] {\texttt{g::Gradient}} }
+    }
+    child[concept color=mred] {node[concept] {\texttt{algorithm::\\Nonlinear-\\Method}} }
+    child[concept color=morange] {node[concept] {\texttt{state::\\Optimization-\\Algorithm}} }
+    child[concept color=mpurple] {node[concept] {\texttt{config::\\Options}}}
+    child[concept color=morange!50!mpurple] {node[concept] {\texttt{result::\\Optimizer-\\Result}} [clockwise from = -30] child[concept color=morange!50!mblue] {node[concept] {\texttt{status::\\Optimizer-\\Status}}}};
+\end{tikzpicture}
+\end{document}
diff --git a/docs/src/tikz/solver.tex b/docs/src/tikz/solver.tex
@@ -0,0 +1,24 @@
+\documentclass[tikz,border=10pt]{standalone}
+\usetikzlibrary{positioning, mindmap}
+
+\definecolor{morange}{RGB}{255,127,14}
+\definecolor{mblue}{RGB}{31,119,180}
+\definecolor{mred}{RGB}{214,39,40}
+\definecolor{mpurple}{RGB}{148,103,189}
+\definecolor{mgreen}{RGB}{44,160,44}
+
+\begin{document}
+\begin{tikzpicture}
+  \path[mindmap,concept color=black,text=white]
+    node[concept] {\texttt{NewtonSolver <: \\NonlinearSolver}}
+    [clockwise from=0]
+    % note that `sibling angle' can only be defined in
+    % `level 1 concept/.append style={}'
+    % note that the `concept color' is passed to the `child'(!)
+    child[concept color=mblue] {node[concept] {\texttt{nonlinear-\\system::\\Nonlinear-\\System}} [clockwise from=-30] child[concept color=mgreen] { node[concept] {\texttt{j::Jacobian}}}}
+    child[concept color=morange] {node[concept] {\texttt{cache::\\NewtonSolver-\\Cache}} }
+    child[concept color=mpurple] {node[concept] {\texttt{config::\\Options}}}
+    child[concept color=morange!50!mgreen] {node[concept] {\texttt{linesearch::\\Linesearch-\\State}}}
+    child[concept color=morange!50!mblue] {node[concept] {\texttt{status::\\Optimizer-\\Status}}};
+\end{tikzpicture}
+\end{document}
diff --git a/src/SimpleSolvers.jl b/src/SimpleSolvers.jl
@@ -152,9 +152,10 @@ module SimpleSolvers
 
     include("optimization/optimizer_status.jl")
     include("optimization/optimizer_result.jl")
-    include("optimization/optimizer.jl")
+    include("optimization/iterative_hessians.jl")
     include("optimization/hessian_bfgs.jl")
     include("optimization/hessian_dfp.jl")
+    include("optimization/optimizer.jl")
     include("optimization/newton_optimizer_cache.jl")
     include("optimization/newton_optimizer_linesearch_objective.jl")
     include("optimization/newton_optimizer_state.jl")

diff --git a/src/base/hessian.jl b/src/base/hessian.jl
@@ -80,11 +80,6 @@ function check_hessian(H::AbstractMatrix; digits::Integer = 5)
     println()
 end
 
-# function print_hessian(H::AbstractMatrix)
-#     display(H)
-#     println()
-# end
-
 """
     update!(hessian, x)
 
@@ -208,7 +203,7 @@ end
 
 Base.inv(H::HessianAutodiff) = inv(H.H)
 
-Base.:\(H::HessianAutodiff, b) = H.H \ b
+Base.:\(H::HessianAutodiff, b) = solve(LU(), H.H, b)
 
 LinearAlgebra.ldiv!(x, H::HessianAutodiff, b) = x .= H \ b
 

diff --git a/src/linesearch/bierlaire_quadratic.jl b/src/linesearch/bierlaire_quadratic.jl
@@ -4,16 +4,69 @@
 A constant that determines the *precision* in [`BierlaireQuadraticState`](@ref). The constant recommended in [bierlaire2015optimization](@cite) is `1E-3`.
 
 Note that this constant may also depend on whether we deal with optimizers or solvers.
+
+!!! warn
+    We have deactivated the use of this constant for the moment and are only using `eps(T)` in `BierlaireQuadratic`. This is because solvers and optimizers should rely on different choices of this constant.
 """
-const DEFAULT_BIERLAIRE_ε::Float64 = eps(Float32)
+const DEFAULT_BIERLAIRE_ε::Float64 = 2eps(Float32)
 
 """
     DEFAULT_BIERLAIRE_ξ
 
 A constant on basis of which the `b` in [`BierlaireQuadraticState`](@ref) is perturbed in order "to avoid stalling" (see [bierlaire2015optimization; Chapter 11.2.1](@cite); in this reference the author recommends ``10^{-7}`` as a value).
 Its value is $(DEFAULT_BIERLAIRE_ξ).
+
+!!! warn
+    We have deactivated the use of this constant for the moment and are only using `eps(T)` in `BierlaireQuadratic`. This is because solvers and optimizers should rely on different choices of this constant.
+"""
+const DEFAULT_BIERLAIRE_ξ::Float64 = 2eps(Float32)
+
+"""
+    default_precision(T)
+
+Compute the default precision used for [`BierlaireQuadraticState`](@ref).
+Compare this to the [`default_tolerance`](@ref) used in [`Options`](@ref).
+
+# Examples
+
+```jldoctest; setup = :(using SimpleSolvers: default_precision)
+default_precision(Float64)
+
+# output
+
+2.220446049250313e-16
+```
+
+```jldoctest; setup = :(using SimpleSolvers: default_precision)
+default_precision(Float32)
+
+# output
+
+1.1920929f-6
+```
+
+```jldoctest; setup = :(using SimpleSolvers: default_precision)
+default_precision(Float16)
+
+# output
+
+ERROR: No default precision defined for Float16.
+[...]
+```
 """
-const DEFAULT_BIERLAIRE_ξ::Float64 = eps(Float32)
+default_precision
+
+function default_precision(::Type{Float32})
+    10eps(Float32)
+end
+
+function default_precision(::Type{Float64})
+    eps(Float64)
+end
+
+function default_precision(::Type{T}) where {T <: AbstractFloat}
+    error("No default precision defined for $(T).")
+end
 
 """
     BierlaireQuadraticState <: LinesearchState
@@ -30,8 +83,8 @@ struct BierlaireQuadraticState{T} <: LinesearchState{T}
     ξ::T
 
     function BierlaireQuadraticState(T₁::DataType=Float64;
-                    ε::T = DEFAULT_BIERLAIRE_ε,
-                    ξ::T = DEFAULT_BIERLAIRE_ξ,
+                    ε::T = default_precision(T₁), # DEFAULT_BIERLAIRE_ε,
+                    ξ::T = default_precision(T₁), # DEFAULT_BIERLAIRE_ξ,
                     options_kwargs...) where {T}
         config₁ = Options(T₁; options_kwargs...)
         new{T₁}(config₁, T₁(ε), T₁(ξ))

diff --git a/src/linesearch/custom_quadratic.jl b/src/linesearch/custom_quadratic.jl
@@ -62,6 +62,7 @@ function (ls::QuadraticState2{T})(obj::AbstractUnivariateObjective{T}, number_of
 
     # compute minimum αₜ of p(α); i.e. p'(α) = 0.
     αₜ = -p₁ / (2p₂) + a
+    !(l2norm(αₜ - x₀) < ls.ε) || return αₜ
 
     ls(obj, number_of_iterations + 1, αₜ, s * ls.s_reduction)
 end

diff --git a/src/optimization/hessian_bfgs.jl b/src/optimization/hessian_bfgs.jl
@@ -1,7 +1,26 @@
 """
     HessianBFGS <: Hessian
+
+A `struct` derived from [`Hessian`](@ref) to be used for an [`Optimizer`](@ref).
+
+# Fields
+- `objective::`[`MultivariateObjective`](@ref): 
+- `x̄`: previous solution,
+- `x`: current solution,
+- `δ`: *descent direction*,
+- `ḡ`: previous gradient,
+- `g`: current gradient,
+- `γ`: difference between current and previous gradient,
+- `Q`: 
+- `T1`:
+- `T2`:
+- `T3`:
+- `δγ`: the outer product of `δ` and `γ`. Note that this is different from the output of [`compute_δγ`](@ref), which is the inner product of `γ` and `δ`.
+- `δδ`: 
+
+Also compare those fields with the ones of [`NewtonOptimizerCache`](@ref).
 """
-struct HessianBFGS{T,VT,MT,OBJ} <: Hessian{T}
+struct HessianBFGS{T,VT,MT,OBJ} <: IterativeHessian{T}
     objective::OBJ
 
     x̄::VT    # previous solution
@@ -37,49 +56,68 @@ HessianBFGS(F::Callable, x::AbstractVector) = HessianBFGS(MultivariateObjective(
 
 Hessian(::BFGS, ForOBJ::Union{Callable, MultivariateObjective}, x::AbstractVector) = HessianBFGS(ForOBJ, x)
 
+@doc raw"""
+    initialize!(H, x)
+
+Initialize an object `H` of type [`HessianBFGS`](@ref). 
+
+We note that unlike most other [`initialize!`](@ref) methods this one is not writing `NaN`s everywhere - ``Q = H^{-1}`` is set to the identity.
+
+# Examples
+
+```jldoctest; setup = :(using SimpleSolvers; using SimpleSolvers: initialize!)
+f(x) = x .^ 2
+x = [1f0, 2f0, 3f0]
+H = HessianBFGS(f, x)
+initialize!(H, x)
+inv(H)
+
+# output
+
+3×3 Matrix{Float32}:
+ 1.0  0.0  0.0
+ 0.0  1.0  0.0
+ 0.0  0.0  1.0
+```
+"""
 function initialize!(H::HessianBFGS, x::AbstractVector)
     H.Q .= Matrix(1.0I, size(H.Q)...)
 
-    H.x̄ .= eltype(x)(NaN)
-    H.δ .= eltype(x)(NaN)
-    H.ḡ .= eltype(x)(NaN)
-    H.γ .= eltype(x)(NaN)
-
-    H.x .= x
-    H.g .= gradient!(H.objective, x)
+    H.x̄ .= alloc_x(x)
+    H.x .= alloc_x(x)
+    H.δ .= alloc_x(x)
+    H.ḡ .= alloc_g(x)
+    H.g .= alloc_g(x)
+    H.γ .= alloc_g(x)
 
     H
 end
 
-function update!(H::HessianBFGS, x::AbstractVector)
-    # copy previous data and compute new gradient
-    H.ḡ .= H.g
-    H.x̄ .= H.x
-    H.x .= x
-    H.g .= gradient!(H.objective, x)
-
-    # δ = x - x̄
-    H.δ .= H.x .- H.x̄
+function compute_outer_products!(H::HessianBFGS)
+    outer!(H.δδ, H.δ, H.δ)
+    outer!(H.δγ, H.δ, H.γ)
+end
 
-    # γ = g - ḡ
-    H.γ .= H.g .- H.ḡ
+function update!(H::HessianBFGS, x::AbstractVector)
+    update!(H, x, gradient!(objective(H), x))
 
-    # δγ = δᵀγ
-    δγ = H.δ ⋅ H.γ
+    δγ = compute_δγ(H)
 
     # BFGS
     # Q = Q - ... + ...
     # H.Q .-= (H.δ * H.γ' * H.Q .+ H.Q * H.γ * H.δ') ./ δγ .-
     #         (1 + dot(H.γ, H.Q, H.γ) ./ δγ) .* (H.δ * H.δ') ./ δγ
-
-    if δγ ≠ 0
-        outer!(H.δγ, H.δ, H.γ)
-        outer!(H.δδ, H.δ, H.δ)
+
+    if !iszero(δγ)
+        compute_outer_products!(H)
         mul!(H.T1, H.δγ, H.Q)
         mul!(H.T2, H.Q, H.δγ')
-        H.T3 .= (1 + dot(H.γ, H.Q, H.γ) ./ δγ) .* H.δδ
+        γQγ = compute_γQγ(H)
+        H.T3 .= (1 + γQγ ./ δγ) .* H.δδ
         H.Q .-= (H.T1 .+ H.T2 .- H.T3) ./ δγ
     end
+
+    H
 end
 
 Base.inv(H::HessianBFGS) = H.Q

diff --git a/src/optimization/hessian_dfp.jl b/src/optimization/hessian_dfp.jl
@@ -3,7 +3,7 @@
 
 
 """
-struct HessianDFP{T,VT,MT,OBJ} <: Hessian{T}
+struct HessianDFP{T,VT,MT,OBJ} <: IterativeHessian{T}
     objective::OBJ
 
     x̄::VT    # previous solution
@@ -49,36 +49,27 @@ function initialize!(H::HessianDFP, x::AbstractVector)
     H.x .= x
     H.g .= gradient!(H.objective, x)
 
-    return H
+    H
 end
 
-function update!(H::HessianDFP, x::AbstractVector)
-    # copy previous data and compute new gradient
-    H.ḡ .= H.g
-    H.x̄ .= H.x
-    H.x .= x
-    H.g .= gradient!(H.objective, x)
-
-    # δ = x - x̄
-    H.δ .= H.x .- H.x̄
-
-    # γ = g - ḡ
-    H.γ .= H.g .- H.ḡ
+function compute_outer_products!(H::HessianDFP)
+    outer!(H.γγ, H.γ, H.γ)
+    outer!(H.δδ, H.δ, H.δ)
+end
 
-    # γQγ = γᵀQγ
-    γQγ = dot(H.γ, H.Q, H.γ)
+function update!(H::HessianDFP, x::AbstractVector)
+    update!(H, x, gradient!(objective(H), x))
 
-    # δγ = δᵀγ
-    δγ = H.δ ⋅ H.γ
+    γQγ = compute_γQγ(H)
+    δγ = compute_δγ(H)
 
     # DFP
     # Q = Q - ... + ...
     # H.Q .-= H.Q * H.γ * H.γ' * H.Q / (H.γ' * H.Q * H.γ) .-
     #         H.δ * H.δ' ./ δγ
 
-    if δγ ≠ 0 && γQγ ≠ 0
-        outer!(H.γγ, H.γ, H.γ)
-        outer!(H.δδ, H.δ, H.δ)
+    if !iszero(δγ) && !iszero(γQγ)
+        compute_outer_products!(H)
 
         mul!(H.T1, H.γγ, H.Q)
         mul!(H.T2, H.Q, H.T1)