Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions docs/src/tikz/optimizer.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
\documentclass[tikz,border=10pt]{standalone}
\usetikzlibrary{positioning, mindmap}

\definecolor{morange}{RGB}{255,127,14}
\definecolor{mblue}{RGB}{31,119,180}
\definecolor{mred}{RGB}{214,39,40}
\definecolor{mpurple}{RGB}{148,103,189}
\definecolor{mgreen}{RGB}{44,160,44}

\begin{document}
\begin{tikzpicture}
\path[mindmap,concept color=black,text=white]
node[concept] {\texttt{Optimizer <: \\NonlinearSolver}}
[clockwise from=0]
% note that `sibling angle' can only be defined in
% `level 1 concept/.append style={}'
child[concept color=mgreen] {node[concept] {\texttt{hessian::\\Hessian}}}
% note that the `concept color' is passed to the `child'(!)
child[concept color=mblue] {
node[concept] {\texttt{objective::\\Multivariate-\\Objective}}
[clockwise from=-30]
child { node[concept] {\texttt{g::Gradient}} }
}
child[concept color=mred] {node[concept] {\texttt{algorithm::\\Nonlinear-\\Method}} }
child[concept color=morange] {node[concept] {\texttt{state::\\Optimization-\\Algorithm}} }
child[concept color=mpurple] {node[concept] {\texttt{config::\\Options}}}
child[concept color=morange!50!mpurple] {node[concept] {\texttt{result::\\Optimizer-\\Result}} [clockwise from = -30] child[concept color=morange!50!mblue] {node[concept] {\texttt{status::\\Optimizer-\\Status}}}};
\end{tikzpicture}
\end{document}
24 changes: 24 additions & 0 deletions docs/src/tikz/solver.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
\documentclass[tikz,border=10pt]{standalone}
\usetikzlibrary{positioning, mindmap}

\definecolor{morange}{RGB}{255,127,14}
\definecolor{mblue}{RGB}{31,119,180}
\definecolor{mred}{RGB}{214,39,40}
\definecolor{mpurple}{RGB}{148,103,189}
\definecolor{mgreen}{RGB}{44,160,44}

\begin{document}
\begin{tikzpicture}
\path[mindmap,concept color=black,text=white]
node[concept] {\texttt{NewtonSolver <: \\NonlinearSolver}}
[clockwise from=0]
% note that `sibling angle' can only be defined in
% `level 1 concept/.append style={}'
% note that the `concept color' is passed to the `child'(!)
child[concept color=mblue] {node[concept] {\texttt{nonlinear-\\system::\\Nonlinear-\\System}} [clockwise from=-30] child[concept color=mgreen] { node[concept] {\texttt{j::Jacobian}}}}
child[concept color=morange] {node[concept] {\texttt{cache::\\NewtonSolver-\\Cache}} }
child[concept color=mpurple] {node[concept] {\texttt{config::\\Options}}}
child[concept color=morange!50!mgreen] {node[concept] {\texttt{linesearch::\\Linesearch-\\State}}}
child[concept color=morange!50!mblue] {node[concept] {\texttt{status::\\Optimizer-\\Status}}};
\end{tikzpicture}
\end{document}
3 changes: 2 additions & 1 deletion src/SimpleSolvers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -152,9 +152,10 @@ module SimpleSolvers

include("optimization/optimizer_status.jl")
include("optimization/optimizer_result.jl")
include("optimization/optimizer.jl")
include("optimization/iterative_hessians.jl")
include("optimization/hessian_bfgs.jl")
include("optimization/hessian_dfp.jl")
include("optimization/optimizer.jl")
include("optimization/newton_optimizer_cache.jl")
include("optimization/newton_optimizer_linesearch_objective.jl")
include("optimization/newton_optimizer_state.jl")
Expand Down
7 changes: 1 addition & 6 deletions src/base/hessian.jl
Original file line number Diff line number Diff line change
Expand Up @@ -80,11 +80,6 @@ function check_hessian(H::AbstractMatrix; digits::Integer = 5)
println()
end

# function print_hessian(H::AbstractMatrix)
# display(H)
# println()
# end

"""
update!(hessian, x)

Expand Down Expand Up @@ -208,7 +203,7 @@ end

Base.inv(H::HessianAutodiff) = inv(H.H)

Base.:\(H::HessianAutodiff, b) = H.H \ b
Base.:\(H::HessianAutodiff, b) = solve(LU(), H.H, b)

LinearAlgebra.ldiv!(x, H::HessianAutodiff, b) = x .= H \ b

Expand Down
61 changes: 57 additions & 4 deletions src/linesearch/bierlaire_quadratic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,69 @@
A constant that determines the *precision* in [`BierlaireQuadraticState`](@ref). The constant recommended in [bierlaire2015optimization](@cite) is `1E-3`.

Note that this constant may also depend on whether we deal with optimizers or solvers.

!!! warn
We have deactivated the use of this constant for the moment and are only using `eps(T)` in `BierlaireQuadratic`. This is because solvers and optimizers should rely on different choices of this constant.
"""
const DEFAULT_BIERLAIRE_ε::Float64 = eps(Float32)
const DEFAULT_BIERLAIRE_ε::Float64 = 2eps(Float32)

"""
DEFAULT_BIERLAIRE_ξ

A constant on basis of which the `b` in [`BierlaireQuadraticState`](@ref) is perturbed in order "to avoid stalling" (see [bierlaire2015optimization; Chapter 11.2.1](@cite); in this reference the author recommends ``10^{-7}`` as a value).
Its value is $(DEFAULT_BIERLAIRE_ξ).

!!! warn
We have deactivated the use of this constant for the moment and are only using `eps(T)` in `BierlaireQuadratic`. This is because solvers and optimizers should rely on different choices of this constant.
"""
const DEFAULT_BIERLAIRE_ξ::Float64 = 2eps(Float32)

"""
default_precision(T)

Compute the default precision used for [`BierlaireQuadraticState`](@ref).
Compare this to the [`default_tolerance`](@ref) used in [`Options`](@ref).

# Examples

```jldoctest; setup = :(using SimpleSolvers: default_precision)
default_precision(Float64)

# output

2.220446049250313e-16
```

```jldoctest; setup = :(using SimpleSolvers: default_precision)
default_precision(Float32)

# output

1.1920929f-6
```

```jldoctest; setup = :(using SimpleSolvers: default_precision)
default_precision(Float16)

# output

ERROR: No default precision defined for Float16.
[...]
```
"""
const DEFAULT_BIERLAIRE_ξ::Float64 = eps(Float32)
default_precision

function default_precision(::Type{Float32})
10eps(Float32)
end

function default_precision(::Type{Float64})
eps(Float64)
end

function default_precision(::Type{T}) where {T <: AbstractFloat}
error("No default precision defined for $(T).")
end

"""
BierlaireQuadraticState <: LinesearchState
Expand All @@ -30,8 +83,8 @@ struct BierlaireQuadraticState{T} <: LinesearchState{T}
ξ::T

function BierlaireQuadraticState(T₁::DataType=Float64;
ε::T = DEFAULT_BIERLAIRE_ε,
ξ::T = DEFAULT_BIERLAIRE_ξ,
ε::T = default_precision(T₁), # DEFAULT_BIERLAIRE_ε,
ξ::T = default_precision(T₁), # DEFAULT_BIERLAIRE_ξ,
options_kwargs...) where {T}
config₁ = Options(T₁; options_kwargs...)
new{T₁}(config₁, T₁(ε), T₁(ξ))
Expand Down
1 change: 1 addition & 0 deletions src/linesearch/custom_quadratic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ function (ls::QuadraticState2{T})(obj::AbstractUnivariateObjective{T}, number_of

# compute minimum αₜ of p(α); i.e. p'(α) = 0.
αₜ = -p₁ / (2p₂) + a
!(l2norm(αₜ - x₀) < ls.ε) || return αₜ

ls(obj, number_of_iterations + 1, αₜ, s * ls.s_reduction)
end
Expand Down
90 changes: 64 additions & 26 deletions src/optimization/hessian_bfgs.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,26 @@
"""
HessianBFGS <: Hessian

A `struct` derived from [`Hessian`](@ref) to be used for an [`Optimizer`](@ref).

# Fields
- `objective::`[`MultivariateObjective`](@ref):
- `x̄`: previous solution,
- `x`: current solution,
- `δ`: *descent direction*,
- `ḡ`: previous gradient,
- `g`: current gradient,
- `γ`: difference between current and previous gradient,
- `Q`:
- `T1`:
- `T2`:
- `T3`:
- `δγ`: the outer product of `δ` and `γ`. Note that this is different from the output of [`compute_δγ`](@ref), which is the inner product of `γ` and `δ`.
- `δδ`:

Also compare those fields with the ones of [`NewtonOptimizerCache`](@ref).
"""
struct HessianBFGS{T,VT,MT,OBJ} <: Hessian{T}
struct HessianBFGS{T,VT,MT,OBJ} <: IterativeHessian{T}
objective::OBJ

x̄::VT # previous solution
Expand Down Expand Up @@ -37,49 +56,68 @@ HessianBFGS(F::Callable, x::AbstractVector) = HessianBFGS(MultivariateObjective(

Hessian(::BFGS, ForOBJ::Union{Callable, MultivariateObjective}, x::AbstractVector) = HessianBFGS(ForOBJ, x)

@doc raw"""
initialize!(H, x)

Initialize an object `H` of type [`HessianBFGS`](@ref).

We note that unlike most other [`initialize!`](@ref) methods this one is not writing `NaN`s everywhere - ``Q = H^{-1}`` is set to the identity.

# Examples

```jldoctest; setup = :(using SimpleSolvers; using SimpleSolvers: initialize!)
f(x) = x .^ 2
x = [1f0, 2f0, 3f0]
H = HessianBFGS(f, x)
initialize!(H, x)
inv(H)

# output

3×3 Matrix{Float32}:
1.0 0.0 0.0
0.0 1.0 0.0
0.0 0.0 1.0
```
"""
function initialize!(H::HessianBFGS, x::AbstractVector)
H.Q .= Matrix(1.0I, size(H.Q)...)

H.x̄ .= eltype(x)(NaN)
H.δ .= eltype(x)(NaN)
H.ḡ .= eltype(x)(NaN)
H.γ .= eltype(x)(NaN)

H.x .= x
H.g .= gradient!(H.objective, x)
H.x̄ .= alloc_x(x)
H.x .= alloc_x(x)
H.δ .= alloc_x(x)
H.ḡ .= alloc_g(x)
H.g .= alloc_g(x)
H.γ .= alloc_g(x)

H
end

function update!(H::HessianBFGS, x::AbstractVector)
# copy previous data and compute new gradient
H.ḡ .= H.g
H.x̄ .= H.x
H.x .= x
H.g .= gradient!(H.objective, x)

# δ = x - x̄
H.δ .= H.x .- H.x̄
function compute_outer_products!(H::HessianBFGS)
outer!(H.δδ, H.δ, H.δ)
outer!(H.δγ, H.δ, H.γ)
end

# γ = g - ḡ
H.γ .= H.g .- H.ḡ
function update!(H::HessianBFGS, x::AbstractVector)
update!(H, x, gradient!(objective(H), x))

# δγ = δᵀγ
δγ = H.δ ⋅ H.γ
δγ = compute_δγ(H)

# BFGS
# Q = Q - ... + ...
# H.Q .-= (H.δ * H.γ' * H.Q .+ H.Q * H.γ * H.δ') ./ δγ .-
# (1 + dot(H.γ, H.Q, H.γ) ./ δγ) .* (H.δ * H.δ') ./ δγ

if δγ ≠ 0
outer!(H.δγ, H.δ, H.γ)
outer!(H.δδ, H.δ, H.δ)

if !iszero(δγ)
compute_outer_products!(H)
mul!(H.T1, H.δγ, H.Q)
mul!(H.T2, H.Q, H.δγ')
H.T3 .= (1 + dot(H.γ, H.Q, H.γ) ./ δγ) .* H.δδ
γQγ = compute_γQγ(H)
H.T3 .= (1 + γQγ ./ δγ) .* H.δδ
H.Q .-= (H.T1 .+ H.T2 .- H.T3) ./ δγ
end

H
end

Base.inv(H::HessianBFGS) = H.Q
Expand Down
33 changes: 12 additions & 21 deletions src/optimization/hessian_dfp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@


"""
struct HessianDFP{T,VT,MT,OBJ} <: Hessian{T}
struct HessianDFP{T,VT,MT,OBJ} <: IterativeHessian{T}
objective::OBJ

x̄::VT # previous solution
Expand Down Expand Up @@ -49,36 +49,27 @@ function initialize!(H::HessianDFP, x::AbstractVector)
H.x .= x
H.g .= gradient!(H.objective, x)

return H
H
end

function update!(H::HessianDFP, x::AbstractVector)
# copy previous data and compute new gradient
H.ḡ .= H.g
H.x̄ .= H.x
H.x .= x
H.g .= gradient!(H.objective, x)

# δ = x - x̄
H.δ .= H.x .- H.x̄

# γ = g - ḡ
H.γ .= H.g .- H.ḡ
function compute_outer_products!(H::HessianDFP)
outer!(H.γγ, H.γ, H.γ)
outer!(H.δδ, H.δ, H.δ)
end

# γQγ = γᵀQγ
γQγ = dot(H.γ, H.Q, H.γ)
function update!(H::HessianDFP, x::AbstractVector)
update!(H, x, gradient!(objective(H), x))

# δγ = δᵀγ
δγ = H.δ ⋅ H.γ
γQγ = compute_γQγ(H)
δγ = compute_δγ(H)

# DFP
# Q = Q - ... + ...
# H.Q .-= H.Q * H.γ * H.γ' * H.Q / (H.γ' * H.Q * H.γ) .-
# H.δ * H.δ' ./ δγ

if δγ ≠ 0 && γQγ ≠ 0
outer!(H.γγ, H.γ, H.γ)
outer!(H.δδ, H.δ, H.δ)
if !iszero(δγ) && !iszero(γQγ)
compute_outer_products!(H)

mul!(H.T1, H.γγ, H.Q)
mul!(H.T2, H.Q, H.T1)
Expand Down
Loading
Loading