diff --git a/docs/src/tikz/optimizer.tex b/docs/src/tikz/optimizer.tex new file mode 100644 index 00000000..5d32dbd4 --- /dev/null +++ b/docs/src/tikz/optimizer.tex @@ -0,0 +1,29 @@ +\documentclass[tikz,border=10pt]{standalone} +\usetikzlibrary{positioning, mindmap} + +\definecolor{morange}{RGB}{255,127,14} +\definecolor{mblue}{RGB}{31,119,180} +\definecolor{mred}{RGB}{214,39,40} +\definecolor{mpurple}{RGB}{148,103,189} +\definecolor{mgreen}{RGB}{44,160,44} + +\begin{document} +\begin{tikzpicture} + \path[mindmap,concept color=black,text=white] + node[concept] {\texttt{Optimizer <: \\NonlinearSolver}} + [clockwise from=0] + % note that `sibling angle' can only be defined in + % `level 1 concept/.append style={}' + child[concept color=mgreen] {node[concept] {\texttt{hessian::\\Hessian}}} + % note that the `concept color' is passed to the `child'(!) + child[concept color=mblue] { + node[concept] {\texttt{objective::\\Multivariate-\\Objective}} + [clockwise from=-30] + child { node[concept] {\texttt{g::Gradient}} } + } + child[concept color=mred] {node[concept] {\texttt{algorithm::\\Nonlinear-\\Method}} } + child[concept color=morange] {node[concept] {\texttt{state::\\Optimization-\\Algorithm}} } + child[concept color=mpurple] {node[concept] {\texttt{config::\\Options}}} + child[concept color=morange!50!mpurple] {node[concept] {\texttt{result::\\Optimizer-\\Result}} [clockwise from = -30] child[concept color=morange!50!mblue] {node[concept] {\texttt{status::\\Optimizer-\\Status}}}}; +\end{tikzpicture} +\end{document} \ No newline at end of file diff --git a/docs/src/tikz/solver.tex b/docs/src/tikz/solver.tex new file mode 100644 index 00000000..c67be652 --- /dev/null +++ b/docs/src/tikz/solver.tex @@ -0,0 +1,24 @@ +\documentclass[tikz,border=10pt]{standalone} +\usetikzlibrary{positioning, mindmap} + +\definecolor{morange}{RGB}{255,127,14} +\definecolor{mblue}{RGB}{31,119,180} +\definecolor{mred}{RGB}{214,39,40} +\definecolor{mpurple}{RGB}{148,103,189} +\definecolor{mgreen}{RGB}{44,160,44} + +\begin{document} +\begin{tikzpicture} + \path[mindmap,concept color=black,text=white] + node[concept] {\texttt{NewtonSolver <: \\NonlinearSolver}} + [clockwise from=0] + % note that `sibling angle' can only be defined in + % `level 1 concept/.append style={}' + % note that the `concept color' is passed to the `child'(!) + child[concept color=mblue] {node[concept] {\texttt{nonlinear-\\system::\\Nonlinear-\\System}} [clockwise from=-30] child[concept color=mgreen] { node[concept] {\texttt{j::Jacobian}}}} + child[concept color=morange] {node[concept] {\texttt{cache::\\NewtonSolver-\\Cache}} } + child[concept color=mpurple] {node[concept] {\texttt{config::\\Options}}} + child[concept color=morange!50!mgreen] {node[concept] {\texttt{linesearch::\\Linesearch-\\State}}} + child[concept color=morange!50!mblue] {node[concept] {\texttt{status::\\Optimizer-\\Status}}}; +\end{tikzpicture} +\end{document} \ No newline at end of file diff --git a/src/SimpleSolvers.jl b/src/SimpleSolvers.jl index 64f1d536..48203bcc 100644 --- a/src/SimpleSolvers.jl +++ b/src/SimpleSolvers.jl @@ -152,9 +152,10 @@ module SimpleSolvers include("optimization/optimizer_status.jl") include("optimization/optimizer_result.jl") - include("optimization/optimizer.jl") + include("optimization/iterative_hessians.jl") include("optimization/hessian_bfgs.jl") include("optimization/hessian_dfp.jl") + include("optimization/optimizer.jl") include("optimization/newton_optimizer_cache.jl") include("optimization/newton_optimizer_linesearch_objective.jl") include("optimization/newton_optimizer_state.jl") diff --git a/src/base/hessian.jl b/src/base/hessian.jl index 50e70292..d0f37fe6 100644 --- a/src/base/hessian.jl +++ b/src/base/hessian.jl @@ -80,11 +80,6 @@ function check_hessian(H::AbstractMatrix; digits::Integer = 5) println() end -# function print_hessian(H::AbstractMatrix) -# display(H) -# println() -# end - """ update!(hessian, x) @@ -208,7 +203,7 @@ end Base.inv(H::HessianAutodiff) = inv(H.H) -Base.:\(H::HessianAutodiff, b) = H.H \ b +Base.:\(H::HessianAutodiff, b) = solve(LU(), H.H, b) LinearAlgebra.ldiv!(x, H::HessianAutodiff, b) = x .= H \ b diff --git a/src/linesearch/bierlaire_quadratic.jl b/src/linesearch/bierlaire_quadratic.jl index 26a54ab1..8d3ef6d3 100644 --- a/src/linesearch/bierlaire_quadratic.jl +++ b/src/linesearch/bierlaire_quadratic.jl @@ -4,16 +4,69 @@ A constant that determines the *precision* in [`BierlaireQuadraticState`](@ref). The constant recommended in [bierlaire2015optimization](@cite) is `1E-3`. Note that this constant may also depend on whether we deal with optimizers or solvers. + +!!! warn + We have deactivated the use of this constant for the moment and are only using `eps(T)` in `BierlaireQuadratic`. This is because solvers and optimizers should rely on different choices of this constant. """ -const DEFAULT_BIERLAIRE_ε::Float64 = eps(Float32) +const DEFAULT_BIERLAIRE_ε::Float64 = 2eps(Float32) """ DEFAULT_BIERLAIRE_ξ A constant on basis of which the `b` in [`BierlaireQuadraticState`](@ref) is perturbed in order "to avoid stalling" (see [bierlaire2015optimization; Chapter 11.2.1](@cite); in this reference the author recommends ``10^{-7}`` as a value). Its value is $(DEFAULT_BIERLAIRE_ξ). + +!!! warn + We have deactivated the use of this constant for the moment and are only using `eps(T)` in `BierlaireQuadratic`. This is because solvers and optimizers should rely on different choices of this constant. +""" +const DEFAULT_BIERLAIRE_ξ::Float64 = 2eps(Float32) + +""" + default_precision(T) + +Compute the default precision used for [`BierlaireQuadraticState`](@ref). +Compare this to the [`default_tolerance`](@ref) used in [`Options`](@ref). + +# Examples + +```jldoctest; setup = :(using SimpleSolvers: default_precision) +default_precision(Float64) + +# output + +2.220446049250313e-16 +``` + +```jldoctest; setup = :(using SimpleSolvers: default_precision) +default_precision(Float32) + +# output + +1.1920929f-6 +``` + +```jldoctest; setup = :(using SimpleSolvers: default_precision) +default_precision(Float16) + +# output + +ERROR: No default precision defined for Float16. +[...] +``` """ -const DEFAULT_BIERLAIRE_ξ::Float64 = eps(Float32) +default_precision + +function default_precision(::Type{Float32}) + 10eps(Float32) +end + +function default_precision(::Type{Float64}) + eps(Float64) +end + +function default_precision(::Type{T}) where {T <: AbstractFloat} + error("No default precision defined for $(T).") +end """ BierlaireQuadraticState <: LinesearchState @@ -30,8 +83,8 @@ struct BierlaireQuadraticState{T} <: LinesearchState{T} ξ::T function BierlaireQuadraticState(T₁::DataType=Float64; - ε::T = DEFAULT_BIERLAIRE_ε, - ξ::T = DEFAULT_BIERLAIRE_ξ, + ε::T = default_precision(T₁), # DEFAULT_BIERLAIRE_ε, + ξ::T = default_precision(T₁), # DEFAULT_BIERLAIRE_ξ, options_kwargs...) where {T} config₁ = Options(T₁; options_kwargs...) new{T₁}(config₁, T₁(ε), T₁(ξ)) diff --git a/src/linesearch/custom_quadratic.jl b/src/linesearch/custom_quadratic.jl index dcf7a539..cb66ca6d 100644 --- a/src/linesearch/custom_quadratic.jl +++ b/src/linesearch/custom_quadratic.jl @@ -62,6 +62,7 @@ function (ls::QuadraticState2{T})(obj::AbstractUnivariateObjective{T}, number_of # compute minimum αₜ of p(α); i.e. p'(α) = 0. αₜ = -p₁ / (2p₂) + a + !(l2norm(αₜ - x₀) < ls.ε) || return αₜ ls(obj, number_of_iterations + 1, αₜ, s * ls.s_reduction) end diff --git a/src/optimization/hessian_bfgs.jl b/src/optimization/hessian_bfgs.jl index 26fdc4de..466c3b1c 100644 --- a/src/optimization/hessian_bfgs.jl +++ b/src/optimization/hessian_bfgs.jl @@ -1,7 +1,26 @@ """ HessianBFGS <: Hessian + +A `struct` derived from [`Hessian`](@ref) to be used for an [`Optimizer`](@ref). + +# Fields +- `objective::`[`MultivariateObjective`](@ref): +- `x̄`: previous solution, +- `x`: current solution, +- `δ`: *descent direction*, +- `ḡ`: previous gradient, +- `g`: current gradient, +- `γ`: difference between current and previous gradient, +- `Q`: +- `T1`: +- `T2`: +- `T3`: +- `δγ`: the outer product of `δ` and `γ`. Note that this is different from the output of [`compute_δγ`](@ref), which is the inner product of `γ` and `δ`. +- `δδ`: + +Also compare those fields with the ones of [`NewtonOptimizerCache`](@ref). """ -struct HessianBFGS{T,VT,MT,OBJ} <: Hessian{T} +struct HessianBFGS{T,VT,MT,OBJ} <: IterativeHessian{T} objective::OBJ x̄::VT # previous solution @@ -37,49 +56,68 @@ HessianBFGS(F::Callable, x::AbstractVector) = HessianBFGS(MultivariateObjective( Hessian(::BFGS, ForOBJ::Union{Callable, MultivariateObjective}, x::AbstractVector) = HessianBFGS(ForOBJ, x) +@doc raw""" + initialize!(H, x) + +Initialize an object `H` of type [`HessianBFGS`](@ref). + +We note that unlike most other [`initialize!`](@ref) methods this one is not writing `NaN`s everywhere - ``Q = H^{-1}`` is set to the identity. + +# Examples + +```jldoctest; setup = :(using SimpleSolvers; using SimpleSolvers: initialize!) +f(x) = x .^ 2 +x = [1f0, 2f0, 3f0] +H = HessianBFGS(f, x) +initialize!(H, x) +inv(H) + +# output + +3×3 Matrix{Float32}: + 1.0 0.0 0.0 + 0.0 1.0 0.0 + 0.0 0.0 1.0 +``` +""" function initialize!(H::HessianBFGS, x::AbstractVector) H.Q .= Matrix(1.0I, size(H.Q)...) - H.x̄ .= eltype(x)(NaN) - H.δ .= eltype(x)(NaN) - H.ḡ .= eltype(x)(NaN) - H.γ .= eltype(x)(NaN) - - H.x .= x - H.g .= gradient!(H.objective, x) + H.x̄ .= alloc_x(x) + H.x .= alloc_x(x) + H.δ .= alloc_x(x) + H.ḡ .= alloc_g(x) + H.g .= alloc_g(x) + H.γ .= alloc_g(x) H end -function update!(H::HessianBFGS, x::AbstractVector) - # copy previous data and compute new gradient - H.ḡ .= H.g - H.x̄ .= H.x - H.x .= x - H.g .= gradient!(H.objective, x) - - # δ = x - x̄ - H.δ .= H.x .- H.x̄ +function compute_outer_products!(H::HessianBFGS) + outer!(H.δδ, H.δ, H.δ) + outer!(H.δγ, H.δ, H.γ) +end - # γ = g - ḡ - H.γ .= H.g .- H.ḡ +function update!(H::HessianBFGS, x::AbstractVector) + update!(H, x, gradient!(objective(H), x)) - # δγ = δᵀγ - δγ = H.δ ⋅ H.γ + δγ = compute_δγ(H) # BFGS # Q = Q - ... + ... # H.Q .-= (H.δ * H.γ' * H.Q .+ H.Q * H.γ * H.δ') ./ δγ .- # (1 + dot(H.γ, H.Q, H.γ) ./ δγ) .* (H.δ * H.δ') ./ δγ - - if δγ ≠ 0 - outer!(H.δγ, H.δ, H.γ) - outer!(H.δδ, H.δ, H.δ) + + if !iszero(δγ) + compute_outer_products!(H) mul!(H.T1, H.δγ, H.Q) mul!(H.T2, H.Q, H.δγ') - H.T3 .= (1 + dot(H.γ, H.Q, H.γ) ./ δγ) .* H.δδ + γQγ = compute_γQγ(H) + H.T3 .= (1 + γQγ ./ δγ) .* H.δδ H.Q .-= (H.T1 .+ H.T2 .- H.T3) ./ δγ end + + H end Base.inv(H::HessianBFGS) = H.Q diff --git a/src/optimization/hessian_dfp.jl b/src/optimization/hessian_dfp.jl index 37b5df9f..74239b22 100644 --- a/src/optimization/hessian_dfp.jl +++ b/src/optimization/hessian_dfp.jl @@ -3,7 +3,7 @@ """ -struct HessianDFP{T,VT,MT,OBJ} <: Hessian{T} +struct HessianDFP{T,VT,MT,OBJ} <: IterativeHessian{T} objective::OBJ x̄::VT # previous solution @@ -49,36 +49,27 @@ function initialize!(H::HessianDFP, x::AbstractVector) H.x .= x H.g .= gradient!(H.objective, x) - return H + H end -function update!(H::HessianDFP, x::AbstractVector) - # copy previous data and compute new gradient - H.ḡ .= H.g - H.x̄ .= H.x - H.x .= x - H.g .= gradient!(H.objective, x) - - # δ = x - x̄ - H.δ .= H.x .- H.x̄ - - # γ = g - ḡ - H.γ .= H.g .- H.ḡ +function compute_outer_products!(H::HessianDFP) + outer!(H.γγ, H.γ, H.γ) + outer!(H.δδ, H.δ, H.δ) +end - # γQγ = γᵀQγ - γQγ = dot(H.γ, H.Q, H.γ) +function update!(H::HessianDFP, x::AbstractVector) + update!(H, x, gradient!(objective(H), x)) - # δγ = δᵀγ - δγ = H.δ ⋅ H.γ + γQγ = compute_γQγ(H) + δγ = compute_δγ(H) # DFP # Q = Q - ... + ... # H.Q .-= H.Q * H.γ * H.γ' * H.Q / (H.γ' * H.Q * H.γ) .- # H.δ * H.δ' ./ δγ - if δγ ≠ 0 && γQγ ≠ 0 - outer!(H.γγ, H.γ, H.γ) - outer!(H.δδ, H.δ, H.δ) + if !iszero(δγ) && !iszero(γQγ) + compute_outer_products!(H) mul!(H.T1, H.γγ, H.Q) mul!(H.T2, H.Q, H.T1) diff --git a/src/optimization/iterative_hessians.jl b/src/optimization/iterative_hessians.jl new file mode 100644 index 00000000..708dde92 --- /dev/null +++ b/src/optimization/iterative_hessians.jl @@ -0,0 +1,61 @@ +""" + IterativeHessian <: Hessian + +An abstract type derived from [`Hessian`](@ref). +Its main purpose is defining a supertype that encompasses [`HessianBFGS`](@ref) and [`HessianDFP`](@ref) for dispatch. +""" +abstract type IterativeHessian{T} <: Hessian{T} end + +objective(H::IterativeHessian) = H.objective +solution(H::IterativeHessian) = H.x +gradient(H::IterativeHessian) = H.g +direction(H::IterativeHessian) = H.δ + +""" + update!(H, x, g) + +Update the [`IterativeHessian`](@ref) based on `x` and `g`. + +Note that this [`update!`](@ref) method performs different operations from `update!(::NewtonOptimizerCache, ::AbstractVector, ::AbstractVector)`](@ref). +""" +function update!(H::IterativeHessian, x::AbstractVector, g::AbstractVector) + H.ḡ .= gradient(H) + H.x̄ .= solution(H) + solution(H) .= x + gradient(H) .= g + + # δ = x - x̄ + H.δ .= solution(H) .- H.x̄ + + # γ = g - ḡ + H.γ .= gradient(H) .- H.ḡ + + H +end + +@doc raw""" + compute_δQδ(H) + +Compute `δQδ`, which is used to check whether ``Q = \mathrm{inv}(H)`` in [`HessianDFP`](@ref) is updated (similarly to [`compute_δγ`](@ref)) and used explicitly in [`HessianBFGS`](@ref). +""" +function compute_γQγ(H::IterativeHessian) + dot(H.γ, H.Q, H.γ) +end + +@doc raw""" + compute_δγ(H) + +Compute `δγ` which is used to check whether ``Q = \mathrm{inv}(H)`` is updated. +""" +function compute_δγ(H::IterativeHessian) + direction(H) ⋅ H.γ +end + +@doc raw""" + compute_outpuer_products(H) + +Compute the outer products ``\gamma\gamma^T`` and ``\delta\delta`` for the [`IterativeHessian`](@ref) `H`. + +Note that this is in-place; all the results are stored in `H`. +""" +function compute_outer_products!(H::IterativeHessian) end \ No newline at end of file diff --git a/src/optimization/newton_optimizer_cache.jl b/src/optimization/newton_optimizer_cache.jl index f30370c4..6603fad5 100644 --- a/src/optimization/newton_optimizer_cache.jl +++ b/src/optimization/newton_optimizer_cache.jl @@ -52,6 +52,21 @@ Return the direction of the gradient step (i.e. `δ`) of an instance of [`Newton """ direction(cache::NewtonOptimizerCache) = cache.δ +@doc raw""" + update!(cache, x) + +Update `cache` (an instance of [`NewtonOptimizerCache`](@ref)) based on `x`. + +This does: +```math +\begin{aligned} +\bar{x}^\mathtt{cache} & \gets x, \\ +x^\mathtt{cache} & \gets x, \\ +\delta^\mathtt{cache} & \gets 0, +\end{aligned} +``` +where ``\delta^\mathtt{cache}`` is the [`direction`](@ref) stored in `cache`. +""" function update!(cache::NewtonOptimizerCache, x::AbstractVector) cache.x̄ .= x cache.x .= x diff --git a/src/optimization/optimizer.jl b/src/optimization/optimizer.jl index db9c4072..e94e6941 100644 --- a/src/optimization/optimizer.jl +++ b/src/optimization/optimizer.jl @@ -65,7 +65,7 @@ struct Optimizer{T, OBJ <: MultivariateObjective{T}, HT <: Hessian{T}, RES <: OptimizerResult{T}, - AST <: OptimizationAlgorithm} <: NonlinearSolver + AST <: OptimizationAlgorithm} <: AbstractSolver algorithm::ALG objective::OBJ hessian::HT @@ -244,6 +244,7 @@ Too see the value of `x` after one iteration confer the docstring of [`solver_st function solve!(opt::Optimizer, x::AbstractVector) initialize!(opt, x) + initial_values_for_hessian!(opt) while (iteration_number(opt) == 0 || !meets_stopping_criteria(opt)) increase_iteration_number!(result(opt)) solver_step!(opt, x) @@ -254,4 +255,21 @@ function solve!(opt::Optimizer, x::AbstractVector) print_status(status(opt), config(opt)) x +end + +initial_values_for_hessian!(opt::Optimizer{T, ALG, OBJ, HT}) where {T, ALG, OBJ, HT <: Hessian} = opt + +""" + initial_values_for_hessian!(opt) + +Write initial values into the [`IterativeHessian`](@ref) in order to start optimization. [`Hessian`](@ref)s that are not [`IterativeHessian`](@ref)s do not need this extra step. +Also note the difference to e.g. [`initialize!(::HessianBFGS, ::AbstractVector)`](@ref). +""" +function initial_values_for_hessian!(opt::Optimizer{T, ALG, OBJ, HT}) where {T, ALG, OBJ, HT <: IterativeHessian} + z = zero(solution(hessian(opt))) + o = ones(T, length(z)) + H = hessian(opt) + update!(H, z, gradient!(objective(H), z)) + update!(H, o) + opt end \ No newline at end of file diff --git a/test/optimizer_tests.jl b/test/optimizer_tests.jl index d4aee2d5..a5462bfd 100644 --- a/test/optimizer_tests.jl +++ b/test/optimizer_tests.jl @@ -24,7 +24,7 @@ test_obj = MultivariateObjective(F, test_x) @test_throws MethodError solver_step!(test_x, test_optim) for method in (Newton(), BFGS(), DFP()) - for _linesearch in (Static(0.8), Backtracking()) # , Quadratic2(), BierlaireQuadratic(), Bisection()) + for _linesearch in (Static(0.8), Backtracking(), Quadratic2(), BierlaireQuadratic(), Bisection()) for T in (Float64, Float32) n = 1 x = ones(T, n)