From 7139317c7a4d7305a96a325afae4236d2a707f29 Mon Sep 17 00:00:00 2001 From: odunbar Date: Thu, 21 Nov 2024 16:10:20 -0800 Subject: [PATCH 1/8] initial tooling for GP and scalar RF training - no data file... --- examples/catke-ces/Project.toml | 10 ++ examples/catke-ces/emulate_sample.jl | 203 +++++++++++++++++++++++++++ 2 files changed, 213 insertions(+) create mode 100644 examples/catke-ces/Project.toml create mode 100644 examples/catke-ces/emulate_sample.jl diff --git a/examples/catke-ces/Project.toml b/examples/catke-ces/Project.toml new file mode 100644 index 0000000..fa7f2b1 --- /dev/null +++ b/examples/catke-ces/Project.toml @@ -0,0 +1,10 @@ +[deps] +CalibrateEmulateSample = "95e48a1f-0bec-4818-9538-3db4340308e3" +Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" +EnsembleKalmanProcesses = "aa8a2aa5-91d8-4396-bcef-d4f2ec43552d" +JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" diff --git a/examples/catke-ces/emulate_sample.jl b/examples/catke-ces/emulate_sample.jl new file mode 100644 index 0000000..55637f1 --- /dev/null +++ b/examples/catke-ces/emulate_sample.jl @@ -0,0 +1,203 @@ +using JLD2 +using Distributions # probability distributions and associated functions +using LinearAlgebra +using Random +using JLD2 +using StatsBase + +# CES +using CalibrateEmulateSample.Emulators +using CalibrateEmulateSample.MarkovChainMonteCarlo +using CalibrateEmulateSample.ParameterDistributions +using CalibrateEmulateSample.DataContainers +using CalibrateEmulateSample.EnsembleKalmanProcesses +using CalibrateEmulateSample.EnsembleKalmanProcesses.Localizers +using CalibrateEmulateSample.Utilities + + +function main() + + data_file = joinpath(@__DIR__, "catke_parameters.jld2") + + # contains: + loaded_data = load(data_file) + objectives = loaded_data["objectives"] + @info "objectives size $(size(objectives))" + phys_parameters = loaded_data["parameters"] + @info "parameters size $(size(phys_parameters))" + #observations = loaded_data["observations"] + #@info "observations size $size(observations)" + # noise_covariance = loaded_data["noise_covariance"] + # observations = loaded_data["observations"] + n_iter, n_ens, n_param = size(phys_parameters) + + # (use priors to transform parameters into computational space) + # get from initial ensemble + #prior_mean = mean(parameters[1,:,:], dims=1) + #prior_cov = cov(parameters[1,:,:], dims=1) + #@info "det prior cov: $(det(prior_cov))" + #@info "pos def prior cov?: $(isposdef(prior_cov))" + prior_vec = [ + constrained_gaussian("CᵂwΔ", 4.0, 2.0, 0.0, 8.0), + constrained_gaussian("Cᵂu★", 4.0, 2.0, 0.0, 8.0), + constrained_gaussian("Cʰⁱc", 1.0, 0.5, 0.0, 2.0), + constrained_gaussian("Cʰⁱu", 1.0, 0.5, 0.0, 2.0), + constrained_gaussian("Cʰⁱe", 5.0, 2.5, 0.0, 10.0), + constrained_gaussian("CʰⁱD", 5.0, 2.5, 0.0, 10.0), + constrained_gaussian("Cˢ", 1.0, 0.5, 0.0, 2.0), + constrained_gaussian("Cˡᵒc", 1.0, 0.5, 0.0, 2.0), + constrained_gaussian("Cˡᵒu", 1.0, 0.5, 0.0, 2.0), + constrained_gaussian("Cˡᵒe", 5.0, 2.5, 0.0, 10.0), + constrained_gaussian("CˡᵒD", 5.0, 2.5, 0.0, 10.0), + constrained_gaussian("CRi⁰", 1.0, 0.5, 0.0, 2.0), + constrained_gaussian("CRiᵟ", 1.0, 0.5, 0.0, 2.0), + constrained_gaussian("Cᵘⁿc", 1.0, 0.5, 0.0, 2.0), + constrained_gaussian("Cᵘⁿu", 1.0, 0.5, 0.0, 2.0), + constrained_gaussian("Cᵘⁿe", 5.0, 2.5, 0.0, 10.0), + constrained_gaussian("CᵘⁿD", 5.0, 2.5, 0.0, 10.0), + constrained_gaussian("Cᶜc", 4.0, 2.0, 0.0, 8.0), + constrained_gaussian("Cᶜu", 4.0, 2.0, 0.0, 8.0), + constrained_gaussian("Cᶜe", 4.0, 2.0, 0.0, 8.0), + constrained_gaussian("CᶜD", 4.0, 2.0, 0.0, 8.0), + constrained_gaussian("Cᵉc", 0.5, 0.25, 0.0, 1.0), + constrained_gaussian("Cˢᵖ", 0.5, 0.25, 0.0, 1.0), + ] + prior = combine_distributions(prior_vec) + + # we map in the computational space + parameters = zeros(size(phys_parameters)) + for i =1:size(phys_parameters,1) + parameters[i,:,:] = transform_constrained_to_unconstrained(prior,phys_parameters[1,:,:]')' + end + + + ## CES on compressed data + + io_pair_iter = 10 + iter_train = 1:10:1+io_pair_iter*10 + @info "train on iterations: $(iter_train)" + inputs = reduce(vcat, [parameters[i,:,:] for i in iter_train]) + outputs = reduce(vcat, objectives[iter_train,:])[:,:] + @info "training data input: $(size(inputs)) output: $(size(outputs))" + input_output_pairs = PairedDataContainer(inputs, outputs, data_are_columns=false) + + # estimate the noise in the objective using trailing samples + estimation_iter = 50 + Γ = zeros(1,1) + Γ[1,1] = var(reduce(vcat, objectives[end - estimation_iter:end,:])) + @info "estimated_noise: $Γ" + + truth = [minimum(objectives);] + @info "data: $(truth)" + + @info "Finished inverse problem and data wrangling stage" + #try GP JL + gppackage = Emulators.GPJL() + pred_type = Emulators.YType() + + mlt_methods = ["GP", "Scalar-RF"] + + mlt_method = mlt_methods[2] + + if mlt_method == "GP" + mlt = GaussianProcess( + gppackage; + kernel = nothing, # use default squared exponential kernel + prediction_type = pred_type, + noise_learn = false, + ) + elseif mlt_method == "Scalar-RF" + overrides = Dict( + "verbose" => true, + "n_features_opt" => 20, + "train_fraction" => 0.95, + "n_iteration" => 20, + "cov_sample_multiplier" => 1.0, + "n_ensemble" => 200, #40*n_dimensions, + "n_cross_val_sets" => 3, + ) + + rank = 5 + nugget=1e-6 + kernel_structure = SeparableKernel(LowRankFactor(rank, nugget), OneDimFactor()) + + n_features = 500 + + mlt = ScalarRandomFeatureInterface( + n_features, + n_param, + kernel_structure = kernel_structure, + optimizer_options = deepcopy(overrides), + ) + + end + + + # Fit an emulator to the data + emulator = Emulator( + mlt, + input_output_pairs; + obs_noise_cov = Γ + ) + + # Optimize the GP hyperparameters for better fit + optimize_hyperparameters!(emulator) + + @info "Finished Emulation stage" + + # test the emulator against some other trajectory data + min_iter_test = maximum(iter_train) + 1 + iter_test = min_iter_test:min_iter_test + 10 + + @info "test on iterations: $(iter_test)" + test_inputs = reduce(vcat, [parameters[i,:,:] for i in iter_test]) + test_outputs = reduce(vcat, objectives[iter_test,:])[:,:] + @info "testing data input: $(size(test_inputs)) output: $(size(test_outputs))" + pred_mean_test = zeros(length(iter_test)*n_ens,1) + for i in 1:size(test_inputs,1) + pred_mean_test[i,:] = Emulators.predict(emulator, test_inputs[i:i,:]', transform_to_real = true)[1] + end + test_error = norm(pred_mean_test - test_outputs)/size(test_inputs,1) + @info "average L^2 test_error $(test_error)" + + pred_mean_train = zeros(io_pair_iter*n_ens,1) + for i in 1:size(get_inputs(input_output_data),2) + pred_mean_test[i,:] = Emulators.predict(emulator, get_inputs(input_output_pairs)[:,i:i], transform_to_real = true)[1] + end + train_error = norm(pred_mean_train - get_outputs(input_output_pairs))/size(inputs,1) + @info "average L^2 train_error $(train_error)" + + # determine a good step size + u0 = vec(mean(get_inputs(input_output_pairs)[end,:], dims = 2)) + println("initial parameters: ", u0) + yt_sample = truth + mcmc = MCMCWrapper(RWMHSampling(), yt_sample, prior, emulator, init_params=u0) + + new_step = optimize_stepsize(mcmc; init_stepsize = 1.0, N = 5000, discard_initial = 0) + chain = MarkovChainMonteCarlo.sample(mcmc, 300_000; stepsize = new_step, discard_initial = 2_000) + posterior = MarkovChainMonteCarlo.get_posterior(mcmc, chain) + + println("Finished Sampling stage") + # extract some statistics + post_mean = mean(posterior) + post_cov = cov(posterior) + println("mean in of posterior samples (taken in comp space)") + println(post_mean) + println("covariance of posterior samples (taken in comp space)") + println(post_cov) + println("transformed posterior mean from comp to phys space") + println(transform_unconstrained_to_constrained(posterior, post_mean)) + + # map to physical space + posterior_samples = vcat([get_distribution(posterior)[name] for name in get_name(posterior)]...) #samples are columns + transformed_posterior_samples = + mapslices(x -> transform_unconstrained_to_constrained(posterior, x), posterior_samples, dims = 1) + println("mean of posterior samples (taken in phys space)") + println(mean(transformed_posterior_samples, dims = 2)) + println("cov of posterior samples (taken in phys space)") + println(cov(transformed_posterior_samples, dims = 2)) + + +end + +main() From 021f17d2941673ff1a9bd73e689b7692684f51a5 Mon Sep 17 00:00:00 2001 From: odunbar Date: Fri, 22 Nov 2024 09:59:18 -0800 Subject: [PATCH 2/8] full pipeline, running scalar RF, but with quick-n-dirty optimizers, and data that is in transience --- examples/catke-ces/emulate_sample.jl | 40 +++++++++++++++------------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/examples/catke-ces/emulate_sample.jl b/examples/catke-ces/emulate_sample.jl index 55637f1..e92d334 100644 --- a/examples/catke-ces/emulate_sample.jl +++ b/examples/catke-ces/emulate_sample.jl @@ -4,6 +4,7 @@ using LinearAlgebra using Random using JLD2 using StatsBase +using Plots # CES using CalibrateEmulateSample.Emulators @@ -15,9 +16,7 @@ using CalibrateEmulateSample.EnsembleKalmanProcesses.Localizers using CalibrateEmulateSample.Utilities -function main() - - data_file = joinpath(@__DIR__, "catke_parameters.jld2") +data_file = joinpath(@__DIR__, "catke_parameters.jld2") # contains: loaded_data = load(data_file) @@ -80,7 +79,7 @@ function main() outputs = reduce(vcat, objectives[iter_train,:])[:,:] @info "training data input: $(size(inputs)) output: $(size(outputs))" input_output_pairs = PairedDataContainer(inputs, outputs, data_are_columns=false) - + @info size(get_inputs(input_output_pairs)) # estimate the noise in the objective using trailing samples estimation_iter = 50 Γ = zeros(1,1) @@ -109,7 +108,7 @@ function main() elseif mlt_method == "Scalar-RF" overrides = Dict( "verbose" => true, - "n_features_opt" => 20, + "n_features_opt" => 50, "train_fraction" => 0.95, "n_iteration" => 20, "cov_sample_multiplier" => 1.0, @@ -117,8 +116,8 @@ function main() "n_cross_val_sets" => 3, ) - rank = 5 - nugget=1e-6 + rank = 3 + nugget=1e-8 kernel_structure = SeparableKernel(LowRankFactor(rank, nugget), OneDimFactor()) n_features = 500 @@ -145,7 +144,16 @@ function main() @info "Finished Emulation stage" + # test the emulator against some other trajectory data + pred_mean_train = zeros(1,size(get_inputs(input_output_pairs),2)) + for i in 1:size(get_inputs(input_output_pairs),2) + pred_mean_train[:,i] = Emulators.predict(emulator, get_inputs(input_output_pairs)[:,i:i], transform_to_real = true)[1] + end + train_error = norm(pred_mean_train - get_outputs(input_output_pairs))/size(get_inputs(input_output_pairs),2) + @info "average L^2 train_error $(train_error)" + + min_iter_test = maximum(iter_train) + 1 iter_test = min_iter_test:min_iter_test + 10 @@ -159,16 +167,10 @@ function main() end test_error = norm(pred_mean_test - test_outputs)/size(test_inputs,1) @info "average L^2 test_error $(test_error)" - - pred_mean_train = zeros(io_pair_iter*n_ens,1) - for i in 1:size(get_inputs(input_output_data),2) - pred_mean_test[i,:] = Emulators.predict(emulator, get_inputs(input_output_pairs)[:,i:i], transform_to_real = true)[1] - end - train_error = norm(pred_mean_train - get_outputs(input_output_pairs))/size(inputs,1) - @info "average L^2 train_error $(train_error)" + # determine a good step size - u0 = vec(mean(get_inputs(input_output_pairs)[end,:], dims = 2)) + u0 = vec(mean(parameters[end,:,:], dims = 1)) println("initial parameters: ", u0) yt_sample = truth mcmc = MCMCWrapper(RWMHSampling(), yt_sample, prior, emulator, init_params=u0) @@ -197,7 +199,9 @@ function main() println("cov of posterior samples (taken in phys space)") println(cov(transformed_posterior_samples, dims = 2)) - -end -main() +# plot some useful +p = plot(prior) +plot!(p, posterior) +vline!(p, mean(phys_parameters[end,:,:],dims=1)) + From 5f811203107259aaf2924f0b13a73eb55f97abbc Mon Sep 17 00:00:00 2001 From: odunbar Date: Mon, 25 Nov 2024 17:34:41 -0800 Subject: [PATCH 3/8] play with params, bugfix training data --- examples/catke-ces/emulate_sample.jl | 81 +++++++++++++++++++--------- 1 file changed, 57 insertions(+), 24 deletions(-) diff --git a/examples/catke-ces/emulate_sample.jl b/examples/catke-ces/emulate_sample.jl index e92d334..9a35055 100644 --- a/examples/catke-ces/emulate_sample.jl +++ b/examples/catke-ces/emulate_sample.jl @@ -66,14 +66,15 @@ data_file = joinpath(@__DIR__, "catke_parameters.jld2") # we map in the computational space parameters = zeros(size(phys_parameters)) for i =1:size(phys_parameters,1) - parameters[i,:,:] = transform_constrained_to_unconstrained(prior,phys_parameters[1,:,:]')' + parameters[i,:,:] = transform_constrained_to_unconstrained(prior, phys_parameters[i,:,:]')' end ## CES on compressed data - io_pair_iter = 10 - iter_train = 1:10:1+io_pair_iter*10 + io_pair_iter = 20 + burnin = 50 + iter_train = burnin+1:io_pair_iter:burnin+1+io_pair_iter*10 @info "train on iterations: $(iter_train)" inputs = reduce(vcat, [parameters[i,:,:] for i in iter_train]) outputs = reduce(vcat, objectives[iter_train,:])[:,:] @@ -100,27 +101,34 @@ data_file = joinpath(@__DIR__, "catke_parameters.jld2") if mlt_method == "GP" mlt = GaussianProcess( - gppackage; - kernel = nothing, # use default squared exponential kernel - prediction_type = pred_type, - noise_learn = false, - ) + gppackage; + kernel = nothing, # use default squared exponential kernel + prediction_type = pred_type, + noise_learn = false, + ) + mlt_untuned = ScalarRandomFeatureInterface( + 1000, + n_param, + kernel_structure = SeparableKernel(LowRankFactor(), OneDimFactor()), + optimizer_options = Dict( "n_features_opt" => 100, "n_iteration" => 0, "n_cross_val_sets" => 1, "cov_sample_multiplier" => 0.01), + ) + elseif mlt_method == "Scalar-RF" overrides = Dict( "verbose" => true, - "n_features_opt" => 50, - "train_fraction" => 0.95, - "n_iteration" => 20, - "cov_sample_multiplier" => 1.0, - "n_ensemble" => 200, #40*n_dimensions, - "n_cross_val_sets" => 3, + "n_features_opt" => 100, + "train_fraction" => 0.9, + "n_iteration" => 10, + "cov_sample_multiplier" => 0.2, + "n_ensemble" => 50, #40*n_dimensions, + "n_cross_val_sets" => 2, ) - rank = 3 - nugget=1e-8 + rank = 5 + nugget=1e-4 kernel_structure = SeparableKernel(LowRankFactor(rank, nugget), OneDimFactor()) - n_features = 500 + n_features = 1000 mlt = ScalarRandomFeatureInterface( n_features, @@ -129,6 +137,13 @@ data_file = joinpath(@__DIR__, "catke_parameters.jld2") optimizer_options = deepcopy(overrides), ) + mlt_untuned = ScalarRandomFeatureInterface( + n_features, + n_param, + kernel_structure = kernel_structure, + optimizer_options = Dict( "n_features_opt" => 100, "n_iteration" => 0, "n_cross_val_sets" => 1, "cov_sample_multiplier" => 0.01), #this is too hacky + ) + end @@ -138,20 +153,30 @@ data_file = joinpath(@__DIR__, "catke_parameters.jld2") input_output_pairs; obs_noise_cov = Γ ) - # Optimize the GP hyperparameters for better fit optimize_hyperparameters!(emulator) + # For comparison, an untuned emulator +emulator_untuned = Emulator( + mlt_untuned, + input_output_pairs; + obs_noise_cov = Γ +) + + @info "Finished Emulation stage" # test the emulator against some other trajectory data pred_mean_train = zeros(1,size(get_inputs(input_output_pairs),2)) + pred_mean_train_untuned = copy(pred_mean_train) for i in 1:size(get_inputs(input_output_pairs),2) pred_mean_train[:,i] = Emulators.predict(emulator, get_inputs(input_output_pairs)[:,i:i], transform_to_real = true)[1] + pred_mean_train_untuned[:,i] = Emulators.predict(emulator_untuned, get_inputs(input_output_pairs)[:,i:i], transform_to_real = true)[1] end train_error = norm(pred_mean_train - get_outputs(input_output_pairs))/size(get_inputs(input_output_pairs),2) - @info "average L^2 train_error $(train_error)" + train_error_untuned = norm(pred_mean_train_untuned - get_outputs(input_output_pairs))/size(get_inputs(input_output_pairs),2) + @info "average L^2 train_error untuned: $(train_error_untuned) and tuned: $(train_error)" min_iter_test = maximum(iter_train) + 1 @@ -162,20 +187,23 @@ data_file = joinpath(@__DIR__, "catke_parameters.jld2") test_outputs = reduce(vcat, objectives[iter_test,:])[:,:] @info "testing data input: $(size(test_inputs)) output: $(size(test_outputs))" pred_mean_test = zeros(length(iter_test)*n_ens,1) + pred_mean_test_untuned = copy(pred_mean_test) for i in 1:size(test_inputs,1) - pred_mean_test[i,:] = Emulators.predict(emulator, test_inputs[i:i,:]', transform_to_real = true)[1] + pred_mean_test[i,:] = Emulators.predict(emulator, test_inputs[i:i,:]', transform_to_real = true)[1] + pred_mean_test_untuned[i,:] = Emulators.predict(emulator_untuned, test_inputs[i:i,:]', transform_to_real = true)[1] end test_error = norm(pred_mean_test - test_outputs)/size(test_inputs,1) - @info "average L^2 test_error $(test_error)" + test_error_untuned = norm(pred_mean_test_untuned - test_outputs)/size(test_inputs,1) + @info "average L^2 test_error untuned: $(test_error_untuned) and tuned: $(test_error)" # determine a good step size u0 = vec(mean(parameters[end,:,:], dims = 1)) println("initial parameters: ", u0) yt_sample = truth - mcmc = MCMCWrapper(RWMHSampling(), yt_sample, prior, emulator, init_params=u0) + mcmc = MCMCWrapper(pCNMHSampling(), yt_sample, prior, emulator, init_params=u0) - new_step = optimize_stepsize(mcmc; init_stepsize = 1.0, N = 5000, discard_initial = 0) + new_step = optimize_stepsize(mcmc; init_stepsize = 0.1, N = 5000, discard_initial = 0) chain = MarkovChainMonteCarlo.sample(mcmc, 300_000; stepsize = new_step, discard_initial = 2_000) posterior = MarkovChainMonteCarlo.get_posterior(mcmc, chain) @@ -200,8 +228,13 @@ data_file = joinpath(@__DIR__, "catke_parameters.jld2") println(cov(transformed_posterior_samples, dims = 2)) -# plot some useful +# plot some useful marginals p = plot(prior) plot!(p, posterior) vline!(p, mean(phys_parameters[end,:,:],dims=1)) +# vline!(p, mean(phys_parameters[end,:,:],dims=1)) # where training data ended up. + +savefig(p, joinpath(@__DIR__, "catke_posterior.png")) +savefig(p, joinpath(@__DIR__, "catke_posterior.pdf")) + From fbe350022ed6ec35bb8c8eef5f4fbffa038bf2f7 Mon Sep 17 00:00:00 2001 From: odunbar Date: Mon, 25 Nov 2024 20:07:15 -0800 Subject: [PATCH 4/8] more flexible priors --- examples/catke-ces/emulate_sample.jl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/catke-ces/emulate_sample.jl b/examples/catke-ces/emulate_sample.jl index 9a35055..644d847 100644 --- a/examples/catke-ces/emulate_sample.jl +++ b/examples/catke-ces/emulate_sample.jl @@ -117,14 +117,14 @@ data_file = joinpath(@__DIR__, "catke_parameters.jld2") overrides = Dict( "verbose" => true, "n_features_opt" => 100, - "train_fraction" => 0.9, + "train_fraction" => 0.8, "n_iteration" => 10, - "cov_sample_multiplier" => 0.2, + "cov_sample_multiplier" => 0.4, "n_ensemble" => 50, #40*n_dimensions, - "n_cross_val_sets" => 2, + "n_cross_val_sets" => 4, ) - rank = 5 + rank = 10 nugget=1e-4 kernel_structure = SeparableKernel(LowRankFactor(rank, nugget), OneDimFactor()) @@ -203,7 +203,7 @@ emulator_untuned = Emulator( yt_sample = truth mcmc = MCMCWrapper(pCNMHSampling(), yt_sample, prior, emulator, init_params=u0) - new_step = optimize_stepsize(mcmc; init_stepsize = 0.1, N = 5000, discard_initial = 0) + new_step = optimize_stepsize(mcmc; init_stepsize = 1e-3, N = 5000, discard_initial = 0) chain = MarkovChainMonteCarlo.sample(mcmc, 300_000; stepsize = new_step, discard_initial = 2_000) posterior = MarkovChainMonteCarlo.get_posterior(mcmc, chain) From ee2b5eb2a9d0deac76f51ea1121f50f97b7e1487 Mon Sep 17 00:00:00 2001 From: odunbar Date: Mon, 25 Nov 2024 20:36:05 -0800 Subject: [PATCH 5/8] more data in optimization loss --- examples/catke-ces/emulate_sample.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/catke-ces/emulate_sample.jl b/examples/catke-ces/emulate_sample.jl index 644d847..f55e699 100644 --- a/examples/catke-ces/emulate_sample.jl +++ b/examples/catke-ces/emulate_sample.jl @@ -116,12 +116,12 @@ data_file = joinpath(@__DIR__, "catke_parameters.jld2") elseif mlt_method == "Scalar-RF" overrides = Dict( "verbose" => true, - "n_features_opt" => 100, - "train_fraction" => 0.8, + "n_features_opt" => 200, + "train_fraction" => 0.9, "n_iteration" => 10, "cov_sample_multiplier" => 0.4, "n_ensemble" => 50, #40*n_dimensions, - "n_cross_val_sets" => 4, + "n_cross_val_sets" => 3, ) rank = 10 From 450b06722a64b9e03442a84d99eecaf2c419e13b Mon Sep 17 00:00:00 2001 From: odunbar Date: Mon, 25 Nov 2024 20:38:37 -0800 Subject: [PATCH 6/8] line width increase --- examples/catke-ces/emulate_sample.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/catke-ces/emulate_sample.jl b/examples/catke-ces/emulate_sample.jl index f55e699..e8b4763 100644 --- a/examples/catke-ces/emulate_sample.jl +++ b/examples/catke-ces/emulate_sample.jl @@ -231,7 +231,7 @@ emulator_untuned = Emulator( # plot some useful marginals p = plot(prior) plot!(p, posterior) -vline!(p, mean(phys_parameters[end,:,:],dims=1)) +vline!(p, mean(phys_parameters[end,:,:],dims=1), linewidth=5) # vline!(p, mean(phys_parameters[end,:,:],dims=1)) # where training data ended up. savefig(p, joinpath(@__DIR__, "catke_posterior.png")) From e94bebf7b99061c060eec6deda6a385b11c21f5d Mon Sep 17 00:00:00 2001 From: odunbar Date: Tue, 26 Nov 2024 12:06:09 -0800 Subject: [PATCH 7/8] readable plots with grey & alpha, added untuned RF posterior --- examples/catke-ces/emulate_sample.jl | 30 ++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/examples/catke-ces/emulate_sample.jl b/examples/catke-ces/emulate_sample.jl index e8b4763..a142969 100644 --- a/examples/catke-ces/emulate_sample.jl +++ b/examples/catke-ces/emulate_sample.jl @@ -199,7 +199,7 @@ emulator_untuned = Emulator( # determine a good step size u0 = vec(mean(parameters[end,:,:], dims = 1)) - println("initial parameters: ", u0) + @info "initial parameters: $(u0)" yt_sample = truth mcmc = MCMCWrapper(pCNMHSampling(), yt_sample, prior, emulator, init_params=u0) @@ -207,7 +207,7 @@ emulator_untuned = Emulator( chain = MarkovChainMonteCarlo.sample(mcmc, 300_000; stepsize = new_step, discard_initial = 2_000) posterior = MarkovChainMonteCarlo.get_posterior(mcmc, chain) - println("Finished Sampling stage") + @info "Finished Sampling stage: tuned" # extract some statistics post_mean = mean(posterior) post_cov = cov(posterior) @@ -229,12 +229,34 @@ emulator_untuned = Emulator( # plot some useful marginals -p = plot(prior) -plot!(p, posterior) +p = plot(posterior) vline!(p, mean(phys_parameters[end,:,:],dims=1), linewidth=5) # vline!(p, mean(phys_parameters[end,:,:],dims=1)) # where training data ended up. savefig(p, joinpath(@__DIR__, "catke_posterior.png")) savefig(p, joinpath(@__DIR__, "catke_posterior.pdf")) +plot!(prior, color=:grey) +savefig(p, joinpath(@__DIR__, "catke_prior_posterior.png")) +savefig(p, joinpath(@__DIR__, "catke_prior_posterior.pdf")) + + + +## Untuned: + + +mcmc_untuned = MCMCWrapper(pCNMHSampling(), yt_sample, prior, emulator, init_params=u0) + +new_step_untuned = optimize_stepsize(mcmc_untuned; init_stepsize = 1e-3, N = 5000, discard_initial = 0) +chain_untuned = MarkovChainMonteCarlo.sample(mcmc_untuned, 300_000; stepsize = new_step_untuned, discard_initial = 2_000) +posterior_untuned = MarkovChainMonteCarlo.get_posterior(mcmc_untuned, chain_untuned) + +@info "Finished Sampling stage: untuned" + +p2 = plot(posterior) +plot!(p2, posterior_untuned, color=:grey, alpha=0.3) +vline!(p2, mean(phys_parameters[end,:,:],dims=1), linewidth=5) + +savefig(p2, joinpath(@__DIR__, "catke_tuned-untuned_posterior.png")) +savefig(p2, joinpath(@__DIR__, "catke_tuned-untuned_posterior.pdf")) From e40148fa6b8b6a5eef59829bf8380ba5d504783d Mon Sep 17 00:00:00 2001 From: odunbar Date: Tue, 26 Nov 2024 17:21:23 -0800 Subject: [PATCH 8/8] add training data min max vals to plots --- examples/catke-ces/emulate_sample.jl | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/examples/catke-ces/emulate_sample.jl b/examples/catke-ces/emulate_sample.jl index a142969..a5a2831 100644 --- a/examples/catke-ces/emulate_sample.jl +++ b/examples/catke-ces/emulate_sample.jl @@ -73,8 +73,9 @@ data_file = joinpath(@__DIR__, "catke_parameters.jld2") ## CES on compressed data io_pair_iter = 20 + n_samples = 10 burnin = 50 - iter_train = burnin+1:io_pair_iter:burnin+1+io_pair_iter*10 + iter_train = burnin+1:io_pair_iter:burnin+1+io_pair_iter*n_samples @info "train on iterations: $(iter_train)" inputs = reduce(vcat, [parameters[i,:,:] for i in iter_train]) outputs = reduce(vcat, objectives[iter_train,:])[:,:] @@ -124,7 +125,7 @@ data_file = joinpath(@__DIR__, "catke_parameters.jld2") "n_cross_val_sets" => 3, ) - rank = 10 + rank = 7 nugget=1e-4 kernel_structure = SeparableKernel(LowRankFactor(rank, nugget), OneDimFactor()) @@ -180,7 +181,7 @@ emulator_untuned = Emulator( min_iter_test = maximum(iter_train) + 1 - iter_test = min_iter_test:min_iter_test + 10 + iter_test = min_iter_test:min_iter_test + 20 @info "test on iterations: $(iter_test)" test_inputs = reduce(vcat, [parameters[i,:,:] for i in iter_test]) @@ -230,13 +231,17 @@ emulator_untuned = Emulator( # plot some useful marginals p = plot(posterior) -vline!(p, mean(phys_parameters[end,:,:],dims=1), linewidth=5) +vline!(p, mean(phys_parameters[end,:,:],dims=1), linewidth=5, color=:red) # vline!(p, mean(phys_parameters[end,:,:],dims=1)) # where training data ended up. savefig(p, joinpath(@__DIR__, "catke_posterior.png")) savefig(p, joinpath(@__DIR__, "catke_posterior.pdf")) -plot!(prior, color=:grey) +plot!(p, prior, color=:grey) +minval=minimum(minimum(phys_parameters[iter_train,:,:],dims=1),dims=2)[:] +maxval=maximum(maximum(phys_parameters[iter_train,:,:],dims=1),dims=2)[:] +vline!(p, reshape(minval,1,:), linewidth=5, color=:grey) +vline!(p, reshape(maxval,1,:), linewidth=5, color=:grey) savefig(p, joinpath(@__DIR__, "catke_prior_posterior.png")) savefig(p, joinpath(@__DIR__, "catke_prior_posterior.pdf")) @@ -256,7 +261,8 @@ posterior_untuned = MarkovChainMonteCarlo.get_posterior(mcmc_untuned, chain_untu p2 = plot(posterior) plot!(p2, posterior_untuned, color=:grey, alpha=0.3) -vline!(p2, mean(phys_parameters[end,:,:],dims=1), linewidth=5) +vline!(p2, mean(phys_parameters[end,:,:],dims=1), linewidth=5, color=:red) + savefig(p2, joinpath(@__DIR__, "catke_tuned-untuned_posterior.png")) savefig(p2, joinpath(@__DIR__, "catke_tuned-untuned_posterior.pdf"))