From 3ac63ffa324d278d51c0c8a402692c8131741314 Mon Sep 17 00:00:00 2001 From: Nandini-Jaiswal Date: Sun, 8 Dec 2024 23:56:20 +0530 Subject: [PATCH 1/4] Add `logistic_reg_vi()` function --- src/bayesian/getter.jl | 9 ++ src/bayesian/logistic_regression.jl | 118 +++++++++++++++--- test/numerical/bayesian/LogisticRegression.jl | 9 ++ 3 files changed, 120 insertions(+), 16 deletions(-) diff --git a/src/bayesian/getter.jl b/src/bayesian/getter.jl index b02bddf..9f0fc20 100644 --- a/src/bayesian/getter.jl +++ b/src/bayesian/getter.jl @@ -33,6 +33,15 @@ function predict(container::BayesianRegressionMCMC{:LogisticRegression}, newdata return vec(mean(container.link.link_function.(z), dims=2)) end +function predict(container::BayesianRegressionVI{:LogisticRegression}, newdata::DataFrame, number_of_samples::Int64 = 1000) + X = modelmatrix(container.formula, newdata) + + W = rand(CRRao_rng, container.dist, number_of_samples) + W = W[union(container.symbol_to_range[:β]...), :] + z = X * W + return vec(mean(container.link.link_function.(z), dims=2)) +end + function predict(container::BayesianRegressionMCMC{:NegativeBinomialRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200) X = modelmatrix(container.formula, newdata) diff --git a/src/bayesian/logistic_regression.jl b/src/bayesian/logistic_regression.jl index d2f122f..0853509 100644 --- a/src/bayesian/logistic_regression.jl +++ b/src/bayesian/logistic_regression.jl @@ -1,4 +1,4 @@ -function logistic_reg(formula::FormulaTerm, data::DataFrame, Link::CRRaoLink, turingModel::Function, sim_size::Int64) +function logistic_reg_mcmc(formula::FormulaTerm, data::DataFrame, Link::CRRaoLink, turingModel::Function, sim_size::Int64) formula = apply_schema(formula, schema(formula, data),RegressionModel) y, X = modelcols(formula, data) @@ -9,9 +9,31 @@ function logistic_reg(formula::FormulaTerm, data::DataFrame, Link::CRRaoLink, tu return BayesianRegressionMCMC(:LogisticRegression, chain, formula, Link) end +function logistic_reg_vi(formula::FormulaTerm, data::DataFrame, Link::CRRaoLink, turingModel::Function, max_iter::Int64) + formula = apply_schema(formula, schema(formula, data),RegressionModel) + y, X = modelcols(formula, data) + + if max_iter < 500 + @warn "Max iterations should generally be atleast 500." + end + model = turingModel(X, y) + dist = vi(model, ADVI(100, max_iter)) + _, symbol_to_range = bijector(model, Val(true)) + return BayesianRegressionVI(:LinearRegression, dist, formula, symbol_to_range) +end + """ ```julia -fit(formula::FormulaTerm, data::DataFrame, modelClass::LogisticRegression, Link::CRRaoLink, prior::Prior_Ridge, h::Float64 = 0.1, level::Float64 = 0.95, sim_size::Int64 = 1000) +fit( + formula::FormulaTerm, + data::DataFrame, + modelClass::LogisticRegression, + Link::CRRaoLink, prior::Prior_Ridge, + use_vi::Bool = false, + h::Float64 = 0.1, + level::Float64 = 0.95, + sim_size::Int64 = use_vi ? 20000 : 1000 +) ``` Fit a Bayesian Logistic Regression model on the input data with a Ridge prior with the provided `Link` function. @@ -185,9 +207,10 @@ function fit( modelClass::LogisticRegression, Link::CRRaoLink, prior::Prior_Ridge, + use_vi::Bool = false, h::Float64 = 0.1, level::Float64 = 0.95, - sim_size::Int64 = 1000 + sim_size::Int64 = use_vi ? 20000 : 1000 ) @model LogisticRegression(X, y) = begin p = size(X, 2) @@ -210,12 +233,26 @@ function fit( end end - return logistic_reg(formula, data, Link, LogisticRegression, sim_size) + if use_vi + return logistic_reg_vi(formula, data, Link, LogisticRegression, sim_size) + else + return logistic_reg_mcmc(formula, data, Link, LogisticRegression, sim_size) + end end """ ```julia -fit(formula::FormulaTerm, data::DataFrame, modelClass::LogisticRegression, Link::CRRaoLink, prior::Prior_Laplace, h::Float64 = 0.1, level::Float64 = 0.95, sim_size::Int64 = 1000) +fit( + formula::FormulaTerm, + data::DataFrame, + modelClass::LogisticRegression, + Link::CRRaoLink, + prior::Prior_Laplace, + use_vi::Bool = false, + h::Float64 = 0.1, + level::Float64 = 0.95, + sim_size::Int64 = use_vi ? 20000 : 1000 +) ``` Fit a Bayesian Logistic Regression model on the input data with a Laplace prior with the provided `Link` function. @@ -384,9 +421,10 @@ function fit( modelClass::LogisticRegression, Link::CRRaoLink, prior::Prior_Laplace, + use_vi::Bool = false, h::Float64 = 0.1, level::Float64 = 0.95, - sim_size::Int64 = 1000 + sim_size::Int64 = use_vi ? 20000 : 1000 ) @model LogisticRegression(X, y) = begin p = size(X, 2) @@ -409,12 +447,26 @@ function fit( end end - return logistic_reg(formula, data, Link, LogisticRegression, sim_size) + if use_vi + return logistic_reg_vi(formula, data, Link, LogisticRegression, sim_size) + else + return logistic_reg_mcmc(formula, data, Link, LogisticRegression, sim_size) + end end """ ```julia -fit(formula::FormulaTerm, data::DataFrame, modelClass::LogisticRegression, Link::CRRaoLink, prior::Prior_Cauchy, h::Float64 = 0.1, level::Float64 = 0.95, sim_size::Int64 = 1000) +fit( + formula::FormulaTerm, + data::DataFrame, + modelClass::LogisticRegression, + Link::CRRaoLink, + prior::Prior_Cauchy, + use_vi::Bool = false, + h::Float64 = 0.1, + level::Float64 = 0.95, + sim_size::Int64 = use_vi ? 20000 : 1000 +) ``` Fit a Bayesian Logistic Regression model on the input data with a Cauchy prior with the provided `Link` function. @@ -578,9 +630,10 @@ function fit( modelClass::LogisticRegression, Link::CRRaoLink, prior::Prior_Cauchy, + use_vi::Bool = false, h::Float64 = 0.1, level::Float64 = 0.95, - sim_size::Int64 = 1000 + sim_size::Int64 = use_vi ? 20000 : 1000 ) @model LogisticRegression(X, y) = begin p = size(X, 2) @@ -603,12 +656,26 @@ function fit( end end - return logistic_reg(formula, data, Link, LogisticRegression, sim_size) + if use_vi + return logistic_reg_vi(formula, data, Link, LogisticRegression, sim_size) + else + return logistic_reg_mcmc(formula, data, Link, LogisticRegression, sim_size) + end end """ ```julia -fit(formula::FormulaTerm, data::DataFrame, modelClass::LogisticRegression, Link::CRRaoLink, prior::Prior_TDist, h::Float64 = 1.0, level::Float64 = 0.95, sim_size::Int64 = 1000) +fit( + formula::FormulaTerm, + data::DataFrame, + modelClass::LogisticRegression, + Link::CRRaoLink, + prior::Prior_TDist, + use_vi::Bool = false, + h::Float64 = 3.0, + level::Float64 = 0.95, + sim_size::Int64 = use_vi ? 20000 : 1000 +) ``` Fit a Bayesian Logistic Regression model on the input data with a T-Dist prior with the provided `Link` function. @@ -797,9 +864,10 @@ function fit( modelClass::LogisticRegression, Link::CRRaoLink, prior::Prior_TDist, + use_vi::Bool = false, h::Float64 = 3.0, level::Float64 = 0.95, - sim_size::Int64 = 1000 + sim_size::Int64 = use_vi ? 20000 : 1000 ) @model LogisticRegression(X, y) = begin p = size(X, 2) @@ -823,14 +891,27 @@ function fit( end end - return logistic_reg(formula, data, Link, LogisticRegression, sim_size) + if use_vi + return logistic_reg_vi(formula, data, Link, LogisticRegression, sim_size) + else + return logistic_reg_mcmc(formula, data, Link, LogisticRegression, sim_size) + end end """ ```julia -fit(formula::FormulaTerm,data::DataFrame,modelClass::LogisticRegression,Link::CRRaoLink,prior::Prior_HorseShoe,level::Float64 = 0.95,sim_size::Int64 = 1000) +fit( + formula::FormulaTerm, + data::DataFrame, + modelClass::LogisticRegression, + Link::CRRaoLink, + use_vi::Bool = false, + prior::Prior_HorseShoe, + level::Float64 = 0.95, + sim_size::Int64 = use_vi ? 20000 : 1000 +) ``` Fit a Bayesian Logistic Regression model on the input data with a HorseShoe prior with the provided `Link` function. @@ -1033,9 +1114,10 @@ function fit( data::DataFrame, modelClass::LogisticRegression, Link::CRRaoLink, + use_vi::Bool = false, prior::Prior_HorseShoe, level::Float64 = 0.95, - sim_size::Int64 = 1000 + sim_size::Int64 = use_vi ? 20000 : 1000 ) @model LogisticRegression(X, y) = begin p = size(X, 2) @@ -1069,5 +1151,9 @@ function fit( end end - return logistic_reg(formula, data, Link, LogisticRegression, sim_size) + if use_vi + return logistic_reg_vi(formula, data, Link, LogisticRegression, sim_size) + else + return logistic_reg_mcmc(formula, data, Link, LogisticRegression, sim_size) + end end \ No newline at end of file diff --git a/test/numerical/bayesian/LogisticRegression.jl b/test/numerical/bayesian/LogisticRegression.jl index 470d66b..1e42cc6 100644 --- a/test/numerical/bayesian/LogisticRegression.jl +++ b/test/numerical/bayesian/LogisticRegression.jl @@ -49,10 +49,19 @@ tests = [ ] for (prior, prior_testcases) in tests + # MCMC for (link, test_mean) in prior_testcases CRRao.set_rng(StableRNG(123)) model = fit(@formula(Vote ~ Age + Race + Income + Educate), turnout, LogisticRegression(), link, prior) prediction = predict(model, turnout) @test mean(prediction) - 2 * std(prediction) <= test_mean && test_mean <= mean(prediction) + 2 * std(prediction) end + + # VI + for (link, test_mean) in prior_testcases + CRRao.set_rng(StableRNG(123)) + model = fit(@formula(Vote ~ Age + Race + Income + Educate), turnout, LogisticRegression(), link, prior, true) + prediction = predict(model, turnout) + @test mean(prediction) - 2 * std(prediction) <= test_mean && test_mean <= mean(prediction) + 2 * std(prediction) + end end \ No newline at end of file From 0fca4c8991e03fb5984f8899ad306f51e5db42c3 Mon Sep 17 00:00:00 2001 From: Nandini-Jaiswal Date: Wed, 25 Dec 2024 09:44:32 +0530 Subject: [PATCH 2/4] Change `getters` --- src/bayesian/getter.jl | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/bayesian/getter.jl b/src/bayesian/getter.jl index 6b2d544..86a5b50 100644 --- a/src/bayesian/getter.jl +++ b/src/bayesian/getter.jl @@ -12,16 +12,7 @@ function predict(container::BayesianRegression{:LogisticRegression}, newdata::Da return vec(mean(container.link.link_function.(z), dims=2)) end -function predict(container::BayesianRegressionVI{:LogisticRegression}, newdata::DataFrame, number_of_samples::Int64 = 1000) - X = modelmatrix(container.formula, newdata) - - W = rand(CRRao_rng, container.dist, number_of_samples) - W = W[union(container.symbol_to_range[:β]...), :] - z = X * W - return vec(mean(container.link.link_function.(z), dims=2)) -end - -function predict(container::BayesianRegressionMCMC{:NegativeBinomialRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200) +function predict(container::BayesianRegression{:NegativeBinomialRegression}, newdata::DataFrame, prediction_chain_start::Int64 = 200) X = modelmatrix(container.formula, newdata) W = container.samples[:, prediction_chain_start:end] z = X * W From 312a3a1d6629da014e2dcfb091a12fe80a7c917b Mon Sep 17 00:00:00 2001 From: Nandini-Jaiswal Date: Sat, 28 Dec 2024 18:04:47 +0530 Subject: [PATCH 3/4] Add docs --- docs/src/api/bayesian_regression.md | 10 +- src/bayesian/logistic_regression.jl | 162 +++++++----------- test/numerical/bayesian/LogisticRegression.jl | 10 +- 3 files changed, 75 insertions(+), 107 deletions(-) diff --git a/docs/src/api/bayesian_regression.md b/docs/src/api/bayesian_regression.md index c9e9ad2..385b539 100644 --- a/docs/src/api/bayesian_regression.md +++ b/docs/src/api/bayesian_regression.md @@ -46,24 +46,24 @@ fit(formula::FormulaTerm,data::DataFrame,modelClass::LinearRegression,prior::Pri ### Logistic Regression with Ridge Prior ```@docs -fit(formula::FormulaTerm, data::DataFrame, modelClass::LogisticRegression, Link::CRRaoLink, prior::Prior_Ridge, h::Float64 = 0.1, level::Float64 = 0.95, sim_size::Int64 = 1000) +fit(formula::FormulaTerm, data::DataFrame, modelClass::LogisticRegression, Link::CRRaoLink, prior::Prior_Ridge, algorithm::BayesianAlgorithm = MCMC(), h::Float64 = 0.1, level::Float64 = 0.95) ``` ### Logistic Regression with Laplace Prior ```@docs -fit(formula::FormulaTerm, data::DataFrame, modelClass::LogisticRegression, Link::CRRaoLink, prior::Prior_Laplace, h::Float64 = 0.1, level::Float64 = 0.95, sim_size::Int64 = 1000) +fit(formula::FormulaTerm, data::DataFrame, modelClass::LogisticRegression, Link::CRRaoLink, prior::Prior_Laplace, algorithm::BayesianAlgorithm = MCMC(), h::Float64 = 0.1, level::Float64 = 0.95) ``` ### Logistic Regression with Cauchy Prior ```@docs -fit(formula::FormulaTerm, data::DataFrame, modelClass::LogisticRegression, Link::CRRaoLink, prior::Prior_Cauchy, h::Float64 = 0.1, level::Float64 = 0.95, sim_size::Int64 = 1000) +fit(formula::FormulaTerm, data::DataFrame, modelClass::LogisticRegression, Link::CRRaoLink, prior::Prior_Cauchy, algorithm::BayesianAlgorithm = MCMC(), h::Float64 = 0.1, level::Float64 = 0.95) ``` ### Logistic Regression with T-Distributed Prior ```@docs -fit(formula::FormulaTerm, data::DataFrame, modelClass::LogisticRegression, Link::CRRaoLink, prior::Prior_TDist, h::Float64 = 1.0, level::Float64 = 0.95, sim_size::Int64 = 1000) +fit(formula::FormulaTerm, data::DataFrame, modelClass::LogisticRegression, Link::CRRaoLink, prior::Prior_TDist, algorithm::BayesianAlgorithm = MCMC(), h::Float64 = 1.0, level::Float64 = 0.95) ``` ### Logistic Regression with Horse Shoe Prior ```@docs -fit(formula::FormulaTerm,data::DataFrame,modelClass::LogisticRegression,Link::CRRaoLink,prior::Prior_HorseShoe,level::Float64 = 0.95,sim_size::Int64 = 1000) +fit(formula::FormulaTerm,data::DataFrame, modelClass::LogisticRegression, Link::CRRaoLink, prior::Prior_HorseShoe, algorithm::BayesianAlgorithm = MCMC(), level::Float64 = 0.95) ``` ## Negative Binomial Regression diff --git a/src/bayesian/logistic_regression.jl b/src/bayesian/logistic_regression.jl index 9c68bf1..76d1ca7 100644 --- a/src/bayesian/logistic_regression.jl +++ b/src/bayesian/logistic_regression.jl @@ -1,11 +1,11 @@ -function logistic_reg_mcmc(formula::FormulaTerm, data::DataFrame, Link::CRRaoLink, turingModel::Function, sim_size::Int64) +function logistic_reg(formula::FormulaTerm, data::DataFrame, Link::CRRaoLink, turingModel::Function, algorithm::MCMC) formula = apply_schema(formula, schema(formula, data),RegressionModel) y, X = modelcols(formula, data) - if sim_size < 500 + if algorithm.sim_size < 500 @warn "Simulation size should generally be atleast 500." end - chain = sample(CRRao_rng, turingModel(X, y), NUTS(), sim_size) + chain = sample(CRRao_rng, turingModel(X, y), NUTS(), algorithm.sim_size) params = get_params(chain[:,:,:]) samples = params.β if isa(samples, Tuple) @@ -16,17 +16,18 @@ function logistic_reg_mcmc(formula::FormulaTerm, data::DataFrame, Link::CRRaoLin return BayesianRegression(:LogisticRegression, samples, formula, Link) end -function logistic_reg_vi(formula::FormulaTerm, data::DataFrame, Link::CRRaoLink, turingModel::Function, max_iter::Int64) +function logistic_reg(formula::FormulaTerm, data::DataFrame, Link::CRRaoLink, turingModel::Function, algorithm::VI) formula = apply_schema(formula, schema(formula, data),RegressionModel) y, X = modelcols(formula, data) - if max_iter < 500 + if algorithm.vi_max_iters < 500 @warn "Max iterations should generally be atleast 500." end model = turingModel(X, y) - dist = vi(model, ADVI(100, max_iter)) + dist = vi(model, ADVI(algorithm.vi_samples_per_step, algorithm.vi_max_iters)) _, symbol_to_range = bijector(model, Val(true)) - return BayesianRegressionVI(:LinearRegression, dist, formula, symbol_to_range) + samples = samples[union(symbol_to_range[:β]...), :] + return BayesianRegression(:LogisticRegression, samples, formula, Link) end """ @@ -35,11 +36,11 @@ fit( formula::FormulaTerm, data::DataFrame, modelClass::LogisticRegression, - Link::CRRaoLink, prior::Prior_Ridge, - use_vi::Bool = false, + Link::CRRaoLink, + prior::Prior_Ridge, + algorithm::BayesianAlgorithm = MCMC(), h::Float64 = 0.1, level::Float64 = 0.95, - sim_size::Int64 = use_vi ? 20000 : 1000 ) ``` @@ -214,10 +215,9 @@ function fit( modelClass::LogisticRegression, Link::CRRaoLink, prior::Prior_Ridge, - use_vi::Bool = false, + algorithm::BayesianAlgorithm = MCMC(), h::Float64 = 0.1, level::Float64 = 0.95, - sim_size::Int64 = use_vi ? 20000 : 1000 ) @model LogisticRegression(X, y) = begin p = size(X, 2) @@ -239,12 +239,7 @@ function fit( y[i] ~ Bernoulli(prob[i]) end end - - if use_vi - return logistic_reg_vi(formula, data, Link, LogisticRegression, sim_size) - else - return logistic_reg_mcmc(formula, data, Link, LogisticRegression, sim_size) - end + return logistic_reg(formula, data, Link, LogisticRegression, algorithm) end """ @@ -255,10 +250,9 @@ fit( modelClass::LogisticRegression, Link::CRRaoLink, prior::Prior_Laplace, - use_vi::Bool = false, + algorithm::BayesianAlgorithm = MCMC(), h::Float64 = 0.1, level::Float64 = 0.95, - sim_size::Int64 = use_vi ? 20000 : 1000 ) ``` @@ -428,10 +422,9 @@ function fit( modelClass::LogisticRegression, Link::CRRaoLink, prior::Prior_Laplace, - use_vi::Bool = false, + algorithm::BayesianAlgorithm = MCMC(), h::Float64 = 0.1, - level::Float64 = 0.95, - sim_size::Int64 = use_vi ? 20000 : 1000 + level::Float64 = 0.95 ) @model LogisticRegression(X, y) = begin p = size(X, 2) @@ -454,11 +447,7 @@ function fit( end end - if use_vi - return logistic_reg_vi(formula, data, Link, LogisticRegression, sim_size) - else - return logistic_reg_mcmc(formula, data, Link, LogisticRegression, sim_size) - end + return logistic_reg(formula, data, Link, LogisticRegression, algorithm) end """ @@ -469,10 +458,9 @@ fit( modelClass::LogisticRegression, Link::CRRaoLink, prior::Prior_Cauchy, - use_vi::Bool = false, + algorithm::BayesianAlgorithm = MCMC(), h::Float64 = 0.1, level::Float64 = 0.95, - sim_size::Int64 = use_vi ? 20000 : 1000 ) ``` @@ -637,10 +625,9 @@ function fit( modelClass::LogisticRegression, Link::CRRaoLink, prior::Prior_Cauchy, - use_vi::Bool = false, + algorithm::BayesianAlgorithm = MCMC(), h::Float64 = 0.1, - level::Float64 = 0.95, - sim_size::Int64 = use_vi ? 20000 : 1000 + level::Float64 = 0.95 ) @model LogisticRegression(X, y) = begin p = size(X, 2) @@ -662,12 +649,7 @@ function fit( y[i] ~ Bernoulli(prob[i]) end end - - if use_vi - return logistic_reg_vi(formula, data, Link, LogisticRegression, sim_size) - else - return logistic_reg_mcmc(formula, data, Link, LogisticRegression, sim_size) - end + return logistic_reg(formula, data, Link, LogisticRegression, algorithm) end """ @@ -678,10 +660,9 @@ fit( modelClass::LogisticRegression, Link::CRRaoLink, prior::Prior_TDist, - use_vi::Bool = false, + algorithm::BayesianAlgorithm = MCMC(), h::Float64 = 3.0, level::Float64 = 0.95, - sim_size::Int64 = use_vi ? 20000 : 1000 ) ``` @@ -871,10 +852,9 @@ function fit( modelClass::LogisticRegression, Link::CRRaoLink, prior::Prior_TDist, - use_vi::Bool = false, + algorithm::BayesianAlgorithm = MCMC(), h::Float64 = 3.0, - level::Float64 = 0.95, - sim_size::Int64 = use_vi ? 20000 : 1000 + level::Float64 = 0.95 ) @model LogisticRegression(X, y) = begin p = size(X, 2) @@ -897,12 +877,7 @@ function fit( y[i] ~ Bernoulli(prob[i]) end end - - if use_vi - return logistic_reg_vi(formula, data, Link, LogisticRegression, sim_size) - else - return logistic_reg_mcmc(formula, data, Link, LogisticRegression, sim_size) - end + return logistic_reg(formula, data, Link, LogisticRegression, algorithm) end @@ -914,10 +889,9 @@ fit( data::DataFrame, modelClass::LogisticRegression, Link::CRRaoLink, - use_vi::Bool = false, + algorithm::BayesianAlgorithm = MCMC(), prior::Prior_HorseShoe, level::Float64 = 0.95, - sim_size::Int64 = use_vi ? 20000 : 1000 ) ``` @@ -1117,50 +1091,44 @@ Quantiles ``` """ function fit( - formula::FormulaTerm, - data::DataFrame, - modelClass::LogisticRegression, - Link::CRRaoLink, - use_vi::Bool = false, - prior::Prior_HorseShoe, - level::Float64 = 0.95, - sim_size::Int64 = use_vi ? 20000 : 1000 + formula::FormulaTerm, + data::DataFrame, + modelClass::LogisticRegression, + Link::CRRaoLink, + algorithm::BayesianAlgorithm = MCMC(), + prior::Prior_HorseShoe, + level::Float64 = 0.95 ) - @model LogisticRegression(X, y) = begin - p = size(X, 2) - n = size(X, 1) - #priors - #v ~ InverseGamma(h, h) - #α ~ TDist(1) - #β ~ filldist(Uniform(-v, v), p) - - halfcauchy = Truncated(TDist(1), 0, Inf) - - τ ~ halfcauchy ## Global Shrinkage - λ ~ filldist(halfcauchy, p) ## Local Shrinkage - σ ~ halfcauchy - #α ~ Normal(0, τ * σ) - β0 = repeat([0], p) ## prior mean - β ~ MvNormal(β0, λ * τ *σ) - - - #z = α .+ X * β - z = X * β - - ## Link Function - - #prob = Link.link.(z) - prob = Link.link_function.(z) - - #likelihood - for i = 1:n - y[i] ~ Bernoulli(prob[i]) - end - end - - if use_vi - return logistic_reg_vi(formula, data, Link, LogisticRegression, sim_size) - else - return logistic_reg_mcmc(formula, data, Link, LogisticRegression, sim_size) - end + @model LogisticRegression(X, y) = begin + p = size(X, 2) + n = size(X, 1) + #priors + #v ~ InverseGamma(h, h) + #α ~ TDist(1) + #β ~ filldist(Uniform(-v, v), p) + + halfcauchy = Truncated(TDist(1), 0, Inf) + + τ ~ halfcauchy ## Global Shrinkage + λ ~ filldist(halfcauchy, p) ## Local Shrinkage + σ ~ halfcauchy + #α ~ Normal(0, τ * σ) + β0 = repeat([0], p) ## prior mean + β ~ MvNormal(β0, λ * τ *σ) + + + #z = α .+ X * β + z = X * β + + ## Link Function + + #prob = Link.link.(z) + prob = Link.link_function.(z) + + #likelihood + for i = 1:n + y[i] ~ Bernoulli(prob[i]) + end + end + return logistic_reg(formula, data, Link, LogisticRegression, algorithm) end \ No newline at end of file diff --git a/test/numerical/bayesian/LogisticRegression.jl b/test/numerical/bayesian/LogisticRegression.jl index 1e42cc6..afbf8cb 100644 --- a/test/numerical/bayesian/LogisticRegression.jl +++ b/test/numerical/bayesian/LogisticRegression.jl @@ -53,15 +53,15 @@ for (prior, prior_testcases) in tests for (link, test_mean) in prior_testcases CRRao.set_rng(StableRNG(123)) model = fit(@formula(Vote ~ Age + Race + Income + Educate), turnout, LogisticRegression(), link, prior) - prediction = predict(model, turnout) - @test mean(prediction) - 2 * std(prediction) <= test_mean && test_mean <= mean(prediction) + 2 * std(prediction) + mcmc_prediction = predict(model, turnout) + @test mean(mcmc_prediction) - 2 * std(mcmc_prediction) <= test_mean && test_mean <= mean(mcmc_prediction) + 2 * std(mcmc_prediction) end # VI for (link, test_mean) in prior_testcases CRRao.set_rng(StableRNG(123)) - model = fit(@formula(Vote ~ Age + Race + Income + Educate), turnout, LogisticRegression(), link, prior, true) - prediction = predict(model, turnout) - @test mean(prediction) - 2 * std(prediction) <= test_mean && test_mean <= mean(prediction) + 2 * std(prediction) + model = fit(@formula(Vote ~ Age + Race + Income + Educate), turnout, LogisticRegression(), link, prior, VI()) + vi_prediction = predict(model, turnout) + @test mean(vi_prediction) - 2 * std(vi_prediction) <= test_mean && test_mean <= mean(vi_prediction) + 2 * std(vi_prediction) end end \ No newline at end of file From 80f0622abd97e36dd4fbab9e80848e7421a3d0bf Mon Sep 17 00:00:00 2001 From: Nandini-Jaiswal Date: Sat, 28 Dec 2024 18:35:04 +0530 Subject: [PATCH 4/4] Fix `argument` position --- src/bayesian/logistic_regression.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bayesian/logistic_regression.jl b/src/bayesian/logistic_regression.jl index 76d1ca7..e289b61 100644 --- a/src/bayesian/logistic_regression.jl +++ b/src/bayesian/logistic_regression.jl @@ -889,8 +889,8 @@ fit( data::DataFrame, modelClass::LogisticRegression, Link::CRRaoLink, - algorithm::BayesianAlgorithm = MCMC(), prior::Prior_HorseShoe, + algorithm::BayesianAlgorithm = MCMC(), level::Float64 = 0.95, ) ``` @@ -1095,8 +1095,8 @@ function fit( data::DataFrame, modelClass::LogisticRegression, Link::CRRaoLink, - algorithm::BayesianAlgorithm = MCMC(), prior::Prior_HorseShoe, + algorithm::BayesianAlgorithm = MCMC(), level::Float64 = 0.95 ) @model LogisticRegression(X, y) = begin