From f54df718d208e80c5400aba96fd35e2edf4e0a96 Mon Sep 17 00:00:00 2001 From: Siddhant Chaudhary Date: Sun, 3 Jul 2022 21:01:15 +0530 Subject: [PATCH 1/2] Added `ndims` parameter to frequentist model; this represents the dimensions of the dataset on which the model was trained. --- src/fitmodel.jl | 5 +++-- src/frequentist/linear_regression.jl | 7 ++++++- src/frequentist/logistic_regression.jl | 7 ++++++- src/frequentist/negativebinomial_regression.jl | 7 ++++++- src/frequentist/poisson_regression.jl | 7 ++++++- 5 files changed, 27 insertions(+), 6 deletions(-) diff --git a/src/fitmodel.jl b/src/fitmodel.jl index 33632ee..9313c98 100644 --- a/src/fitmodel.jl +++ b/src/fitmodel.jl @@ -3,16 +3,17 @@ Type to represent frequentist regression models returned by `fitmodel` functions """ struct FrequentistRegression{RegressionType} model + ndims end """ ```julia -FrequentistRegression(::Symbol, model) +FrequentistRegression(::Symbol, model, ndims) ``` Constructor for `FrequentistRegression`. `model` can be any regression model. Used by `fitmodel` functions to return a frequentist regression model containers. """ -FrequentistRegression(RegressionType::Symbol, model) = FrequentistRegression{RegressionType}(model) +FrequentistRegression(RegressionType::Symbol, model, ndims) = FrequentistRegression{RegressionType}(model, ndims) """ Type to represent bayesian regression models returned by `fitmodel` functions. This type is used internally by the package to represent all bayesian regression models. diff --git a/src/frequentist/linear_regression.jl b/src/frequentist/linear_regression.jl index 5e3583b..0418892 100644 --- a/src/frequentist/linear_regression.jl +++ b/src/frequentist/linear_regression.jl @@ -52,6 +52,11 @@ julia> plot(cooksdistance(container)); """ function fitmodel(formula::FormulaTerm, data::DataFrame, modelClass::LinearRegression) formula = apply_schema(formula, schema(formula, data)) + y, X = modelcols(formula, data) + fm_frame = ModelFrame(formula,data) + X = modelmatrix(fm_frame) + model = lm(formula, data) - return FrequentistRegression(:LinearRegression, model) + ndims = (size(X, 1), size(X, 2)) + return FrequentistRegression(:LinearRegression, model, ndims) end diff --git a/src/frequentist/logistic_regression.jl b/src/frequentist/logistic_regression.jl index cd7277b..94b5081 100644 --- a/src/frequentist/logistic_regression.jl +++ b/src/frequentist/logistic_regression.jl @@ -26,8 +26,13 @@ end function logistic_reg(formula::FormulaTerm, data::DataFrame, Link::GLM.Link) formula = apply_schema(formula, schema(formula, data)) + y, X = modelcols(formula, data) + fm_frame=ModelFrame(formula,data) + X = modelmatrix(fm_frame) + model = glm(formula, data, Binomial(), Link) - return FrequentistRegression(:LogisticRegression, model) + ndims = (size(X, 1), size(X, 2)) + return FrequentistRegression(:LogisticRegression, model, ndims) end """ diff --git a/src/frequentist/negativebinomial_regression.jl b/src/frequentist/negativebinomial_regression.jl index 9ebddbf..5ecf52d 100644 --- a/src/frequentist/negativebinomial_regression.jl +++ b/src/frequentist/negativebinomial_regression.jl @@ -17,8 +17,13 @@ end function negativebinomial_reg(formula::FormulaTerm, data::DataFrame, Link::GLM.Link) formula = apply_schema(formula, schema(formula, data)) + y, X = modelcols(formula, data) + fm_frame = ModelFrame(formula,data) + X = modelmatrix(fm_frame) + model = glm(formula, data, NegativeBinomial(), Link) - return FrequentistRegression(:NegativeBinomialRegression, model) + ndims = (size(X, 1), size(X, 2)) + return FrequentistRegression(:NegativeBinomialRegression, model, ndims) end """ diff --git a/src/frequentist/poisson_regression.jl b/src/frequentist/poisson_regression.jl index 72148ec..2922f7e 100644 --- a/src/frequentist/poisson_regression.jl +++ b/src/frequentist/poisson_regression.jl @@ -10,8 +10,13 @@ end function poisson_reg(formula::FormulaTerm, data::DataFrame, Link::GLM.Link) formula = apply_schema(formula, schema(formula, data)) + y, X = modelcols(formula, data) + fm_frame = ModelFrame(formula,data) + X = modelmatrix(fm_frame) + model = glm(formula, data, Poisson(), Link) - return FrequentistRegression(:PoissonRegression, model) + ndims = (size(X, 1), size(X, 2)) + return FrequentistRegression(:PoissonRegression, model, ndims) end """ From 843cf3cd775b97f047237e6f35d5732b0363a031 Mon Sep 17 00:00:00 2001 From: Siddhant Chaudhary Date: Sun, 3 Jul 2022 21:25:43 +0530 Subject: [PATCH 2/2] Modifying code for `aic` and `bic` functions (instead of using GLM's versions, we give our own implementation). Also fixed the number of parameters (`ndims[2]`) for frequentist linear regression. --- src/frequentist/getter.jl | 7 +++++-- src/frequentist/linear_regression.jl | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/frequentist/getter.jl b/src/frequentist/getter.jl index c5cec89..62dd0d8 100644 --- a/src/frequentist/getter.jl +++ b/src/frequentist/getter.jl @@ -15,11 +15,14 @@ function loglikelihood(container::FrequentistRegression) end function aic(container::FrequentistRegression) - return StatsBase.aic(container.model) + # container.ndims[2] is the number of parameters + return (2 * container.ndims[2] - 2 * loglikelihood(container)) end function bic(container::FrequentistRegression) - return StatsBase.bic(container.model) + # container.ndims[1] is the number of data points + # container.ndims[2] is the number of parameters + return (log(container.ndims[1]) * container.ndims[2] - 2 * loglikelihood(container)) end function sigma(container::FrequentistRegression) diff --git a/src/frequentist/linear_regression.jl b/src/frequentist/linear_regression.jl index 0418892..d6d67cd 100644 --- a/src/frequentist/linear_regression.jl +++ b/src/frequentist/linear_regression.jl @@ -57,6 +57,6 @@ function fitmodel(formula::FormulaTerm, data::DataFrame, modelClass::LinearRegre X = modelmatrix(fm_frame) model = lm(formula, data) - ndims = (size(X, 1), size(X, 2)) + ndims = (size(X, 1), size(X, 2) + 1) return FrequentistRegression(:LinearRegression, model, ndims) end