diff --git a/docs/src/man/glm.md b/docs/src/man/glm.md index 799817b..5854d68 100644 --- a/docs/src/man/glm.md +++ b/docs/src/man/glm.md @@ -1,6 +1,6 @@ # [Generalized Linear Models in Survey](@id manual) -The `svyglm()` function in the Julia Survey package is used to fit generalized linear models (GLMs) to survey data. It incorporates survey design information, such as sampling weights, stratification, and clustering, to produce valid estimates and standard errors that account for the type of survey design. +The `glm()` function in the Julia Survey package is used to fit generalized linear models (GLMs) to survey data. It incorporates survey design information, such as sampling weights, stratification, and clustering, to produce valid estimates and standard errors that account for the type of survey design. As of June 2023, the [GLM.jl documentation](https://juliastats.org/GLM.jl/stable/) lists the supported distribution families and their link functions as: ```txt @@ -17,7 +17,7 @@ Refer to the GLM.jl documentation for more information about the GLM package. ## Fitting a GLM to a Survey Design object -You can fit a GLM to a Survey Design object the same way you would fit it to a regular data frame. The only difference is that you need to specify the survey design object as the second argument to the svyglm() function. +You can fit a GLM to a Survey Design object the same way you would fit it to a regular data frame. The only difference is that you need to specify the survey design object as the second argument to the glm() function. ```julia using Survey @@ -33,14 +33,14 @@ dstrat = SurveyDesign(apistrat, strata = :stype, weights = :pw) dclus1 = SurveyDesign(apiclus1, clusters = :dnum, weights = :pw) ``` -Once you have the survey design object, you can fit a GLM using the svyglm() function. Specify the formula for the model and the distribution family. +Once you have the survey design object, you can fit a GLM using the glm() function. Specify the formula for the model and the distribution family. -The svyglm() function supports all distribution families supported by GLM.jl, i.e. Bernoulli, Binomial, Gamma, Geometric, InverseGaussian, NegativeBinomial, Normal, and Poisson. +The glm() function supports all distribution families supported by GLM.jl, i.e. Bernoulli, Binomial, Gamma, Geometric, InverseGaussian, NegativeBinomial, Normal, and Poisson. For example, to fit a GLM with a Bernoulli distribution and a Logit link function to the `srs` survey design object we created above: ```julia formula = @formula(api00 ~ api99) -my_glm = svyglm(formula, srs, family = Normal()) +my_glm = glm(formula, srs, family = Normal()) # View the coefficients and standard errors my_glm.Coefficients diff --git a/src/Survey.jl b/src/Survey.jl index 9656d51..58bae5f 100644 --- a/src/Survey.jl +++ b/src/Survey.jl @@ -42,6 +42,6 @@ export boxplot export bootweights export ratio export jackknifeweights, variance -export @formula, glm, svyglm +export @formula, glm, glm end diff --git a/src/reg.jl b/src/reg.jl index 98764dc..ad80d94 100644 --- a/src/reg.jl +++ b/src/reg.jl @@ -1,5 +1,5 @@ """ -svyglm(formula::FormulaTerm, design::ReplicateDesign, args...; kwargs...) +glm(formula::FormulaTerm, design::ReplicateDesign, args...; kwargs...) Perform generalized linear modeling (GLM) using the survey design with replicates. @@ -17,11 +17,11 @@ A `DataFrame` containing the estimates for model coefficients and their standard apisrs = load_data("apisrs") srs = SurveyDesign(apisrs) bsrs = bootweights(srs, replicates = 2000) -result = svyglm(@formula(api00 ~ api99), bsrs, Normal()) +result = glm(@formula(api00 ~ api99), bsrs, Normal()) ``` ```jldoctest; setup = :(using Survey, StatsBase, GLM; apisrs = load_data("apisrs"); srs = SurveyDesign(apisrs); bsrs = bootweights(srs, replicates = 2000);) -julia> svyglm(@formula(api00 ~ api99), bsrs, Normal()) +julia> glm(@formula(api00 ~ api99), bsrs, Normal()) 2×2 DataFrame Row │ estimator SE │ Float64 Float64 @@ -31,18 +31,18 @@ julia> svyglm(@formula(api00 ~ api99), bsrs, Normal()) ```` """ -function svyglm(formula::FormulaTerm, design::ReplicateDesign, args...; kwargs...) +function glm(formula::FormulaTerm, design::ReplicateDesign, args...; kwargs...) rhs_symbols = typeof(formula.rhs) == Term ? Symbol.(formula.rhs) : collect(Symbol.(formula.rhs)) lhs_symbols = Symbol.(formula.lhs) columns = vcat(rhs_symbols, lhs_symbols) - function inner_svyglm(df::DataFrame, columns, weights_column, args...; kwargs...) + function inner_glm(df::DataFrame, columns, weights_column, args...; kwargs...) matrix = hcat(ones(nrow(df)), Matrix(df[!, columns[1:(length(columns)-1)]])) model = glm(matrix, (df[!, columns[end]]), args...; wts=df[!, weights_column], kwargs...) return coef(model) end # Compute standard error of coefficients - stderr(columns, inner_svyglm, design, args...; kwargs...) + stderr(columns, inner_glm, design, args...; kwargs...) end \ No newline at end of file diff --git a/test/reg.jl b/test/reg.jl index d7cff51..3459320 100644 --- a/test/reg.jl +++ b/test/reg.jl @@ -1,7 +1,7 @@ @testset "GLM in bootstrap apisrs" begin rename!(bsrs.data, Symbol("sch.wide") => :sch_wide) bsrs.data.sch_wide = ifelse.(bsrs.data.sch_wide .== "Yes", 1, 0) - model = svyglm(@formula(sch_wide ~ meals + ell), bsrs, Binomial()) + model = glm(@formula(sch_wide ~ meals + ell), bsrs, Binomial()) @test model.estimator[1] ≈ 1.523050676 rtol=STAT_TOL @test model.estimator[2] ≈ 0.009754261 rtol=STAT_TOL @@ -15,10 +15,10 @@ # data(api) # srs <- svydesign(id=~1, weights=~pw, data=apisrs) # bsrs <- as.svrepdesign(srs, type="subbootstrap", replicates=10000) - # model = svyglm(sch.wide ~ meals + ell, bsrs, family=binomial()) + # model = glm(sch.wide ~ meals + ell, bsrs, family=binomial()) # summary(model) - model = svyglm(@formula(api00 ~ api99), bsrs, Gamma(),InverseLink()) + model = glm(@formula(api00 ~ api99), bsrs, Gamma(),InverseLink()) @test model.estimator[1] ≈ 2.915479e-03 rtol=STAT_TOL @test model.estimator[2] ≈ -2.137187e-06 rtol=STAT_TOL @@ -29,10 +29,10 @@ # data(api) # srs <- svydesign(id=~1, weights=~pw, data=apisrs) # bsrs <- as.svrepdesign(srs, type="subbootstrap", replicates=10000) - # model = svyglm(api00 ~ api99, bsrs, family = Gamma(link = "inverse")) + # model = glm(api00 ~ api99, bsrs, family = Gamma(link = "inverse")) # summary(model) - model = svyglm(@formula(api00 ~ api99), bsrs, Normal()) + model = glm(@formula(api00 ~ api99), bsrs, Normal()) @test model.estimator[1] ≈ 63.2830726 rtol=STAT_TOL @test model.estimator[2] ≈ 0.9497618 rtol=STAT_TOL @@ -43,11 +43,11 @@ # data(api) # srs <- svydesign(id=~1, weights=~pw, data=apisrs) # bsrs <- as.svrepdesign(srs, type="subbootstrap", replicates=10000) - # model = svyglm(api00 ~ api99, bsrs, family = gaussian()) + # model = glm(api00 ~ api99, bsrs, family = gaussian()) # summary(model) rename!(bsrs.data, Symbol("api.stu") => :api_stu) - model = svyglm(@formula(api_stu ~ meals + ell), bsrs, Poisson()) + model = glm(@formula(api_stu ~ meals + ell), bsrs, Poisson()) @test model.estimator[1] ≈ 6.229602881 rtol=STAT_TOL @test model.estimator[2] ≈ -0.002038345 rtol=STAT_TOL @@ -61,14 +61,14 @@ # data(api) # srs <- svydesign(id=~1, weights=~pw, data=apisrs) # bsrs <- as.svrepdesign(srs, type="subbootstrap", replicates=10000) - # model = svyglm(api.stu ~ meals + ell, bsrs, family = poisson()) + # model = glm(api.stu ~ meals + ell, bsrs, family = poisson()) # summary(model) end @testset "GLM in jackknife apisrs" begin rename!(jsrs.data, Symbol("sch.wide") => :sch_wide) jsrs.data.sch_wide = ifelse.(jsrs.data.sch_wide .== "Yes", 1, 0) - model = svyglm(@formula(sch_wide ~ meals + ell), jsrs, Binomial()) + model = glm(@formula(sch_wide ~ meals + ell), jsrs, Binomial()) @test model.estimator[1] ≈ 1.523051 rtol=STAT_TOL @test model.estimator[2] ≈ 0.009754 rtol=1e-4 # This is a tiny bit off with 1e-5 @@ -82,10 +82,10 @@ end # data(api) # srs <- svydesign(id=~1, weights=~pw, data=apisrs) # jsrs <- as.svrepdesign(srs, type="JK1", replicates=10000) - # model = svyglm(sch.wide ~ meals + ell, jsrs, family=binomial()) + # model = glm(sch.wide ~ meals + ell, jsrs, family=binomial()) # summary(model) - model = svyglm(@formula(api00 ~ api99), jsrs, Gamma(), InverseLink()) + model = glm(@formula(api00 ~ api99), jsrs, Gamma(), InverseLink()) @test model.estimator[1] ≈ 2.915479e-03 rtol=STAT_TOL @test model.estimator[2] ≈ -2.137187e-06 rtol=STAT_TOL @@ -96,10 +96,10 @@ end # data(api) # srs <- svydesign(id=~1, weights=~pw, data=apisrs) # jsrs <- as.svrepdesign(srs, type="JK1", replicates=10000) - # model = svyglm(api00 ~ api99, jsrs, family = Gamma(link = "inverse")) + # model = glm(api00 ~ api99, jsrs, family = Gamma(link = "inverse")) # summary(model) - model = svyglm(@formula(api00 ~ api99), jsrs, Normal()) + model = glm(@formula(api00 ~ api99), jsrs, Normal()) @test model.estimator[1] ≈ 63.2830726 rtol=STAT_TOL @test model.estimator[2] ≈ 0.9497618 rtol=STAT_TOL @@ -110,11 +110,11 @@ end # data(api) # srs <- svydesign(id=~1, weights=~pw, data=apisrs) # jsrs <- as.svrepdesign(srs, type="JK1", replicates=10000) - # model = svyglm(api00 ~ api99, jsrs, family = gaussian()) + # model = glm(api00 ~ api99, jsrs, family = gaussian()) # summary(model) rename!(jsrs.data, Symbol("api.stu") => :api_stu) - model = svyglm(@formula(api_stu ~ meals + ell), jsrs, Poisson()) + model = glm(@formula(api_stu ~ meals + ell), jsrs, Poisson()) @test model.estimator[1] ≈ 6.229602881 rtol=STAT_TOL @test model.estimator[2] ≈ -0.002038345 rtol=STAT_TOL @@ -128,14 +128,14 @@ end # data(api) # srs <- svydesign(id=~1, weights=~pw, data=apisrs) # jsrs <- as.svrepdesign(srs, type="JK1", replicates=10000) - # model = svyglm(api.stu ~ meals + ell, jsrs, family = poisson()) + # model = glm(api.stu ~ meals + ell, jsrs, family = poisson()) # summary(model) end @testset "GLM in bootstrap apiclus1" begin rename!(dclus1_boot.data, Symbol("sch.wide") => :sch_wide) dclus1_boot.data.sch_wide = ifelse.(dclus1_boot.data.sch_wide .== "Yes", 1, 0) - model = svyglm(@formula(sch_wide ~ meals + ell), dclus1_boot, Binomial()) + model = glm(@formula(sch_wide ~ meals + ell), dclus1_boot, Binomial()) @test model.estimator[1] ≈ 1.89955691 rtol=STAT_TOL @test model.estimator[2] ≈ -0.01911468 rtol=STAT_TOL @@ -149,10 +149,10 @@ end # data(api) # srs <- svydesign(id=~dnum, weights=~pw, data=apiclus1) # dclus1_boot <- as.svrepdesign(srs, type="subbootstrap", replicates=10000) - # model = svyglm(sch.wide ~ meals + ell, dclus1_boot, family=binomial()) + # model = glm(sch.wide ~ meals + ell, dclus1_boot, family=binomial()) # summary(model) - model = svyglm(@formula(api00 ~ api99), dclus1_boot, Gamma(),InverseLink()) + model = glm(@formula(api00 ~ api99), dclus1_boot, Gamma(),InverseLink()) @test model.estimator[1] ≈ 2.914844e-03 rtol=STAT_TOL @test model.estimator[2] ≈ -2.180775e-06 rtol=STAT_TOL @@ -163,10 +163,10 @@ end # data(api) # srs <- svydesign(id=~dnum, weights=~pw, data=apiclus1) # dclus1_boot <- as.svrepdesign(srs, type="subbootstrap", replicates=10000) - # model = svyglm(api00 ~ api99, dclus1_boot, family = Gamma(link = "inverse")) + # model = glm(api00 ~ api99, dclus1_boot, family = Gamma(link = "inverse")) # summary(model) - model = svyglm(@formula(api00 ~ api99), dclus1_boot, Normal()) + model = glm(@formula(api00 ~ api99), dclus1_boot, Normal()) @test model.estimator[1] ≈ 95.28483 rtol=STAT_TOL @test model.estimator[2] ≈ 0.90429 rtol=STAT_TOL @@ -177,11 +177,11 @@ end # data(api) # srs <- svydesign(id=~dnum, weights=~pw, data=apiclus1) # dclus1_boot <- as.svrepdesign(srs, type="subbootstrap", replicates=10000) - # model = svyglm(api00 ~ api99, dclus1_boot, family = gaussian()) + # model = glm(api00 ~ api99, dclus1_boot, family = gaussian()) # summary(model) rename!(dclus1_boot.data, Symbol("api.stu") => :api_stu) - model = svyglm(@formula(api_stu ~ meals + ell), dclus1_boot, Poisson()) + model = glm(@formula(api_stu ~ meals + ell), dclus1_boot, Poisson()) @test model.estimator[1] ≈ 6.2961803529 rtol=STAT_TOL @test model.estimator[2] ≈ -0.0026906166 rtol=STAT_TOL @@ -195,14 +195,14 @@ end # data(api) # srs <- svydesign(id=~dnum, weights=~pw, data=apiclus1) # dclus1_boot <- as.svrepdesign(srs, type="subbootstrap", replicates=10000) - # model = svyglm(api.stu ~ meals + ell, dclus1_boot, family = poisson()) + # model = glm(api.stu ~ meals + ell, dclus1_boot, family = poisson()) # summary(model) end @testset "GLM in bootstrap apistrat" begin rename!(bstrat.data, Symbol("sch.wide") => :sch_wide) bstrat.data.sch_wide = ifelse.(bstrat.data.sch_wide .== "Yes", 1, 0) - model = svyglm(@formula(sch_wide ~ meals + ell), bstrat, Binomial()) + model = glm(@formula(sch_wide ~ meals + ell), bstrat, Binomial()) @test model.estimator[1] ≈ 1.560408424 rtol=STAT_TOL @test model.estimator[2] ≈ 0.003524761 rtol=STAT_TOL @@ -216,10 +216,10 @@ end # data(api) # srs <- svydesign(id=~1, strata=~dnum, weights=~pw, data=apistrat) # bstrat <- as.svrepdesign(srs, type="subbootstrap", replicates=10000) - # model = svyglm(sch.wide ~ meals + ell, bstrat, family=binomial()) + # model = glm(sch.wide ~ meals + ell, bstrat, family=binomial()) # summary(model) - model = svyglm(@formula(api00 ~ api99), bstrat, Gamma(),InverseLink()) + model = glm(@formula(api00 ~ api99), bstrat, Gamma(),InverseLink()) @test model.estimator[1] ≈ 2.873104e-03 rtol=STAT_TOL @test model.estimator[2] ≈ -2.088791e-06 rtol=STAT_TOL @@ -230,6 +230,6 @@ end # data(api) # srs <- svydesign(id=~1, strata=~dnum, weights=~pw, data=apistrat) # bstrat <- as.svrepdesign(srs, type="subbootstrap", replicates=10000) - # model = svyglm(api00 ~ api99, bstrat, family = Gamma(link = "inverse")) + # model = glm(api00 ~ api99, bstrat, family = Gamma(link = "inverse")) # summary(model) end \ No newline at end of file