From d9f017ae067ea23c3641b44b51eaaac05af29efb Mon Sep 17 00:00:00 2001 From: EngPeterAtef Date: Tue, 19 Mar 2024 23:56:46 +0200 Subject: [PATCH 1/3] Refactor SurveyDesign constructor to SurveyDesign! This commit updates the SurveyDesign constructor to SurveyDesign! across multiple files including SurveyDesign.jl, by.jl, and runtests.jl. The change reflects a move towards a more explicit indication of in-place modifications or significant changes that the constructor might perform on the data it receives. This naming convention aligns with Julia's standard practice of using the bang symbol (!) to denote functions that modify their arguments or have important side effects. Additionally, minor formatting adjustments were made in runtests.jl to improve code readability and consistency with Julia's style guidelines. --- src/SurveyDesign.jl | 4 ++-- src/by.jl | 2 +- test/runtests.jl | 24 ++++++++++++------------ 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/SurveyDesign.jl b/src/SurveyDesign.jl index ac699c0..4b350c7 100644 --- a/src/SurveyDesign.jl +++ b/src/SurveyDesign.jl @@ -52,7 +52,7 @@ struct SurveyDesign <: AbstractSurveyDesign allprobs::Symbol # Right now only singlestage approx supported pps::Bool # TODO functionality # Single stage clusters sample, like apiclus1 - function SurveyDesign( + function SurveyDesign!( data::AbstractDataFrame; clusters::Union{Nothing,Symbol,Vector{Symbol}} = nothing, strata::Union{Nothing,Symbol} = nothing, @@ -323,7 +323,7 @@ struct ReplicateDesign{ReplicateType} <: AbstractSurveyDesign rename!(data, [replicate_weights[index] => "replicate_"*string(index) for index in 1:length(replicate_weights)]) # call the SurveyDesign constructor - base_design = SurveyDesign( + base_design = SurveyDesign!( data; clusters=clusters, strata=strata, diff --git a/src/by.jl b/src/by.jl index abf7667..bae560b 100644 --- a/src/by.jl +++ b/src/by.jl @@ -1,5 +1,5 @@ function subset(group, design::SurveyDesign) - return SurveyDesign(DataFrame(group);clusters = design.cluster, strata = design.strata, popsize = design.popsize, weights = design.weights) + return SurveyDesign!(DataFrame(group);clusters = design.cluster, strata = design.strata, popsize = design.popsize, weights = design.weights) end function subset(group, design::ReplicateDesign) diff --git a/test/runtests.jl b/test/runtests.jl index 77aa464..c63fc41 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -7,23 +7,23 @@ using DataFrames const STAT_TOL = 1e-5 const SE_TOL = 1e-1 TOTAL_REPLICATES = 4000 -REPLICATES_VECTOR = [Symbol("replicate_"*string(i)) for i in 1:TOTAL_REPLICATES] +REPLICATES_VECTOR = [Symbol("replicate_" * string(i)) for i in 1:TOTAL_REPLICATES] REPLICATES_REGEX = r"r*_\d" # Simple random sample apisrs = load_data("apisrs") # Load API dataset -srs = SurveyDesign(apisrs, weights = :pw) -unitrange = UnitRange((length(names(apisrs)) + 1):(TOTAL_REPLICATES + length(names(apisrs)))) +srs = SurveyDesign!(apisrs, weights=:pw) +unitrange = UnitRange((length(names(apisrs))+1):(TOTAL_REPLICATES+length(names(apisrs)))) bsrs = srs |> bootweights # Create bootstrap replicate design jsrs = srs |> jackknifeweights # Create jackknife replicate design -bsrs_direct = ReplicateDesign{BootstrapReplicates}(bsrs.data, REPLICATES_VECTOR, weights = :pw) # using ReplicateDesign constructor -bsrs_unitrange = ReplicateDesign{BootstrapReplicates}(bsrs.data, unitrange, weights = :pw) # using ReplicateDesign constructor -bsrs_regex = ReplicateDesign{BootstrapReplicates}(bsrs.data, REPLICATES_REGEX, weights = :pw) # using ReplicateDesign constructor +bsrs_direct = ReplicateDesign{BootstrapReplicates}(bsrs.data, REPLICATES_VECTOR, weights=:pw) # using ReplicateDesign constructor +bsrs_unitrange = ReplicateDesign{BootstrapReplicates}(bsrs.data, unitrange, weights=:pw) # using ReplicateDesign constructor +bsrs_regex = ReplicateDesign{BootstrapReplicates}(bsrs.data, REPLICATES_REGEX, weights=:pw) # using ReplicateDesign constructor # Stratified sample apistrat = load_data("apistrat") # Load API dataset -dstrat = SurveyDesign(apistrat, strata = :stype, weights = :pw) # Create SurveyDesign -unitrange = UnitRange((length(names(apistrat)) + 1):(TOTAL_REPLICATES + length(names(apistrat)))) +dstrat = SurveyDesign!(apistrat, strata=:stype, weights=:pw) # Create SurveyDesign +unitrange = UnitRange((length(names(apistrat))+1):(TOTAL_REPLICATES+length(names(apistrat)))) bstrat = dstrat |> bootweights # Create replicate design bstrat_direct = ReplicateDesign{BootstrapReplicates}(bstrat.data, REPLICATES_VECTOR, strata=:stype, weights=:pw) # using ReplicateDesign constructor bstrat_unitrange = ReplicateDesign{BootstrapReplicates}(bstrat.data, unitrange, strata=:stype, weights=:pw) # using ReplicateDesign constructor @@ -32,8 +32,8 @@ bstrat_regex = ReplicateDesign{BootstrapReplicates}(bstrat.data, REPLICATES_REGE # One-stage cluster sample apiclus1 = load_data("apiclus1") # Load API dataset apiclus1[!, :pw] = fill(757 / 15, (size(apiclus1, 1),)) # Correct api mistake for pw column -dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw) # Create SurveyDesign -unitrange = UnitRange((length(names(apiclus1)) + 1):(TOTAL_REPLICATES + length(names(apiclus1)))) +dclus1 = SurveyDesign!(apiclus1; clusters=:dnum, weights=:pw) # Create SurveyDesign +unitrange = UnitRange((length(names(apiclus1))+1):(TOTAL_REPLICATES+length(names(apiclus1)))) dclus1_boot = dclus1 |> bootweights # Create replicate design dclus1_boot_direct = ReplicateDesign{BootstrapReplicates}(dclus1_boot.data, REPLICATES_VECTOR, clusters=:dnum, weights=:pw) # using ReplicateDesign constructor dclus1_boot_unitrange = ReplicateDesign{BootstrapReplicates}(dclus1_boot.data, unitrange, clusters=:dnum, weights=:pw) # using ReplicateDesign constructor @@ -41,14 +41,14 @@ dclus1_boot_regex = ReplicateDesign{BootstrapReplicates}(dclus1_boot.data, REPLI # Two-stage cluster sample apiclus2 = load_data("apiclus2") # Load API dataset -dclus2 = SurveyDesign(apiclus2; clusters = :dnum, weights = :pw) # Create SurveyDesign +dclus2 = SurveyDesign!(apiclus2; clusters=:dnum, weights=:pw) # Create SurveyDesign dclus2_boot = dclus2 |> bootweights # Create replicate design # NHANES nhanes = load_data("nhanes") nhanes.seq1 = collect(1.0:5.0:42955.0) nhanes.seq2 = collect(1.0:9.0:77319.0) # [9k for k in 0:8590.0] -dnhanes = SurveyDesign(nhanes; clusters = :SDMVPSU, strata = :SDMVSTRA, weights = :WTMEC2YR) +dnhanes = SurveyDesign!(nhanes; clusters=:SDMVPSU, strata=:SDMVSTRA, weights=:WTMEC2YR) dnhanes_boot = dnhanes |> bootweights @testset "Survey.jl" begin From 3e804b9ef9cf7e35610321234c1ae67a9991f94b Mon Sep 17 00:00:00 2001 From: EngPeterAtef Date: Wed, 20 Mar 2024 00:10:59 +0200 Subject: [PATCH 2/3] Remove deprecated constructor syntax in SurveyDesign. by mistake I changed the names of classes not fucntions --- test/runtests.jl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index c63fc41..b897801 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -12,7 +12,7 @@ REPLICATES_REGEX = r"r*_\d" # Simple random sample apisrs = load_data("apisrs") # Load API dataset -srs = SurveyDesign!(apisrs, weights=:pw) +srs = SurveyDesign(apisrs, weights=:pw) unitrange = UnitRange((length(names(apisrs))+1):(TOTAL_REPLICATES+length(names(apisrs)))) bsrs = srs |> bootweights # Create bootstrap replicate design jsrs = srs |> jackknifeweights # Create jackknife replicate design @@ -22,7 +22,7 @@ bsrs_regex = ReplicateDesign{BootstrapReplicates}(bsrs.data, REPLICATES_REGEX, w # Stratified sample apistrat = load_data("apistrat") # Load API dataset -dstrat = SurveyDesign!(apistrat, strata=:stype, weights=:pw) # Create SurveyDesign +dstrat = SurveyDesign(apistrat, strata=:stype, weights=:pw) # Create SurveyDesign unitrange = UnitRange((length(names(apistrat))+1):(TOTAL_REPLICATES+length(names(apistrat)))) bstrat = dstrat |> bootweights # Create replicate design bstrat_direct = ReplicateDesign{BootstrapReplicates}(bstrat.data, REPLICATES_VECTOR, strata=:stype, weights=:pw) # using ReplicateDesign constructor @@ -32,7 +32,7 @@ bstrat_regex = ReplicateDesign{BootstrapReplicates}(bstrat.data, REPLICATES_REGE # One-stage cluster sample apiclus1 = load_data("apiclus1") # Load API dataset apiclus1[!, :pw] = fill(757 / 15, (size(apiclus1, 1),)) # Correct api mistake for pw column -dclus1 = SurveyDesign!(apiclus1; clusters=:dnum, weights=:pw) # Create SurveyDesign +dclus1 = SurveyDesign(apiclus1; clusters=:dnum, weights=:pw) # Create SurveyDesign unitrange = UnitRange((length(names(apiclus1))+1):(TOTAL_REPLICATES+length(names(apiclus1)))) dclus1_boot = dclus1 |> bootweights # Create replicate design dclus1_boot_direct = ReplicateDesign{BootstrapReplicates}(dclus1_boot.data, REPLICATES_VECTOR, clusters=:dnum, weights=:pw) # using ReplicateDesign constructor @@ -41,14 +41,14 @@ dclus1_boot_regex = ReplicateDesign{BootstrapReplicates}(dclus1_boot.data, REPLI # Two-stage cluster sample apiclus2 = load_data("apiclus2") # Load API dataset -dclus2 = SurveyDesign!(apiclus2; clusters=:dnum, weights=:pw) # Create SurveyDesign +dclus2 = SurveyDesign(apiclus2; clusters=:dnum, weights=:pw) # Create SurveyDesign dclus2_boot = dclus2 |> bootweights # Create replicate design # NHANES nhanes = load_data("nhanes") nhanes.seq1 = collect(1.0:5.0:42955.0) nhanes.seq2 = collect(1.0:9.0:77319.0) # [9k for k in 0:8590.0] -dnhanes = SurveyDesign!(nhanes; clusters=:SDMVPSU, strata=:SDMVSTRA, weights=:WTMEC2YR) +dnhanes = SurveyDesign(nhanes; clusters=:SDMVPSU, strata=:SDMVSTRA, weights=:WTMEC2YR) dnhanes_boot = dnhanes |> bootweights @testset "Survey.jl" begin From 278cf201f0e70481e57c9ce95e3eb88febe70779 Mon Sep 17 00:00:00 2001 From: EngPeterAtef Date: Wed, 20 Mar 2024 01:35:58 +0200 Subject: [PATCH 3/3] Use SurveyDesign! for mutable operations This commit updates the usage of `SurveyDesign` to `SurveyDesign!` in various instances within the test suite. The change reflects the need for mutable operations on the survey design objects, aligning with the Julia convention of using the bang symbol (!) to indicate functions that modify their arguments in place. This adjustment ensures that the test suite correctly utilizes the API for creating and modifying survey designs, particularly in the context of applying bootstrap and jackknife weights, as well as handling different sampling strategies such as simple random sampling, stratified sampling, and cluster sampling. The update is critical for maintaining the integrity and accuracy of the survey analysis tests. --- test/runtests.jl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index b897801..c63fc41 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -12,7 +12,7 @@ REPLICATES_REGEX = r"r*_\d" # Simple random sample apisrs = load_data("apisrs") # Load API dataset -srs = SurveyDesign(apisrs, weights=:pw) +srs = SurveyDesign!(apisrs, weights=:pw) unitrange = UnitRange((length(names(apisrs))+1):(TOTAL_REPLICATES+length(names(apisrs)))) bsrs = srs |> bootweights # Create bootstrap replicate design jsrs = srs |> jackknifeweights # Create jackknife replicate design @@ -22,7 +22,7 @@ bsrs_regex = ReplicateDesign{BootstrapReplicates}(bsrs.data, REPLICATES_REGEX, w # Stratified sample apistrat = load_data("apistrat") # Load API dataset -dstrat = SurveyDesign(apistrat, strata=:stype, weights=:pw) # Create SurveyDesign +dstrat = SurveyDesign!(apistrat, strata=:stype, weights=:pw) # Create SurveyDesign unitrange = UnitRange((length(names(apistrat))+1):(TOTAL_REPLICATES+length(names(apistrat)))) bstrat = dstrat |> bootweights # Create replicate design bstrat_direct = ReplicateDesign{BootstrapReplicates}(bstrat.data, REPLICATES_VECTOR, strata=:stype, weights=:pw) # using ReplicateDesign constructor @@ -32,7 +32,7 @@ bstrat_regex = ReplicateDesign{BootstrapReplicates}(bstrat.data, REPLICATES_REGE # One-stage cluster sample apiclus1 = load_data("apiclus1") # Load API dataset apiclus1[!, :pw] = fill(757 / 15, (size(apiclus1, 1),)) # Correct api mistake for pw column -dclus1 = SurveyDesign(apiclus1; clusters=:dnum, weights=:pw) # Create SurveyDesign +dclus1 = SurveyDesign!(apiclus1; clusters=:dnum, weights=:pw) # Create SurveyDesign unitrange = UnitRange((length(names(apiclus1))+1):(TOTAL_REPLICATES+length(names(apiclus1)))) dclus1_boot = dclus1 |> bootweights # Create replicate design dclus1_boot_direct = ReplicateDesign{BootstrapReplicates}(dclus1_boot.data, REPLICATES_VECTOR, clusters=:dnum, weights=:pw) # using ReplicateDesign constructor @@ -41,14 +41,14 @@ dclus1_boot_regex = ReplicateDesign{BootstrapReplicates}(dclus1_boot.data, REPLI # Two-stage cluster sample apiclus2 = load_data("apiclus2") # Load API dataset -dclus2 = SurveyDesign(apiclus2; clusters=:dnum, weights=:pw) # Create SurveyDesign +dclus2 = SurveyDesign!(apiclus2; clusters=:dnum, weights=:pw) # Create SurveyDesign dclus2_boot = dclus2 |> bootweights # Create replicate design # NHANES nhanes = load_data("nhanes") nhanes.seq1 = collect(1.0:5.0:42955.0) nhanes.seq2 = collect(1.0:9.0:77319.0) # [9k for k in 0:8590.0] -dnhanes = SurveyDesign(nhanes; clusters=:SDMVPSU, strata=:SDMVSTRA, weights=:WTMEC2YR) +dnhanes = SurveyDesign!(nhanes; clusters=:SDMVPSU, strata=:SDMVSTRA, weights=:WTMEC2YR) dnhanes_boot = dnhanes |> bootweights @testset "Survey.jl" begin