Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor SurveyDesign constructor to SurveyDesign! #318

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/SurveyDesign.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
- `weights::Union{Nothing, Symbol}=nothing`: the sampling weights.
- `popsize::Union{Nothing, Symbol}=nothing`: the (expected) survey population size.

```jldoctest

Check failure on line 30 in src/SurveyDesign.jl

View workflow job for this annotation

GitHub Actions / build

doctest failure in ~/work/Survey.jl/Survey.jl/src/SurveyDesign.jl:30-43 ```jldoctest julia> apiclus1 = load_data("apiclus1"); julia> dclus1 = SurveyDesign(apiclus1; clusters=:dnum, weights=:pw) SurveyDesign: data: 183×44 DataFrame strata: none cluster: dnum [637, 637, 637 … 448] popsize: [507.7049, 507.7049, 507.7049 … 507.7049] sampsize: [15, 15, 15 … 15] weights: [33.847, 33.847, 33.847 … 33.847] allprobs: [0.0295, 0.0295, 0.0295 … 0.0295] ``` Subexpression: dclus1 = SurveyDesign(apiclus1; clusters=:dnum, weights=:pw) Evaluated output: ERROR: MethodError: no method matching SurveyDesign(::DataFrames.DataFrame; clusters::Symbol, weights::Symbol) Stacktrace: [1] top-level scope @ none:1 Expected output: SurveyDesign: data: 183×44 DataFrame strata: none cluster: dnum [637, 637, 637 … 448] popsize: [507.7049, 507.7049, 507.7049 … 507.7049] sampsize: [15, 15, 15 … 15] weights: [33.847, 33.847, 33.847 … 33.847] allprobs: [0.0295, 0.0295, 0.0295 … 0.0295] diff = Warning: Diff output requires color. SurveyDesign: data: 183×44 DataFrame strata: none cluster: dnum [637, 637, 637 … 448] popsize: [507.7049, 507.7049, 507.7049 … 507.7049] sampsize: [15, 15, 15 … 15] weights: [33.847, 33.847, 33.847 … 33.847] allprobs: [0.0295, 0.0295, 0.0295 … 0.0295]ERROR: MethodError: no method matching SurveyDesign(::DataFrames.DataFrame; clusters::Symbol, weights::Symbol) Stacktrace: [1] top-level scope @ none:1
julia> apiclus1 = load_data("apiclus1");

julia> dclus1 = SurveyDesign(apiclus1; clusters=:dnum, weights=:pw)
Expand All @@ -52,7 +52,7 @@
allprobs::Symbol # Right now only singlestage approx supported
pps::Bool # TODO functionality
# Single stage clusters sample, like apiclus1
function SurveyDesign(
function SurveyDesign!(
data::AbstractDataFrame;
clusters::Union{Nothing,Symbol,Vector{Symbol}} = nothing,
strata::Union{Nothing,Symbol} = nothing,
Expand Down Expand Up @@ -323,7 +323,7 @@
rename!(data, [replicate_weights[index] => "replicate_"*string(index) for index in 1:length(replicate_weights)])

# call the SurveyDesign constructor
base_design = SurveyDesign(
base_design = SurveyDesign!(
data;
clusters=clusters,
strata=strata,
Expand Down
2 changes: 1 addition & 1 deletion src/by.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
function subset(group, design::SurveyDesign)
return SurveyDesign(DataFrame(group);clusters = design.cluster, strata = design.strata, popsize = design.popsize, weights = design.weights)
return SurveyDesign!(DataFrame(group);clusters = design.cluster, strata = design.strata, popsize = design.popsize, weights = design.weights)
end

function subset(group, design::ReplicateDesign)
Expand Down
24 changes: 12 additions & 12 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,23 @@ using DataFrames
const STAT_TOL = 1e-5
const SE_TOL = 1e-1
TOTAL_REPLICATES = 4000
REPLICATES_VECTOR = [Symbol("replicate_"*string(i)) for i in 1:TOTAL_REPLICATES]
REPLICATES_VECTOR = [Symbol("replicate_" * string(i)) for i in 1:TOTAL_REPLICATES]
REPLICATES_REGEX = r"r*_\d"

# Simple random sample
apisrs = load_data("apisrs") # Load API dataset
srs = SurveyDesign(apisrs, weights = :pw)
unitrange = UnitRange((length(names(apisrs)) + 1):(TOTAL_REPLICATES + length(names(apisrs))))
srs = SurveyDesign(apisrs, weights=:pw)
unitrange = UnitRange((length(names(apisrs))+1):(TOTAL_REPLICATES+length(names(apisrs))))
bsrs = srs |> bootweights # Create bootstrap replicate design
jsrs = srs |> jackknifeweights # Create jackknife replicate design
bsrs_direct = ReplicateDesign{BootstrapReplicates}(bsrs.data, REPLICATES_VECTOR, weights = :pw) # using ReplicateDesign constructor
bsrs_unitrange = ReplicateDesign{BootstrapReplicates}(bsrs.data, unitrange, weights = :pw) # using ReplicateDesign constructor
bsrs_regex = ReplicateDesign{BootstrapReplicates}(bsrs.data, REPLICATES_REGEX, weights = :pw) # using ReplicateDesign constructor
bsrs_direct = ReplicateDesign{BootstrapReplicates}(bsrs.data, REPLICATES_VECTOR, weights=:pw) # using ReplicateDesign constructor
bsrs_unitrange = ReplicateDesign{BootstrapReplicates}(bsrs.data, unitrange, weights=:pw) # using ReplicateDesign constructor
bsrs_regex = ReplicateDesign{BootstrapReplicates}(bsrs.data, REPLICATES_REGEX, weights=:pw) # using ReplicateDesign constructor

# Stratified sample
apistrat = load_data("apistrat") # Load API dataset
dstrat = SurveyDesign(apistrat, strata = :stype, weights = :pw) # Create SurveyDesign
unitrange = UnitRange((length(names(apistrat)) + 1):(TOTAL_REPLICATES + length(names(apistrat))))
dstrat = SurveyDesign(apistrat, strata=:stype, weights=:pw) # Create SurveyDesign
unitrange = UnitRange((length(names(apistrat))+1):(TOTAL_REPLICATES+length(names(apistrat))))
bstrat = dstrat |> bootweights # Create replicate design
bstrat_direct = ReplicateDesign{BootstrapReplicates}(bstrat.data, REPLICATES_VECTOR, strata=:stype, weights=:pw) # using ReplicateDesign constructor
bstrat_unitrange = ReplicateDesign{BootstrapReplicates}(bstrat.data, unitrange, strata=:stype, weights=:pw) # using ReplicateDesign constructor
Expand All @@ -32,23 +32,23 @@ bstrat_regex = ReplicateDesign{BootstrapReplicates}(bstrat.data, REPLICATES_REGE
# One-stage cluster sample
apiclus1 = load_data("apiclus1") # Load API dataset
apiclus1[!, :pw] = fill(757 / 15, (size(apiclus1, 1),)) # Correct api mistake for pw column
dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw) # Create SurveyDesign
unitrange = UnitRange((length(names(apiclus1)) + 1):(TOTAL_REPLICATES + length(names(apiclus1))))
dclus1 = SurveyDesign(apiclus1; clusters=:dnum, weights=:pw) # Create SurveyDesign
unitrange = UnitRange((length(names(apiclus1))+1):(TOTAL_REPLICATES+length(names(apiclus1))))
dclus1_boot = dclus1 |> bootweights # Create replicate design
dclus1_boot_direct = ReplicateDesign{BootstrapReplicates}(dclus1_boot.data, REPLICATES_VECTOR, clusters=:dnum, weights=:pw) # using ReplicateDesign constructor
dclus1_boot_unitrange = ReplicateDesign{BootstrapReplicates}(dclus1_boot.data, unitrange, clusters=:dnum, weights=:pw) # using ReplicateDesign constructor
dclus1_boot_regex = ReplicateDesign{BootstrapReplicates}(dclus1_boot.data, REPLICATES_REGEX, clusters=:dnum, weights=:pw) # using ReplicateDesign constructor

# Two-stage cluster sample
apiclus2 = load_data("apiclus2") # Load API dataset
dclus2 = SurveyDesign(apiclus2; clusters = :dnum, weights = :pw) # Create SurveyDesign
dclus2 = SurveyDesign(apiclus2; clusters=:dnum, weights=:pw) # Create SurveyDesign
dclus2_boot = dclus2 |> bootweights # Create replicate design

# NHANES
nhanes = load_data("nhanes")
nhanes.seq1 = collect(1.0:5.0:42955.0)
nhanes.seq2 = collect(1.0:9.0:77319.0) # [9k for k in 0:8590.0]
dnhanes = SurveyDesign(nhanes; clusters = :SDMVPSU, strata = :SDMVSTRA, weights = :WTMEC2YR)
dnhanes = SurveyDesign(nhanes; clusters=:SDMVPSU, strata=:SDMVSTRA, weights=:WTMEC2YR)
dnhanes_boot = dnhanes |> bootweights

@testset "Survey.jl" begin
Expand Down
Loading