Skip to content

Commit

Permalink
Updated Unit-Tests to be compatible with CRAN checks for MacOs
Browse files Browse the repository at this point in the history
  • Loading branch information
bernhard-da committed Sep 19, 2023
1 parent af70894 commit bdb7e75
Show file tree
Hide file tree
Showing 6 changed files with 253 additions and 51 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: cellKey
Type: Package
Date: 2023-03-09
Date: 2023-03-13
Title: Consistent Perturbation of Statistical Frequency- And Magnitude Tables
Version: 1.0.0
Version: 1.0.1
Authors@R: c(
person(
given="Bernhard", family="Meindl",
Expand Down
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# cellKey 1.0.1
- Updated Unit-Tests to be compatible with CRAN checks for MacOs

# cellKey 1.0.0
- first version on CRAN
- updated due to changes in Package `ptable`
Expand Down
12 changes: 10 additions & 2 deletions tests/testthat/test_ck_generate_rkeys.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,14 @@ ck1 <- ck_generate_rkeys(dat = x, nr_digits = 8, seed = NULL)
ck2 <- ck_generate_rkeys(dat = x, nr_digits = 10, seed = 5)

test_that("recordkeys are correctly computed", {
expect_equal(digest::sha1(ck1), "a291a306de5db64e3fbcb94478982733a17ede20")
expect_equal(digest::sha1(ck2), "5677f580e6ed9fa59ab670b0e7badc67a37bf14c")
expect_identical(length(ck1), 4580L)
expect_identical(ck1[1], 0.97131759)
expect_identical(ck1[4580], 0.88019983)
expect_identical(round(mean(ck1), digits = 3), 0.501)


expect_identical(length(ck2), 4580L)
expect_identical(ck2[1], 0.2002144526)
expect_identical(ck2[4580], 0.1878750164)
expect_identical(round(mean(ck2), digits = 3), 0.508)
})
150 changes: 129 additions & 21 deletions tests/testthat/test_countvars.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
context("Testing Frequency Tables")

set.seed(120, sample.kind = "Reject")
dat <- ck_create_testdata()

Expand All @@ -11,22 +12,29 @@ dim_sex <- hier_create(root = "Total", nodes = c("male", "female"))
dim_age <- hier_create(root = "Total", paste0("age_group", 1:6))
dims <- list(sex = dim_sex, age = dim_age)
test_that("dims-hash is ok", {
expect_identical(digest::sha1(dim_sex), "fea2001f35be84e90b30f6773af75f03c11fbf7a")
expect_identical(digest::sha1(dim_age), "a7648dc3f484720911f0de0e6ac563b69fd20c42")
expect_identical(digest::sha1(dims), "62748837ca3246a33081dd35f50d06334caa3119")
expect_identical(class(dims), "list")
expect_identical(nrow(dim_sex), 3L)
expect_identical(max(dim_sex$level), 2)
expect_identical(nrow(dim_age), 7L)
expect_identical(max(dim_age$level), 2)
})

## test generation of destatis rkeys
rk1 <- ck_generate_rkeys(dat = dat, nr_digits = 5)
rk2 <- ck_generate_rkeys(dat = dat, nr_digits = 5)
test_that("check rkey generation and seed is ok", {
expect_identical(rk1, rk2)
expect_identical(digest::sha1(rk1), "4de74ed6170e2142ef552ee6722921db8d091d0c")
expect_identical(round(mean(rk1), digits = 3), 0.501)
})
dat$rec_key <- rk1
test_that("checking dimension and structure of generated testdata is ok", {
expect_identical(digest::sha1(dat), "fb66a8be3e9044c8fecdb13c6fab5fe9ec456c25")
expect_true(is.data.table(dat))
expect_identical(round(mean(dat$sampling_weight), digits = 3), 59.719)
expect_identical(round(mean(dat$household_weights), digits = 3), 21.834)

expect_identical(nrow(dat), 4580L)
expect_identical(ncol(dat), 16L)
expect_identical(sum(dat$sex == "male"), 2296L)
})

## perturbation parameters for count variables
Expand Down Expand Up @@ -54,8 +62,13 @@ test_that("ck_params_cnts() is ok", {
test_that("checking perturbation parameters for counts", {
expect_is(params_cnts, "ck_params")
expect_equal(params_cnts$type, "cnts")
expect_is(params_cnts$params$ptable, "data.table")
expect_identical(digest::sha1(params_cnts), "cedf56d7064f15e55da506b1922c4cac6035765f")
dt <- params_cnts$params$ptable
expect_is(dt, "data.table")
expect_identical(dim(dt), c(66L, 7L))
expect_identical(round(mean(dt$p), digits = 3), 0.136)
expect_identical(round(mean(dt$lb), digits = 3), 0.562)
expect_identical(round(mean(dt$ub), digits = 3), 0.698)
expect_identical(round(mean(dt$v), digits = 3), 1.045)
})

countvars <- NULL
Expand All @@ -79,9 +92,11 @@ expect_message(tab$perturb("total"), "Variable 'total' was already perturbed!")
res_freqtab <- tab$freqtab("total")
test_that("check ck_define_table() with already existing rec-keys", {
expect_is(tab, "cellkey_obj")
expect_identical(dim(res_freqtab), c(21L, 7L))
expect_identical(res_freqtab$uwc[3], 1143)
expect_identical(res_freqtab$puwc[3], 1147)
expect_identical(digest::sha1(res_freqtab), "05e71f630a385f7e428ce1fec21b5f6026bb921a")
expect_identical(round(mean(res_freqtab$pwc), digits = 3), 52096.24)
expect_identical(round(mean(res_freqtab$puwc), digits = 3), 872.333)
})

dat$rec_key <- NULL
Expand All @@ -99,23 +114,71 @@ expect_message(tab$perturb("total"), "Variable 'total' was already perturbed!")

test_that("ck_define_table() with new record keys is ok", {
expect_is(tab, "cellkey_obj")
expect_identical(digest::sha1(tab$freqtab("total")), "05e71f630a385f7e428ce1fec21b5f6026bb921a")
dt <- tab$freqtab("total")
expect_identical(dt$uwc[3], 1143)
expect_identical(dt$puwc[3], 1147)
expect_identical(round(mean(dt$pwc), digits = 3), 52096.24)
expect_identical(round(mean(dt$puwc), digits = 3), 872.333)
})

freqtab <- tab$freqtab("total")
test_that("weighted version of ck_perturb() is ok", {
expect_identical(digest::sha1(freqtab), "05e71f630a385f7e428ce1fec21b5f6026bb921a")
expect_identical(digest::sha1(tab$mod_cnts()), "ee05433bb69fbf66094cf14e5c50320b3060eab3")
expect_identical(freqtab$uwc[3], 1143)
expect_identical(freqtab$puwc[3], 1147)
expect_identical(round(mean(freqtab$pwc), digits = 3), 52096.24)
expect_identical(round(mean(freqtab$puwc), digits = 3), 872.333)

dt <- tab$mod_cnts()
expect_identical(dim(dt), c(21L, 6L))
expect_identical(round(mean(dt$ckey), digits = 3), 0.456)
expect_identical(round(mean(dt$pert), digits = 3), -0.048)
expect_identical(range(dt$row_nr), c(40, 65))
})

mm <- tab$measures_cnts("Total")
test_that("ck_cnt_measures() [exclude_zeros = TRUE] is ok", {
expect_identical(digest::sha1(mm), "89f4aba98334930446c2fb97b0812b7ece98ef6d")
expect_identical(range(as.numeric(mm$overview$noise)), c(1, 7))
expect_identical(range(as.numeric(mm$overview$cnt)), c(1, 9))
expect_identical(round(mean(as.numeric(mm$overview$pct)), digits = 3), 0.143)

expect_identical(range(as.numeric(mm$measures$d1)), c(0, 4))
expect_identical(range(as.numeric(mm$measures$d2)), c(0, 0.429))
expect_identical(range(as.numeric(mm$measures$d3)), c(0, 0.517))

expect_identical(range(mm$cumdistr_d1$cnt), c(9L, 21L))
expect_identical(round(range(mm$cumdistr_d1$pct), digits = 3), c(0.429, 1))

expect_identical(range(mm$cumdistr_d2$cnt), c(19L, 21L))
expect_identical(round(range(mm$cumdistr_d2$pct), digits = 3), c(0.905, 1))

expect_identical(range(mm$cumdistr_d3$cnt), c(12L, 21L))
expect_identical(round(range(mm$cumdistr_d3$pct), digits = 3), c(0.571, 1))
expect_identical(mm$false_nonzero, 0L)
expect_identical(mm$false_zero, 0L)
expect_identical(mm$exclude_zeros, TRUE)
})

mm <- tab$measures_cnts("Total", exclude_zeros = FALSE)
test_that("ck_cnt_measures() [exclude_zeros = FALSE] is ok", {
expect_identical(digest::sha1(mm), "9fb4ffe32ebc420d8ccecb6f3dab6e1431396fa0")
expect_identical(range(as.numeric(mm$overview$noise)), c(1, 7))
expect_identical(range(as.numeric(mm$overview$cnt)), c(1, 9))
expect_identical(round(mean(as.numeric(mm$overview$pct)), digits = 3), 0.143)

expect_identical(range(as.numeric(mm$measures$d1)), c(0, 4))
expect_identical(range(as.numeric(mm$measures$d2)), c(0, 0.429))
expect_identical(range(as.numeric(mm$measures$d3)), c(0, 0.517))

expect_identical(range(mm$cumdistr_d1$cnt), c(9L, 21L))
expect_identical(round(range(mm$cumdistr_d1$pct), digits = 3), c(0.429, 1))

expect_identical(range(mm$cumdistr_d2$cnt), c(19L, 21L))
expect_identical(round(range(mm$cumdistr_d2$pct), digits = 3), c(0.905, 1))

expect_identical(range(mm$cumdistr_d3$cnt), c(12L, 21L))
expect_identical(round(range(mm$cumdistr_d3$pct), digits = 3), c(0.571, 1))
expect_identical(mm$false_nonzero, 0L)
expect_identical(mm$false_zero, 0L)
expect_identical(mm$exclude_zeros, FALSE)
})

# no weights
Expand All @@ -133,8 +196,17 @@ tab$perturb("total")
freqtab <- tab$freqtab("total")

test_that("checking unweighted version of perturb()", {
expect_identical(digest::sha1(freqtab), "31831ab589f3bc7bdd20c4b6fd1ea1916e3edfef")
expect_identical(digest::sha1(tab$mod_cnts()), "ee05433bb69fbf66094cf14e5c50320b3060eab3")
expect_identical(dim(freqtab), c(21L, 7L))
expect_identical(freqtab$uwc[3], 1143)
expect_identical(freqtab$puwc[3], 1147)
expect_identical(freqtab$pwc, freqtab$puwc)
expect_identical(round(mean(freqtab$pwc), digits = 3), 872.333)

dt <- tab$mod_cnts()
expect_identical(dim(dt), c(21L, 6L))
expect_identical(round(mean(dt$ckey), digits = 3), 0.456)
expect_identical(round(mean(dt$pert), digits = 3), -0.048)
expect_identical(range(dt$row_nr), c(40, 65))
})

context("Testing multiple countvars")
Expand All @@ -153,16 +225,52 @@ tab <- ck_setup(
tab$params_cnts_set(params_cnts, v = NULL)
tab$perturb(c("total", "cnt_males", "cnt_highincome"))
test_that("check tabulation of cnt_males is ok", {
expect_identical(digest::sha1(tab$freqtab("cnt_males")), "a7260c1f65a089d8849084a7e84c946be997e986")
expect_identical(digest::sha1(tab$measures_cnts("cnt_males")), "b692361c523b0d37aa38bf252a360414cd941e2b")
dt <- tab$freqtab("cnt_males")
expect_identical(dt$uwc[3], 571)
expect_identical(dt$puwc[3], 571)
expect_identical(round(mean(dt$pwc), digits = 3), 25757.65)
expect_identical(round(mean(dt$puwc), digits = 3), 437.048)

expect_identical(round(range(dt$puwc), digits = 3), c(0, 2297))
expect_identical(round(range(dt$pwc), digits = 3), c(0, 135387.941))

mm <- tab$measures_cnts("cnt_males")
expect_identical(round(range(mm$overview$pct), digits = 3), c(0.095, 0.810))
expect_identical(round(range(mm$measures$d1), digits = 3), c(0, 4))
expect_identical(round(range(mm$measures$d2), digits = 3), c(0, 0.048))
expect_identical(round(range(mm$measures$d3), digits = 3), c(0, 0.221))
})

test_that("check tabulation of cnt_highincome is ok", {
expect_identical(digest::sha1(tab$freqtab("cnt_highincome")), "72c684edc0cebef0343bce02d0075050d5286a5c")
expect_identical(digest::sha1(tab$measures_cnts("cnt_highincome")), "639f20e406ecfdd44c84e0ae04674321c2a602b1")
dt <- tab$freqtab("cnt_highincome")
expect_identical(dt$uwc[3], 123)
expect_identical(dt$puwc[3], 123)
expect_identical(round(mean(dt$pwc), digits = 3), 5063.68)
expect_identical(round(mean(dt$puwc), digits = 3), 84.286)

expect_identical(round(range(dt$puwc), digits = 3), c(0, 444))
expect_identical(round(range(dt$pwc), digits = 3), c(0, 26671.928))

mm <- tab$measures_cnts("cnt_highincome")
expect_identical(round(range(mm$overview$pct), digits = 3), c(0.048, 0.667))
expect_identical(round(range(mm$measures$d1), digits = 3), c(0, 3))
expect_identical(round(range(mm$measures$d2), digits = 3), c(0, 0.158))
expect_identical(round(range(mm$measures$d3), digits = 3), c(0, 0.359))
})

test_that("check tabulation of multiple count variables is ok", {
tt <- tab$freqtab(c("total", "cnt_males", "cnt_highincome"))
expect_identical(digest::sha1(tt), "1984413d2bb4ebff2d268b243b300cd8494b55fb")
dt <- tab$freqtab(c("total", "cnt_males", "cnt_highincome"))
expect_identical(dim(dt), c(63L, 7L))
expect_identical(dt$uwc[3], 1143)
expect_identical(dt$puwc[3], 1148)
expect_identical(round(mean(dt$pwc), digits = 3), 27640.21)
expect_identical(round(mean(dt$puwc), digits = 3), 464.587)
expect_identical(round(range(dt$puwc), digits = 3), c(0, 4582))
expect_identical(round(range(dt$pwc), digits = 3), c(0, 273633.438))

mm <- tab$measures_cnts("cnt_highincome")
expect_identical(round(range(mm$overview$pct), digits = 3), c(0.048, 0.667))
expect_identical(round(range(mm$measures$d1), digits = 3), c(0, 3))
expect_identical(round(range(mm$measures$d2), digits = 3), c(0, 0.158))
expect_identical(round(range(mm$measures$d3), digits = 3), c(0, 0.359))
})
54 changes: 47 additions & 7 deletions tests/testthat/test_numvars.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,13 @@ x$rkey <- ck_generate_rkeys(dat = x, nr_digits = 8)

test_that("checking dimension and structure of generated testdata is ok", {
expect_true(is.data.table(x))
expect_identical(digest::sha1(x), "4c804693f7d573e9dfa24bcc312ba2b61b490ada")
expect_identical(digest::sha1(dims), "62748837ca3246a33081dd35f50d06334caa3119")
expect_identical(dim(x), c(4580L, 21L))
expect_identical(round(mean(x$rkey), digits = 3), 0.5)
expect_identical(round(mean(x$household_weights), digits = 3), 21.834)
expect_identical(range(x$mixed), c(-20L, 10L))
expect_identical(sum(x$cnt_males), 2296)
expect_identical(sum(x$cnt_females), 2284)
expect_identical(sum(x$cnt_highincome), 445)
})

## perturbation parameters for count variables
Expand Down Expand Up @@ -85,7 +90,27 @@ p2 <- ck_params_nums(
test_that("checking perturbation parameters", {
expect_is(p1, "ck_params")
expect_equal(p1$type, "params_m_flex")
expect_identical(digest::sha1(p1), "53299f7eab1d919060e12311955ed3eb02f0b38b")

pp <- p1$params

expect_identical(pp$type, "top_contr")
expect_identical(pp$top_k, 3)
expect_identical(dim(pp$ptab), c(96L, 7L))
expect_identical(round(mean(pp$ptab$p), digits = 3), 0.042)

expect_identical(pp$mu_c, 2.5)
expect_identical(pp$m_fixed_sq, NA)
expect_identical(pp$zs, 0)
expect_identical(pp$E, 1.34)
expect_identical(pp$mult_params$fp, 1000)
expect_identical(pp$mult_params$p_small, 0.2)
expect_identical(pp$mult_params$p_large, 0.03)
expect_identical(pp$mult_params$epsilon, c(1, 0.5, 0.3))

expect_identical(pp$same_key, FALSE)
expect_identical(pp$use_zero_rkeys, TRUE)
expect_identical(pp$even_odd, FALSE)
expect_identical(pp$separation, FALSE)
})

# set up problem
Expand Down Expand Up @@ -114,8 +139,23 @@ expect_message(tab$perturb("income"), "Numeric variable 'income' was perturbed."
expect_message(tab$perturb("savings"), "Numeric variable 'savings' was perturbed.")

test_that("variable was correctly perturbed", {
expect_equal(digest::sha1(tab$numtab("income", mean_before_sum = FALSE)), "29eec69ec43987831d951b7e6ecd4d423b27f5e2")
expect_equal(digest::sha1(tab$numtab("savings", mean_before_sum = FALSE)), "77abfc53132ec03b5a523764eefb52e00dca897f")
expect_equal(digest::sha1(tab$numtab("income", mean_before_sum = TRUE)), "98f4cdbe6b4b362aa0b9ab31337fc39c4d807c1e")
expect_equal(digest::sha1(tab$numtab("savings", mean_before_sum = TRUE)), "39a0fa3ddfd8b45778549711d9f9f58c843b0912")
dt <- tab$numtab("income", mean_before_sum = FALSE)
expect_identical(dim(dt), c(21L, 6L))
expect_equal(round(mean(dt$pws), digits = 3), 13181967.1)
expect_equal(round(mean(dt$ws), digits = 3), 13184145.52)

dt <- tab$numtab("savings", mean_before_sum = FALSE)
expect_identical(dim(dt), c(21L, 6L))
expect_equal(round(mean(dt$pws), digits = 3), 1306306.6)
expect_equal(round(mean(dt$ws), digits = 3), 1306303.048)

dt <- tab$numtab("income", mean_before_sum = TRUE)
expect_identical(dim(dt), c(21L, 6L))
expect_equal(round(mean(dt$pws), digits = 3), 13179895.45)
expect_equal(round(mean(dt$ws), digits = 3), 13184145.52)

dt <- tab$numtab("savings", mean_before_sum = TRUE)
expect_identical(dim(dt), c(21L, 6L))
expect_equal(round(mean(dt$pws), digits = 3), 1306310.272)
expect_equal(round(mean(dt$ws), digits = 3), 1306303.048)
})
Loading

0 comments on commit bdb7e75

Please sign in to comment.