Skip to content

Commit

Permalink
tbl_svysummary() can now report design effects (#1487)
Browse files Browse the repository at this point in the history
* `tbl_svysummary()` can now report design effects

fix #1486

* updates to testing file

* snap update

* increment version number

* Update DESCRIPTION

* snapshot update

---------

Co-authored-by: Daniel Sjoberg <danield.sjoberg@gmail.com>
  • Loading branch information
larmarange and ddsjoberg authored Apr 15, 2023
1 parent 287f6b1 commit e6ab4e6
Show file tree
Hide file tree
Showing 8 changed files with 74 additions and 739 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: gtsummary
Title: Presentation-Ready Data Summary and Analytic Result
Tables
Version: 1.7.0.9007
Version: 1.7.0.9008
Authors@R:
c(person(given = "Daniel D.",
family = "Sjoberg",
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# gtsummary (development version)

* The `tbl_svysummary()` function may now report the design effect, e.g. `tbl_svysummary(statistic = ~"{deff}")`. (#1486)

* Bug fix when a subset of CIs are requested in `add_ci(include=)`. (#1484)

* Added French translations for new marginal effects tidiers housed in {broom.helpers}. (#1417)
Expand All @@ -20,6 +22,8 @@

* `add_ci.tbl_svysummary()` now takes properly into account the `percent` argument (#1470)

* `tbl_svysummary()` can now report design effects (#1486)

# gtsummary 1.7.0

### Breaking Changes
Expand Down
53 changes: 34 additions & 19 deletions R/tbl_svysummary.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#' \item `{N}` denominator, or cohort size
#' \item `{p}` percentage
#' \item `{p.std.error}` standard error of the sample proportion computed with [survey::svymean()]
#' \item `{deff}` design effect of the sample proportion computed with [survey::svymean()]
#' \item `{n_unweighted}` unweighted frequency
#' \item `{N_unweighted}` unweighted denominator
#' \item `{p_unweighted}` unweighted formatted percentage
Expand All @@ -34,6 +35,7 @@
#' \item `{median}` median
#' \item `{mean}` mean
#' \item `{mean.std.error}` standard error of the sample mean computed with [survey::svymean()]
#' \item `{deff}` design effect of the sample mean computed with [survey::svymean()]
#' \item `{sd}` standard deviation
#' \item `{var}` variance
#' \item `{min}` minimum
Expand Down Expand Up @@ -356,18 +358,19 @@ summarize_categorical_survey <- function(data, variable, by,

if (is.null(by)) {
if (percent %in% c("column", "cell")) {
svy_p <- survey::svymean(c_form(right = variable), data, na.rm = TRUE) %>%
svy_p <- survey::svymean(c_form(right = variable), data, na.rm = TRUE, deff = TRUE) %>%
as_tibble(rownames = "var_level") %>%
mutate(
variable_levels = str_sub(.data$var_level, stringr::str_length(variable) + 1)
) %>%
select(p = "mean", p.std.error = "SE", "variable_levels")
select(p = "mean", p.std.error = "SE", "deff", "variable_levels")
} else {
# this will have p=1 for all and p.std.error=0 for all
svy_p <- tibble(
variable_levels = levels(data$variables[[variable]]),
p = 1,
p.std.error = 0
p.std.error = 0,
deff = NaN
)
}
svy_table <-
Expand All @@ -377,37 +380,39 @@ summarize_categorical_survey <- function(data, variable, by,
left_join(svy_p, by = c("variable_levels"))
} else {
if (percent == "column") {
svy_p <- survey::svyby(c_form(right = variable), c_form(right = by), data, survey::svymean, na.rm = TRUE) %>%
svy_p <- survey::svyby(c_form(right = variable), c_form(right = by), data, survey::svymean, na.rm = TRUE, deff = TRUE) %>%
as_tibble() %>%
tidyr::pivot_longer(!one_of(by)) %>%
mutate(
stat = if_else(
str_starts(.data$name, paste0("se.", variable)) | str_starts(.data$name, paste0("se.`", variable, "`")),
"p.std.error",
"p"
stat = case_when(
str_starts(.data$name, paste0("se.", variable)) | str_starts(.data$name, paste0("se.`", variable, "`")) ~ "p.std.error",
str_starts(.data$name, paste0("DEff.", variable)) | str_starts(.data$name, paste0("DEff.`", variable, "`")) ~ "deff",
TRUE ~ "p"
),
name = stringr::str_remove_all(.data$name, "se\\.") %>%
stringr::str_remove_all("DEff\\.") %>%
str_remove_all(variable) %>%
str_remove_all("`")
) %>%
tidyr::pivot_wider(names_from = "stat", values_from = "value") %>%
set_names(c("by", "variable_levels", "p", "p.std.error"))
set_names(c("by", "variable_levels", "p", "p.std.error", "deff"))
} else if (percent == "row") {
svy_p <- survey::svyby(c_form(right = by), c_form(right = variable), data, survey::svymean, na.rm = TRUE) %>%
svy_p <- survey::svyby(c_form(right = by), c_form(right = variable), data, survey::svymean, na.rm = TRUE, deff = TRUE) %>%
as_tibble() %>%
tidyr::pivot_longer(!one_of(variable)) %>%
mutate(
stat = if_else(
str_starts(.data$name, paste0("se.", by)) | str_starts(.data$name, paste0("se.`", by, "`")),
"p.std.error",
"p"
stat = case_when(
str_starts(.data$name, paste0("se.", by)) | str_starts(.data$name, paste0("se.`", by, "`")) ~ "p.std.error",
str_starts(.data$name, paste0("DEff.", by)) | str_starts(.data$name, paste0("DEff.`", by, "`")) ~ "deff",
TRUE ~ "p"
),
name = stringr::str_remove_all(.data$name, "se\\.") %>%
stringr::str_remove_all("DEff\\.") %>%
str_remove_all(by) %>%
str_remove_all("`")
) %>%
tidyr::pivot_wider(names_from = "stat", values_from = "value") %>%
set_names(c("variable_levels", "by", "p", "p.std.error"))
set_names(c("variable_levels", "by", "p", "p.std.error", "deff"))
} else if (percent == "cell") {
inttemp <- expand.grid(
by = levels(data$variables[[by]]),
Expand All @@ -417,10 +422,10 @@ summarize_categorical_survey <- function(data, variable, by,
var_level = paste0("interaction(", .env$by, ", ", variable, ")", .data$by, ".", .data$variable_levels)
)

svy_p <- survey::svymean(c_inter(by, variable), data, na.rm = TRUE) %>%
svy_p <- survey::svymean(c_inter(by, variable), data, na.rm = TRUE, deff = TRUE) %>%
as_tibble(rownames = "var_level") %>%
dplyr::left_join(inttemp, by = "var_level") %>%
select(p = "mean", p.std.error = "SE", "by", "variable_levels")
select(p = "mean", p.std.error = "SE", "by", "deff", "variable_levels")
}

svy_table <-
Expand All @@ -447,7 +452,8 @@ summarize_categorical_survey <- function(data, variable, by,
mutate(
N = sum(.data$n),
p = if_else(.data$N == 0, NA_real_, .data$p), # re-introducing NA where relevant
p.std.error = if_else(.data$N == 0, NA_real_, .data$p.std.error)
p.std.error = if_else(.data$N == 0, NA_real_, .data$p.std.error),
deff = if_else(.data$N == 0, NA_real_, .data$deff)
) %>%
ungroup()

Expand Down Expand Up @@ -557,6 +563,9 @@ compute_survey_stat <- function(data, variable, by, f) {
if (f == "mean.std.error") {
fun <- svymean.std.error
}
if (f == "deff") {
fun <- svymean.deff
}
if (f == "median") {
fun <- svyquantile_version
args$quantiles <- .5
Expand Down Expand Up @@ -646,7 +655,7 @@ df_stats_fun_survey <- function(summary_type, variable, dichotomous_value, sort,
sort = "alphanumeric", percent = "column",
stat_display = "{n}"
) %>%
select(-"stat_display", -"p.std.error") %>%
select(-"stat_display", -"p.std.error", -"deff") %>%
rename(
p_miss = "p",
N_obs = "N",
Expand Down Expand Up @@ -730,6 +739,12 @@ svymean.std.error <- function(x, design, na.rm = FALSE, ...) {
survey::svymean(x = x, design = design, na.rm = na.rm, ...) %>% survey::SE()
}

# mean design effects
svymean.deff <- function(x, design, na.rm = FALSE, ...) {
survey::svymean(x = x, design = design, na.rm = na.rm, deff = TRUE) %>%
survey::deff()
}

# function chooses which quantile function to sue based on the survey pkg version
svyquantile_version <- function(...) {
fn <-
Expand Down
2 changes: 2 additions & 0 deletions R/utils-tbl_summary.R
Original file line number Diff line number Diff line change
Expand Up @@ -543,8 +543,10 @@ stat_label_match <- function(stat_display, iqr = TRUE, range = TRUE) {
"{N_miss}", "N missing",
"{N_nonmiss}", "N",
"{N_obs}", "No. obs.",
"{mean.std.error}", "SE",
"{p.std.error}%", "SE(%)",
"{p.std.error}", "SE(%)",
"{deff}", "Design effect",
"{N_unweighted}", "N (unweighted)",
"{n_unweighted}", "n (unweighted)",
"{N_obs_unweighted}", "Total N (unweighted)",
Expand Down
2 changes: 2 additions & 0 deletions man/tbl_svysummary.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit e6ab4e6

Please sign in to comment.