Skip to content

Commit

Permalink
tbl_uvregression() to accept survey objects (#740)
Browse files Browse the repository at this point in the history
* added fn to remove survey columns

* updated to accept survey objects

* doc updates

* adding unit test

* doc update

* Increment version number

* doc updates
  • Loading branch information
ddsjoberg authored Jan 16, 2021
1 parent e52d94b commit d85aff2
Show file tree
Hide file tree
Showing 8 changed files with 75 additions and 39 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: gtsummary
Title: Presentation-Ready Data Summary and Analytic Result
Tables
Version: 1.3.6.9001
Version: 1.3.6.9002
Authors@R:
c(person(given = "Daniel D.",
family = "Sjoberg",
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# gtsummary (development version)

* `tbl_uvregression()` now accepts both data frames and survey design objects as input. (#742)

* Added function `add_vif()` to include variance inflation factors in `tbl_regression()` output. (#717)

# gtsummary 1.3.6
Expand Down
35 changes: 16 additions & 19 deletions R/tbl_svysummary.R
Original file line number Diff line number Diff line change
Expand Up @@ -91,32 +91,15 @@
tbl_svysummary <- function(data, by = NULL, label = NULL, statistic = NULL,
digits = NULL, type = NULL, value = NULL,
missing = NULL, missing_text = NULL, sort = NULL,
percent = NULL, include = NULL) {
percent = NULL, include = everything()) {
# checking for survey package ------------------------------------------------
assert_package("survey", "tbl_svysummary()")

# test if data is a survey object
if (!is_survey(data)) stop("'data' should be a survey object (see svydesign()).", call. = FALSE)

# eval -----------------------------------------------------------------------
include <- select(data$variables, {{ include }}) %>% names()

# default selection for include
if (length(include) == 0) {
# look at data$call
if (is.null(data$call)) {
include <- names(data$variables)
} else {
exclude <- c(
all.vars(data$call$id),
all.vars(data$call$probs),
all.vars(data$call$strata),
all.vars(data$call$fpc),
all.vars(data$call$weights)
)
include <- setdiff(names(data$variables), exclude)
}
}
include <- select(.remove_survey_cols(data), {{ include }}) %>% names()

# setting defaults from gtsummary theme --------------------------------------
label <- label %||%
Expand Down Expand Up @@ -576,3 +559,17 @@ c_form <- function(left = NULL, right = 1) {
right <- paste(right, collapse = "+")
stats::as.formula(paste(left, "~", right))
}


# this function removes the weight/survey columns from x$variables
# used when columns must be selected from the survey object, and we dont want users
# to select the weighting columns
.remove_survey_cols <- function(x) {
if (is.data.frame(x)) return(x)
x$variables %>%
select(-any_of(c(all.vars(x$call$id),
all.vars(x$call$probs),
all.vars(x$call$strata),
all.vars(x$call$fpc),
all.vars(x$call$weights))))
}
43 changes: 28 additions & 15 deletions R/tbl_uvregression.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@
#'
#' @param data Data frame to be used in univariate regression modeling. Data
#' frame includes the outcome variable(s) and the independent variables.
#' @param method Regression method (e.g. [lm], [glm], [survival::coxph], and more).
#' Survey design objects are also accepted.
#' @param method Regression method (e.g. [lm], [glm], [survival::coxph],
#' [survey::svyglm], and more).
#' @param y Model outcome (e.g. `y = recurrence` or `y = Surv(time, recur)`).
#' All other column in `data` will be regressed on `y`.
#' Specify one and only one of `y` or `x`
Expand Down Expand Up @@ -106,6 +108,12 @@ tbl_uvregression <- function(data, method, y = NULL, x = NULL, method.args = NUL
)
}

# checking input -------------------------------------------------------------
# data is a data frame
if (!is.data.frame(data) && !is_survey(data)) {
stop("`data` argument must be a data frame or survey object.", call. = FALSE)
}

# setting defaults -----------------------------------------------------------
pvalue_fun <-
pvalue_fun %||%
Expand Down Expand Up @@ -138,14 +146,18 @@ tbl_uvregression <- function(data, method, y = NULL, x = NULL, method.args = NUL
y <- rlang::enexpr(y)
x <-
tryCatch({
.select_to_varnames(select = !!x, data = data, arg_name = "x")
.select_to_varnames(select = !!x,
data = switch(is.data.frame(data), data) %||% .remove_survey_cols(data),
arg_name = "x")
}, error = function(e) {
rlang::expr_text(x)
})

y <-
tryCatch({
.select_to_varnames(select = !!y, data = data, arg_name = "y")
.select_to_varnames(select = !!y,
data = switch(is.data.frame(data), data) %||% .remove_survey_cols(data),
arg_name = "y")
}, error = function(e) {
rlang::expr_text(y)
})
Expand All @@ -163,19 +175,19 @@ tbl_uvregression <- function(data, method, y = NULL, x = NULL, method.args = NUL
include <-
.select_to_varnames(
select = {{ include }},
data = data,
data = switch(is.data.frame(data), data) %||% .remove_survey_cols(data),
arg_name = "include"
)
exclude <-
.select_to_varnames(
select = {{ exclude }},
data = data,
data = switch(is.data.frame(data), data) %||% .remove_survey_cols(data),
arg_name = "exclude"
)
show_single_row <-
.select_to_varnames(
select = {{ show_single_row }},
data = data,
data = switch(is.data.frame(data), data) %||% .remove_survey_cols(data),
arg_name = "show_single_row"
)

Expand Down Expand Up @@ -205,7 +217,7 @@ tbl_uvregression <- function(data, method, y = NULL, x = NULL, method.args = NUL
label <-
.formula_list_to_named_list(
x = label,
data = data,
data = switch(is.data.frame(data), data) %||% .remove_survey_cols(data),
arg_name = "label"
)

Expand All @@ -214,12 +226,6 @@ tbl_uvregression <- function(data, method, y = NULL, x = NULL, method.args = NUL
stop("Each `label` specified must be a string of length 1.", call. = FALSE)
}

# data -----------------------------------------------------------------------
# data is a data frame
if (!is.data.frame(data)) {
stop("`data` argument must be a data frame.", call. = FALSE)
}

# will return call, and all object passed to in table1 call
# the object func_inputs is a list of every object passed to the function
tbl_uvregression_inputs <- as.list(environment())
Expand All @@ -229,7 +235,7 @@ tbl_uvregression <- function(data, method, y = NULL, x = NULL, method.args = NUL

# get all vars not specified -------------------------------------------------
all_vars <-
names(data) %>%
names(switch(is.data.frame(data), data) %||% .remove_survey_cols(data)) %>%
# removing x or y variable
setdiff(paste(c(y, x), "~ 1") %>% stats::as.formula() %>% all.vars()) %>%
# removing any other variables listed in the formula
Expand Down Expand Up @@ -263,10 +269,17 @@ tbl_uvregression <- function(data, method, y = NULL, x = NULL, method.args = NUL
formula_chr = glue(formula),
model = map(
.data$formula_chr,
~list(method, formula = as.formula(.x), data = data) %>%
function(.x) {
call_list1 <- # defining formula and data call (or formula and design)
switch(is.data.frame(data),
list(method, formula = as.formula(.x), data = data)) %||%
list(method, formula = as.formula(.x), design = data)

call_list1 %>%
c(as.list(method.args)[-1]) %>%
as.call() %>%
eval()
}
),
# removing backticks
y = switch(is.null(.env$y), all_vars) %||% y,
Expand Down
2 changes: 1 addition & 1 deletion inst/WORDLIST
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ saddlepoint
sig
survfit
svychisq
svyglm
svyranktest
svysummary
svyttest
Expand All @@ -68,4 +69,3 @@ tidyselect's
tidyverse
unhide
uvregression
varnames
2 changes: 1 addition & 1 deletion man/tbl_svysummary.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions man/tbl_uvregression.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions tests/testthat/test-tbl_uvregression.R
Original file line number Diff line number Diff line change
Expand Up @@ -277,3 +277,25 @@ test_that("tbl_uvregression throw error with odd variable names in `data=`", {
trial %>% dplyr::rename(`age person` = age) %>% tbl_uvregression(method = lm, y = `age person`)
)
})

test_that("tbl_uvregression works with survey object", {
svy <- survey::svydesign(ids = ~1, data = trial, weights = ~1)

expect_error(
tbl_uvreg <-
svy %>%
tbl_uvregression(
y = response,
method = survey::svyglm,
method.args = list(family = binomial),
hide_n = TRUE,
include = c(response, age, marker, grade)
),
NA
)

expect_equal(
tbl_uvreg$tbls$age$model_obj %>% broom::tidy(),
survey::svyglm(response ~ age, design = svy, family = binomial) %>% broom::tidy()
)
})

0 comments on commit d85aff2

Please sign in to comment.