Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add z argument to prop_test #353

Merged
merged 8 commits into from
Dec 26, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
- Added `rep_slice_sample()`, a light wrapper around `rep_sample_n()`, that
more closely resembles `dplyr::slice_sample()` (the function that supersedes)
`dplyr::sample_n()` (#325)
- Added a `success` argument to `prop_test()` (#343)
- Added a `success`, `correct`, and `z` argument to `prop_test()` (#343, #347)
- Implemented the standardized proportion $z$ statistic for one categorical variable

# infer 0.5.3
Expand Down
9 changes: 7 additions & 2 deletions R/calculate.R
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,8 @@ calculate <- function(x,
} else if (
!(stat %in% c("Chisq", "prop", "count")) &
!(stat %in% c("t", "z")
& (attr(x, "theory_type") %in% c("One sample t", "One sample prop z")))) {
& (attr(x, "theory_type") %in%
c("One sample t", "One sample prop z", "Two sample props z")))) {
# From `hypothesize()` to `calculate()`
# Catch-all if generate was not called
# warning_glue(
Expand Down Expand Up @@ -459,6 +460,10 @@ calc_impl.z <- function(type, x, order, ...) {
explanatory_variable(x),
levels = c(order[1], order[2])
)

if (!"replicate" %in% colnames(x)) {
x$replicate <- 1L
}

aggregated <- x %>%
dplyr::group_by(replicate, explan) %>%
Expand All @@ -478,7 +483,7 @@ calc_impl.z <- function(type, x, order, ...) {
denom = sqrt(p_hat * (1 - p_hat) / n1 + p_hat * (1 - p_hat) / n2),
stat = diff_prop / denom
) %>%
dplyr::select(-total_suc, -n1, -n2)
dplyr::select(stat)

df_out
} else if (attr(x, "theory_type") == "One sample prop z") {
Expand Down
47 changes: 47 additions & 0 deletions R/wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,14 @@ check_conf_level <- function(conf_level) {
#' a string. Only used when testing the null that a single
#' proportion equals a given value, or that two proportions are equal;
#' ignored otherwise.
#' @param correct A logical indicating whether Yates' continuity correction
#' should be applied where possible. If `z = TRUE`, the `correct` argument will
#' be overwritten as `FALSE`. Otherwise defaults to `correct = TRUE`.
#' @param z A logical value for whether to report the statistic as a standard
#' normal deviate or a Pearson's chi-square statistic. \eqn{z^2} is distributed
#' chi-square with 1 degree of freedom, though note that the user will likely
#' need to turn off Yates' continuity correction by setting `correct = FALSE`
#' to see this connection.
#' @param ... Additional arguments for [prop.test()][stats::prop.test()].
#'
#' @examples
Expand All @@ -421,6 +429,14 @@ check_conf_level <- function(conf_level) {
#' prop_test(gss,
#' college ~ NULL,
#' p = .2)
#'
#' # report as a z-statistic rather than chi-square
#' # and specify the success level of the response
#' prop_test(gss,
#' college ~ NULL,
#' success = "degree",
#' p = .2,
#' z = TRUE)
#'
#' @export
prop_test <- function(x, formula,
Expand All @@ -432,12 +448,15 @@ prop_test <- function(x, formula,
conf_int = TRUE,
conf_level = 0.95,
success = NULL,
correct = NULL,
z = FALSE,
...) {
# Parse response and explanatory variables
response <- enquo(response)
explanatory <- enquo(explanatory)
x <- parse_variables(x = x, formula = formula,
response = response, explanatory = explanatory)
correct <- if (z) {FALSE} else if (is.null(correct)) {TRUE} else {correct}

if (!(class(response_variable(x)) %in% c("logical", "character", "factor"))) {
stop_glue(
Expand Down Expand Up @@ -492,6 +511,7 @@ prop_test <- function(x, formula,
alternative = alternative,
conf.level = conf_level,
p = p,
correct = correct,
...)
} else { # one sample
response_tbl <- response_variable(x) %>%
Expand All @@ -510,6 +530,7 @@ prop_test <- function(x, formula,
alternative = alternative,
conf.level = conf_level,
p = p,
correct = correct,
...)

}
Expand Down Expand Up @@ -539,7 +560,33 @@ prop_test <- function(x, formula,
chisq_df = parameter,
p_value = p.value)
}

if (z) {
results <- calculate_z(x, results, success, p, order)
}

results
}

calculate_z <- function(x, results, success, p, order) {
exp <- if (has_explanatory(x)) {attr(x, "explanatory")} else {"NULL"}

form <- as.formula(paste0(attr(x, "response"), " ~ ", exp))

stat <- x %>%
specify(formula = form, success = success) %>%
hypothesize(
null = if (has_explanatory(x)) {"independence"} else {"point"},
p = if (is.null(p) && !has_explanatory(x)) {.5} else {p}
) %>%
calculate(
stat = "z",
order = if (has_explanatory(x)) {order} else {NULL}
) %>%
dplyr::pull()

results$statistic <- stat
results$chisq_df <- NULL

results
}
20 changes: 20 additions & 0 deletions man/prop_test.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 10 additions & 0 deletions tests/testthat/test-wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,7 @@ test_that("one sample prop_test works", {

test_that("prop_test output dimensionality is correct", {
infer_1_sample <- prop_test(df, resp ~ NULL, p = .5)
infer_1_sample_z <- prop_test(df, resp ~ NULL, p = .5, z = TRUE)
infer_2_sample <- prop_test(df, resp ~ exp, order = c("a", "b"))
infer_2_sample_no_int <- prop_test(df, resp ~ exp, order = c("a", "b"),
conf_int = FALSE)
Expand All @@ -338,7 +339,16 @@ test_that("prop_test output dimensionality is correct", {
infer_3_sample <- prop_test(df, resp ~ exp, order = c("a", "b"))

expect_length(infer_1_sample, 4)
expect_length(infer_1_sample, length(infer_1_sample_z) + 1)
expect_length(infer_2_sample, 6)
expect_length(infer_2_sample_no_int, 4)
expect_length(infer_3_sample, 3)
})

test_that("prop_test z argument works as expected", {
chi_res <- prop_test(df, resp ~ NULL, p = .5, correct = FALSE)

z_res <- prop_test(df, resp ~ NULL, p = .5, z = TRUE)

expect_equal(unname(chi_res$statistic), z_res$statistic^2, tolerance = eps)
})