tidymodels · echasnovski · Dec 26, 2020 · Dec 20, 2020 · Dec 22, 2020 · Dec 22, 2020
diff --git a/NEWS.md b/NEWS.md
@@ -4,7 +4,7 @@
 - Added `rep_slice_sample()`, a light wrapper around `rep_sample_n()`, that
 more closely resembles `dplyr::slice_sample()` (the function that supersedes)
 `dplyr::sample_n()` (#325)
-- Added a `success` argument to `prop_test()` (#343)
+- Added a `success`, `correct`, and `z` argument to `prop_test()` (#343, #347)
 - Implemented the standardized proportion $z$ statistic for one categorical variable
 
 # infer 0.5.3

diff --git a/R/calculate.R b/R/calculate.R
@@ -95,7 +95,8 @@ calculate <- function(x,
     } else if (
       !(stat %in% c("Chisq", "prop", "count")) &
       !(stat %in% c("t", "z") 
-        & (attr(x, "theory_type") %in% c("One sample t", "One sample prop z")))) {
+        & (attr(x, "theory_type") %in% 
+           c("One sample t", "One sample prop z", "Two sample props z")))) {
       # From `hypothesize()` to `calculate()`
       # Catch-all if generate was not called
       # warning_glue(
@@ -459,6 +460,10 @@ calc_impl.z <- function(type, x, order, ...) {
       explanatory_variable(x),
       levels = c(order[1], order[2])
     )
+
+    if (!"replicate" %in% colnames(x)) {
+      x$replicate <- 1L
+    }
 
     aggregated <- x %>%
       dplyr::group_by(replicate, explan) %>%
@@ -478,7 +483,7 @@ calc_impl.z <- function(type, x, order, ...) {
         denom = sqrt(p_hat * (1 - p_hat) / n1 + p_hat * (1 - p_hat) / n2),
         stat = diff_prop / denom
       ) %>%
-      dplyr::select(-total_suc, -n1, -n2)
+      dplyr::select(stat)
 
     df_out
   } else if (attr(x, "theory_type") == "One sample prop z") {

diff --git a/R/wrappers.R b/R/wrappers.R
@@ -407,6 +407,14 @@ check_conf_level <- function(conf_level) {
 #'   a string. Only used when testing the null that a single
 #'   proportion equals a given value, or that two proportions are equal;
 #'   ignored otherwise.
+#' @param correct A logical indicating whether Yates' continuity correction 
+#'   should be applied where possible. If `z = TRUE`, the `correct` argument will 
+#'   be overwritten as `FALSE`. Otherwise defaults to `correct = TRUE`.
+#' @param z A logical value for whether to report the statistic as a standard
+#'   normal deviate or a Pearson's chi-square statistic. \eqn{z^2}  is distributed
+#'   chi-square with 1 degree of freedom, though note that the user will likely
+#'   need to turn off Yates' continuity correction by setting `correct = FALSE`
+#'   to see this connection.
 #' @param ... Additional arguments for [prop.test()][stats::prop.test()].
 #'
 #' @examples
@@ -421,6 +429,14 @@ check_conf_level <- function(conf_level) {
 #' prop_test(gss,
 #'           college ~ NULL,
 #'           p = .2)
+#' 
+#' # report as a z-statistic rather than chi-square
+#' # and specify the success level of the response
+#' prop_test(gss,
+#'           college ~ NULL,
+#'           success = "degree",
+#'           p = .2,
+#'           z = TRUE)
 #'
 #' @export
 prop_test <- function(x, formula,
@@ -432,12 +448,15 @@ prop_test <- function(x, formula,
                       conf_int = TRUE,
                       conf_level = 0.95,
                       success = NULL,
+                      correct = NULL,
+                      z = FALSE,
                       ...) {
   # Parse response and explanatory variables
   response    <- enquo(response)
   explanatory <- enquo(explanatory)
   x <- parse_variables(x = x, formula = formula,
                        response = response, explanatory = explanatory)
+  correct <- if (z) {FALSE} else if (is.null(correct)) {TRUE} else {correct}
 
   if (!(class(response_variable(x)) %in% c("logical", "character", "factor"))) {
     stop_glue(
@@ -492,6 +511,7 @@ prop_test <- function(x, formula,
                                alternative = alternative,
                                conf.level = conf_level,
                                p = p,
+                               correct = correct,
                                ...)
   } else { # one sample
     response_tbl <- response_variable(x) %>%
@@ -510,6 +530,7 @@ prop_test <- function(x, formula,
                                alternative = alternative,
                                conf.level = conf_level,
                                p = p,
+                               correct = correct,
                                ...)
 
   }
@@ -539,7 +560,33 @@ prop_test <- function(x, formula,
                     chisq_df = parameter,
                     p_value = p.value)
   }
+
+  if (z) {
+    results <- calculate_z(x, results, success, p, order)
+  }
 
   results
 }
 
+calculate_z <- function(x, results, success, p, order) {
+  exp <- if (has_explanatory(x)) {attr(x, "explanatory")} else {"NULL"}
+
+  form <- as.formula(paste0(attr(x, "response"), " ~ ", exp))
+
+  stat <- x %>%
+    specify(formula = form, success = success) %>%
+    hypothesize(
+      null = if (has_explanatory(x)) {"independence"} else {"point"},
+      p = if (is.null(p) && !has_explanatory(x)) {.5} else {p}
+    ) %>%
+    calculate(
+      stat = "z",
+      order = if (has_explanatory(x)) {order} else {NULL}
+    ) %>%
+    dplyr::pull()
+
+  results$statistic <- stat
+  results$chisq_df <- NULL
+
+  results
+}
diff --git a/man/prop_test.Rd b/man/prop_test.Rd
diff --git a/tests/testthat/test-wrappers.R b/tests/testthat/test-wrappers.R
@@ -328,6 +328,7 @@ test_that("one sample prop_test works", {
 
 test_that("prop_test output dimensionality is correct", {
   infer_1_sample <- prop_test(df, resp ~ NULL, p = .5)
+  infer_1_sample_z <- prop_test(df, resp ~ NULL, p = .5, z = TRUE)
   infer_2_sample <- prop_test(df, resp ~ exp, order = c("a", "b"))
   infer_2_sample_no_int <- prop_test(df, resp ~ exp, order = c("a", "b"), 
                                      conf_int = FALSE)
@@ -338,7 +339,16 @@ test_that("prop_test output dimensionality is correct", {
   infer_3_sample <- prop_test(df, resp ~ exp, order = c("a", "b"))
 
   expect_length(infer_1_sample, 4)
+  expect_length(infer_1_sample, length(infer_1_sample_z) + 1)
   expect_length(infer_2_sample, 6)
   expect_length(infer_2_sample_no_int, 4)
   expect_length(infer_3_sample, 3)
 })
+
+test_that("prop_test z argument works as expected", {
+  chi_res <- prop_test(df, resp ~ NULL, p = .5, correct = FALSE)
+
+  z_res <- prop_test(df, resp ~ NULL, p = .5, z = TRUE)
+
+  expect_equal(unname(chi_res$statistic), z_res$statistic^2, tolerance = eps)
+})