merge develop into prop_test

need the new z functionality from #351
tidymodels · Dec 22, 2020 · 4f6befa · 4f6befa
1 parent 871a62c
commit 4f6befa
Show file tree

Hide file tree

Showing 9 changed files with 67 additions and 20 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -5,6 +5,7 @@
 more closely resembles `dplyr::slice_sample()` (the function that supersedes)
 `dplyr::sample_n()` (#325)
 - Added a `success` argument to `prop_test()` (#343)
+- Implemented the standardized proportion $z$ statistic for one categorical variable
 
 # infer 0.5.3
 

diff --git a/R/calculate.R b/R/calculate.R
@@ -92,8 +92,10 @@ calculate <- function(x,
         "implemented) for `stat` = \"{stat}\". Are you missing ",
         "a `generate()` step?"
       )
-    } else if (!(stat %in% c("Chisq", "prop", "count")) &
-      !(stat == "t" & (attr(x, "theory_type") == "One sample t"))) {
+    } else if (
+      !(stat %in% c("Chisq", "prop", "count")) &
+      !(stat %in% c("t", "z") 
+        & (attr(x, "theory_type") %in% c("One sample t", "One sample prop z")))) {
       # From `hypothesize()` to `calculate()`
       # Catch-all if generate was not called
       # warning_glue(
@@ -484,16 +486,14 @@ calc_impl.z <- function(type, x, order, ...) {
 
     # When `hypothesize()` has been called
     success <- attr(x, "success")
-
-    p0 <- attr(x, "params")[1]
-    num_rows <- nrow(x) / length(unique(x$replicate))
-
     col <- attr(x, "response")
-    # if (is.null(success)) {
-    #   success <- quo(get_par_levels(x)[1])
-    # }
-    # Error given instead
-
+    p0 <- unname(attr(x, "params")[1])
+    if (!is_generated(x)) {
+      num_rows <- nrow(x)
+    } else {
+      num_rows <- nrow(x) / length(unique(x$replicate))
+    }
+
     df_out <- x %>%
       dplyr::summarize(
         stat = (

diff --git a/R/hypothesize.R b/R/hypothesize.R
@@ -75,7 +75,8 @@ hypothesize <- function(x, null, p = NULL, mu = NULL, med = NULL, sigma = NULL)
       }
     }
   )
-  append_infer_class(tibble::as_tibble(x))
+  res <- append_infer_class(tibble::as_tibble(x))
+  copy_attrs(res, x, "params")
 }
 
 is_hypothesized <- function(x){

diff --git a/tests/testthat/helper-data.R b/tests/testthat/helper-data.R
@@ -1,5 +1,10 @@
 set.seed(4242)
 
+expect_doppelganger <- function(title, fig, path = NULL, ...) {
+  testthat::skip_if_not_installed("vdiffr")
+  vdiffr::expect_doppelganger(title, fig, path = path, ...)
+}
+
 eps <- if (capabilities("long.double")) {sqrt(.Machine$double.eps)} else {0.01}
 
 gss_tbl <- tibble::as_tibble(gss) %>%

diff --git a/tests/testthat/test-calculate.R b/tests/testthat/test-calculate.R
@@ -568,3 +568,17 @@ test_that("calc_impl.ratio_of_props works", {
     tolerance = eps
   )
 })
+
+test_that("calc_impl.z works for one sample proportions", {
+  infer_obs_stat <- gss %>%
+    specify(response = sex, success = "female") %>%
+    hypothesize(null = "point", p = .5) %>%
+    calculate(stat = "z") %>%
+    dplyr::pull()
+
+  base_obs_stat <- 
+    (mean(gss$sex == "female") - .5) / 
+    sqrt(.5^2 / nrow(gss))
+
+  expect_equal(infer_obs_stat, base_obs_stat, tolerance = eps)
+})
diff --git a/tests/testthat/test-shade_confidence_interval.R b/tests/testthat/test-shade_confidence_interval.R
@@ -1,8 +1,5 @@
 context("shade_confidence_interval")
 
-library(vdiffr)
-
-
 # shade_confidence_interval -----------------------------------------------
 test_that("shade_confidence_interval works", {
   skip_if(getRversion() > "4.0.2")

diff --git a/tests/testthat/test-shade_p_value.R b/tests/testthat/test-shade_p_value.R
@@ -1,8 +1,5 @@
 context("shade_p_value")
 
-library(vdiffr)
-
-
 # shade_p_value -----------------------------------------------------------
 test_that("shade_p_value works", {
   skip_if(getRversion() > "4.0.2")

diff --git a/tests/testthat/test-visualize.R b/tests/testthat/test-visualize.R
@@ -1,7 +1,6 @@
 context("visualize")
 
 library(dplyr)
-library(vdiffr)
 
 set.seed(42)
 

diff --git a/vignettes/observed_stat_examples.Rmd b/vignettes/observed_stat_examples.Rmd
@@ -229,7 +229,40 @@ null_distn <- gss %>%
 
 ### One categorical variable (standardized proportion $z$)
 
-While the standardized proportion $z$ statistic has not yet been implemented in the randomization-based framework, the package supplies a wrapper around `prop.test` to allow for tests of a single proportion on tidy data.
+Calculating the observed statistic,
+
+```{r}
+p_hat <- gss %>%
+  specify(response = sex, success = "female") %>%
+  hypothesize(null = "point", p = .5) %>%
+  calculate(stat = "z")
+```
+
+Then, generating the null distribution,
+
+```{r}
+null_distn <- gss %>%
+  specify(response = sex, success = "female") %>%
+  hypothesize(null = "point", p = .5) %>%
+  generate(reps = 1000, type = "simulate") %>%
+  calculate(stat = "z")
+```
+
+Visualizing the observed statistic alongside the null distribution,
+
+```{r}
+visualize(null_distn) +
+  shade_p_value(obs_stat = p_hat, direction = "two-sided")
+```
+
+Calculating the p-value from the null distribution and observed statistic,
+
+```{r}
+null_distn %>%
+  get_p_value(obs_stat = p_hat, direction = "two-sided")
+```
+
+The package also supplies a wrapper around `prop.test` for tests of a single proportion on tidy data.
 
 ```{r prop_test_1_grp}
 prop_test(gss,