New function ggadjust_pvalue #522

kassambara · Dec 4, 2022 · 2b3b7bc · 2b3b7bc
1 parent 766a19e
commit 2b3b7bc
Show file tree

Hide file tree

Showing 8 changed files with 294 additions and 2 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -26,7 +26,7 @@ Imports:
     tidyr (>= 1.2.1.9001),
     purrr,
     dplyr (>= 0.7.1),
-    cowplot,
+    cowplot (>= 1.1.1),
     ggsignif,
     scales,
     gridExtra,
@@ -75,6 +75,7 @@ Collate:
     'get_legend.R'
     'get_palette.R'
     'ggadd.R'
+    'ggadjust_pvalue.R'
     'ggarrange.R'
     'ggballoonplot.R'
     'ggpar.R'
@@ -135,7 +136,7 @@ Collate:
     'utils-geom-signif.R'
     'utils-pipe.R'
     'utils-tidyr.R'
-Remotes:  
+Remotes: 
     tidyverse/tidyr,
     slowkow/ggrepel,
     kassambara/rstatix
diff --git a/NAMESPACE b/NAMESPACE
@@ -35,6 +35,7 @@ export(get_legend)
 export(get_palette)
 export(get_summary_stats)
 export(ggadd)
+export(ggadjust_pvalue)
 export(ggarrange)
 export(ggballoonplot)
 export(ggbarplot)

diff --git a/R/geom_pwc.R b/R/geom_pwc.R
@@ -106,12 +106,28 @@ NULL
 #'  aesthetics, used to set an aesthetic to a fixed value, like \code{color =
 #'  "red"} or \code{size = 3}. They may also be parameters to the paired
 #'  geom/stat.
+#' @details
+#' \bold{Notes on adjusted p-values and facet}. When using the ggplot facet functions, the p-values are computed and adjusted by panel, without taking into account the other panels. This is by design in ggplot2.
+#'
+#' In this case, when there is only one computed p-value by panel, then using `label = "p"` or `label = "p.adj"` will give the same results using `geom_pwc()`. Again, p-value computation and adjustment in a given facet panel is done independently to the other panels.
+#'
+#' One might want to adjust the p-values of all the facet panels together. There are two solutions for that:
+#'
+#' \itemize{
+#' \item Using \code{\link{ggadjust_pvalue}(p)} after creating the plot \code{p}
+#' \item or adding the adjusted p-value manually using \code{\link{stat_pvalue_manual}()}. Read more at:
+#' \itemize{
+#'   \item \href{https://www.datanovia.com/en/blog/how-to-add-p-values-to-ggplot-facets/}{How to Add P-values to GGPLOT Facets}
+#'   \item \href{https://www.datanovia.com/en/blog/add-p-values-to-ggplot-facets-with-different-scales/}{Add P-values to GGPLOT Facets with Different Scales}
+#' }
+#' }
 #'@inheritParams ggplot2::layer
 #' @examples
 #' df <- ToothGrowth
 #' df$dose <- factor(df$dose)
 #'
 #'@rdname geom_pwc
+#' @seealso \code{\link{ggadjust_pvalue}}
 #'@examples
 #' # Data preparation
 #' #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

diff --git a/R/ggadjust_pvalue.R b/R/ggadjust_pvalue.R
@@ -0,0 +1,113 @@
+#' @include utilities.R geom_pwc.R
+NULL
+
+#'Adjust p-values Displayed on a GGPlot
+#'@description Adjust p-values produced by \code{\link{geom_pwc}()} on a ggplot.
+#'  This is mainly useful when using facet, where p-values are generally
+#'  computed and adjusted by panel without taking into account the other panels.
+#'  In this case, one might want to adjust after the p-values of all panels together.
+#'@inheritParams geom_pwc
+#'@param p a ggplot
+#'@param layer An integer indicating the statistical layer rank in the ggplot
+#'  (in the order added to the plot).
+#'@param output character. Possible values are one of \code{c("plot",
+#'  "stat_test")}. Default is "plot".
+#'@examples
+#' # Data preparation
+#' #:::::::::::::::::::::::::::::::::::::::
+#' df <- ToothGrowth
+#' df$dose <- as.factor(df$dose)
+#' # Add a random grouping variable
+#' df$group <- factor(rep(c("grp1", "grp2"), 30))
+#' head(df, 3)
+#'
+#' # Boxplot: Two groups by panel
+#' #:::::::::::::::::::::::::::::::::::::::
+#' # Create a box plot
+#' bxp <- ggboxplot(
+#'   df, x = "supp", y = "len", fill = "#00AFBB",
+#'   facet.by = "dose"
+#' )
+#' # Make facet and add p-values
+#' bxp <- bxp + geom_pwc(method = "t_test")
+#' bxp
+#' # Adjust all p-values together after
+#' ggadjust_pvalue(
+#'   bxp, p.adjust.method = "bonferroni",
+#'   label = "{p.adj.format}{p.adj.signif}", hide.ns = TRUE
+#' )
+#'
+#'
+#' # Boxplot: Three groups by panel
+#' #:::::::::::::::::::::::::::::::::::::::
+#' # Create a box plot
+#' bxp <- ggboxplot(
+#'   df, x = "dose", y = "len", fill = "#00AFBB",
+#'   facet.by = "supp"
+#' )
+#' # Make facet and add p-values
+#' bxp <- bxp + geom_pwc(method = "t_test")
+#' bxp
+#' # Adjust all p-values together after
+#' ggadjust_pvalue(
+#'   bxp, p.adjust.method = "bonferroni",
+#'   label = "{p.adj.format}{p.adj.signif}"
+#' )
+#'@export
+ggadjust_pvalue <- function(p, layer = NULL, p.adjust.method = "holm", label = "p.adj",
+                            hide.ns = NULL, symnum.args = list(), output = c("plot", "stat_test")){
+  output <- match.arg(output)
+  .build <- ggplot_build(p)
+  .build_data <- .build$data
+
+  # Pairwise comparison--------------------------
+  # Find layer containing statistical test data
+  key_columns <- c("group1", "group2", "p")
+  if(is.null(layer)){
+    for(i in 1:length(.build_data)){
+      if(all( key_columns %in% colnames(.build_data[[i]]))){
+        layer <- i
+        break
+      }
+    }
+  }
+  if(is.null(layer)){
+    stop("Can't find any layer containing statiscal tests")
+  }
+
+  stat_test <- .build$data[[layer]]
+  sy <- fortify_signif_symbols_encoding(symnum.args)
+  if(all(is.na(stat_test$p))){
+    warning(
+      "p-values can't be adjusted for the specified stat method.\n",
+      "The result of the method doesn't contain the p column.\n",
+      "Note that, tests such as tukey_hsd or games_howell_test handle p-value adjustement ",
+      "internally; they only return the p.adj.",
+      call. = FALSE
+    )
+    label <- gsub(pattern = "p.format", replacement = "p.adj.format", label)
+  }
+  else{
+    padjusted <- stat_test %>%
+      dplyr::select(dplyr::all_of(c("PANEL", "group", "group1", "group2", "p"))) %>%
+      dplyr::distinct(.keep_all = TRUE) %>%
+      rstatix::adjust_pvalue(method = p.adjust.method)
+    # Hide NS if hide.ns not null
+    if(!is.null(hide.ns))
+      padjusted <- rstatix::remove_ns(padjusted, col = hide.ns)
+
+    p <- p.adj <- NULL
+    stat_test <- stat_test %>%
+      dplyr::select(-dplyr::one_of(c("p", "p.adj", "p.format", "p.adj.format", "label"))) %>%
+      dplyr::inner_join(padjusted, by = c("PANEL", "group", "group1", "group2")) %>%
+      rstatix::p_format(p, p.adj, new.col = TRUE, accuracy = 1e-4) %>%
+      rstatix::add_significance(p.col = "p", cutpoints = sy$cutpoints, symbols = sy$symbols) %>%
+      rstatix::add_significance(p.col = "p.adj", cutpoints = sy$cutpoints, symbols = sy$symbols) %>%
+      add_stat_label(label = label)
+    .build$data[[layer]] <- stat_test
+  }
+  if(output == "stat_test"){
+    return(stat_test)
+  }
+  as_ggplot(ggplot_gtable(.build))
+}
diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -51,6 +51,7 @@ reference:
       - stat_friedman_test
       - geom_pwc
       - geom_bracket
+      - ggadjust_pvalue
   - title: Plot Tables and Paragraphs
     contents:
       - ggtexttable

diff --git a/man/geom_pwc.Rd b/man/geom_pwc.Rd
diff --git a/man/ggadjust_pvalue.Rd b/man/ggadjust_pvalue.Rd
diff --git a/tests/testthat/test-ggadjust_pvalue.R b/tests/testthat/test-ggadjust_pvalue.R
@@ -0,0 +1,36 @@
+# Data preparation
+#:::::::::::::::::::::::::::::::::::::::
+df <- ToothGrowth
+df$dose <- as.factor(df$dose)
+# Add a random grouping variable
+df$group <- factor(rep(c("grp1", "grp2"), 30))
+
+
+test_that("ggadjust_pvalue works for geom_pwc() pairwise comparison using facet", {
+  p <- ggboxplot(df, x = "supp", y = "len", facet.by = "dose") +
+    geom_pwc(method = "t_test")
+  # Adjust all p-values together after
+  stat_test <- ggadjust_pvalue(
+    p, p.adjust.method = "bonferroni",
+    label = "{p.adj.format}{p.adj.signif}",
+    output = "stat_test"
+  )
+  stat_test <- stat_test %>%
+    dplyr::select(PANEL, x, y, group, group1, group2, label) %>%
+    mutate(x = as.numeric(x), label = as.character(label))
+  expected <- tibble::tribble(
+     ~PANEL, ~x,     ~y, ~group, ~group1, ~group2,     ~label,
+        "1",  1, 34.494,      1,     "1",     "2",  "0.0191*",
+        "1",  1, 35.385,      1,     "1",     "2",  "0.0191*",
+        "1",  2, 35.385,      1,     "1",     "2",  "0.0191*",
+        "2",  1, 34.494,      1,     "1",     "2", "0.0031**",
+        "2",  1, 35.385,      1,     "1",     "2", "0.0031**",
+        "2",  2, 35.385,      1,     "1",     "2", "0.0031**",
+        "3",  1, 34.494,      1,     "1",     "2",      "1ns",
+        "3",  1, 35.385,      1,     "1",     "2",      "1ns",
+        "3",  2, 35.385,      1,     "1",     "2",      "1ns"
+     ) %>%
+    dplyr::mutate(PANEL = as.factor(PANEL)) %>%
+    as.data.frame(stringAsFactor = FALSE)
+  expect_equal(stat_test, expected)
+})