Add riskdiff argument to tabulate_rsp_subgroups and `tabulate_sur…

…vival_subgroups` (#1276) Fixes #1246
insightsengineering · Aug 6, 2024 · 9ea65dc · 9ea65dc
1 parent cbbcdc4
commit 9ea65dc
Show file tree

Hide file tree

Showing 14 changed files with 455 additions and 97 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -56,6 +56,7 @@ export(control_coxreg)
 export(control_incidence_rate)
 export(control_lineplot_vars)
 export(control_logistic)
+export(control_riskdiff)
 export(control_step)
 export(control_surv_med_annot)
 export(control_surv_time)

diff --git a/NEWS.md b/NEWS.md
@@ -2,6 +2,8 @@
 ### Enhancements
 * Added `errorbar_width` and `linetype` parameters to `g_lineplot`.
 * Reworking of `summarize_glm_count()` documentation and all its associated functions to better describe the results and the functions' purpose.
+* Added the `.formats` argument to `tabulate_rsp_subgroups` and `tabulate_survival_subgroups` to allow users to specify formats.
+* Added the `riskdiff` argument to `tabulate_rsp_subgroups` and `tabulate_survival_subgroups` to allow users to add a risk difference table column, and function `control_riskdiff` to specify settings for the risk difference column.
 
 ### Bug Fixes
 * Fixed a bug in `a_surv_time` that threw an error when split only has `"is_event"`.

diff --git a/R/h_biomarkers_subgroups.R b/R/h_biomarkers_subgroups.R
@@ -21,6 +21,9 @@ h_tab_one_biomarker <- function(df,
                                 ...) {
   extra_args <- list(...)
 
+  # Create "ci" column from "lcl" and "ucl"
+  df$ci <- combine_vectors(df$lcl, df$ucl)
+
   lyt <- basic_table()
 
   # Row split by row type - only keep the content rows here.

diff --git a/R/response_biomarkers_subgroups.R b/R/response_biomarkers_subgroups.R
@@ -75,6 +75,9 @@ tabulate_rsp_biomarkers <- function(df,
   checkmate::assert_character(df$biomarker_label)
   checkmate::assert_subset(vars, get_stats("tabulate_rsp_biomarkers"))
 
+  # Create "ci" column from "lcl" and "ucl"
+  df$ci <- combine_vectors(df$lcl, df$ucl)
+
   df_subs <- split(df, f = df$biomarker)
   tabs <- lapply(df_subs, FUN = function(df_sub) {
     tab_sub <- h_tab_rsp_one_biomarker(

diff --git a/R/response_subgroups.R b/R/response_subgroups.R
@@ -131,35 +131,25 @@ a_response_subgroups <- function(.formats = list(
                                    n_tot = "xx",
                                    or = list(format_extreme_values(2L)),
                                    ci = list(format_extreme_values_ci(2L)),
-                                   pval = "x.xxxx | (<0.0001)" # nolint end
+                                   pval = "x.xxxx | (<0.0001)",
+                                   riskdiff = "xx.x (xx.x - xx.x)" # nolint end
                                  ),
                                  na_str = default_na_str()) {
   checkmate::assert_list(.formats)
   checkmate::assert_subset(
     names(.formats),
-    c("n", "n_rsp", "prop", "n_tot", "or", "ci", "pval")
+    c("n", "n_rsp", "prop", "n_tot", "or", "ci", "pval", "riskdiff")
   )
 
   afun_lst <- Map(
     function(stat, fmt, na_str) {
-      if (stat == "ci") {
-        function(df, labelstr = "", ...) {
-          in_rows(
-            .list = combine_vectors(df$lcl, df$ucl),
-            .labels = as.character(df$subgroup),
-            .formats = fmt,
-            .format_na_strs = na_str
-          )
-        }
-      } else {
-        function(df, labelstr = "", ...) {
-          in_rows(
-            .list = as.list(df[[stat]]),
-            .labels = as.character(df$subgroup),
-            .formats = fmt,
-            .format_na_strs = na_str
-          )
-        }
+      function(df, labelstr = "", ...) {
+        in_rows(
+          .list = as.list(df[[stat]]),
+          .labels = as.character(df$subgroup),
+          .formats = fmt,
+          .format_na_strs = na_str
+        )
       }
     },
     stat = names(.formats),
@@ -184,52 +174,108 @@ a_response_subgroups <- function(.formats = list(
 #'   * `or`: Odds ratio.
 #'   * `ci` : Confidence interval of odds ratio.
 #'   * `pval`: p-value of the effect.
-#'   Note, the statistics `n_tot`, `or` and `ci` are required.
+#'   Note, the statistics `n_tot`, `or`, and `ci` are required.
+#' @param riskdiff (`list`)\cr if a risk (proportion) difference column should be added, a list of settings to apply
+#'   within the column. See [control_riskdiff()] for details. If `NULL`, no risk difference column will be added. If
+#'   `riskdiff$arm_x` and `riskdiff$arm_y` are `NULL`, the first level of `df$prop$arm` will be used as `arm_x` and
+#'   the second level as `arm_y`.
 #'
 #' @return An `rtables` table summarizing binary response by subgroup.
 #'
 #' @examples
-#' ## Table with default columns.
+#' # Table with default columns
 #' basic_table() %>%
 #'   tabulate_rsp_subgroups(df)
 #'
-#' ## Table with selected columns.
+#' # Table with selected columns
 #' basic_table() %>%
 #'   tabulate_rsp_subgroups(
 #'     df = df,
 #'     vars = c("n_tot", "n", "n_rsp", "prop", "or", "ci")
 #'   )
 #'
+#' # Table with risk difference column added
+#' basic_table() %>%
+#'   tabulate_rsp_subgroups(
+#'     df,
+#'     riskdiff = control_riskdiff(
+#'       arm_x = levels(df$prop$arm)[1],
+#'       arm_y = levels(df$prop$arm)[2]
+#'     )
+#'   )
+#'
 #' @export
 #' @order 2
 tabulate_rsp_subgroups <- function(lyt,
                                    df,
                                    vars = c("n_tot", "n", "prop", "or", "ci"),
                                    groups_lists = list(),
                                    label_all = "All Patients",
-                                   na_str = default_na_str()) {
+                                   riskdiff = NULL,
+                                   na_str = default_na_str(),
+                                   .formats = c(
+                                     n = "xx", n_rsp = "xx", prop = "xx.x%", n_tot = "xx",
+                                     or = list(format_extreme_values(2L)), ci = list(format_extreme_values_ci(2L)),
+                                     pval = "x.xxxx | (<0.0001)"
+                                   )) {
+  checkmate::assert_list(riskdiff, null.ok = TRUE)
+  checkmate::assert_true(all(c("n_tot", "or", "ci") %in% vars))
+
+  # Create "ci" column from "lcl" and "ucl"
+  df$or$ci <- combine_vectors(df$or$lcl, df$or$ucl)
+
+  # Fill in missing formats with defaults
+  default_fmts <- eval(formals(tabulate_rsp_subgroups)$.formats)
+  .formats <- c(.formats, default_fmts[vars[!vars %in% names(.formats)]])
+
+  # Extract additional parameters from df
   conf_level <- df$or$conf_level[1]
-  method <- if ("pval_label" %in% names(df$or)) {
-    df$or$pval_label[1]
-  } else {
-    NULL
-  }
+  method <- if ("pval_label" %in% names(df$or)) df$or$pval_label[1] else NULL
+  colvars <- d_rsp_subgroups_colvars(vars, conf_level = conf_level, method = method)
+  prop_vars <- intersect(colvars$vars, c("n", "prop", "n_rsp"))
+  or_vars <- intersect(names(colvars$labels), c("n_tot", "or", "ci", "pval"))
+  colvars_prop <- list(vars = prop_vars, labels = colvars$labels[prop_vars])
+  colvars_or <- list(vars = or_vars, labels = colvars$labels[or_vars])
 
   extra_args <- list(groups_lists = groups_lists, conf_level = conf_level, method = method, label_all = label_all)
 
-  afun_lst <- a_response_subgroups(na_str = na_str)
-  colvars <- d_rsp_subgroups_colvars(vars, conf_level = conf_level, method = method)
+  # Get analysis function for each statistic
+  afun_lst <- a_response_subgroups(.formats = c(.formats, riskdiff = riskdiff$format), na_str = na_str)
 
-  colvars_prop <- list(
-    vars = colvars$vars[names(colvars$labels) %in% c("n", "prop", "n_rsp")],
-    labels = colvars$labels[names(colvars$labels) %in% c("n", "prop", "n_rsp")]
-  )
-  colvars_or <- list(
-    vars = colvars$vars[names(colvars$labels) %in% c("n_tot", "or", "ci", "pval")],
-    labels = colvars$labels[names(colvars$labels) %in% c("n_tot", "or", "ci", "pval")]
-  )
+  # Add risk difference column
+  if (!is.null(riskdiff)) {
+    if (is.null(riskdiff$arm_x)) riskdiff$arm_x <- levels(df$prop$arm)[1]
+    if (is.null(riskdiff$arm_y)) riskdiff$arm_y <- levels(df$prop$arm)[2]
+    colvars_or$vars <- c(colvars_or$vars, "riskdiff")
+    colvars_or$labels <- c(colvars_or$labels, riskdiff = riskdiff$col_label)
+    arm_cols <- paste(rep(c("n_rsp", "n_rsp", "n", "n")), c(riskdiff$arm_x, riskdiff$arm_y), sep = "_")
 
-  # Columns from table_prop are optional.
+    df_prop_diff <- df$prop %>%
+      dplyr::select(-"prop") %>%
+      tidyr::pivot_wider(
+        id_cols = c("subgroup", "var", "var_label", "row_type"),
+        names_from = "arm",
+        values_from = c("n", "n_rsp")
+      ) %>%
+      dplyr::rowwise() %>%
+      dplyr::mutate(
+        riskdiff = stat_propdiff_ci(
+          x = as.list(.data[[arm_cols[1]]]),
+          y = as.list(.data[[arm_cols[2]]]),
+          N_x = .data[[arm_cols[3]]],
+          N_y = .data[[arm_cols[4]]]
+        )
+      ) %>%
+      dplyr::select(-dplyr::all_of(arm_cols))
+
+    df$or <- df$or %>%
+      dplyr::left_join(
+        df_prop_diff,
+        by = c("subgroup", "var", "var_label", "row_type")
+      )
+  }
+
+  # Add columns from table_prop (optional)
   if (length(colvars_prop$vars) > 0) {
     lyt_prop <- split_cols_by(lyt = lyt, var = "arm")
     lyt_prop <- split_cols_by_multivar(
@@ -238,7 +284,7 @@ tabulate_rsp_subgroups <- function(lyt,
       varlabels = colvars_prop$labels
     )
 
-    # "All Patients" row
+    # Add "All Patients" row
     lyt_prop <- split_rows_by(
       lyt = lyt_prop,
       var = "row_type",
@@ -253,6 +299,7 @@ tabulate_rsp_subgroups <- function(lyt,
       extra_args = extra_args
     )
 
+    # Add analysis rows
     if ("analysis" %in% df$prop$row_type) {
       lyt_prop <- split_rows_by(
         lyt = lyt_prop,
@@ -276,15 +323,15 @@ tabulate_rsp_subgroups <- function(lyt,
     table_prop <- NULL
   }
 
-  # Columns "n_tot", "or", "ci" in table_or are required.
+  # Add columns from table_or ("n_tot", "or", and "ci" required)
   lyt_or <- split_cols_by(lyt = lyt, var = "arm")
   lyt_or <- split_cols_by_multivar(
     lyt = lyt_or,
     vars = colvars_or$vars,
     varlabels = colvars_or$labels
   )
 
-  # "All Patients" row
+  # Add "All Patients" row
   lyt_or <- split_rows_by(
     lyt = lyt_or,
     var = "row_type",
@@ -300,6 +347,7 @@ tabulate_rsp_subgroups <- function(lyt,
   ) %>%
     append_topleft("Baseline Risk Factors")
 
+  # Add analysis rows
   if ("analysis" %in% df$or$row_type) {
     lyt_or <- split_rows_by(
       lyt = lyt_or,
@@ -317,17 +365,19 @@ tabulate_rsp_subgroups <- function(lyt,
       extra_args = extra_args
     )
   }
+
   table_or <- build_table(lyt_or, df = df$or)
 
+  # Join tables, add forest plot attributes
   n_tot_id <- match("n_tot", colvars_or$vars)
   if (is.null(table_prop)) {
     result <- table_or
     or_id <- match("or", colvars_or$vars)
-    ci_id <- match("lcl", colvars_or$vars)
+    ci_id <- match("ci", colvars_or$vars)
   } else {
     result <- cbind_rtables(table_or[, n_tot_id], table_prop, table_or[, -n_tot_id])
     or_id <- 1L + ncol(table_prop) + match("or", colvars_or$vars[-n_tot_id])
-    ci_id <- 1L + ncol(table_prop) + match("lcl", colvars_or$vars[-n_tot_id])
+    ci_id <- 1L + ncol(table_prop) + match("ci", colvars_or$vars[-n_tot_id])
     n_tot_id <- 1L
   }
   structure(

diff --git a/R/riskdiff.R b/R/riskdiff.R
@@ -160,3 +160,38 @@ afun_riskdiff <- function(df,
     in_rows(.list = rd_ci, .formats = "xx.x (xx.x - xx.x)", .indent_mods = .indent_mods)
   }
 }
+
+#' Control function for risk difference column
+#'
+#' @description `r lifecycle::badge("stable")`
+#'
+#' Sets a list of parameters to use when generating a risk (proportion) difference column. Used as input to the
+#' `riskdiff` parameter of [tabulate_rsp_subgroups()] and [tabulate_survival_subgroups()].
+#'
+#' @inheritParams add_riskdiff
+#' @param format (`string` or `function`)\cr the format label (string) or formatting function to apply to the risk
+#'   difference statistic. See the `3d` string options in [list_valid_format_labels()] for possible format strings.
+#'   Defaults to `"xx.x (xx.x - xx.x)"`.
+#'
+#' @return A `list` of items with names corresponding to the arguments.
+#'
+#' @seealso [add_riskdiff()], [tabulate_rsp_subgroups()], and [tabulate_survival_subgroups()].
+#'
+#' @examples
+#' control_riskdiff()
+#' control_riskdiff(arm_x = "ARM A", arm_y = "ARM B")
+#'
+#' @export
+control_riskdiff <- function(arm_x = NULL,
+                             arm_y = NULL,
+                             format = "xx.x (xx.x - xx.x)",
+                             col_label = "Risk Difference (%) (95% CI)",
+                             pct = TRUE) {
+  checkmate::assert_character(arm_x, len = 1, null.ok = TRUE)
+  checkmate::assert_character(arm_y, min.len = 1, null.ok = TRUE)
+  checkmate::assert_character(format, len = 1)
+  checkmate::assert_character(col_label)
+  checkmate::assert_flag(pct)
+
+  list(arm_x = arm_x, arm_y = arm_y, format = format, col_label = col_label, pct = pct)
+}