Merge f636231 into 6ca9aa5

insightsengineering · Jun 6, 2023 · 1cb133d · 1cb133d
2 parents 6ca9aa5 + f636231
commit 1cb133d
Show file tree

Hide file tree

Showing 20 changed files with 671 additions and 136 deletions.
diff --git a/R/analyze_colvars_functions.R b/R/analyze_colvars_functions.R
@@ -1,16 +1,28 @@
 #' Analyze Functions on Columns
 #'
 #' @description
+#' These functions are wrappers of [rtables::analyze_colvars()] which apply corresponding `tern`
+#' statistics functions to add an analysis to a given table layout. In particular, these functions
+#' where designed to have the analysis methods split into different columns.
 #'
-#' These functions are wrappers of [rtables::analyze_colvars()] which apply corresponding `tern` statistics functions
-#' to add an analysis to a given table layout:
+#' * [analyze_vars_in_cols()]: fundamental tabulation of analysis methods onto columns.
+#'   In other words, the analysis methods are defined in the column space, i.e. they become
+#'   column labels. By changing the variable vector, the list of functions can be applied on
+#'   different variables, with the caveat of having the same number of statistical functions.
 #'
-#' * [analyze_patients_exposure_in_cols()]
-#' * [analyze_vars_in_cols()] (extended wrapper, never used in other `tern` functions)
-#' * [summarize_colvars()] (low level wrapper, never used in other `tern` functions)
-#' * [summarize_coxreg()] (contains also [rtables::summarize_row_groups()])
-#' * [tabulate_rsp_subgroups()]
-#' * [tabulate_survival_subgroups()]
+#' * [tabulate_rsp_subgroups()]: similarly to `analyze_vars_in_cols`, this
+#'   function combines `analyze_colvars` and `summarize_row_groups` in a compact way
+#'   to produce standard tables that show analysis methods as columns.
+#' * [tabulate_survival_subgroups()]: this function is very similar to the above, but
+#'   it is used for other tables.
+#'
+#' * [analyze_patients_exposure_in_cols()]: based only on `analyze_colvars`. It needs
+#'   [summarize_patients_exposure_in_cols()] to leverage nesting of label rows analysis
+#'   with [rtables::summarize_row_groups()].
+#' * [summarize_coxreg()]: generally based on [rtables::summarize_row_groups()], it behaves
+#'   similarly to `tabulate_*` functions described above as it is designed to provide
+#'   specific standard tables that may contain nested structure with a combination of
+#'   `summarize_row_groups()` and [rtables::analyze_colvars()].
 #'
 #' @seealso
 #'   * [summarize_functions] for functions which are wrappers for [rtables::summarize_row_groups()].

diff --git a/R/analyze_functions.R b/R/analyze_functions.R
@@ -26,6 +26,11 @@
 #' * [estimate_proportion()]
 #' * [estimate_proportion_diff()]
 #' * [summarize_ancova()]
+#' * [summarize_colvars()]: even if this function uses [rtables::analyze_colvars()],
+#'   it applies the analysis methods as different rows for one or more
+#'   variables that are split into different columns. In comparison, [analyze_colvars_functions]
+#'   leverage `analyze_colvars` to have the context split in rows and the analysis
+#'   methods in columns.
 #' * [summarize_change()]
 #' * [summarize_vars()]
 #' * [surv_time()]

diff --git a/R/analyze_vars_in_cols.R b/R/analyze_vars_in_cols.R
@@ -1,76 +1,121 @@
 #' Summary numeric variables in columns
 #'
-#' @description `r lifecycle::badge("stable")`
+#' @description `r lifecycle::badge("experimental")`
 #'
-#' Layout-creating function which can be used for creating column-wise summary tables, primarily
-#' used for PK data sets. This function is a wrapper for [rtables::analyze_colvars()].
+#' Layout-creating function which can be used for creating column-wise summary tables.
+#' This function sets the analysis methods as column labels and is a wrapper for
+#' [rtables::analyze_colvars()]. It was designed principally for PK tables.
 #'
 #' @inheritParams argument_convention
 #' @inheritParams rtables::analyze_colvars
+#' @param row_labels (`character`)\cr as this function works in columns space, usual `.labels`
+#'   character vector applies on the column space. You can change the row labels by defining this
+#'   parameter to a named character vector with names corresponding to the split values. It defaults
+#'   to `NULL` and if it contains only one `string`, it will duplicate that as a row label.
+#' @param do_summarize_row_groups (`flag`)\cr defaults to `FALSE` and applies the analysis to the current
+#'   label rows. This is a wrapper of [rtables::summarize_row_groups()] and it can accept `labelstr`
+#'   to define row labels. This behavior is not supported as we never need to overload row labels.
+#' @param split_col_vars (`flag`)\cr defaults to `TRUE` and puts the analysis results onto the columns.
+#'   This option allows you to add multiple instances of this functions, also in a nested fashion,
+#'   without adding more splits. This split must happen only one time on a single layout.
 #'
 #' @return
 #' A layout object suitable for passing to further layouting functions, or to [rtables::build_table()].
 #' Adding this function to an `rtable` layout will summarize the given variables, arrange the output
 #' in columns, and add it to the table layout.
 #'
+#' @note This is an experimental implementation of [rtables::summarize_row_groups()] and
+#'   [rtables::analyze_colvars()] that may be subjected to changes as `rtables` extends its
+#'   support to more complex analysis pipelines on the column space. For the same reasons,
+#'   we encourage to read the examples carefully and file issues for cases that differ from
+#'   them.
+#'
+#'   Here `labelstr` behaves differently than usual. If it is not defined (default as `NULL`),
+#'   row labels are assigned automatically to the split values in case of `rtables::analyze_colvars`
+#'   (`do_summarize_row_groups = FALSE`, the default), and to the group label for
+#'   `do_summarize_row_groups = TRUE`.
+#'
 #' @seealso [summarize_vars()], [rtables::analyze_colvars()].
 #'
 #' @examples
 #' library(dplyr)
 #'
+#' # Data preparation
 #' adpp <- tern_ex_adpp %>% h_pkparam_sort()
+#'
 #' lyt <- basic_table() %>%
-#'   split_rows_by(var = "ARM", label_pos = "topleft") %>%
-#'   split_rows_by(var = "SEX", label_pos = "topleft") %>%
+#'   split_rows_by(var = "STRATA1", label_pos = "topleft") %>%
+#'   split_rows_by(
+#'     var = "SEX",
+#'     label_pos = "topleft",
+#'     child_label = "hidden"
+#'   ) %>% # Removes duplicated labels
 #'   analyze_vars_in_cols(vars = "AGE")
 #' result <- build_table(lyt = lyt, df = adpp)
 #' result
 #'
 #' # By selecting just some statistics and ad-hoc labels
 #' lyt <- basic_table() %>%
 #'   split_rows_by(var = "ARM", label_pos = "topleft") %>%
-#'   split_rows_by(var = "SEX", label_pos = "topleft") %>%
+#'   split_rows_by(
+#'     var = "SEX",
+#'     label_pos = "topleft",
+#'     child_labels = "hidden",
+#'     split_fun = drop_split_levels
+#'   ) %>%
 #'   analyze_vars_in_cols(
 #'     vars = "AGE",
-#'     .stats = c("n", "cv", "geom_mean", "mean_ci", "median", "min", "max"),
+#'     .stats = c("n", "cv", "geom_mean"),
 #'     .labels = c(
-#'       n = "myN",
-#'       cv = "myCV",
-#'       geom_mean = "myGeomMean",
-#'       mean_ci = "Mean (95%CI)",
-#'       median = "Median",
-#'       min = "Minimum",
-#'       max = "Maximum"
+#'       n = "aN",
+#'       cv = "aCV",
+#'       geom_mean = "aGeomMean"
 #'     )
 #'   )
 #' result <- build_table(lyt = lyt, df = adpp)
 #' result
 #'
+#' # Changing row labels
 #' lyt <- basic_table() %>%
 #'   analyze_vars_in_cols(
 #'     vars = "AGE",
-#'     labelstr = "some custom label"
+#'     row_labels = "some custom label"
+#'   )
+#' result <- build_table(lyt, df = adpp)
+#' result
+#'
+#' # Pharmacokinetic parameters
+#' lyt <- basic_table() %>%
+#'   split_rows_by(
+#'     var = "TLG_DISPLAY",
+#'     split_label = "PK Parameter",
+#'     label_pos = "topleft",
+#'     child_label = "hidden"
+#'   ) %>%
+#'   analyze_vars_in_cols(
+#'     vars = "AVAL"
 #'   )
 #' result <- build_table(lyt, df = adpp)
 #' result
 #'
-#' # PKPT03
+#' # Multiple calls (summarize label and analyze underneath)
 #' lyt <- basic_table() %>%
-#'   split_rows_by(var = "TLG_DISPLAY", split_label = "PK Parameter", label_pos = "topleft") %>%
+#'   split_rows_by(
+#'     var = "TLG_DISPLAY",
+#'     split_label = "PK Parameter",
+#'     label_pos = "topleft"
+#'   ) %>%
 #'   analyze_vars_in_cols(
 #'     vars = "AVAL",
-#'     .stats = c("n", "mean", "sd", "cv", "geom_mean", "geom_cv", "median", "min", "max"),
-#'     .labels = c(
-#'       n = "n",
-#'       mean = "Mean",
-#'       sd = "SD",
-#'       cv = "CV (%)",
-#'       geom_mean = "Geometric Mean",
-#'       geom_cv = "CV % Geometric Mean",
-#'       median = "Median",
-#'       min = "Minimum",
-#'       max = "Maximum"
-#'     )
+#'     do_summarize_row_groups = TRUE # does a summarize level
+#'   ) %>%
+#'   split_rows_by("SEX",
+#'     child_label = "hidden",
+#'     label_pos = "topleft"
+#'   ) %>%
+#'   analyze_vars_in_cols(
+#'     vars = "AVAL",
+#'     split_col_vars = FALSE # avoids re-splitting the columns
 #'   )
 #' result <- build_table(lyt, df = adpp)
 #' result
@@ -95,13 +140,19 @@ analyze_vars_in_cols <- function(lyt,
                                    cv = "CV (%)",
                                    geom_cv = "CV % Geometric Mean"
                                  ),
-                                 labelstr = " ",
+                                 row_labels = NULL,
+                                 do_summarize_row_groups = FALSE,
+                                 split_col_vars = TRUE,
+                                 .indent_mods = NULL,
                                  nested = TRUE,
                                  na_level = NULL,
                                  .formats = NULL) {
   checkmate::assert_string(na_level, null.ok = TRUE)
-  checkmate::assert_string(labelstr)
+  checkmate::assert_character(row_labels, null.ok = TRUE)
+  checkmate::assert_int(.indent_mods, null.ok = TRUE)
   checkmate::assert_flag(nested)
+  checkmate::assert_flag(split_col_vars)
+  checkmate::assert_flag(do_summarize_row_groups)
 
   # Automatic assignment of formats
   if (is.null(.formats)) {
@@ -113,19 +164,6 @@ analyze_vars_in_cols <- function(lyt,
     formats_v <- .formats
   }
 
-  afun_list <- Map(
-    function(stat) {
-      make_afun(
-        s_summary,
-        .labels = labelstr,
-        .stats = stat,
-        .format_na_strs = na_level,
-        .formats = formats_v[names(formats_v) == stat]
-      )
-    },
-    stat = .stats
-  )
-
   # Check for vars in the case that one or more are used
   if (length(vars) == 1) {
     vars <- rep(vars, length(.stats))
@@ -136,15 +174,133 @@ analyze_vars_in_cols <- function(lyt,
     )
   }
 
-  lyt <- split_cols_by_multivar(
-    lyt = lyt,
-    vars = vars,
-    varlabels = .labels[.stats]
-  )
-
-  analyze_colvars(lyt,
-    afun = afun_list,
-    nested = nested,
-    extra_args = list(...)
-  )
+  if (split_col_vars) {
+    # Checking there is not a previous identical column split
+    clyt <- tail(clayout(lyt), 1)[[1]]
+
+    dummy_lyt <- split_cols_by_multivar(
+      lyt = basic_table(),
+      vars = vars,
+      varlabels = .labels[.stats]
+    )
+
+    if (any(sapply(clyt, identical, y = get_last_col_split(dummy_lyt)))) {
+      stop(
+        "Column split called again with the same values. ",
+        "This can create many unwanted columns. Please consider adding ",
+        "split_col_vars = FALSE to the last call of ",
+        deparse(sys.calls()[[sys.nframe() - 1]]), "."
+      )
+    }
+
+    # Main col split
+    lyt <- split_cols_by_multivar(
+      lyt = lyt,
+      vars = vars,
+      varlabels = .labels[.stats]
+    )
+  }
+
+  if (do_summarize_row_groups) {
+    if (length(unique(vars)) > 1) {
+      stop("When using do_summarize_row_groups only one label level var should be inserted.")
+    }
+
+    # Function list for do_summarize_row_groups. Slightly different handling of labels
+    cfun_list <- Map(
+      function(stat) {
+        function(u, .spl_context, labelstr, ...) {
+          # Statistic
+          res <- s_summary(u, ...)[[stat]]
+
+          # Label check and replacement
+          if (length(row_labels) > 1) {
+            if (!(labelstr %in% names(row_labels))) {
+              stop(
+                "Replacing the labels in do_summarize_row_groups needs a named vector",
+                "that contains the split values. In the current split variable ",
+                .spl_context$split[nrow(.spl_context)],
+                " the labelstr value (split value by default) ", labelstr, " is not in",
+                " row_labels names: ", names(row_labels)
+              )
+            }
+            lbl <- unlist(row_labels[labelstr])
+          } else {
+            lbl <- labelstr
+          }
+
+          # Cell creation
+          rcell(res,
+            label = lbl,
+            format = formats_v[names(formats_v) == stat][[1]],
+            format_na_str = na_level,
+            indent_mod = ifelse(is.null(.indent_mods), 0L, .indent_mods)
+          )
+        }
+      },
+      stat = .stats
+    )
+
+    # Main call to rtables
+    summarize_row_groups(
+      lyt = lyt,
+      var = unique(vars),
+      cfun = cfun_list,
+      extra_args = list(...)
+    )
+  } else {
+    # Function list for analyze_colvars
+    afun_list <- Map(
+      function(stat) {
+        function(u, .spl_context, ...) {
+          # Main statistics
+          res <- s_summary(u, ...)[[stat]]
+
+          # Label from context
+          label_from_context <- .spl_context$value[nrow(.spl_context)]
+
+          # Label switcher
+          if (is.null(row_labels)) {
+            lbl <- label_from_context
+          } else {
+            if (length(row_labels) > 1) {
+              if (!(label_from_context %in% names(row_labels))) {
+                stop(
+                  "Replacing the labels in do_summarize_row_groups needs a named vector",
+                  "that contains the split values. In the current split variable ",
+                  .spl_context$split[nrow(.spl_context)],
+                  " the split value ", label_from_context, " is not in",
+                  " row_labels names: ", names(row_labels)
+                )
+              }
+              lbl <- unlist(row_labels[label_from_context])
+            } else {
+              lbl <- row_labels
+            }
+          }
+
+          # Cell creation
+          rcell(res,
+            label = lbl,
+            format = formats_v[names(formats_v) == stat][[1]],
+            format_na_str = na_level,
+            indent_mod = ifelse(is.null(.indent_mods), 0L, .indent_mods)
+          )
+        }
+      },
+      stat = .stats
+    )
+
+    # Main call to rtables
+    analyze_colvars(lyt,
+      afun = afun_list,
+      nested = nested,
+      extra_args = list(...)
+    )
+  }
+}
+
+# Help function
+get_last_col_split <- function(lyt) {
+  tail(tail(clayout(lyt), 1)[[1]], 1)[[1]]
 }
diff --git a/R/argument_convention.R b/R/argument_convention.R
@@ -32,7 +32,8 @@
 #'   all of that structure's children. Defaults to 0, which corresponds to the
 #'   unmodified default behavior.
 #' @param labelstr (`character`)\cr label of the level of the parent split currently being summarized
-#'   (must be present as second argument in Content Row Functions).
+#'   (must be present as second argument in Content Row Functions). See [rtables::summarize_row_groups()]
+#'   for more information.
 #' @param lyt (`layout`)\cr input layout where analyses will be added to.
 #' @param na.rm (`flag`)\cr whether `NA` values should be removed from `x` prior to analysis.
 #' @param na_level (`string`)\cr used to replace all `NA` or empty values in factors with custom `string`.