-
-
Notifications
You must be signed in to change notification settings - Fork 22
/
response_biomarkers_subgroups.R
229 lines (225 loc) · 7.52 KB
/
response_biomarkers_subgroups.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
#' Tabulate Biomarker Effects on Binary Response by Subgroup
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Tabulate the estimated effects of multiple continuous biomarker variables
#' on a binary response endpoint across population subgroups.
#'
#' @inheritParams argument_convention
#' @param df (`data.frame`)\cr containing all analysis variables, as returned by
#' [extract_rsp_biomarkers()].
#' @param vars (`character`)\cr the names of statistics to be reported among:
#' * `n_tot`: Total number of patients per group.
#' * `n_rsp`: Total number of responses per group.
#' * `prop`: Total response proportion per group.
#' * `or`: Odds ratio.
#' * `ci`: Confidence interval of odds ratio.
#' * `pval`: p-value of the effect.
#' Note, the statistics `n_tot`, `or` and `ci` are required.
#'
#' @return An `rtables` table summarizing biomarker effects on binary response by subgroup.
#'
#' @details These functions create a layout starting from a data frame which contains
#' the required statistics. The tables are then typically used as input for forest plots.
#'
#' @note In contrast to [tabulate_rsp_subgroups()] this tabulation function does
#' not start from an input layout `lyt`. This is because internally the table is
#' created by combining multiple subtables.
#'
#' @seealso [h_tab_rsp_one_biomarker()] which is used internally, [extract_rsp_biomarkers()].
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adrs <- tern_ex_adrs
#' adrs_labels <- formatters::var_labels(adrs)
#'
#' adrs_f <- adrs %>%
#' filter(PARAMCD == "BESRSPI") %>%
#' mutate(rsp = AVALC == "CR")
#' formatters::var_labels(adrs_f) <- c(adrs_labels, "Response")
#'
#' df <- extract_rsp_biomarkers(
#' variables = list(
#' rsp = "rsp",
#' biomarkers = c("BMRKR1", "AGE"),
#' covariates = "SEX",
#' subgroups = "BMRKR2"
#' ),
#' data = adrs_f
#' )
#'
#' \donttest{
#' ## Table with default columns.
#' tabulate_rsp_biomarkers(df)
#'
#' ## Table with a manually chosen set of columns: leave out "pval", reorder.
#' tab <- tabulate_rsp_biomarkers(
#' df = df,
#' vars = c("n_rsp", "ci", "n_tot", "prop", "or")
#' )
#'
#' ## Finally produce the forest plot.
#' g_forest(tab, xlim = c(0.7, 1.4))
#' }
#'
#' @export
#' @name response_biomarkers_subgroups
tabulate_rsp_biomarkers <- function(df,
vars = c("n_tot", "n_rsp", "prop", "or", "ci", "pval"),
na_str = default_na_str(),
.indent_mods = 0L) {
checkmate::assert_data_frame(df)
checkmate::assert_character(df$biomarker)
checkmate::assert_character(df$biomarker_label)
checkmate::assert_subset(vars, get_stats("tabulate_rsp_biomarkers"))
df_subs <- split(df, f = df$biomarker)
tabs <- lapply(df_subs, FUN = function(df_sub) {
tab_sub <- h_tab_rsp_one_biomarker(
df = df_sub,
vars = vars,
na_str = na_str,
.indent_mods = .indent_mods
)
# Insert label row as first row in table.
label_at_path(tab_sub, path = row_paths(tab_sub)[[1]][1]) <- df_sub$biomarker_label[1]
tab_sub
})
result <- do.call(rbind, tabs)
n_id <- grep("n_tot", vars)
or_id <- match("or", vars)
ci_id <- match("ci", vars)
structure(
result,
forest_header = paste0(c("Lower", "Higher"), "\nBetter"),
col_x = or_id,
col_ci = ci_id,
col_symbol_size = n_id
)
}
#' Prepares Response Data Estimates for Multiple Biomarkers in a Single Data Frame
#'
#' @description `r lifecycle::badge("stable")`
#'
#' Prepares estimates for number of responses, patients and overall response rate,
#' as well as odds ratio estimates, confidence intervals and p-values,
#' for multiple biomarkers across population subgroups in a single data frame.
#' `variables` corresponds to the names of variables found in `data`, passed as a
#' named list and requires elements `rsp` and `biomarkers` (vector of continuous
#' biomarker variables) and optionally `covariates`, `subgroups` and `strat`.
#' `groups_lists` optionally specifies groupings for `subgroups` variables.
#'
#' @inheritParams argument_convention
#' @inheritParams response_subgroups
#' @param control (named `list`)\cr controls for the response definition and the
#' confidence level produced by [control_logistic()].
#'
#' @return A `data.frame` with columns `biomarker`, `biomarker_label`, `n_tot`, `n_rsp`,
#' `prop`, `or`, `lcl`, `ucl`, `conf_level`, `pval`, `pval_label`, `subgroup`, `var`,
#' `var_label`, and `row_type`.
#'
#' @note You can also specify a continuous variable in `rsp` and then use the
#' `response_definition` control to convert that internally to a logical
#' variable reflecting binary response.
#'
#' @seealso [h_logistic_mult_cont_df()] which is used internally.
#'
#' @examples
#' library(dplyr)
#' library(forcats)
#'
#' adrs <- tern_ex_adrs
#' adrs_labels <- formatters::var_labels(adrs)
#'
#' adrs_f <- adrs %>%
#' filter(PARAMCD == "BESRSPI") %>%
#' mutate(rsp = AVALC == "CR")
#'
#' # Typical analysis of two continuous biomarkers `BMRKR1` and `AGE`,
#' # in logistic regression models with one covariate `RACE`. The subgroups
#' # are defined by the levels of `BMRKR2`.
#' df <- extract_rsp_biomarkers(
#' variables = list(
#' rsp = "rsp",
#' biomarkers = c("BMRKR1", "AGE"),
#' covariates = "SEX",
#' subgroups = "BMRKR2"
#' ),
#' data = adrs_f
#' )
#' df
#'
#' # Here we group the levels of `BMRKR2` manually, and we add a stratification
#' # variable `STRATA1`. We also here use a continuous variable `EOSDY`
#' # which is then binarized internally (response is defined as this variable
#' # being larger than 750).
#' df_grouped <- extract_rsp_biomarkers(
#' variables = list(
#' rsp = "EOSDY",
#' biomarkers = c("BMRKR1", "AGE"),
#' covariates = "SEX",
#' subgroups = "BMRKR2",
#' strat = "STRATA1"
#' ),
#' data = adrs_f,
#' groups_lists = list(
#' BMRKR2 = list(
#' "low" = "LOW",
#' "low/medium" = c("LOW", "MEDIUM"),
#' "low/medium/high" = c("LOW", "MEDIUM", "HIGH")
#' )
#' ),
#' control = control_logistic(
#' response_definition = "I(response > 750)"
#' )
#' )
#' df_grouped
#'
#' @export
extract_rsp_biomarkers <- function(variables,
data,
groups_lists = list(),
control = control_logistic(),
label_all = "All Patients") {
assert_list_of_variables(variables)
checkmate::assert_string(variables$rsp)
checkmate::assert_character(variables$subgroups, null.ok = TRUE)
checkmate::assert_string(label_all)
# Start with all patients.
result_all <- h_logistic_mult_cont_df(
variables = variables,
data = data,
control = control
)
result_all$subgroup <- label_all
result_all$var <- "ALL"
result_all$var_label <- label_all
result_all$row_type <- "content"
if (is.null(variables$subgroups)) {
# Only return result for all patients.
result_all
} else {
# Add subgroups results.
l_data <- h_split_by_subgroups(
data,
variables$subgroups,
groups_lists = groups_lists
)
l_result <- lapply(l_data, function(grp) {
result <- h_logistic_mult_cont_df(
variables = variables,
data = grp$df,
control = control
)
result_labels <- grp$df_labels[rep(1, times = nrow(result)), ]
cbind(result, result_labels)
})
result_subgroups <- do.call(rbind, args = c(l_result, make.row.names = FALSE))
result_subgroups$row_type <- "analysis"
rbind(
result_all,
result_subgroups
)
}
}