Skip to content

Commit

Permalink
refac: replaced input parameter data by data_set and function dataset…
Browse files Browse the repository at this point in the history
… by get_dataset
  • Loading branch information
davidsantiagoquevedo committed Jul 16, 2024
1 parent b533ca2 commit f322f6f
Show file tree
Hide file tree
Showing 53 changed files with 344 additions and 359 deletions.
4 changes: 2 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
# Generated by roxygen2: do not edit by hand

S3method(dataset,match)
S3method(get_dataset,match)
S3method(plot,effectiveness)
S3method(summary,effectiveness)
S3method(summary,match)
export(coh_coverage)
export(dataset)
export(effectiveness)
export(get_age_group)
export(get_dataset)
export(make_immunization)
export(match_cohort)
export(plot_coverage)
Expand Down
66 changes: 33 additions & 33 deletions R/coh_data_wrangling.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#' and `1` means the opposite. However, it can also receive custom
#' options, e.g., `c("v", "u")` for vaccinated and unvaccinated.
#'
#' @param data `data.frame` with at least one column from which to
#' @param data_set `data.frame` with at least one column from which to
#' generate the status specified in `status`.
#' @param col_names Name(s) of the column(s) as a string or a character
#' vector containing the information from which the status is calculated.
Expand All @@ -26,18 +26,18 @@
#' @return Status
#' @keywords internal

set_status <- function(data,
set_status <- function(data_set,
col_names,
operator = c("&", "|"),
status = c(1, 0)) {
# input checking
checkmate::assert_data_frame(
data,
data_set,
min.rows = 1, min.cols = 1
)
checkmate::assert_character(col_names, any.missing = FALSE, min.len = 1)
checkmate::assert_names(
names(data),
names(data_set),
must.include = col_names
)

Expand All @@ -55,7 +55,7 @@ set_status <- function(data,
any.missing = FALSE
)
condition <- "ifelse("
int0 <- "(!is.na(data[["
int0 <- "(!is.na(data_set[["
intf <- "]]))"
i <- 1
for (col_name in col_names) {
Expand Down Expand Up @@ -88,21 +88,21 @@ set_status <- function(data,
#' @return Status
#' @keywords internal

set_event_status <- function(data,
set_event_status <- function(data_set,
outcome_date_col,
censoring_date_col = NULL) {
checkmate::assert_data_frame(
data,
data_set,
min.rows = 1, min.cols = 1
)
checkmate::assert_character(outcome_date_col,
any.missing = FALSE, min.len = 1
)
checkmate::assert_names(
names(data), must.include = outcome_date_col
names(data_set), must.include = outcome_date_col
)

data$outcome_status <- set_status(data = data,
data_set$outcome_status <- set_status(data_set = data_set,
col_names = outcome_date_col,
status = c(1, 0)
)
Expand All @@ -111,17 +111,17 @@ set_event_status <- function(data,
any.missing = FALSE, min.len = 1
)
checkmate::assert_names(
names(data), must.include = censoring_date_col
names(data_set), must.include = censoring_date_col
)
data$outcome_status <- ifelse(
(!is.na(data[[censoring_date_col]])) &
(!is.na(data[[outcome_date_col]])) &
(data[[censoring_date_col]] <= data[[outcome_date_col]]),
data_set$outcome_status <- ifelse(
(!is.na(data_set[[censoring_date_col]])) &
(!is.na(data_set[[outcome_date_col]])) &
(data_set[[censoring_date_col]] <= data_set[[outcome_date_col]]),
yes = "0",
no = data$outcome_status
no = data_set$outcome_status
)
}
return(data$outcome_status)
return(data_set$outcome_status)
}

#' @title Construct Time-to-Event
Expand All @@ -144,22 +144,22 @@ set_event_status <- function(data,
#' @return Time-to-event
#' @keywords internal

get_time_to_event <- function(data,
get_time_to_event <- function(data_set,
outcome_date_col,
censoring_date_col = NULL,
start_cohort, end_cohort,
start_from_immunization = FALSE,
immunization_date_col) {
# add input checking
checkmate::assert_data_frame(
data,
data_set,
min.rows = 1L
)

checkmate::assert_string(outcome_date_col)

checkmate::assert_names(
colnames(data),
colnames(data_set),
must.include = outcome_date_col
)
# check date types
Expand All @@ -172,7 +172,7 @@ get_time_to_event <- function(data,

# check if date columns are date type
checkmate::assert_date(
data[[outcome_date_col]]
data_set[[outcome_date_col]]
)

checkmate::assert_logical(
Expand All @@ -183,53 +183,53 @@ get_time_to_event <- function(data,
#Checks of censoring_date_col if provided
if (!is.null(censoring_date_col)) {
checkmate::assert_names(
colnames(data),
colnames(data_set),
must.include = censoring_date_col
)
checkmate::assert_date(
data[[censoring_date_col]]
data_set[[censoring_date_col]]
)
checkmate::assert_string(censoring_date_col)
}

# check immnunization date col if asked
if (start_from_immunization) {
stopifnot(
"`immunization_date_col` must be provided, and a column name in `data`" =
"`immunization_date_col` must be provided, and a column name in `data_set`" = #nolint
(!missing(immunization_date_col) &&
checkmate::test_string(immunization_date_col) &&
immunization_date_col %in% colnames(data))
immunization_date_col %in% colnames(data_set))
)

# Check for date type column
checkmate::assert_date(
data[[immunization_date_col]]
data_set[[immunization_date_col]]
)
}

# Initialize vector with start point to calculate time-to-event
# cohort start by default
t0 <- rep(start_cohort, nrow(data))
t0 <- rep(start_cohort, nrow(data_set))
if (start_from_immunization) {
# if start from immunization replace informed immunization dates
t0 <- as.Date(ifelse(is.na(data[[immunization_date_col]]),
t0 <- as.Date(ifelse(is.na(data_set[[immunization_date_col]]),
yes = as.character(t0),
no = as.character(data[[immunization_date_col]])
no = as.character(data_set[[immunization_date_col]])
))
}

# Initialize vector with end point to calculate time-to-event
# cohort end by default
tf <- rep(end_cohort, nrow(data))
tf <- rep(end_cohort, nrow(data_set))
# replace informed outcome dates
tf <- as.Date(ifelse(!is.na(data[[outcome_date_col]]),
yes = as.character(data[[outcome_date_col]]),
tf <- as.Date(ifelse(!is.na(data_set[[outcome_date_col]]),
yes = as.character(data_set[[outcome_date_col]]),
no = as.character(tf)
))
# replace censoring dates if provided
if (!is.null(censoring_date_col)) {
tf <- as.Date(ifelse(!is.na(data[[censoring_date_col]]),
yes = as.character(data[[censoring_date_col]]),
tf <- as.Date(ifelse(!is.na(data_set[[censoring_date_col]]),
yes = as.character(data_set[[censoring_date_col]]),
no = as.character(tf)
))
}
Expand Down
6 changes: 3 additions & 3 deletions R/coh_eff_hr.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#' @keywords internal


coh_eff_hr <- function(data,
coh_eff_hr <- function(data_set,
outcome_status_col,
time_to_event_col,
vacc_status_col,
Expand All @@ -26,7 +26,7 @@ coh_eff_hr <- function(data,
end_cohort) {

# Kaplan-Meier model for loglog curve
km <- km_model(data = data,
km <- km_model(data_set = data_set,
outcome_status_col = outcome_status_col,
time_to_event_col = time_to_event_col,
vacc_status_col = vacc_status_col,
Expand All @@ -43,7 +43,7 @@ coh_eff_hr <- function(data,
)

# Cox model
cx <- cox_model(data = data,
cx <- cox_model(data_set = data_set,
outcome_status_col = outcome_status_col,
time_to_event_col = time_to_event_col,
vacc_status_col = vacc_status_col,
Expand Down
18 changes: 9 additions & 9 deletions R/coh_eff_plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ plot_loglog <- function(km,
#'
#' # Create `data.frame` with information on immunization
#' cohortdata <- make_immunization(
#' data = cohortdata,
#' data_set = cohortdata,
#' outcome_date_col = "death_date",
#' censoring_date_col = "death_other_causes",
#' immunization_delay = 14,
Expand All @@ -70,7 +70,7 @@ plot_loglog <- function(km,
#'
#' # Match the data
#' matching <- match_cohort(
#' data = cohortdata,
#' data_set = cohortdata,
#' outcome_date_col = "death_date",
#' censoring_date_col = "death_other_causes",
#' start_cohort = start_cohort,
Expand All @@ -81,17 +81,17 @@ plot_loglog <- function(km,
#' )
#'
#' # Extract matched data
#' cohortdata_match <- dataset(matching)
#' cohortdata_match <- get_dataset(matching)
#'
#' # Plot survival curve
#' plot_survival(
#' data = cohortdata_match,
#' data_set = cohortdata_match,
#' start_cohort = start_cohort,
#' end_cohort = end_cohort
#' )
#' @export

plot_survival <- function(data,
plot_survival <- function(data_set,
start_cohort,
end_cohort,
outcome_status_col = "outcome_status",
Expand All @@ -105,11 +105,11 @@ plot_survival <- function(data,
cumulative = FALSE) {
# input checking
checkmate::assert_data_frame(
data,
data_set,
min.rows = 1L
)
checkmate::assert_names(
names(data),
names(data_set),
must.include = c(outcome_status_col, time_to_event_col, vacc_status_col)
)
checkmate::assert_character(
Expand All @@ -133,12 +133,12 @@ plot_survival <- function(data,
any.missing = FALSE, len = 1
)
checkmate::assert_names(
data[[vacc_status_col]],
data_set[[vacc_status_col]],
must.include = c(vaccinated_status, unvaccinated_status)
)

# KM model
km <- km_model(data = data,
km <- km_model(data_set = data_set,
outcome_status_col = outcome_status_col,
time_to_event_col = time_to_event_col,
vacc_status_col = vacc_status_col,
Expand Down
26 changes: 13 additions & 13 deletions R/coh_eff_utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ extract_surv_model <- function(model, start_cohort, end_cohort) {
#' "surv", "lower", "upper",
#' "cumincidence", "cumincidence_lower", "cumincidence_upper"
#' @keywords internal
km_model <- function(data,
km_model <- function(data_set,
outcome_status_col,
time_to_event_col,
vacc_status_col,
Expand All @@ -34,9 +34,9 @@ km_model <- function(data,
# KM model time to event, outcome ~ vaccine status
model <- survival::survfit(
survival::Surv(
data[[time_to_event_col]],
data[[outcome_status_col]]
) ~ data[[vacc_status_col]]
data_set[[time_to_event_col]],
data_set[[outcome_status_col]]
) ~ data_set[[vacc_status_col]]
)
# Extract data from `{survival}` object
km <- extract_surv_model(model, start_cohort, end_cohort)
Expand All @@ -49,8 +49,8 @@ km_model <- function(data,
# Construct strata data
km$strata <- factor(km$strata,
levels = c(
paste0("data[[vacc_status_col]]=", vaccinated_status),
paste0("data[[vacc_status_col]]=", unvaccinated_status)
paste0("data_set[[vacc_status_col]]=", vaccinated_status),
paste0("data_set[[vacc_status_col]]=", unvaccinated_status)
)
)
levels(km$strata) <- c(vaccinated_status, unvaccinated_status)
Expand All @@ -75,30 +75,30 @@ km_model <- function(data,
#' `{survival}` object with model
#' `{survival}` object with Schoenfeld test
#' @keywords internal
cox_model <- function(data,
cox_model <- function(data_set,
outcome_status_col,
time_to_event_col,
vacc_status_col,
vaccinated_status,
unvaccinated_status) {

# Prepare data for model
data[[vacc_status_col]] <- factor(
data[[vacc_status_col]],
data_set[[vacc_status_col]] <- factor(
data_set[[vacc_status_col]],
levels = c(vaccinated_status, unvaccinated_status),
ordered = FALSE
)

data[[vacc_status_col]] <- stats::relevel(
data[[vacc_status_col]], ref = unvaccinated_status
data_set[[vacc_status_col]] <- stats::relevel(
data_set[[vacc_status_col]], ref = unvaccinated_status
)

# Cox model time to event, outcome ~ vaccine status
# Regression
model <- survival::coxph(
survival::Surv( # nolint
data[[time_to_event_col]], data[[outcome_status_col]]
) ~ data[[vacc_status_col]]
data_set[[time_to_event_col]], data_set[[outcome_status_col]]
) ~ data_set[[vacc_status_col]]
)

## Hazard ratio
Expand Down
Loading

0 comments on commit f322f6f

Please sign in to comment.