Skip to content

Commit

Permalink
Merge pull request #205 from CHOP-CGTInformatics/join-data-tibbles
Browse files Browse the repository at this point in the history
Missing Data Fix, Partial Keys Fix
  • Loading branch information
rsh52 authored Oct 15, 2024
2 parents b68e371 + 14d2587 commit 5c4a7de
Show file tree
Hide file tree
Showing 10 changed files with 207 additions and 33 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ importFrom(dplyr,bind_rows)
importFrom(dplyr,case_when)
importFrom(dplyr,coalesce)
importFrom(dplyr,cur_column)
importFrom(dplyr,distinct)
importFrom(dplyr,filter)
importFrom(dplyr,first)
importFrom(dplyr,group_by)
Expand Down Expand Up @@ -147,6 +148,7 @@ importFrom(tidyselect,any_of)
importFrom(tidyselect,ends_with)
importFrom(tidyselect,eval_select)
importFrom(tidyselect,everything)
importFrom(tidyselect,matches)
importFrom(tidyselect,starts_with)
importFrom(tidyselect,where)
importFrom(vctrs,vec_ptype)
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
- Use `combine_checkboxes()` to consolidate multiple checkbox fields in a REDCap data tibble under a single column
- Added new article vignette "Using Labelled Vectors with REDCapTidieR"
- Fixed a bug for mixed structure databases resulting in data loss when some fields had dual repeating-separately/repeating-together behavior
- Fixed a bug where partial keys taken from REDCap arms could be incorrectly specified
- Various improvements and additions with CRAN release of REDCapR 1.2.0:
- `event_name` added as a column to the `redcap_event` column of longitudinal supertibbles
- `guess_max` parameter in `read_redcap()` default updated to `Inf`
Expand Down
4 changes: 2 additions & 2 deletions R/REDCapTidieR-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#' @importFrom dplyr %>% across bind_rows case_when filter group_by if_any if_else
#' left_join mutate pull recode relocate rename right_join row_number rowwise
#' select slice summarise ungroup coalesce cur_column bind_cols first nth n_distinct
#' first
#' first distinct
#' @importFrom formattable percent
#' @importFrom lobstr obj_size
#' @importFrom lubridate is.difftime is.period is.POSIXt is.Date
Expand All @@ -27,7 +27,7 @@
#' @importFrom tidyr complete fill pivot_wider nest separate_wider_delim unnest
#' unnest_wider
#' @importFrom tidyselect all_of any_of ends_with eval_select everything
#' starts_with where
#' starts_with where matches
#' @importFrom vctrs vec_ptype_abbr vec_ptype
#' @importFrom pillar tbl_sum
#' @importFrom readr parse_logical parse_integer parse_double parse_date parse_time
Expand Down
40 changes: 36 additions & 4 deletions R/clean_redcap_long.R
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,9 @@ clean_redcap_long <- function(db_data_long,
# Retrieve mixed structure fields and forms in reference df
mixed_structure_ref <- get_mixed_structure_fields(db_data_long) %>%
filter(.data$rep_and_nonrep & !str_ends(.data$field_name, "_form_complete")) %>%
left_join(db_metadata_long %>% select("field_name", "form_name"),
left_join(
db_metadata_long %>%
select("field_name", "form_name"),
by = "field_name"
)

Expand Down Expand Up @@ -318,7 +320,10 @@ distill_repeat_table_long <- function(form_name,
db_data_long <- db_data_long %>%
add_partial_keys(var = .data$redcap_event_name) %>%
filter(
!is.na(.data$redcap_form_instance) &
(
!is.na(.data$redcap_form_instance) |
if_any(matches("redcap_event_instance"), ~ !is.na(.))
) &
.data$redcap_repeat_instrument == my_form
)

Expand Down Expand Up @@ -416,14 +421,41 @@ convert_mixed_instrument <- function(db_data_long, mixed_structure_ref) {
)
)

repeat_together_present <- any(
is.na(db_data_long$redcap_repeat_instrument) &
!is.na(db_data_long$redcap_repeat_instance)
)

if (!"redcap_event_instance" %in% names(db_data_long) && repeat_together_present) {
db_data_long <- db_data_long %>%
mutate(
redcap_event_instance = NA
) %>%
relocate(.data$redcap_event_instance, .after = .data$redcap_repeat_instance)
}

if (repeat_together_present) {
db_data_long <- db_data_long %>%
mutate(
redcap_event_instance = case_when(
# Shift form instances to even instances for repeat-together types
update_mask & is.na(redcap_repeat_instrument) ~ redcap_repeat_instance,
# Otherwise
TRUE ~ redcap_event_instance
)
)
}

# Assign update data based on rules below
db_data_long <- db_data_long %>%
mutate(
redcap_repeat_instance = case_when(
# Add single instance repeat event instance vals when none exist
# This handles nonrepeating data in events set to repeat separately
update_mask & is.na(redcap_repeat_instance) ~ 1,
# Keep repeat event instance vals when they already exist
update_mask & !is.na(redcap_repeat_instance) ~ redcap_repeat_instance,
# If repeat-together type, remove values from redcap_repeat_instance
# (shifted and captured in redcap_event_instance)
update_mask & is.na(redcap_repeat_instrument) ~ NA,
TRUE ~ .data$redcap_repeat_instance
),
redcap_repeat_instrument = case_when(
Expand Down
64 changes: 59 additions & 5 deletions R/read_redcap.R
Original file line number Diff line number Diff line change
Expand Up @@ -270,8 +270,11 @@ read_redcap <- function(redcap_uri,
}

if (is_longitudinal) {
repeat_event_types <- get_repeat_event_types(db_data)

linked_arms <- link_arms(
redcap_uri = redcap_uri, token = token,
redcap_uri = redcap_uri,
token = token,
suppress_redcapr_messages = suppress_redcapr_messages
)

Expand All @@ -292,7 +295,7 @@ read_redcap <- function(redcap_uri,
out <- add_metadata(out, db_metadata, redcap_uri, token, suppress_redcapr_messages)

if (is_longitudinal) {
out <- add_event_mapping(out, linked_arms)
out <- add_event_mapping(out, linked_arms, repeat_event_types)
}

out <- out %>%
Expand Down Expand Up @@ -440,18 +443,21 @@ add_metadata <- function(supertbl, db_metadata, redcap_uri, token, suppress_redc
#' @param supertbl a supertibble object to supplement with metadata
#' @param linked_arms the tibble with event mappings created by
#' \code{link_arms()}
#' @param repeat_event_types a dataframe output from [get_repeat_event_types()] which
#' specifies NR, RS, and RT types for events
#'
#' @return
#' The original supertibble with an events \code{redcap_events} list column
#' containing arms and events associated with each instrument
#'
#' @keywords internal
#'
add_event_mapping <- function(supertbl, linked_arms) {

add_event_mapping <- function(supertbl, linked_arms, repeat_event_types) {
event_info <- linked_arms %>%
left_join(repeat_event_types, by = c("unique_event_name" = "redcap_event_name")) %>%
add_partial_keys(.data$unique_event_name) %>%
select(
redcap_form_name = "form", "redcap_event", "event_name", "redcap_arm", "arm_name"
redcap_form_name = "form", "redcap_event", "event_name", "redcap_arm", "arm_name", "repeat_type"
) %>%
nest(redcap_events = !"redcap_form_name")

Expand Down Expand Up @@ -498,3 +504,51 @@ calc_metadata_stats <- function(data) {
form_complete_pct = percent(form_complete_pct, digits = 2, format = "fg")
)
}

#' @title
#' Add identification for repeat event types
#'
#' @description
#' To correctly assign repeat event types a few assumptions must be made:
#'
#' - There are only 3 behaviors: nonrepeating, repeat_separately, and repeat_together
#' - If an event only shows `redcap_repeat_instance` and `redcap_repeat_instrument`
#' as `NA`, it can be considered a nonrepeat event.
#' - If an event is always `NA` for `redcap_repeat_instrument` and filled for `redcap_repeat_instance`
#' it can be assumed to be a repeat_together event
#' - repeat_separate and nonrepeating event types exhibit the same behavior along the
#' primary keys of the data. nonrepeating event types can have data display with
#' `redcap_repeat_instance`values both filled and as `NA`. If this is the case,
#' it can be assumed the event is a repeating separate event.
#'
#' @param data the REDCap data
#'
#' @return
#' A dataframe with unique event names mapped to their corresponding repeat types
#'
#' @keywords internal

get_repeat_event_types <- function(data) {
out <- data %>%
distinct(.data$redcap_event_name, .data$redcap_repeat_instrument, .data$redcap_repeat_instance) %>%
mutate(
repeat_type = case_when(
!is.na(redcap_event_name) & !is.na(redcap_repeat_instrument) & !is.na(redcap_repeat_instance) ~
"repeat_separate",
!is.na(redcap_event_name) & is.na(redcap_repeat_instrument) & !is.na(redcap_repeat_instance) ~
"repeat_together",
TRUE ~ "nonrepeating"
)
) %>%
distinct(.data$redcap_event_name, .data$repeat_type)

# Check for instances where the same event is labelled as nonrepeating & repeating separate
# If this is the case, it must be repeating separate (there is just data that qualifies as both)

out %>%
mutate(
is_duplicated = (duplicated(.data$redcap_event_name) | duplicated(.data$redcap_event_name, fromLast = TRUE))
) %>%
filter(!.data$is_duplicated | (.data$is_duplicated & .data$repeat_type == "repeat_separate")) %>%
select(-.data$is_duplicated)
}
48 changes: 28 additions & 20 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,13 @@
add_partial_keys <- function(db_data,
var = NULL) {
if (!is.null(enexpr(var))) {
pattern <- "^(\\w+?)_arm_(\\d)$"
# Include handling for instances where REDCap appends with "_1b" or similar
pattern <- "^(\\w+?)_arm_(\\d+\\w?)$"

db_data <- db_data %>%
mutate(
redcap_event = sub(pattern, "\\1", {{ var }}),
redcap_arm = as.integer(sub(pattern, "\\2", {{ var }}))
redcap_arm = as.character(sub(pattern, "\\2", {{ var }}))
)
}

Expand Down Expand Up @@ -64,31 +65,38 @@ create_repeat_instance_vars <- function(db_data) {
}

# Detect if repeat events exist
# Determined by non-NA vals in new "redcap_form_instance" alongside
# NA vals in "redcap_repeat_instrument"
# `has_repeat_forms` will always be TRUE for events to exist
if (has_repeat_forms) {
# First determined if redcap_event_instance added during mixed structure handling
# See: convert_mixed_instrument
has_repeat_events <- "redcap_event_instance" %in% names(out)

# Next determined by non-NA vals in new "redcap_form_instance" alongside
# NA vals in "redcap_repeat_instrument"
# `has_repeat_forms` will always be TRUE for events to exist
if (has_repeat_forms && !has_repeat_events) {
has_repeat_events <- any(
is.na(out$redcap_repeat_instrument) & !is.na(out$redcap_form_instance)
)
} else {
has_repeat_events <- FALSE
}

if (has_repeat_events) {
out$redcap_event_instance <- ifelse(
is.na(out$redcap_repeat_instrument) &
!is.na(out$redcap_form_instance),
out$redcap_form_instance,
NA
)
# In cases where there are repeating events but they were not added by
# convert_mixed_instrument(), add an empty redcap_event_instance column
if (!"redcap_event_instance" %in% names(out)) {
out$redcap_event_instance <- NA
}

out$redcap_form_instance <- ifelse(
is.na(out$redcap_repeat_instrument) &
!is.na(out$redcap_form_instance),
NA,
out$redcap_form_instance
)
out <- out %>%
mutate(
redcap_event_instance = case_when(
is.na(redcap_repeat_instrument) & !is.na(redcap_form_instance) ~ redcap_form_instance,
# Else leave NA or the value given by conver_mixed_instrument()
TRUE ~ redcap_event_instance
),
redcap_form_instance = case_when(
is.na(redcap_repeat_instrument) & !is.na(redcap_form_instance) ~ NA,
TRUE ~ redcap_form_instance
)
)

out <- relocate(out,
"redcap_event_instance",
Expand Down
5 changes: 4 additions & 1 deletion man/add_event_mapping.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

29 changes: 29 additions & 0 deletions man/get_repeat_event_types.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

41 changes: 41 additions & 0 deletions tests/testthat/test-read_redcap.R
Original file line number Diff line number Diff line change
Expand Up @@ -641,3 +641,44 @@ test_that("read_redcap handles missing data codes", {
}) |>
expect_no_warning()
})

test_that("get_repeat_event_types() works", {
mixed_data_structure <- tibble::tribble(
~"record_id", ~"redcap_event_name", ~"redcap_repeat_instrument", ~"redcap_repeat_instance",
1, "nonrepeat", NA, NA,
1, "repeat_together", NA, 1,
1, "repeat_separate", "mixed_structure_form", 1
)

expected_out <- tibble::tribble(
~"redcap_event_name", ~"repeat_type",
"nonrepeat", "nonrepeating",
"repeat_together", "repeat_together",
"repeat_separate", "repeat_separate"
)

out <- get_repeat_event_types(mixed_data_structure)

expect_equal(out, expected_out)

# Example with nonrepeating arm that contains repeating and non repeating forms
mixed_data_structure <- tibble::tribble(
~"record_id", ~"redcap_event_name", ~"redcap_repeat_instrument", ~"redcap_repeat_instance",
1, "nonrepeat", NA, NA,
1, "nonrepeat", "repeat_form", 1,
1, "repeat_together", NA, 1,
1, "repeat_separate", "mixed_structure_form", 1
)

out <- get_repeat_event_types(mixed_data_structure)

expected_out <- tibble::tribble(
~"redcap_event_name", ~"repeat_type",
"nonrepeat", "repeat_separate",
"repeat_together", "repeat_together",
"repeat_separate", "repeat_separate"
)

expect_equal(out, expected_out)

})
6 changes: 5 additions & 1 deletion tests/testthat/test-utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,8 @@ test_that("add_partial_keys works", {
1, "nr_event_arm_1", NA, NA,
1, "nr_event_arm_1", "r_instrument", 1,
3, "nr_event_arm_1", "r_instrument", 1,
4, "r_event_arm_1", NA, 1
4, "r_event_arm_1", NA, 1,
5, "r_event_arm_1b", NA, 1
)

out <- test_data %>%
Expand All @@ -367,6 +368,9 @@ test_that("add_partial_keys works", {
expect_true(all(expected_cols %in% names(out)))
expect_s3_class(out, "data.frame")
expect_true(nrow(out) > 0)

expected_redcap_arm_col <- c("1", "1", "1", "1", "1b")
expect_equal(out$redcap_arm, expected_redcap_arm_col)
})

test_that("create_repeat_instance_vars works", {
Expand Down

0 comments on commit 5c4a7de

Please sign in to comment.