metrumresearchgroup · kylebaron · May 15, 2021 · Feb 8, 2021 · Feb 11, 2021 · Apr 24, 2021
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: lastdose
 Type: Package
 Title: Calculate Time and Amount of Last Dose
-Version: 0.3.2
+Version: 0.4.0
 Authors@R: c(
     person("Kyle T", "Baron", email = "kyleb@metrumrg.com", role=c("aut", "cre"), comment=c(ORCID="0000-0001-7252-5656"))
     )
@@ -13,7 +13,7 @@ Imports: Rcpp
 LinkingTo: Rcpp
 URL: https://github.com/metrumresearchgroup/lastdose
 BugReports: https://github.com/metrumresearchgroup/lastdose/issues
-Suggests: testthat
+Suggests: testthat, withr
 Encoding: UTF-8
 LazyData: true
 RoxygenNote: 7.1.1

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,16 @@
+# lastdose 0.4.0
+
+- Change default value for `include_tafd` to FALSE (#29)
+- Add option `lastdose.include_tafd` (#29)
+- Add option `lastdose.id_col`  (#27)
+- Add option `lastdose.time_units` (#27)
+- Search column names for candidate ID columns using `find_id_col()`; 
+  this function is unexported (#25)
+- Search column names for candidate time columns using `find_time_col()`;
+  this function is unexported (#25)
+- Handle missing values (`NA`) in the time column; these records will 
+  stay in place and `NA` will be inserted for all outputs (#30)
+
 # lastdose 0.3.2
 
 - Fix bug where `II` column was not properly detected resulting in incorrect 

diff --git a/R/lastdose.R b/R/lastdose.R
@@ -15,11 +15,17 @@ NULL
 #' @param time_col character name for the `TIME` column; this could be time after
 #' first dose or time after first record or time relative to any origin; input
 #' may be `numeric` or `POSIXct` (e.g. `DATETIME`); if `POSIXct`, a numeric
-#' value will be calculated based on the value of `time_units`
+#' value will be calculated based on the value of `time_units`. The data frame
+#' will be searched for the first matching candidate time column using
+#' [find_time_col()]; if you don't want `lastdose` to search, you should pass
+#' in the name of the column to use for `TIME`.
 #' @param time_units for calculating time when the time column inherits
 #' `POSIXct`; you may use any value that is valid for [difftime()]
 #' @param id_col character name for the subject `ID` column; may be numeric
-#' or character; if character, a numeric value is derived
+#' or character; if character, a numeric value is derived. The data frame
+#' will be searched for the first matching candidate `ID` column using
+#' [find_id_col()]; if you don't want `lastdose` to search, you should pass
+#' in the name of the column to use for `ID`.
 #' @param back_calc if `TRUE`, then the time before the first dose
 #' is calculated for records prior to the first dosing record when
 #' at least one dosing record is found in the data set.  Records before
@@ -41,18 +47,42 @@ NULL
 #' @param include_tafd `logical`; if `FALSE`, then `TAFD` data is not appended
 #' to the data set.  Only used for the [lastdose()] function.
 #'
+#' @section Options:
+#'
+#' These are options that can be set to customize `lastdose` behavior
+#' for the current context.  See `?options` for how to set an option.
+#'
+#' - `lastdose.time_units`: sets the default time unit that is used to calculate
+#'   relative times when the time column is represented as date-time data
+#'   (`POSIXct`)
+#' - `lastdose.id_col`: sets the default value for the `id_col` argument
+#'   to last dose; this identifies the column that is to be used to distinguish
+#'   individuals; the data in this column may be numeric or character
+#' - `lastdose.include_tafd`: sets default value for `include_tafd`; if `TRUE`
+#'   then the time since the first dose record (EVID 1 or EVID 4) in the data
+#'   set will be automatically appended to the output data frame when
+#'   calling `lastdose()`; `tafd` is always included when calling
+#'   `lastdose_df()` and `lastdose_list()`
+#'
 #' @details
 #'
-#' When calling [lastdose()] to modify the data frame, three columns will be
+#' When calling [lastdose()] to modify the data frame, two columns will be
 #' added (by default): `TAD` indicating the time after the most-recent dose,
-#' `TAFD` indicating the time after the first dose and `LDOS` indicating the
-#' amount of the most recent dose.  This default behavior can be modified with
-#' the `include_ldos` and `include_tafd` arguments.
+#' and `LDOS` indicating the amount of the most recent dose. `TAFD` indicating
+#' the time after the first dose record (`EVID` 1 or 4) can be added via the
+#' `include_tafd` argument and users can opt out from adding `LDOS` with the
+#' `include_ldos` argument.
+#'
 #'
 #' When calling [lastdose_list()] or [lastdose_df()], the respective items are
 #' accessible with `tad`,  `tafd`, and `ldos` (note the lower case form here to
 #' distinguish from the columns that might be added to the data frame).
 #'
+#' **Time after first dose**: note that time after first dose (`TAFD`) is the
+#' time after the first dosing record (`EVID` 1 or 4) in the data frame that
+#' you pass in. If you don't have a dosing record for the first dose to
+#' anchor this calculation, you should opt out.
+#'
 #' **Handling of commented records**: Dosing records that have been "commented"
 #' (as indicated with the `comments` argument) will never be considered as
 #' actual doses when determining `TAD`, `TAFD`, and `LDOS`.  But commented
@@ -102,8 +132,9 @@ NULL
 #'
 #'
 #' @export
-lastdose <- function(data,..., include_ldos = TRUE, include_tafd = TRUE) {
-  ans <- lastdose_list(data,...)
+lastdose <- function(data, ..., include_ldos = TRUE,
+                     include_tafd = getOption("lastdose.include_tafd", FALSE)) {
+  ans <- lastdose_list(data, ...)
   data[["TAD"]] <- ans[["tad"]]
   if(include_tafd) data[["TAFD"]] <- ans[["tafd"]]
   if(include_ldos) data[["LDOS"]] <- ans[["ldos"]]
@@ -113,40 +144,38 @@ lastdose <- function(data,..., include_ldos = TRUE, include_tafd = TRUE) {
 #' @rdname lastdose
 #' @export
 lastdose_list <- function(data,
-                          time_col = "TIME",
-                          time_units = NULL,
-                          id_col = "ID",
+                          time_col = find_time_col(data),
+                          time_units = getOption("lastdose.time_units", NULL),
+                          id_col = find_id_col(data),
                           fill = -99,
                           back_calc = TRUE,
                           addl_ties = c("obs_first", "dose_first"),
                           comments = find_comments(data)) {
-  if(length(comments)==1) {
+
+  if(length(comments) == 1) {
     comments <- rep(comments,nrow(data))
   }
-  if(!length(comments)==nrow(data)) {
-    stop("'comments' must be have length equal to the number of rows in 'data'",call.=FALSE)
+  if(length(comments) != nrow(data)) {
+    stop(
+      "'comments' must be have length equal to the number of rows in 'data'",
+      call. = FALSE
+    )
   }
   addl_ties <- match.arg(addl_ties)
   sort1 <- addl_ties == "obs_first"
   lower_names <- tolower(names(data))
-  wid <- match(id_col, names(data))
-  if(is.na(wid)) {
-    stop("did not find id column `", id_col, "` in `data`", call.=FALSE)
-  }
-  col_id <- data[[wid]]
-  if(is.character(col_id)) {
-    col_id <- match(col_id, unique(col_id))
-  }
-  if(!is.numeric(col_id)) {
-    stop("id column is required to be numeric", call.=FALSE)
-  }
   wtime <- match(time_col, names(data))
   if(is.na(wtime)) {
-    stop("did not find time column `", time_col, "` in `data`", call.=FALSE)
+    stop("did not find time column `", time_col, "` in `data`", call. = FALSE)
+  }
+  has_na_time <- anyNA(data[[wtime]])
+  if(has_na_time) {
+    na_time <- is.na(data[[wtime]])
+    data <- data[!na_time,, drop = FALSE]
   }
   col_time <- data[[wtime]]
   if(inherits(col_time, "POSIXct")) {
-    if(missing(time_units)) {
+    if(is.null(time_units)) {
       stop(
         "`time_units` is required when time column inherits `POSIXct`",
         call.=FALSE
@@ -164,6 +193,17 @@ lastdose_list <- function(data,
   if(!is.numeric(col_time)) {
     stop("time column is required to be numeric", call.=FALSE)
   }
+  wid <- match(id_col, names(data))
+  if(is.na(wid)) {
+    stop("did not find id column `", id_col, "` in `data`", call.=FALSE)
+  }
+  col_id <- data[[wid]]
+  if(is.character(col_id)) {
+    col_id <- match(col_id, unique(col_id))
+  }
+  if(!is.numeric(col_id)) {
+    stop("id column is required to be numeric", call.=FALSE)
+  }
   wamt <- match("amt", lower_names)
   if(is.na(wamt)) {
     stop("column AMT or amt is required in the data set", call.=FALSE)
@@ -214,19 +254,25 @@ lastdose_list <- function(data,
     sort1,
     comments
   )
+  if(has_na_time) {
+    re_order <- order(c(which(!na_time), which(na_time)))
+    for(j in seq_along(ans)) {
+      ans[[j]] <- ans[[j]][re_order]
+    }
+  }
   ans
 }
 
 #' @rdname lastdose
 #' @export
-lastdose_df <- function(data,...) {
-  ans <- lastdose_list(data,...)
+lastdose_df <- function(data, ...) {
+  ans <- lastdose_list(data, ...)
   data.frame(
     tad = ans[["tad"]],
     tafd = ans[["tafd"]],
     ldos = ans[["ldos"]],
-    stringsAsFactors=FALSE,check.names=FALSE,
-    fix.empty.names=FALSE, row.names=NULL
+    stringsAsFactors = FALSE, check.names = FALSE,
+    fix.empty.names = FALSE, row.names = NULL
   )
 }
 
@@ -260,11 +306,11 @@ lastdose_df <- function(data,...) {
 #'
 #'
 #' @export
-find_comments <- function(x,...) UseMethod("find_comments")
+find_comments <- function(x, ...) UseMethod("find_comments")
 #' @rdname find_comments
 #'
 #' @export
-find_comments.data.frame <- function(x,...) {
+find_comments.data.frame <- function(x, ...) {
   if(!inherits(x[["C"]], c("logical", "character"))) {
     if(exists("C", x)) {
       warning(
@@ -279,7 +325,7 @@ find_comments.data.frame <- function(x,...) {
 
 #' @rdname find_comments
 #' @export
-find_comments.character <- function(x,...) {
+find_comments.character <- function(x, ...) {
   !(is.na(x)|x=='.')
 }
 
@@ -288,3 +334,66 @@ find_comments.character <- function(x,...) {
 find_comments.logical <- function(x, ...) {
   x & !is.na(x)
 }
+
+#' Find TIME column
+#'
+#' Search data frame names for the first matching candidate TIME column name.
+#' See `details`.
+#'
+#' @param data a data.frame to search
+#' @details
+#' Column names will be searched against the following candidates
+#'
+#' - `TIME`
+#' - `DATETIME`
+#'
+#' The first the first candidate to be matched will be returned. If there
+#' are no matches, an error is generated.
+#'
+#' @examples
+#' data <- data.frame(A = 1, DATETIME = 2, TIME = 3, Z = 99)
+#' lastdose:::find_time_col(data)
+#'
+find_time_col <- function(data) {
+  stopifnot(is.data.frame(data))
+  ans <- intersect(c("TIME", "DATETIME"), names(data))
+  if(length(ans)==0) {
+    stop("could not find a TIME column in `data`", call. = FALSE)
+  }
+  ans[1]
+}
+
+#' Find ID column
+#'
+#' Search data frame names for the first matching candidate ID column name.
+#' See `details`.
+#'
+#' @param data a data.frame to search
+#' @details
+#' Column names will be searched against the following candidates
+#'
+#' - `getOption("lastdose.id_col")`
+#' - `ID`
+#' - `USUBJID`
+#' - `SUBJID`
+#' - `PTNO`
+#' - `SUBJ`
+#'
+#' The first the first candidate to be matched will be returned. If there
+#' are no matches, an error is generated.
+#'
+#' @examples
+#' data <- data.frame(A = 1, B = 2, PTNO = 3, ID = 4, Z = 99)
+#' lastdose:::find_id_col(data)
+#'
+find_id_col <- function(data) {
+  stopifnot(is.data.frame(data))
+  op <- getOption("lastdose.id_col", NULL)
+  can <- c(op, "ID", "USUBJID", "SUBJID", "PTNO", "SUBJ")
+  ans <- intersect(can, names(data))
+  if(length(ans)==0) {
+    stop("could not find a subject identifier column in `data`", call. = FALSE)
+  }
+  ans[1]
+}
+
diff --git a/man/find_id_col.Rd b/man/find_id_col.Rd
diff --git a/man/find_time_col.Rd b/man/find_time_col.Rd