Merge remote-tracking branch 'origin/main' into refac-effectiveness-m…

…atching
epiverse-trace · Jun 15, 2024 · 343963f · 343963f
2 parents a1d4f8a + 50d51a1
commit 343963f
Show file tree

Hide file tree

Showing 19 changed files with 307 additions and 70 deletions.
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -10,7 +10,7 @@ assignees: ''
 Please place an "x" in all the boxes that apply
 ---------------------------------------------
 
-- [ ] I have the most recent version of linelist and R
+- [ ] I have the most recent version of vaccineff and R
 - [ ] I have found a bug
 - [ ] I have a [reproducible example](http://reprex.tidyverse.org/articles/reprex-dos-and-donts.html)
 - [ ] I want to request a new feature

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -2,18 +2,29 @@ Package: vaccineff
 Title: Estimate Vaccine Effectiveness Based on Different Study Designs
 Version: 0.0.1
 Authors@R: c(
-    person(given = "Zulma M.", family = "Cucunubá", email = "zulma.cucunuba@javeriana.edu.co", role = c("aut", "cre"),
-        comment = c(ORCID = "0000-0002-8165-3198")),
     person(given = "David Santiago", family = "Quevedo", email = "ex-dsquevedo@javeriana.edu.co", role = c("aut"),
         comment = c(ORCID = "0000-0003-1583-4262")),
-    person(given = "Santiago", family = "Loaiza", email = "santiago.loaiza@javeriana.edu.co", role = c("aut"),
-        comment = c(ORCID = "0000-0002-2092-3262")),
+    person(given = "Zulma M.", family = "Cucunubá", email = "zulma.cucunuba@javeriana.edu.co", role = c("aut", "cre"),
+        comment = c(ORCID = "0000-0002-8165-3198")),
+    person("International Development Research Center (IDRC)", role = c("fnd")),
     person(given = "Geraldine", family = "Gómez Millán", email = "geralidine.gomez@javeriana.edu.co", role = c("ctb"),
         comment = c(ORCID = "0009-0007-8701-0568")),
     person(given = "Pratik", family = "Gupte", email = "pratik.gupte@lshtm.ac.uk", role = c("ctb"), 
-           comment = c(ORCID = "0000-0001-5294-7819"))
+        comment = c(ORCID = "0000-0001-5294-7819")),
+    person(given = "Érika J", family = "Cantor", email = "erika.cantor@javeriana.edu.co", role = c("ctb"), 
+        comment = c(ORCID = "0000-0003-3320-6032")),
+    person(given = "Santiago", family = "Loaiza", email = "santiago.loaiza@javeriana.edu.co", role = c("ctb"),
+        comment = c(ORCID = "0000-0002-2092-3262")),
+    person(given = "Jaime", family = "Pavlich-Mariscal", email = "jpavlich@javeriana.edu.co", role = c("ctb"),
+           comment = c(ORCID = "0000-0002-3892-6680")),
+    person(given = "Hugo", family = "Gruson", email = "hugo.gruson@data.org", role = c("ctb"),
+           comment = c(ORCID = "0000-0002-4094-1476")),
+    person(given = "Chris", family = "Hartgerink", email = "chris@data.org", role = "ctb",
+           comment = c(ORCID = "0000-0003-1050-6809")),
+    person(given = "Felipe Segundo", family = "Abril", email = "fsabrilb@unal.edu.co", role = "ctb",
+           comment = c(ORCID = "0000-0002-2512-4929"))
   )
-Description: Estimate vaccine effectiveness based on different observational study designs, as discussed in Torvaldsen and McIntyre (2020) <doi:10.3316/informit.511798489353134>.
+Description: R package with tools for estimating vaccine effectiveness and vaccine related metrics.
 URL: https://github.com/epiverse-trace/vaccineff, https://epiverse-trace.github.io/vaccineff/
 BugReports: https://github.com/epiverse-trace/vaccineff/issues
 License: MIT + file LICENSE

diff --git a/R/coh_matching.R b/R/coh_matching.R
@@ -0,0 +1,219 @@
+#' @title Static match of cohort
+#'
+#' @description This function builds couples of vaccinated - unvaccinated
+#' individuals with similar characteristics. The function relies on  the
+#' propensity score matching algorithm implemented in MatchIt package.
+#' By default the function uses `method = "nearest"`, `ratio = 1`,
+#' `distance = "glm"` to match the data.
+#' Exact and near characteristics are accepted for the matching criteria.
+#' These are passed in the parameters `exact` and `nearest`, respectively.
+#' Parameters `nearest` and `caliper` must be provided together. In this case,
+#' the calipers must be passed as a named vector containing each of
+#' the variables provided in `nearest`
+#' (e.g. `nearest = c("characteristic1", "characteristic2"),
+#' caliper = c(characteristic1 = n1, characteristic2 = n2)`,
+#' where `n1` and `n2` are the calipers).
+#' `caliper` is ignored (set to NULL) when `nearest` is not provided.
+#'
+#' @param data dataset with cohort information (see example)
+#' @param status_vacc_col name of the column containing the information
+#' of the vaccination status.
+#' @param exact name(s) of column(s) for `exact` matching.
+#' Default to `NULL`.
+#' @param nearest named vector with name(s) of column(s) for `nearest`
+#' matching and caliper(s) for each variable.
+#' e.g. `nearest = c("characteristic1" = n1, "characteristic2" = n2)`,
+#' where `n1` and `n2` are the calipers. Default to `NULL`.
+#' @return data frame with matched population. Two columns are added
+#' to the structure provided in `data`:
+#' `prop_score` (propensity score of the match),
+#' `subclass` (id of matched couple)
+#' @examples
+#' # load package example data for cohort studies
+#' data("cohortdata")
+#'
+#' # assign vaccination status
+#' cohortdata$vaccine_status <- set_status(
+#'   data = cohortdata,
+#'   col_names = c("vaccine_date_1", "vaccine_date_2"),
+#'   status = c("v", "u")
+#' )
+#'
+#' # match cohort
+#' matched_cohort <- match_cohort(data = cohortdata,
+#'   status_vacc_col = "vaccine_status",
+#'   nearest = c(age = 1),
+#'   exact = "sex"
+#' )
+#'
+#' # view matched data
+#' head(matched_cohort)
+#' @export
+match_cohort <- function(data,
+                         status_vacc_col,
+                         exact = NULL,
+                         nearest = NULL) {
+
+  # input checking
+  checkmate::assert_data_frame(
+    data,
+    min.rows = 1, min.cols = 1
+  )
+  checkmate::assert_character(status_vacc_col,
+    any.missing = FALSE, min.len = 1
+  )
+  checkmate::assert_names(
+    names(data),
+    must.include = c(status_vacc_col)
+  )
+
+  # `exact` and `nearest` cannot be NULL. At least one must be provided
+  stopifnot(
+    "`exact` and `nearest` cannot be NULL. At least one must be provided" =
+      (!missing(nearest) || !missing(exact))
+  )
+
+  # checks for `nearest`
+  if (!is.null(nearest)) {
+    checkmate::assert_numeric(
+      nearest,
+      any.missing = FALSE, min.len = 1, names = "named"
+    )
+    checkmate::assert_names(
+      names(data),
+      must.include = names(nearest)
+    )
+  }
+  # checks for `exact`. Not else, both can be non-NULL
+  if (!is.null(exact)) {
+    checkmate::assert_character(exact,
+      any.missing = FALSE, min.len = 1
+    )
+    checkmate::assert_names(
+      names(data),
+      must.include = exact
+    )
+  }
+
+  #Formula
+  variables <- c(exact, names(nearest))
+  formula <- paste0(status_vacc_col, " ~ ")
+  for (v in seq_along(variables)) {
+    if (v == 1) {
+      formula <- paste0(formula, variables[v])
+    } else {
+      formula <- paste0(formula, " + ", variables[v])
+    }
+  }
+  formula_eval <- eval(parse(text = formula))
+  data[[status_vacc_col]] <- as.factor(data[[status_vacc_col]])
+
+  #Matching
+  matchit <- MatchIt::matchit(
+    formula_eval,
+    data = data,
+    method = "nearest",
+    ratio = 1,
+    exact = exact,
+    nearest = names(nearest),
+    caliper = nearest,
+    distance = "glm"
+  )
+  match <- MatchIt::match.data(matchit, distance = "prop.score")
+  names(match) <- gsub(x = names(match),
+    pattern = ".",
+    replacement = "_",
+    fixed = TRUE
+  )
+  match <- match[, -which(names(match) == "weights")]
+  return(match)
+}
+
+#' @title Censor couple after matching
+#'
+#' @description This function censors a couple whether the case or the control
+#' have a censoring date. It imputes the censoring date to the whole couple
+#' using the matching id provided in subclass. This column comes with the output
+#' of `match_cohort`.
+#'
+#' @inheritParams get_immunization_date
+#' @examples
+#' # load package example data for cohort studies
+#' data("cohortdata")
+#'
+#' # assign vaccination status
+#' cohortdata$vaccine_status <- set_status(
+#'   data = cohortdata,
+#'   col_names = c("vaccine_date_1", "vaccine_date_2"),
+#'   status = c("v", "u")
+#' )
+#'
+#' # match cohort
+#' matched_cohort <- match_cohort(data = cohortdata,
+#'   status_vacc_col = "vaccine_status",
+#'   nearest = c(age = 1),
+#'   exact = "sex"
+#' )
+#'
+#' # add column with censoring date for match
+#' matched_cohort$censoring_date_match <-  get_censoring_date_match(
+#'   data = matched_cohort,
+#'   outcome_date_col = "death_date",
+#'   censoring_date_col = "death_other_causes"
+#' )
+#'
+#' # view data with added column
+#' head(matched_cohort)
+#' @export
+get_censoring_date_match <- function(data,
+                                     outcome_date_col,
+                                     censoring_date_col) {
+  # check for data frame type
+  checkmate::assert_data_frame(
+    data,
+    min.rows = 1L
+  )
+
+  # check for string type
+  checkmate::assert_string(outcome_date_col)
+  checkmate::assert_string(censoring_date_col)
+
+  # check for names in data
+  checkmate::assert_names(
+    colnames(data),
+    must.include = c(outcome_date_col, censoring_date_col, "subclass")
+  )
+
+  # check for date type
+  checkmate::assert_date(data[[outcome_date_col]])
+  checkmate::assert_date(data[[censoring_date_col]])
+
+  # create censoring date for every couple indexed by subclass
+  censoring_date <- unlist(
+    tapply(data[[censoring_date_col]],
+      data$subclass,
+      function(x) {
+        if (all(is.na(x))) {
+          return(as.Date(NA))
+        } else {
+          return(as.character(min(x, na.rm = TRUE)))
+        }
+      }
+    )
+  )
+  # return data matched by subclass
+  data$censoring_date_match <- as.Date(censoring_date[data$subclass])
+
+  # if outcome happens before censoring_date_match
+  # no censoring must be assigned
+  data$censoring_date_match <-
+    as.Date(ifelse(
+      (data$censoring_date_match > data[[outcome_date_col]]) &
+        (!is.na(data$censoring_date_match)) &
+        (!is.na(data[[outcome_date_col]])),
+      as.Date(NA),
+      as.character(data$censoring_date_match)
+    ))
+  return(data$censoring_date_match)
+}
+
diff --git a/README.Rmd b/README.Rmd
@@ -16,7 +16,7 @@ knitr::opts_chunk$set(
 )
 ```
 
-## *{{ packagename }}*: An R package with tools for estimating vaccine effectiveness and vaccine related metrics <img src="man/figures/vaccineff.png" align="right" width="130"/>
+## *{{ packagename }}*: An R package with tools for estimating vaccine effectiveness and vaccine related metrics <img src="man/figures/logo.png" align="right" width="130"/>
 
 <!-- badges: start -->
 
@@ -29,15 +29,11 @@ knitr::opts_chunk$set(
 
 <!-- badges: end -->
 
-*{{ packagename }}* is an R package that offers tools for estimating the
-vaccine effectiveness, using a series of epidemiological designs including cohort studies, test-negative case-control, and screening
-methods [@torvaldsen2020]. The package provides a set of features for preparing the data,
-to analyze different study designs and for assessing the performance of
-the models.
-
 *{{ packagename }}* is developed at [Pontificia Universidad Javeriana](https://www.javeriana.edu.co/inicio) as part of the
 [Epiverse-TRACE initiative](https://data.org/initiatives/epiverse/).
 
+*{{ packagename }}* is an R package that offers tools for estimating vaccine effectiveness, using a series of epidemiological designs including cohort studies, test-negative case-control, and screening methods [@torvaldsen2002]. The current version of the package provides a set of features for preparing, visualizing, and managing cohort data, estimating vaccine effectiveness, and assessing the performance of the models. Test-negative design and screening method will be included in future versions.
+
 ## Installation
 
 The current development version of _{{ packagename }}_ can be installed from [GitHub](https://github.com/) using the `pak` package.
@@ -53,8 +49,8 @@ pak::pak("{{ gh_repo }}")
 library(vaccineff)
 ```
 
-*{{ packagename }}* provides minimal datasets that can be used to test out
-each design `(cohortdata, testnegdata, screeningdata)`
+*{{ packagename }}* provides a minimal cohort dataset that can be used to test out
+the models.
 
 ```{r example, include = TRUE, echo = TRUE}
 # Load example `cohortdata` included in the package
@@ -74,25 +70,16 @@ To report a bug or to request a new feature please open an [issue](https://githu
 
 Contributions to _{{ packagename }}_ are welcomed. Please follow the [package contributing guide](https://github.com/{{ gh_repo }}/blob/main/.github/CONTRIBUTING.md).
 
-## Code of conduct
-
-Please note that the _{{ packagename }}_ project is released with a [Contributor Code of Conduct](https://github.com/epiverse-trace/.github/blob/main/CODE_OF_CONDUCT.md). By contributing to this project, you agree to abide by its terms.
-
-## Contributions
-
 Contributors to the project include:
 
--   [David Santiago Quevedo](https://github.com/davidsantiagoquevedo)
-    (author)
+**Authors**: [David Santiago Quevedo](https://github.com/davidsantiagoquevedo) and [Zulma M. Cucunubá](https://github.com/zmcucunuba) (maintainer)
 
--   [Zulma M. Cucunubá](https://github.com/zmcucunuba) (author)
+**Contributors**:
+[Geraldine Gómez](https://github.com/GeraldineGomez), [Pratik Gupte](https://github.com/pratikunterwegs), [Érika J. Cantor](https://github.com/ErikaCantor), [Santiago Loaiza](https://github.com/santilo9513), [Jaime A. Pavlich-Mariscal](https://github.com/jpavlich), [Hugo Gruson](https://github.com/Bisaloo), [Chris Hartgerink](https://github.com/chartgerink), [Felipe Segundo Abril-Bermúdez](https://github.com/fsabrilb)
 
--   [Santiago Loaiza](https://github.com/santilo9513) (author)
 
--   [Geraldine Gómez](https://github.com/GeraldineGomez) (contributor)
-
--   [Jaime A. Pavlich-Mariscal](https://github.com/jpavlich) (contributor)
+## Code of conduct
 
--   [Pratik Gupte](https://github.com/pratikunterwegs) (contributor)
+Please note that the _{{ packagename }}_ project is released with a [Contributor Code of Conduct](https://github.com/epiverse-trace/.github/blob/main/CODE_OF_CONDUCT.md). By contributing to this project, you agree to abide by its terms.
 
 ## References