rewrote validate_serodata, to be used inside prepare_serodata. Ch…

…anged `validate_serodata` to `validate_prepared_serodata` within fit_seromodel
epiverse-trace · Feb 7, 2024 · f10c0ba · f10c0ba
1 parent 2323e1a
commit f10c0ba
Show file tree

Hide file tree

Showing 2 changed files with 74 additions and 66 deletions.
diff --git a/R/modelling.R b/R/modelling.R
@@ -1,3 +1,65 @@
+# TODO Complete @param documentation
+
+stop_if_missing <- function(serodata, must_have_cols) {
+  if (
+    !all(
+      must_have_cols
+      %in% colnames(serodata)
+    )
+  ) {
+    missing <- must_have_cols[which(!(must_have_cols %in% colnames(serodata)))]
+    stop(
+      "The following mandatory columns in `serodata` are missing.",
+      toString(missing)
+    )
+  }
+}
+
+warn_missing <- function(serodata, optional_cols) {
+  if (
+    !all(
+      optional_cols
+      %in% colnames(serodata)
+    )
+  ) {
+    missing <- optional_cols[which(!(optional_cols %in% colnames(serodata)))]
+    warning(
+      "The following optional columns in `serodata` are missing.",
+      "Consider including them to get more information from this analysis",
+      toString(missing)
+    )
+    for (col in missing) {
+      serodata[[col]] <- "None" # TODO Shouln't we use `NA` instead?
+    }
+  }
+}
+
+validate_serodata <- function(serodata) {
+  stop_if_missing(serodata,
+    must_have_cols = c("survey", "total", "counts", "tsur")
+  )
+  warn_missing(serodata,
+    optional_cols = c("country", "test", "antibody")
+  )
+
+  # Check that the serodata has the necessary columns to fully
+  # identify the age groups
+  stopifnot(
+    "serodata must contain both 'age_min' and 'age_max',
+    or 'age_mean_f' to fully identify the age groups" =
+      all(c(
+        "age_min", "age_max"
+      ) %in% colnames(serodata)) |
+        "age_mean_f" %in% colnames(serodata)
+  )
+}
+
+validate_prepared_serodata <- function(serodata) {
+  stop_if_missing(serodata,
+    must_have_cols = c("total", "counts", "tsur", "age_mean_f", "birth_year")
+  )
+}
+
 #' Function that runs the specified stan model for the Force-of-Infection and
 #' estimates the seroprevalence based on the result of the fit
 #'
@@ -130,21 +192,17 @@ fit_seromodel <- function(
   ) {
   # TODO Add a warning because there are exceptions where a minimal amount of
   # iterations is needed
-    # Validate arguments
-  validate_serodata(serodata)
+  # Validate arguments
+  # validate_serodata(serodata)
+  validate_prepared_serodata(serodata)
   stopifnot(
-        "foi_model must be either `constant`, `tv_normal_log`, or `tv_normal`"
-              = foi_model %in% c("constant", "tv_normal_log", "tv_normal"),
-        "n_iters must be numeric"
-              = is.numeric(n_iters),
-        "n_thin must be numeric"
-              = is.numeric(n_thin),
-        "delta must be numeric"
-              = is.numeric(delta),
-        "m_treed must be numeric"
-              = is.numeric(m_treed),
-        "decades must be numeric"
-              = is.numeric(decades)
+    "foi_model must be either `constant`, `tv_normal_log`, or `tv_normal`" 
+      = foi_model %in% c("constant", "tv_normal_log", "tv_normal"),
+    "n_iters must be numeric" = is.numeric(n_iters),
+    "n_thin must be numeric" = is.numeric(n_thin),
+    "delta must be numeric" = is.numeric(delta),
+    "m_treed must be numeric" = is.numeric(m_treed),
+    "decades must be numeric" = is.numeric(decades)
   )
   model <- stanmodels[[foi_model]]
   cohort_ages <- get_cohort_ages(serodata = serodata)

diff --git a/R/seroprevalence_data.R b/R/seroprevalence_data.R
@@ -45,60 +45,9 @@
 prepare_serodata <- function(serodata = serodata,
                              alpha = 0.05) {
   checkmate::assert_numeric(alpha, lower = 0, upper = 1)
-  # Check that serodata has the right columns
-  cols_check <- c("survey", "total", "counts", "tsur")
-  if (
-    !all(
-      cols_check
-      %in% colnames(serodata)
-      )
-    ) {
-      stop(
-        "serodata must contain the right columns. ",
-        sprintf(
-          "Column(s) (%s) are missing.", toString(
-            cols_check[which(!(cols_check %in% colnames(serodata)))]
-          )
-        )
-      )
-  }
-
-  # Check that the serodata has the necessary columns to fully
-  # identify the age groups
-  stopifnot(
-    "serodata must contain both 'age_min' and 'age_max',
-    or 'age_mean_f' to fully identify the age groups" =
-      all(c(
-        "age_min", "age_max"
-        ) %in% colnames(serodata)) |
-        "age_mean_f" %in% colnames(serodata)
-  )
-
-  if (!any(colnames(serodata) == "country")) {
-    warning(
-        "Column 'country' is missing. ",
-        "Consider adding it as additional information."
-    )
-    serodata$country <- "None"
-  }
+  validate_serodata(serodata)
 
 
-  if (!any(colnames(serodata) == "test")) {
-    warning(
-        "Column 'test' is missing. ",
-        "Consider adding it as additional information."
-    )
-    serodata$test <- "None"
-  }
-
-  if (!any(colnames(serodata) == "antibody")) {
-    warning(
-        "Column 'antibody' is missing. ",
-        "Consider adding it as additional information."
-    )
-    serodata$antibody <- "None"
-  }
-
   if (!any(colnames(serodata) == "age_mean_f")) {
     serodata <- serodata %>%
       dplyr::mutate(
@@ -113,6 +62,7 @@ prepare_serodata <- function(serodata = serodata,
       birth_year = .data$tsur - .data$age_mean_f
     )
   }
+
   serodata <- serodata %>%
     cbind(
       Hmisc::binconf(