diff --git a/R/modelling.R b/R/modelling.R index c1fe84a7..ae36e98b 100644 --- a/R/modelling.R +++ b/R/modelling.R @@ -1,3 +1,65 @@ +# TODO Complete @param documentation + +stop_if_missing <- function(serodata, must_have_cols) { + if ( + !all( + must_have_cols + %in% colnames(serodata) + ) + ) { + missing <- must_have_cols[which(!(must_have_cols %in% colnames(serodata)))] + stop( + "The following mandatory columns in `serodata` are missing.", + toString(missing) + ) + } +} + +warn_missing <- function(serodata, optional_cols) { + if ( + !all( + optional_cols + %in% colnames(serodata) + ) + ) { + missing <- optional_cols[which(!(optional_cols %in% colnames(serodata)))] + warning( + "The following optional columns in `serodata` are missing.", + "Consider including them to get more information from this analysis", + toString(missing) + ) + for (col in missing) { + serodata[[col]] <- "None" # TODO Shouln't we use `NA` instead? + } + } +} + +validate_serodata <- function(serodata) { + stop_if_missing(serodata, + must_have_cols = c("survey", "total", "counts", "tsur") + ) + warn_missing(serodata, + optional_cols = c("country", "test", "antibody") + ) + + # Check that the serodata has the necessary columns to fully + # identify the age groups + stopifnot( + "serodata must contain both 'age_min' and 'age_max', + or 'age_mean_f' to fully identify the age groups" = + all(c( + "age_min", "age_max" + ) %in% colnames(serodata)) | + "age_mean_f" %in% colnames(serodata) + ) +} + +validate_prepared_serodata <- function(serodata) { + stop_if_missing(serodata, + must_have_cols = c("total", "counts", "tsur", "age_mean_f", "birth_year") + ) +} + #' Function that runs the specified stan model for the Force-of-Infection and #' estimates the seroprevalence based on the result of the fit #' @@ -130,21 +192,17 @@ fit_seromodel <- function( ) { # TODO Add a warning because there are exceptions where a minimal amount of # iterations is needed - # Validate arguments - validate_serodata(serodata) + # Validate arguments + # validate_serodata(serodata) + validate_prepared_serodata(serodata) stopifnot( - "foi_model must be either `constant`, `tv_normal_log`, or `tv_normal`" - = foi_model %in% c("constant", "tv_normal_log", "tv_normal"), - "n_iters must be numeric" - = is.numeric(n_iters), - "n_thin must be numeric" - = is.numeric(n_thin), - "delta must be numeric" - = is.numeric(delta), - "m_treed must be numeric" - = is.numeric(m_treed), - "decades must be numeric" - = is.numeric(decades) + "foi_model must be either `constant`, `tv_normal_log`, or `tv_normal`" + = foi_model %in% c("constant", "tv_normal_log", "tv_normal"), + "n_iters must be numeric" = is.numeric(n_iters), + "n_thin must be numeric" = is.numeric(n_thin), + "delta must be numeric" = is.numeric(delta), + "m_treed must be numeric" = is.numeric(m_treed), + "decades must be numeric" = is.numeric(decades) ) model <- stanmodels[[foi_model]] cohort_ages <- get_cohort_ages(serodata = serodata) diff --git a/R/seroprevalence_data.R b/R/seroprevalence_data.R index 55fa1550..7b068212 100644 --- a/R/seroprevalence_data.R +++ b/R/seroprevalence_data.R @@ -45,60 +45,9 @@ prepare_serodata <- function(serodata = serodata, alpha = 0.05) { checkmate::assert_numeric(alpha, lower = 0, upper = 1) - # Check that serodata has the right columns - cols_check <- c("survey", "total", "counts", "tsur") - if ( - !all( - cols_check - %in% colnames(serodata) - ) - ) { - stop( - "serodata must contain the right columns. ", - sprintf( - "Column(s) (%s) are missing.", toString( - cols_check[which(!(cols_check %in% colnames(serodata)))] - ) - ) - ) - } - - # Check that the serodata has the necessary columns to fully - # identify the age groups - stopifnot( - "serodata must contain both 'age_min' and 'age_max', - or 'age_mean_f' to fully identify the age groups" = - all(c( - "age_min", "age_max" - ) %in% colnames(serodata)) | - "age_mean_f" %in% colnames(serodata) - ) - - if (!any(colnames(serodata) == "country")) { - warning( - "Column 'country' is missing. ", - "Consider adding it as additional information." - ) - serodata$country <- "None" - } + validate_serodata(serodata) - if (!any(colnames(serodata) == "test")) { - warning( - "Column 'test' is missing. ", - "Consider adding it as additional information." - ) - serodata$test <- "None" - } - - if (!any(colnames(serodata) == "antibody")) { - warning( - "Column 'antibody' is missing. ", - "Consider adding it as additional information." - ) - serodata$antibody <- "None" - } - if (!any(colnames(serodata) == "age_mean_f")) { serodata <- serodata %>% dplyr::mutate( @@ -113,6 +62,7 @@ prepare_serodata <- function(serodata = serodata, birth_year = .data$tsur - .data$age_mean_f ) } + serodata <- serodata %>% cbind( Hmisc::binconf(