diff --git a/.Rbuildignore b/.Rbuildignore index 158350c7..128b8aec 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -14,3 +14,4 @@ ^Meta$ ^.lintr$ ^.*/tests/docker/.*$ +^vignettes/articles$ diff --git a/DESCRIPTION b/DESCRIPTION index b06d8618..9e244ff4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: serofoi Type: Package -Title: Estimates the Force-of-Infection of a given pathogen from population based seroprevalence studies -Version: 0.1.0 +Title: Estimates the Force-of-Infection of a given pathogen from population-based seroprevalence studies +Version: 1.0.1 Authors@R: c( person( @@ -36,7 +36,7 @@ License: MIT + file LICENSE Encoding: UTF-8 Language: en-GB LazyData: true -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 NeedsCompilation: yes Depends: R (>= 3.5.0) @@ -55,7 +55,9 @@ Imports: purrr, tidyr, tibble, - Matrix + Matrix, + glue, + config LinkingTo: BH (>= 1.66.0), Rcpp (>= 0.12.0), diff --git a/NAMESPACE b/NAMESPACE index 6dad2c97..87e703e5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,27 +1,29 @@ # Generated by roxygen2: do not edit by hand -export(extract_seromodel_summary) +export(build_stan_data) +export(extract_central_estimates) export(fit_seromodel) -export(get_chunk_structure) -export(get_cohort_ages) -export(get_foi_central_estimates) -export(get_prev_expanded) -export(get_table_rhats) -export(plot_foi) -export(plot_info_table) -export(plot_rhats) +export(get_foi_index) +export(plot_foi_estimates) export(plot_seromodel) -export(plot_seroprev) -export(plot_seroprev_fitted) -export(prepare_serodata) +export(plot_seroprevalence_estimates) +export(plot_serosurvey) +export(plot_summary) +export(prepare_serosurvey_for_plotting) export(probability_seropositive_by_age) export(probability_seropositive_general_model_by_age) -export(run_seromodel) +export(set_foi_init) +export(sf_cauchy) +export(sf_normal) +export(sf_uniform) export(simulate_serosurvey) export(simulate_serosurvey_age_and_time_model) export(simulate_serosurvey_age_model) export(simulate_serosurvey_general_model) export(simulate_serosurvey_time_model) +export(summarise_central_estimate) +export(summarise_loo_estimate) +export(summarise_seromodel) import(Rcpp) import(dplyr) import(methods) diff --git a/R/build_stan_data.R b/R/build_stan_data.R new file mode 100644 index 00000000..534d02b6 --- /dev/null +++ b/R/build_stan_data.R @@ -0,0 +1,245 @@ +#' Sets normal distribution parameters for sampling +#' +#' @param mean Mean of the normal distribution +#' @param sd Standard deviation of the normal distribution +#' @return List with specified statistics and name of the model +#' @export +sf_normal <- function(mean = 0, sd = 1) { + # Restricting normal inputs to be non-negative + if (mean < 0 || sd <= 0) { + stop( + "Normal distribution only accepts", + " `mean>=0` and `sd>0` for mean and standard deviation") + } + + return(list(mean = mean, sd = sd, name = "normal")) +} + +#' Sets uniform distribution parameters for sampling +#' +#' @param min Minimum value of the random variable of the uniform distribution +#' @param max Maximum value of the random variable of the uniform distribution +#' @return List with specified statistics and name of the model +#' @export +sf_uniform <- function(min = 0, max = 10) { + # Restricting uniform inputs to be non-negative + if (min < 0 || (min >= max)) { + stop( + "Uniform distribution only accepts", + " 0<=min= max) { + message("Uniform distribution only accepts min < max") + } + + return(list(min = min, max = max, name = "uniform")) +} + +#' Sets Cauchy distribution parameters for sampling +#' +#' @param scale Scale +#' of the normal distribution +#' @param sd Standard deviation of the normal distribution +#' @return List with specified statistics and name of the model +#' @export +sf_cauchy <- function(location = 0, scale = 1) { + # Restricting normal inputs to be non-negative + if (location < 0 || scale < 0) { + stop( + "Normal distribution only accepts", + " `location>=0` and `scale=>0` for mean and standard deviation") + } + + return(list(location = location, scale = scale, name = "cauchy")) +} + +#' Sets empty distribution +sf_none <- function() { + return(list(name = "none")) +} + +#' Generates force-of-infection indexes for heterogeneous age groups +#' +#' Generates a list of integers indexing together the time/age intervals +#' for which FOI values will be estimated in [fit_seromodel]. +#' The max value in `foi_index` correspond to the number of FOI values to +#' be estimated when sampling. +#' @inheritParams fit_seromodel +#' @param group_size Age groups size +#' @return Integer vector with the indexes numerating each year/age +#' (depending on the model). +#' @examples +#' data(chagas2012) +#' foi_index <- get_foi_index(chagas2012, group_size = 25) +#' @export +get_foi_index <- function( + serosurvey, + group_size + ) { + checkmate::assert_int( + group_size, + lower = 1, + upper = max(serosurvey$age_max) + ) + + foi_index <- unlist( + purrr::map( + seq( + 1, + max(serosurvey$age_max) / group_size, + 1), + rep, + times = group_size + ) + ) + + foi_index <- c( + foi_index, + rep( + max(foi_index), + max(serosurvey$age_max) - length(foi_index) + ) + ) + + return(foi_index) +} + +#' Set stan data defaults for sampling +#' +#' @param stan_data List to be passed to [rstan][rstan::sampling] +#' @inheritParams fit_seromodel +#' @return List with default values of stan data for sampling +set_stan_data_defaults <- function( + stan_data, + is_log_foi = FALSE, + is_seroreversion = FALSE +) { + config_file <- system.file("extdata", "config.yml", package = "serofoi") + prior_default <- config::get(file = config_file, "priors")$defaults + + foi_defaults <- list( + foi_prior_index = prior_default$index, + foi_min = prior_default$min, + foi_max = prior_default$max, + foi_mean = prior_default$mean, + foi_sd = prior_default$sd + ) + # Add sigma defaults depending on scale + if (is_log_foi) { + # Normal distribution defaults + foi_defaults <- c( + foi_defaults, + list( + foi_sigma_rw_loc = prior_default$mean, + foi_sigma_rw_sc = prior_default$sd + ) + ) + } else { + # Cauchy distribution defaults + foi_defaults <- c( + foi_defaults, + list( + foi_sigma_rw_loc = prior_default$location, + foi_sigma_rw_sc = prior_default$scale + ) + ) + } + stan_data <- c( + stan_data, + foi_defaults + ) + + if (is_seroreversion) { + seroreversion_defaults <- list( + seroreversion_prior_index = prior_default$index, + seroreversion_min = prior_default$min, + seroreversion_max = prior_default$max, + seroreversion_mean = prior_default$mean, + seroreversion_sd = prior_default$sd + ) + stan_data <- c( + stan_data, + seroreversion_defaults + ) + } + + return(stan_data) +} + +#' Builds stan data for sampling depending on the selected model +#' +#' @inheritParams fit_seromodel +#' @return List with necessary data for sampling the specified model +#' @export +build_stan_data <- function( + serosurvey, + model_type = "constant", + foi_prior = sf_uniform(), + foi_index = NULL, + is_log_foi = FALSE, + foi_sigma_rw = sf_none(), + is_seroreversion = FALSE, + seroreversion_prior = sf_none() +) { + + stan_data <- list( + n_observations = nrow(serosurvey), + age_max = max(serosurvey$age_max), + ages = seq(1, max(serosurvey$age_max), 1), + n_seropositive = serosurvey$n_seropositive, + n_sample = serosurvey$n_sample, + age_groups = serosurvey$age_group + ) %>% + set_stan_data_defaults( + is_log_foi = is_log_foi, + is_seroreversion = is_seroreversion + ) + + if (is.null(foi_index)) { + foi_index_default <- get_foi_index(serosurvey = serosurvey, group_size = 1) + stan_data <- c( + stan_data, + list(foi_index = foi_index_default) + ) + } else { + # TODO: check that foi_index is the right size + stan_data <- c( + stan_data, + list(foi_index = foi_index) + ) + } + config_file <- system.file("extdata", "config.yml", package = "serofoi") + prior_index <- config::get(file = config_file, "priors")$indexes + + if (foi_prior$name == "uniform") { + stan_data$foi_prior_index <- prior_index[["uniform"]] + stan_data$foi_min <- foi_prior$min + stan_data$foi_max <- foi_prior$max + } else if (foi_prior$name == "normal") { + stan_data$foi_prior_index <- prior_index[["normal"]] + stan_data$foi_mean <- foi_prior$mean + stan_data$foi_sd <- foi_prior$sd + } + + if (foi_sigma_rw$name == "cauchy") { + stan_data$foi_sigma_rw_loc <- foi_sigma_rw$location + stan_data$foi_sigma_rw_sc <- foi_sigma_rw$scale + } + + if (is_seroreversion) { + if (seroreversion_prior$name == "none") { + stop("seroreversion_prior not specified") + } else if (seroreversion_prior$name == "uniform") { + stan_data$seroreversion_prior_index <- prior_index[["uniform"]] + stan_data$seroreversion_min <- seroreversion_prior$min + stan_data$seroreversion_max <- seroreversion_prior$max + } else if (seroreversion_prior$name == "normal") { + stan_data$seroreversion_prior_index <- prior_index[["normal"]] + stan_data$seroreversion_mean <- seroreversion_prior$mean + stan_data$seroreversion_sd <- seroreversion_prior$sd + } + } + + return(stan_data) +} diff --git a/R/chagas2012.R b/R/chagas2012.R deleted file mode 100644 index 6c7ea6f8..00000000 --- a/R/chagas2012.R +++ /dev/null @@ -1,17 +0,0 @@ -# TODO Check if we really need to have the package `qtl` installed. Otherwise -# remove all entries of the form `see \code{\link[qtl]...` -#' Seroprevalence data on serofoi -#' -#' Data from a serological surveys -#' -#' @docType data -#' -#' @usage chagas2012 -#' -#' @format An object of class `"cross"`; see [qtl::read.cross()]. -#' -#' @keywords datasets -#' -#' @examples -#' chagas2012 -"chagas2012" diff --git a/R/chik2015.R b/R/chik2015.R deleted file mode 100644 index 30142601..00000000 --- a/R/chik2015.R +++ /dev/null @@ -1,15 +0,0 @@ -#' Seroprevalence data on serofoi -#' -#' Data from a serological surveys -#' -#' @docType data -#' -#' @usage chik2015 -#' -#' @format An object of class `"cross"`; see [qtl::read.cross()]. -#' -#' @keywords datasets -#' -#' @examples -#' chik2015 -"chik2015" diff --git a/R/fit_seromodel.R b/R/fit_seromodel.R new file mode 100644 index 00000000..c7121081 --- /dev/null +++ b/R/fit_seromodel.R @@ -0,0 +1,157 @@ +#' Adds age group marker to serosurvey +#' +#' @inheritParams fit_seromodel +add_age_group_to_serosurvey <- function(serosurvey) { + if (any(colnames(serosurvey) == "age_group")) { + message("Using `age_group` already present in serosurvey") + } else { + serosurvey <- serosurvey %>% + dplyr::mutate( + age_group = floor((.data$age_min + .data$age_max) / 2) + ) + } + return(serosurvey) +} + +#' Sets initialization function for sampling +#' +#' @inheritParams fit_seromodel +#' @export +set_foi_init <- function( + foi_init, + is_log_foi, + foi_index +) { + + # Set default behavior for initialization + if (is.null(foi_init)) { + config_file <- system.file("extdata", "config.yml", package = "serofoi") + init_default <- config::get(file = config_file, "priors")$defaults$init + + if (is_log_foi) { + foi_init <- function() { + list(log_foi_vector = rep(log(init_default), max(foi_index))) + } + } else { + foi_init <- function() { + list(foi_vector = rep(init_default, max(foi_index))) + } + } + } + + checkmate::assert_class(foi_init, "function") + checkmate::assert_double(unlist(foi_init()[[1]]), len = max(foi_index)) + + return(foi_init) +} + +#' Runs specified stan model for the force-of-infection +#' +#' @param serosurvey +#' \describe{ +#' \item{`survey_year`}{Year in which the survey took place +#' (only needed to plot time models)} +#' \item{`age_min`}{Floor value of the average between age_min and age_max} +#' \item{`age_max`}{The size of the sample} +#' \item{`n_sample`}{Number of samples for each age group} +#' \item{`n_seropositive`}{Number of positive samples for each age group} +#' } +#' @param model_type Type of the model. Either "constant", "age" or "time" +#' @param is_log_foi Boolean to set logarithmic scale in the FOI +#' @param foi_prior Force-of-infection distribution specified by means of +#' the helper functions. Currently available options are: +#' \describe{ +#' \item{[sf_normal]}{Function to set normal distribution priors} +#' \item{[sf_uniform]}{Function to set uniform distribution priors} +#' } +#' @param foi_sigma_rw Prior distribution for the standard deviation of the +#' force-of-infection. Currently available options are: +#' \describe{ +#' \item{[sf_normal]}{Function to set normal distribution prior. +#' Available for time models in the log-scale} +#' \item{[sf_cauchy]}{Function to set Cauchy distribution prior. +#' Available for time models in regular scale.} +#' } +#' @param foi_index Integer vector specifying the age-groups for which +#' force-of-infection values will be estimated. It can be specified by +#' means of [get_foi_index] +#' @param is_seroreversion Boolean specifying whether to include +#' seroreversion rate estimation in the model +#' @param seroreversion_prior seroreversion distribution specified by means of +#' the helper functions. Currently available options are: +#' \describe{ +#' \item{[sf_normal]}{Function to set normal distribution priors} +#' \item{[sf_uniform]}{Function to set uniform distribution priors} +#' \item{[sf_none]}{Function to set no prior distribution} +#' } +#' @param ... Additional parameters for [rstan][rstan::sampling] +#' @returns stan_fit object with force-of-infection and seroreversion +#' (when applicable) samples +#' @examples +#' data(veev2012) +#' seromodel <- fit_seromodel( +#' serosurvey = veev2012, +#' model_type = "time", +#' foi_index = get_foi_index(veev2012, group_size = 30) +#' ) +#' @export +fit_seromodel <- function( + serosurvey, + model_type = "constant", + is_log_foi = FALSE, + foi_prior = sf_normal(), + foi_sigma_rw = sf_none(), + foi_index = NULL, + foi_init = NULL, + is_seroreversion = FALSE, + seroreversion_prior = sf_normal(), + ... +) { + serosurvey <- serosurvey %>% + validate_serosurvey() %>% + add_age_group_to_serosurvey() + + stopifnot( + "model_type must be either 'constant', 'time' or 'age'" = + model_type %in% c("constant", "time", "age") + ) + + stan_data <- build_stan_data( + serosurvey = serosurvey, + model_type = model_type, + foi_prior = foi_prior, + foi_index = foi_index, + is_log_foi = is_log_foi, + foi_sigma_rw = foi_sigma_rw, + is_seroreversion = is_seroreversion, + seroreversion_prior = seroreversion_prior + ) + + foi_init <- set_foi_init( + foi_init = foi_init, + is_log_foi = is_log_foi, + foi_index = stan_data$foi_index + ) + + # Assigning right name to the model based on user specifications + model_name <- model_type + if (is_log_foi) { + model_name <- paste0(model_name, "_log") + } + if (is_seroreversion) + model_name <- paste0(model_name, "_seroreversion") + else + model_name <- paste0(model_name, "_no_seroreversion") + + # Compile or load Stan model + model <- stanmodels[[model_name]] + + seromodel <- rstan::sampling( + model, + data = stan_data, + init = foi_init, + ... + ) + seromodel@model_name <- model_name + return(seromodel) +} diff --git a/R/model_comparison.R b/R/model_comparison.R deleted file mode 100644 index cc566013..00000000 --- a/R/model_comparison.R +++ /dev/null @@ -1,45 +0,0 @@ -#' Build dataframe containing the R-hat estimates for a given -#' serological model -#' -#' This function relies on [rhat][bayesplot::rhat] to extract the -#' R-hat estimates of the serological model object `seromodel_object` and -#' returns a table a dataframe with the estimates for each year of birth. -#' @inheritParams get_foi_central_estimates -#' @return rhats table -#' @examples -#' data(chagas2012) -#' serodata <- prepare_serodata(serodata = chagas2012) -#' model_constant <- fit_seromodel( -#' serodata = serodata, -#' foi_model = "constant", -#' iter = 1500 -#' ) -#' cohort_ages <- get_cohort_ages(serodata) -#' get_table_rhats( -#' seromodel_object = model_constant, -#' cohort_ages = cohort_ages -#' ) -#' @export -get_table_rhats <- function(seromodel_object, - cohort_ages) { - checkmate::assert_class(seromodel_object, "stanfit") - - rhats <- bayesplot::rhat(seromodel_object, "foi") - - if (any(is.nan(rhats))) { - warn_msg <- paste0( - length(which(is.nan(rhats))), - " rhat values are `nan`, ", - "indicating the model may not have run correctly for those times.\n", - "Setting those rhat values to `NA`." - ) - warning(warn_msg) - rhats[which(is.nan(rhats))] <- NA - } - model_rhats <- data.frame( - year = cohort_ages$birth_year, - rhat = rhats - ) - - return(model_rhats) -} diff --git a/R/modelling.R b/R/modelling.R deleted file mode 100644 index 1b42480a..00000000 --- a/R/modelling.R +++ /dev/null @@ -1,765 +0,0 @@ -stop_if_missing <- function(serodata, must_have_cols) { - if ( - !all( - must_have_cols - %in% colnames(serodata) - ) - ) { - missing <- must_have_cols[which(!(must_have_cols %in% colnames(serodata)))] - stop( - "The following mandatory columns in `serodata` are missing.\n", - toString(missing) - ) - } -} - -stop_if_wrong_type <- function(serodata, col_types) { - error_messages <- list() - for (col in names(col_types)) { - # valid_col_types <- ifelse(is.list(col_types[[col]]), - # col_types[[col]], as.list(col_types[[col]]) - # ) - valid_col_types <- as.list(col_types[[col]]) - - # Only validates column type if the column exists in the dataframe - if (col %in% colnames(serodata) && - !any(vapply(valid_col_types, function(type) { - do.call(sprintf("is.%s", type), list(serodata[[col]])) - }, logical(1)))) { - error_messages <- append( - error_messages, - sprintf( - "`%s` must be of any of these types: `%s`", - col, toString(col_types[[col]]) - ) - ) - } - } - if (length(error_messages) > 0) { - stop( - "The following columns in `serodata` have wrong types:\n", - toString(error_messages) - ) - } -} - -warn_missing <- function( - serodata, - optional_cols - ) { - if ( - !all( - optional_cols - %in% colnames(serodata) - ) - ) { - missing <- optional_cols[which(!(optional_cols %in% colnames(serodata)))] - warning( - "The following optional columns in `serodata` are missing. ", - "Consider including them to get more information from this analysis:\n", - toString(missing) - ) - for (col in missing) { - serodata[[col]] <- "None" # TODO Shouln't we use `NA` instead? - } - } - - return(serodata) -} - - -validate_serodata <- function(serodata) { - col_types <- list( - survey = c("character", "factor"), - total = "numeric", - counts = "numeric", - tsur = "numeric", - age_min = "numeric", - age_max = "numeric" - ) - - stop_if_missing(serodata, - must_have_cols = names(col_types) - ) - - stop_if_wrong_type(serodata, col_types) - - optional_col_types <- list( - country = c("character", "factor"), - test = c("character", "factor"), - antibody = c("character", "factor") - ) - - # Add missing columns - serodata <- warn_missing( - serodata, - optional_cols = names(optional_col_types) - ) - - # If any optional column is present, validates that is has the correct type - stop_if_wrong_type(serodata, optional_col_types) - - return(serodata) -} - -validate_prepared_serodata <- function(serodata) { - col_types <- list( - total = "numeric", - counts = "numeric", - tsur = "numeric", - age_mean_f = "numeric", - birth_year = "numeric", - prev_obs = "numeric", - prev_obs_lower = "numeric", - prev_obs_upper = "numeric" - ) - serodata <- validate_serodata(serodata) - - stop_if_missing(serodata, must_have_cols = names(col_types)) - stop_if_wrong_type(serodata, col_types) - - return(serodata) -} - -#' Run specified stan model for the force-of-infection and -#' estimate the seroprevalence based on the result of the fit -#' -#' Starting on v.0.1.0, this function will be DEPRECATED. Use `fit_seromodel` -#' instead. -#' This function runs the specified model for the force-of-infection `foi_model` -#' using the data from a seroprevalence survey `serodata` as the input data. See -#' [fit_seromodel] for further details. -#' -#' @inheritParams fit_seromodel -#' @param print_summary Boolean. If `TRUE`, a table summarizing modelling -#' results is printed. -#' @return `seromodel_object`. An object containing relevant information about -#' the implementation of the model. For further details refer to -#' [fit_seromodel]. -#' @examples -#' \dontrun{ -#' data(chagas2012) -#' serodata <- prepare_serodata(chagas2012) -#' fit_seromodel( -#' serodata, -#' foi_model = "constant" -#' ) -#' } -#' @export -run_seromodel <- function( - serodata, - foi_model = c("constant", "tv_normal_log", "tv_normal"), - foi_parameters = NULL, - chunks = NULL, - chunk_size = 1, - iter = 1000, - adapt_delta = 0.90, - max_treedepth = 10, - seed = 12345, - print_summary = TRUE, - ...) { - .Deprecated("fit_seromodel") - foi_model <- match.arg(foi_model) - survey <- unique(serodata$survey) - if (length(survey) > 1) { - warning("You have more than 1 surveys or survey codes") - } - seromodel_object <- fit_seromodel( - serodata = serodata, - foi_model = foi_model, - foi_parameters = foi_parameters, - chunks = chunks, - chunk_size = chunk_size, - iter = iter, - adapt_delta = adapt_delta, - max_treedepth = max_treedepth, - seed = seed, - ... - ) - message( - "serofoi model ", - foi_model, - " finished running ------" - ) - if (print_summary) { - model_summary <- extract_seromodel_summary( - seromodel_object = seromodel_object, - serodata = serodata - ) - print(t(model_summary)) - } - return(seromodel_object) -} - -#' Fit selected model to the specified seroprevalence survey -#' data -#' -#' This function fits the specified model `foi_model` to the serological survey -#' data `serodata` by means of [sampling][rstan::sampling]. The -#' function determines whether the corresponding stan model object needs to be -#' compiled by rstan. -#' @param serodata A data frame containing the data from a seroprevalence -#' survey. This data frame must contain at least the following columns: -#' \describe{ -#' \item{`total`}{Number of samples for each age group} -#' \item{`counts`}{Number of positive samples for each age group} -#' \item{`tsur`}{Year in which the survey took place} -#' \item{`age_mean_f`}{Floor value of the average between age_min and age_max} -#' \item{`sample_size`}{The size of the sample} -#' \item{`birth_year`}{The year in which the individuals of each age group -#' were born} -#' } -#' The last six columns can be added to `serodata` by means of the function -#' [prepare_serodata()]. -#' @param foi_model Name of the selected model. Current version provides three -#' options: -#' \describe{ -#' \item{`"constant"`}{Runs a constant model} -#' \item{`"tv_normal"`}{Runs a normal model} -#' \item{`"tv_normal_log"`}{Runs a normal logarithmic model} -#' } -#' @param foi_parameters List specifying the initial prior parameters of the -#' model `foi_model` to be specified as (e.g.): -#' \describe{ -#' \item{`"constant"`}{`list(foi_a = 0, foi_b = 2)`} -#' \item{`"tv_normal"`}{`list(foi_location = 0, foi_scale = 1)`} -#' \item{`"tv_normal_log"`}{`list(foi_location = -6, foi_scale = 4)`} -#' } -#' @param chunks Numeric list specifying the chunk structure of the time -#' interval from the birth year of the oldest age cohort -#' `min(serodata$age_mean_f)` to the time when the serosurvey was conducted -#' `t_sur`. If `NULL`, the time interval is divided in chunks of size -#' `chunk_size`. -#' @param chunk_size Size of the chunks to be used in case that the chunk -#' structure `chunks` is not specified in [fit_seromodel]. -#' Default is 1, meaning that one force of infection value is to be estimated -#' for every year in the time interval spanned by the serosurvey. -#' If the length of the time interval is not exactly divisible by `chunk_size`, -#' the remainder years are included in the last chunk. -#' @param iter Number of interactions for each chain including the warmup. -#' `iter` in [sampling][rstan::sampling]. -#' @param adapt_delta Real number between 0 and 1 that represents the target -#' average acceptance probability. Increasing the value of `adapt_delta` will -#' result in a smaller step size and fewer divergences. For further details -#' refer to the `control` parameter in [sampling][rstan::sampling] or -#' [here](https://mc-stan.org/rstanarm/reference/adapt_delta.html). -#' @param max_treedepth Maximum tree depth for the binary tree used in the NUTS -#' stan sampler. For further details refer to the `control` parameter in -#' [sampling][rstan::sampling]. -#' @param seed For further details refer to the `seed` parameter in -#' [sampling][rstan::sampling]. -#' @param ... Additional parameters for [sampling][rstan::sampling]. -#' @return `seromodel_object`. `stanfit` object returned by the function -#' [sampling][rstan::sampling] -#' @examples -#' data(chagas2012) -#' serodata <- prepare_serodata(chagas2012) -#' seromodel_fit <- fit_seromodel( -#' serodata = serodata, -#' foi_model = "constant" -#' ) -#' -#' @export -fit_seromodel <- function( - serodata, - foi_model = c("constant", "tv_normal_log", "tv_normal", "av_normal"), - foi_parameters = NULL, - chunks = NULL, - chunk_size = 1, - iter = 1000, - adapt_delta = 0.90, - max_treedepth = 10, - seed = 12345, - ...) { - serodata <- validate_prepared_serodata(serodata) - err_msg <- paste0( - "foi_model must be either ", - "constant, ", - "tv_normal, tv_normal_log, or ", - "av_normal" - ) - stopifnot( - err_msg = - foi_model %in% c( - "constant", - "tv_normal", "tv_normal_log", - "av_normal" - ), - "iter must be numeric" = is.numeric(iter), - "seed must be numeric" = is.numeric(seed) - ) - - # Set default foi parameters - if (is.null(foi_parameters)) { - if (foi_model == "constant") { - foi_parameters <- list( - foi_a = 0, - foi_b = 2 - ) - } else if (foi_model %in% c("tv_normal", "av_normal")) { - foi_parameters <- list( - foi_location = 0, - foi_scale = 1 - ) - } else if (foi_model == "tv_normal_log") { - foi_parameters <- list( - foi_location = -6, - foi_scale = 4 - ) - } - } - - # Load Stan model - model <- stanmodels[[foi_model]] - exposure_matrix <- get_exposure_matrix(serodata) - - # Set default chunks structure - if (is.null(chunks)) { - chunks <- get_chunk_structure( - serodata = serodata, - chunk_size = chunk_size - ) - } - checkmate::assert_class(chunks, "numeric") - stopifnot( - "`chunks` length must be equal to `max(serodata$age_mean_f)`" = - length(chunks) == max(serodata$age_mean_f) - ) - - # Build Stan data - stan_data <- list( - n_obs = nrow(serodata), - n_pos = serodata$counts, - n_total = serodata$total, - age_max = max(serodata$age_mean_f), - chunks = chunks - ) - - if (foi_model %in% c("constant", "tv_normal", "tv_normal_log")) { - exposure_matrix <- get_exposure_matrix(serodata) - stan_data <- append( - stan_data, - list( - observation_exposure_matrix = exposure_matrix - ) - ) - } else if (foi_model == "av_normal") { - stan_data <- append( - stan_data, - list(ages = serodata$age_mean_f) - ) - } - - if (foi_model == "constant") { - stan_data <- append( - stan_data, - list( - foi_a = foi_parameters$foi_a, - foi_b = foi_parameters$foi_b - ) - ) - } else { - stan_data <- append( - stan_data, - list( - foi_location = foi_parameters$foi_location, - foi_scale = foi_parameters$foi_scale - ) - ) - } - - if (foi_model == "tv_normal_log") { - f_init <- function() { - list(log_fois = rep(-3, max(chunks))) - } - } else { - f_init <- function() { - list(fois = rep(0.01, max(chunks))) - } - } - - seromodel_fit <- rstan::sampling( - model, - data = stan_data, - iter = iter, - init = f_init, - control = list( - adapt_delta = adapt_delta, - max_treedepth = max_treedepth - ), - seed = seed, - # https://github.com/stan-dev/rstan/issues/761#issuecomment-647029649 - chain_id = 0, - include = FALSE, - pars = "fois_vector", - verbose = FALSE, - refresh = 0, - ... - ) - - if (seromodel_fit@mode == 0) { - seromodel_object <- seromodel_fit - return(seromodel_object) - } else { - # This may happen for invalid inputs in rstan::sampling() (e.g. thin > iter) - seromodel_object <- "no model" - return(seromodel_object) - } -} - - -#' Generate list containing the chunk structure to be used in the retrospective -#' estimation of the force of infection. -#' -#' This function generates a numeric list specifying the chunk structure of the -#' time interval spanning from the year of birth of the oldest age cohort up to -#' the time when the serosurvey was conducted. -#' @inheritParams fit_seromodel -#' @examples -#' data(chagas2012) -#' serodata <- prepare_serodata(serodata = chagas2012, alpha = 0.05) -#' cohort_ages <- get_cohort_ages(serodata = serodata) -#' @export -get_chunk_structure <- function( - serodata, - chunk_size - ) { - checkmate::assert_int( - chunk_size, - lower = 1, - upper = max(serodata$age_mean_f) - ) - - chunks <- unlist( - purrr::map( - seq( - 1, - max(serodata$age_mean_f) / chunk_size, - 1), - rep, - times = chunk_size - ) - ) - - chunks <- append( - chunks, - rep( - max(chunks), - max(serodata$age_mean_f) - length(chunks) - ) - ) - - return(chunks) -} - -#' Generate data frame containing the age of each cohort -#' corresponding to each birth year excluding the year of the survey. -#' -#' This function generates a data frame containing the age of each cohort -#' corresponding to each `birth_year` excluding the year of the survey, for -#' which the cohort age is still 0. specified serological survey data `serodata` -#' excluding the year of the survey. -#' @inheritParams run_seromodel -#' @return `cohort_ages`. A data frame containing the age of each cohort -#' corresponding to each birth year -#' @examples -#' data(chagas2012) -#' serodata <- prepare_serodata(serodata = chagas2012, alpha = 0.05) -#' cohort_ages <- get_cohort_ages(serodata = serodata) -#' @export -get_cohort_ages <- function(serodata) { - - birth_year <- min(serodata$birth_year):(serodata$tsur[1] - 1) - age <- rev(seq_along(birth_year)) - - cohort_ages <- data.frame( - birth_year = birth_year, - age = age - ) - return(cohort_ages) -} - -# TODO Is necessary to explain better what we mean by the exposure matrix. - -#' Generate exposure matrix corresponding to a serological -#' survey -#' -#' @inheritParams run_seromodel -#' @return `exposure_output`. An atomic matrix containing the expositions for -#' each entry of `serodata` by year. -#' @examples -#' data(chagas2012) -#' serodata <- prepare_serodata(serodata = chagas2012) -#' exposure_matrix <- get_exposure_matrix(serodata = serodata) -#' @keywords internal -#' @noRd -get_exposure_matrix <- function(serodata) { - age_class <- serodata$age_mean_f - cohort_ages <- get_cohort_ages(serodata = serodata) - ly <- nrow(cohort_ages) - exposure <- matrix(0, nrow = length(age_class), ncol = ly) - for (k in seq_along(age_class)) { - exposure[k, (ly - age_class[k] + 1):ly] <- 1 - } - exposure_output <- exposure - return(exposure_output) -} - -#' Extract central estimates for the fitted forced FoI -#' -#' @param seromodel_object Stanfit object containing the results of fitting a -#' model by means of [fit_seromodel]. -#' @inheritParams fit_seromodel -#' @param lower_quantile Lower quantile used to compute the credible interval of -#' the fitted force-of-infection. -#' @param upper_quantile Lower quantile used to compute the credible interval of -#' the fitted force-of-infection. -#' @return `foi_central_estimates`. Central estimates for the fitted forced FoI -#' @examples -#' data(chagas2012) -#' serodata <- prepare_serodata(chagas2012) -#' seromodel_object <- fit_seromodel( -#' serodata = serodata, -#' foi_model = "constant" -#' ) -#' cohort_ages <- get_cohort_ages(serodata = serodata) -#' foi_central_estimates <- get_foi_central_estimates( -#' seromodel_object = seromodel_object, -#' serodata = serodata -#' ) -#' @export -get_foi_central_estimates <- function( - seromodel_object, - serodata, - lower_quantile = 0.05, - upper_quantile = 0.95 - ) { - # extracts force-of-infection from stan fit - foi <- rstan::extract(seromodel_object, "foi", inc_warmup = FALSE)[[1]] - cohort_ages <- get_cohort_ages(serodata) - # defines time scale depending on the type of the model - if( - seromodel_object@model_name %in% - c("constant", "tv_normal", "tv_normal_log") - ) { - foi_central_estimates <- data.frame( - year = cohort_ages$birth_year - ) - } else if (seromodel_object@model_name == "av_normal") { - foi_central_estimates <- data.frame( - age = rev(cohort_ages$age) - ) - } - - # generates central estimations - foi_central_estimates <- foi_central_estimates %>% - mutate( - lower = apply(foi, 2, quantile, lower_quantile), - upper = apply(foi, 2, quantile, upper_quantile), - medianv = apply(foi, 2, quantile, 0.5) - ) - - return(foi_central_estimates) -} - -#' Function to extract a summary of the specified serological model object -#' -#' This function extracts a summary corresponding to a serological model object -#' containing information about the original serological survey data used to -#' fit the model, such as the year when the survey took place, the type of test -#' taken and the corresponding antibody, as well as information about the -#' convergence of the model, like the expected log pointwise predictive density -#' `elpd` and its corresponding standard deviation. -#' @inheritParams get_foi_central_estimates -#' @inheritParams run_seromodel -#' @return `model_summary`. Object with a summary of `seromodel_object` -#' containing the following: -#' \describe{ -#' \item{`foi_model`}{Name of the selected model.} -#' \item{`data_set`}{Seroprevalence survey label} -#' \item{`country`}{Name of the country were the survey was conducted in.} -#' \item{`year`}{Year in which the survey was conducted.} -#' \item{`test`}{Type of test of the survey.} -#' \item{`antibody`}{Antibody} -#' \item{`n_sample`}{Total number of samples in the survey.} -#' \item{`n_agec`}{Number of age groups considered.} -#' \item{`n_iter`}{Number of iterations by chain including warmup.} -#' \item{`elpd`}{elpd} -#' \item{`se`}{se} -#' \item{`converged`}{convergence} -#' } -#' @examples -#' data(chagas2012) -#' serodata <- prepare_serodata(chagas2012) -#' seromodel_object <- fit_seromodel( -#' serodata = serodata, -#' foi_model = "constant" -#' ) -#' extract_seromodel_summary(seromodel_object, -#' serodata = serodata -#' ) -#' @export -extract_seromodel_summary <- function(seromodel_object, - serodata) { - serodata <- validate_prepared_serodata(serodata) - #------- Loo estimates - # The argument parameter_name refers to the name given to the Log-likelihood - # in the stan models. See loo::extract_log_lik() documentation for further - # details - loo_fit <- loo::loo( - seromodel_object, - save_psis = FALSE, - pars = c(parameter_name = "logLikelihood") - ) - if (sum(is.na(loo_fit)) < 1) { - lll <- as.numeric((round(loo_fit$estimates[1, ], 2))) - } else { - lll <- c(-1e10, 0) - } - - model_summary <- data.frame( - foi_model = seromodel_object@model_name, - dataset = unique(serodata$survey), - country = unique(serodata$country), - year = unique(serodata$tsur), - test = unique(serodata$test), - antibody = unique(serodata$antibody), - n_sample = sum(serodata$total), - n_agec = length(serodata$age_mean_f), - n_iter = seromodel_object@sim$iter, - elpd = lll[1], - se = lll[2], - converged = NA - ) - cohort_ages <- get_cohort_ages(serodata = serodata) - rhats <- get_table_rhats( - seromodel_object = seromodel_object, - cohort_ages = cohort_ages - ) - - if (all(rhats$rhat <= 1.01)) { - model_summary$converged <- "Yes" - } else { - model_summary$converged <- "No" - warn_msg <- paste0( - length(which(rhats$rhat > 1.01)), - " rhat values are above 1.01. ", - "Running the chains for more iterations is recommended." - ) - warning(warn_msg) - } - - return(model_summary) -} - - -#' Generate data frame containing the confidence interval based on -#' a force-of-infection fitting -#' -#' This function computes the corresponding binomial confidence intervals for -#' the obtained prevalence based on a fitting of the force-of-infection `foi` -#' for plotting an analysis purposes. -#' @param foi Object containing the information of the force-of-infection. It is -#' obtained from `rstan::extract(seromodel_object$seromodel, "foi", inc_warmup -#' = FALSE)[[1]]`. -#' @param alpha Probability threshold for statistical significance used for both -#' the binomial confidence interval, and the lower and upper quantiles of the -#' estimated prevalence. -#' @inheritParams run_seromodel -#' @param bin_data If `TRUE`, `serodata` is binned by means of -#' `prepare_bin_data`. Otherwise, age groups are kept as originally input. -#' @param bin_step Integer specifying the age groups bin size to be used when -#' `bin_data` is set to `TRUE`. -#' @return `prev_final`. The expanded prevalence data. This is used for plotting -#' purposes in the `visualization` module. -#' @examples -#' data(chagas2012) -#' serodata <- prepare_serodata(chagas2012) -#' seromodel_object <- fit_seromodel( -#' serodata = serodata, -#' foi_model = "constant" -#' ) -#' foi <- rstan::extract(seromodel_object, "foi")[[1]] -#' get_prev_expanded(foi, serodata) -#' @export -get_prev_expanded <- function(foi, - serodata, - alpha = 0.05, - bin_data = FALSE, - bin_step = 5 - ) { - - if (bin_data && any(serodata$age_max - serodata$age_min > 2)) { - warning("Make sure `serodata` is already grouped by age") - bin_data <- FALSE - } - - foi_expanded <- foi - - ly <- NCOL(foi_expanded) - exposure_expanded <- matrix(0, nrow = ly, ncol = ly) - exposure_expanded[apply( - lower.tri(exposure_expanded, diag = TRUE), - 1, rev - )] <- 1 - - prev_pn <- t(1 - exp(-exposure_expanded %*% t(foi_expanded))) - - predicted_prev <- t( - apply( - prev_pn, - 2, - function(x) { - quantile( - x, - c( - 0.5, - alpha, - 1 - alpha - ) - ) - } - ) - ) - colnames(predicted_prev) <- c( - "predicted_prev", - "predicted_prev_lower", - "predicted_prev_upper" - ) - predicted_prev <- as.data.frame(predicted_prev) - predicted_prev$age <- 1:ly - - if (bin_data) { - observed_prev <- prepare_bin_data( - serodata = serodata, - bin_step = bin_step, - alpha = alpha - ) - } else { - observed_prev <- serodata - } - - observed_prev <- observed_prev %>% - dplyr::select( - "age_mean_f", - "prev_obs", - "prev_obs_lower", - "prev_obs_upper", - "total", - "counts" - ) %>% - rename( - age = "age_mean_f" - ) - - prev_expanded <- - base::merge( - predicted_prev, - observed_prev, - by = "age", - all.x = TRUE - ) %>% - dplyr::mutate(survey = unique(serodata$survey)) - - return(prev_expanded) -} diff --git a/R/plot_seromodel.R b/R/plot_seromodel.R new file mode 100644 index 00000000..d0b1eb20 --- /dev/null +++ b/R/plot_seromodel.R @@ -0,0 +1,559 @@ +#' Prepares serosurvey for plotting +#' +#' Adds seroprevalence values with corresponding binomial confidence interval +#' @inheritParams fit_seromodel +#' @param alpha 1 - alpha indicates the confidence level to be used +#' @return serosurvey with additional columns: +#' \describe{ +#' \item{seroprev}{Seroprevalence computed as the proportion of positive +#' cases `n_seropositive` in the number of samples +#' `n_sample` for each age group} +#' \item{seroprev_lower}{Lower limit of the binomial confidence interval +#' of `seroprev`} +#' \item{seroprev_upper}{Upper limit of the binomial confidence interval +#' of `seroprev`} +#' } +#' @export +prepare_serosurvey_for_plotting <- function( #nolint + serosurvey, + alpha = 0.05 + ) { + + serosurvey <- serosurvey %>% + cbind( + Hmisc::binconf( + serosurvey$n_seropositive, + serosurvey$n_sample, + alpha = alpha, + method = "exact", + return.df = TRUE + ) + ) %>% + dplyr::rename( + seroprev = "PointEst", + seroprev_lower = "Lower", + seroprev_upper = "Upper" + ) %>% + dplyr::arrange(.data$age_group) %>% + dplyr::relocate(.data$age_group) +} + +#' Construct age-group variable from age column +#' +#' Generates age intervals of length step in the interval spanned by +#' `age_min` and `age_max` in a serosurvey. +#' In cases where `max(age_max)%%(step+1)!=0`, the last age interval is +#' truncated and will have a different length than the others. +#' @inheritParams plot_serosurvey +#' @param step step used to split the age interval +#' @return Serosurvey with addition factor variable grouping `age_intervals`. +#' The interval is taken as closed to the right and to the left. +get_age_intervals <- function(serosurvey, step) { + age_min <- min(serosurvey$age_min) + age_max <- max(serosurvey$age_max) + + checkmate::assert_int(age_min, lower = 0) + checkmate::assert_int(age_max, lower = age_min) + checkmate::assert_int(step, lower = 2, upper = age_max) + + limits_low <- as.integer( + seq( + age_min, age_max, + by = step + ) + ) + + if ((age_max - age_min) %% step != 0) { + warn_msg <- "(age_min - age_max) is not an integer multiple of step. + The last age interval will be truncated to " + warn_msg <- paste0( + warn_msg, "[", limits_low[length(limits_low)], ",", age_max, "]" + ) + warning(warn_msg) + } + + # prepare breaks + lim_breaks <- c(limits_low, age_max) + + # define age groups closed to the left and closed to the right + survey_features <- data.frame( + age_min = limits_low, + age_max = limits_low + step - 1 + ) %>% add_age_bins() + + serosurvey$age_interval <- cut( + x = serosurvey$age_group, + breaks = lim_breaks, + include.lowest = TRUE, right = FALSE, + labels = survey_features$group + ) + + return(serosurvey) +} + +#' Plots seroprevalence from the given serosurvey +#' +#' @inheritParams fit_seromodel +#' @param size_text Size of text for plotting (`base_size` in +#' [ggplot2][ggplot2::theme_bw]) +#' @param bin_serosurvey If `TRUE`, `serodata` is binned by means of +#' `prepare_bin_serosurvey`. +#' Otherwise, age groups are kept as originally input. +#' @param bin_step Integer specifying the age groups bin size to be used when +#' `bin_serosurvey` is set to `TRUE`. +#' @return ggplot object with seroprevalence plot +#' @examples +#' # Chikungunya example serosurvey +#' data(chik2015) +#' plot_serosurvey(chik2015) +#' +#' # VEEV example serosurvey +#' data(veev2012) +#' plot_serosurvey(veev2012) +#' +#' # Chagas disease example serosurvey +#' data(chagas2012) +#' plot_serosurvey(chagas2012, bin_serosurvey = TRUE) +#' @export +plot_serosurvey <- function( + serosurvey, + size_text = 11, + bin_serosurvey = FALSE, + bin_step = 5 + ) { + serosurvey <- validate_serosurvey(serosurvey = serosurvey) %>% + add_age_group_to_serosurvey() + + if (bin_serosurvey) { + age_max <- max(serosurvey$age_max) + checkmate::assert_int(bin_step, lower = 2, upper = age_max) + + serosurvey <- get_age_intervals( + serosurvey = serosurvey, + step = bin_step + ) + + serosurvey <- serosurvey %>% + dplyr::group_by(.data$age_interval) %>% + dplyr::summarise( + n_sample = sum(.data$n_sample), + n_seropositive = sum(.data$n_seropositive) + ) %>% + dplyr::mutate( + age_min = as.integer(gsub("[[]|\\,.*", "\\1", .data$age_interval)) + 1, + age_max = as.integer(gsub(".*\\,|[]]", "\\1", .data$age_interval)) + ) %>% + add_age_group_to_serosurvey() + } + + serosurvey <- prepare_serosurvey_for_plotting(serosurvey) + + min_prev <- min(serosurvey$seroprev_lower) + max_prev <- max(serosurvey$seroprev_upper) + + seroprev_plot <- ggplot2::ggplot( + data = serosurvey, + ggplot2::aes(x = .data$age_group) + ) + + ggplot2::geom_errorbar( + ggplot2::aes( + ymin = .data$seroprev_lower, + ymax = .data$seroprev_upper + ), + width = 0.1 + ) + + ggplot2::geom_point( + ggplot2::aes( + y = .data$seroprev, + size = .data$n_sample + ), + fill = "#7a0177", colour = "black", shape = 21 + ) + + ggplot2::coord_cartesian( + xlim = c(min(serosurvey$age_min), max(serosurvey$age_max)), + ylim = c(min_prev, max_prev) + ) + + ggplot2::theme_bw(size_text) + + ggplot2::theme(legend.position = "none") + + ggplot2::ylab("Seroprevalence") + + ggplot2::xlab("Age") + + return(seroprev_plot) +} + +#' Extracts central estimates from stan_fit object for specified parameter +#' +#' @param seromodel stan_fit object obtained from sampling a model +#' with [fit_seromode] +#' @inheritParams fit_seromodel +#' @param alpha 1 - alpha indicates the credibility level to be used +#' @param par_name String specifying the parameter to be extracted +#' from `seromodel` +#' @returns A dataframe with the following columns +#' \describe{ +#' \item{`median`}{Median of the samples computed as the 0.5 quantile} +#' \item{`lower`}{Lower quantile `alpha`} +#' \item{`upper`}{Upper quantile `1 - alpha`} +#' } +#' @export +extract_central_estimates <- function( + seromodel, + serosurvey, + alpha = 0.05, + par_name = "foi_vector" +) { + samples <- rstan::extract(seromodel, par_name)[[1]] %>% + as.matrix() #to deal with 1-time estimates + central_estimates <- data.frame( + median = apply(samples, 2, quantile, 0.5), + lower = apply(samples, 2, quantile, alpha), + upper = apply(samples, 2, quantile, 1 - alpha) + ) + + return(central_estimates) +} + +#' Plot seroprevalence estimates on top of the serosurvey +#' +#' @inheritParams extract_central_estimates +#' @inheritParams plot_serosurvey +#' @returns ggplot object with seroprevalence estimates and serosurveys plots +#' @export +plot_seroprevalence_estimates <- function( + seromodel, + serosurvey, + alpha = 0.05, + size_text = 11, + bin_serosurvey = FALSE, + bin_step = 5 +) { + checkmate::assert_class(seromodel, "stanfit", null.ok = TRUE) + + seroprevalence_central_estimates <- data.frame( #nolint + median = 0.0, + lower = 0.0, + upper = 0.0, + age = 0 + ) %>% + rbind( + extract_central_estimates( + seromodel = seromodel, + serosurvey = serosurvey, + alpha = alpha, + par_name = "prob_infected_expanded" + ) %>% + mutate(age = seq(1, max(serosurvey$age_max))) + ) + + seroprevalence_plot <- plot_serosurvey( + serosurvey = serosurvey, + size_text = size_text, + bin_serosurvey = bin_serosurvey, + bin_step = bin_step + ) + + ggplot2::geom_line( + data = seroprevalence_central_estimates, + ggplot2::aes(x = .data$age, y = median), + colour = "#7a0177" + ) + + ggplot2::geom_ribbon( + data = seroprevalence_central_estimates, + ggplot2::aes(x = .data$age, ymin = .data$lower, ymax = .data$upper), + fill = "#c994c7", alpha = 0.5 + ) + + ggplot2::coord_cartesian( + xlim = c(0, max(serosurvey$age_max)) + ) + + return(seroprevalence_plot) +} + +#' Plots force-of-infection central estimates +#' +#' @inheritParams extract_central_estimates +#' @inheritParams fit_seromodel +#' @param foi_df Dataframe with columns +#' \describe{ +#' \item{`year`/`age`}{Year/Age (depending on the model)} +#' \item{`foi`}{Force-of-infection values by year/age} +#' } +#' @inheritParams plot_serosurvey +#' @param foi_max Max force-of-infection value for plotting +#' @return ggplot object with estimated force-of-infection +#' @export +plot_foi_estimates <- function( + seromodel, + serosurvey, + alpha = 0.05, + foi_df = NULL, + foi_max = NULL, + size_text = 11 +) { + # TODO: Add checks for foi_df (size, colnames, etc.) + checkmate::assert_class(seromodel, "stanfit", null.ok = TRUE) + + model_name <- seromodel@model_name + stopifnot( + "seromodel@name should start with either 'age' or 'time'" = + startsWith(model_name, "age") | startsWith(model_name, "time") + ) + + foi_central_estimates <- extract_central_estimates( + seromodel = seromodel, + serosurvey = serosurvey, + alpha = alpha, + par_name = "foi_expanded" + ) + + if (is.null(foi_max)) + foi_max <- max(foi_central_estimates$upper) + + if (startsWith(model_name, "age")) { + xlab <- "Age" + ages <- 1:max(serosurvey$age_max) + foi_central_estimates <- mutate( + foi_central_estimates, + age = ages + ) + if (!is.null(foi_df)) { + foi_central_estimates <- foi_central_estimates %>% + left_join(foi_df, by = "age") + } + foi_plot <- ggplot2::ggplot( + data = foi_central_estimates, ggplot2::aes(x = .data$age) + ) + } else if (startsWith(model_name, "time")) { + checkmate::assert_names(names(serosurvey), must.include = "survey_year") + xlab <- "Year" + ages <- rev(1:max(serosurvey$age_max)) + years <- unique(serosurvey$survey_year) - ages + foi_central_estimates <- mutate( + foi_central_estimates, + year = years + ) + if (!is.null(foi_df)) { + foi_central_estimates <- foi_central_estimates %>% + left_join(foi_df, by = "year") + } + foi_plot <- ggplot2::ggplot( + data = foi_central_estimates, ggplot2::aes(x = .data$year) + ) + } + + foi_plot <- foi_plot + + ggplot2::geom_ribbon( + ggplot2::aes( + ymin = .data$lower, + ymax = .data$upper + ), + fill = "#41b6c4", + alpha = 0.5 + ) + + ggplot2::geom_line( + ggplot2::aes(y = .data$median), + colour = "#253494" + ) + + ggplot2::theme_bw(size_text) + + ggplot2::coord_cartesian(ylim = c(0, foi_max)) + + ggplot2::ylab("Force-of-Infection") + + ggplot2::xlab(xlab) + + if (!is.null(foi_df)) { + foi_plot <- foi_plot + + ggplot2::geom_line( + ggplot2::aes(y = .data$foi), + colour = "#b30909" + ) + } + + return(foi_plot) +} + +#' Plot r-hats convergence criteria for the specified model +#' +#' @inheritParams extract_central_estimates +#' @inheritParams plot_serosurvey +#' @return ggplot object showing the r-hats of the model to be compared with the +#' convergence criteria (horizontal dashed line) +plot_rhats <- function( + seromodel, + serosurvey, + par_name = "foi_expanded", + size_text = 11 +) { + checkmate::assert_class(seromodel, "stanfit", null.ok = TRUE) + + model_name <- seromodel@model_name + stopifnot( + "seromodel@name should start with either 'age' or 'time'" = + startsWith(model_name, "age") | startsWith(model_name, "time") + ) + + rhats <- bayesplot::rhat(seromodel, par_name) + + if (startsWith(model_name, "age")) { + xlab <- "Age" + ages <- 1:max(serosurvey$age_max) + rhats_df <- data.frame( + age = ages, + rhat = rhats + ) + + rhats_plot <- ggplot2::ggplot( + data = rhats_df, ggplot2::aes(x = .data$age) + ) + } else if (startsWith(model_name, "time")) { + checkmate::assert_names(names(serosurvey), must.include = "survey_year") + xlab <- "Year" + ages <- rev(1:max(serosurvey$age_max)) + years <- unique(serosurvey$survey_year) - ages + rhats_df <- data.frame( + year = years, + rhat = rhats + ) + + rhats_plot <- ggplot2::ggplot( + data = rhats_df, ggplot2::aes(x = .data$year) + ) + } + + rhats_plot <- rhats_plot + + ggplot2::geom_hline( + yintercept = 1.01, + linetype = "dashed" + ) + + ggplot2::geom_line(ggplot2::aes(y = .data$rhat)) + + ggplot2::geom_point(ggplot2::aes(y = .data$rhat)) + + ggplot2::coord_cartesian( + ylim = c( + min(1.0, min(rhats_df$rhat)), + max(1.02, max(rhats_df$rhat)) + ) + ) + + ggplot2::theme_bw(size_text) + + ggplot2::xlab(xlab) + + ggplot2::ylab("Convergence (r-hats)") + + return(rhats_plot) +} + +#' Plots model summary +#' +#' @inheritParams summarise_seromodel +#' @inheritParams plot_serosurvey +#' @return ggplot object with a summary of the specified model +#' @export +plot_summary <- function( + seromodel, + serosurvey, + loo_estimate_digits = 1, + central_estimate_digits = 2, + rhat_digits = 2, + size_text = 11 +) { + checkmate::assert_class(seromodel, "stanfit", null.ok = TRUE) + + summary_table <- summarise_seromodel( + seromodel = seromodel, + serosurvey = serosurvey, + loo_estimate_digits = loo_estimate_digits, + central_estimate_digits = central_estimate_digits, + rhat_digits = rhat_digits + ) %>% + t() #convert summary to table + + summary_df <- data.frame( + row = rev(seq_len(NCOL(summary_table))), + text = paste0(colnames(summary_table), ": ", summary_table[1, ]) + ) + + summary_plot <- ggplot2::ggplot( + summary_df, + ggplot2::aes(x = 1, y = row)) + + ggplot2::scale_y_continuous( + limits = c(0, nrow(summary_df) + 1), + breaks = NULL + ) + + ggplot2::theme_void() + + ggplot2::geom_text( + ggplot2::aes(label = text), + fontface = "bold", + size = size_text / 2.5 + ) + + return(summary_plot) +} + +#' Visualise results of the provided model +#' +#' @inheritParams plot_summary +#' @inheritParams plot_seroprevalence_estimates +#' @inheritParams plot_foi_estimates +#' @inheritParams plot_rhats +#' @return seromodel summary plot +#' @export +plot_seromodel <- function( + seromodel, + serosurvey, + alpha = 0.05, + bin_serosurvey = FALSE, + bin_step = 5, + foi_df = NULL, + foi_max = NULL, + loo_estimate_digits = 1, + central_estimate_digits = 2, + seroreversion_digits = 2, + rhat_digits = 2, + size_text = 11 +) { + checkmate::assert_class(seromodel, "stanfit", null.ok = TRUE) + + summary_plot <- plot_summary( + seromodel = seromodel, + serosurvey = serosurvey, + loo_estimate_digits = loo_estimate_digits, + central_estimate_digits = central_estimate_digits, + rhat_digits = rhat_digits, + size_text = size_text + ) + + seroprev_plot <- plot_seroprevalence_estimates( + seromodel = seromodel, + serosurvey = serosurvey, + alpha = alpha, + size_text = size_text, + bin_serosurvey = bin_serosurvey, + bin_step = bin_step + ) + + plot_list <- list( + summary_plot, + seroprev_plot + ) + + model_name <- seromodel@model_name + if (!startsWith(model_name, "constant")) { + foi_plot <- plot_foi_estimates( + seromodel, + serosurvey, + alpha = alpha, + foi_df = foi_df, + foi_max = foi_max, + size_text + ) + + rhats_plot <- plot_rhats( + seromodel = seromodel, + serosurvey = serosurvey, + size_text = size_text + ) + + plot_list <- c( + plot_list, + list(foi_plot, rhats_plot) + ) + } + + seromodel_plot <- cowplot::plot_grid(plotlist = plot_list, ncol = 1) + return(seromodel_plot) +} diff --git a/R/serofoi-package.R b/R/serofoi-package.R index fc3ef874..7cd8525b 100644 --- a/R/serofoi-package.R +++ b/R/serofoi-package.R @@ -2,7 +2,6 @@ #' #' @description A DESCRIPTION OF THE PACKAGE #' -#' @docType package #' @name serofoi-package #' @aliases serofoi #' @useDynLib serofoi, .registration = TRUE @@ -19,4 +18,7 @@ #' @references Stan Development Team (NA). RStan: the R interface to Stan. R #' package version 2.26.22. https://mc-stan.org #' +#' #' @keywords internal +"_PACKAGE" + NULL diff --git a/R/seroprevalence_data.R b/R/seroprevalence_data.R deleted file mode 100644 index 94d56450..00000000 --- a/R/seroprevalence_data.R +++ /dev/null @@ -1,147 +0,0 @@ -# TODO Complete @param documentation - - -#' Prepare data from a serological survey for modelling -#' -#' This function adds the necessary additional variables to the given dataset -#' `serodata` corresponding to a serological survey. -#' @param serodata A data frame containing the data from a serological survey. -#' This data frame must contain the following columns: -#' \describe{ -#' \item{`survey`}{survey Label of the current survey} -#' \item{`total`}{Number of samples for each age group} -#' \item{`counts`}{Number of positive samples for each age group} -#' \item{`age_min`}{age_min} -#' \item{`age_max`}{age_max} -#' \item{`tsur`}{Year in which the survey took place} -#' \item{`country`}{The country where the survey took place} -#' \item{`test`}{The type of test taken} -#' \item{`antibody`}{antibody} -#' } -#' Alternatively to `age_min` and `age_max`, the dataset could already include -#' the age group marker `age_mean_f`, representing the middle point between -#' `age_min` and `age_max`. If `afe_mean_f` is missing, it will be generated -#' by the function. -#' @param alpha probability of a type I error. For further details refer to -#' [binconf][Hmisc::binconf]. -#' @return serodata with additional columns necessary for the analysis. These -#' columns are: -#' \describe{ -#' \item{`age_mean_f`}{Floor value of the average between age_min and age_max -#' for the age groups delimited by `age_min` and `age_max`} -#' \item{`sample_size`}{The size of the sample} -#' \item{`birth_year`}{Years in which the subject was born according to the -#' age group marker `age_mean_f`} -#' \item{`prev_obs`}{Observed prevalence} -#' \item{`prev_obs_lower`}{Lower limit of the confidence interval for the -#' observed prevalence} -#' \item{`prev_obs_upper`}{Upper limit of the confidence interval for the -#' observed prevalence} -#' } -#' @examples -#' data(chagas2012) -#' serodata <- prepare_serodata(chagas2012) -#' @export -prepare_serodata <- function(serodata = serodata, - alpha = 0.05) { - checkmate::assert_numeric(alpha, lower = 0, upper = 1) - serodata <- validate_serodata(serodata) - - if (!any(colnames(serodata) == "age_mean_f")) { - serodata <- serodata %>% - dplyr::mutate( - age_mean_f = floor((.data$age_min + .data$age_max) / 2), - sample_size = sum(.data$total) - ) - } - - if (!any(colnames(serodata) == "birth_year")) { - serodata <- dplyr::mutate( - serodata, - birth_year = .data$tsur - .data$age_mean_f - ) - } - - serodata <- serodata %>% - cbind( - Hmisc::binconf( - serodata$counts, - serodata$total, - alpha = alpha, - method = "exact", - return.df = TRUE - ) - ) %>% - dplyr::rename( - prev_obs = "PointEst", - prev_obs_lower = "Lower", - prev_obs_upper = "Upper" - ) %>% - dplyr::arrange(.data$age_mean_f) - - return(serodata) -} - - -#' Prepare pre-processed serological survey dataset to plot the -#' binomial confidence intervals of the seroprevalence by age group -#' -#' This function prepapares a given pre-processed serological dataset (see -#' [prepare_serodata()]) to plot the binomial confidence intervals of its -#' corresponding seroprevalence grouped by age group. -#' @inheritParams run_seromodel -#' @return data set with the binomial confidence intervals -#' @examples -#' data(chagas2012) -#' serodata <- prepare_serodata(chagas2012) -#' prepare_bin_data(serodata) -#' @keywords internal -#' @noRd -prepare_bin_data <- function(serodata, - bin_step = 5, - alpha = 0.05) { - if (!any(colnames(serodata) == "age_mean_f")) { - serodata <- serodata %>% - dplyr::mutate( - age_mean_f = floor((.data$age_min + .data$age_max) / 2), - sample_size = sum(.data$total) - ) - } - serodata$age_group <- get_age_group( - age = serodata$age_mean_f, - step = bin_step - ) - - serodata_bin <- serodata %>% - dplyr::group_by(.data$age_group) %>% - dplyr::summarise( - total = sum(.data$total), - counts = sum(.data$counts) - ) %>% - dplyr::mutate( - survey = unique(serodata$survey), - tsur = unique(serodata$tsur), - age_min = as.integer(gsub("[(]|\\,.*", "\\1", .data$age_group)) + 1, - age_max = as.integer(gsub(".*\\,|[]]", "\\1", .data$age_group)), - age_mean_f = floor((.data$age_min + .data$age_max) / 2) - ) - - serodata_bin <- cbind( - serodata_bin, - Hmisc::binconf( - serodata_bin$counts, - serodata_bin$total, - alpha = alpha, - method = "exact", - return.df = TRUE - ) - ) %>% - dplyr::rename( - prev_obs = "PointEst", - prev_obs_lower = "Lower", - prev_obs_upper = "Upper" - ) %>% - dplyr::arrange(.data$age_mean_f) - - return(serodata_bin) -} diff --git a/R/simulate_serosurvey.R b/R/simulate_serosurvey.R index de0e98a9..56428ab8 100644 --- a/R/simulate_serosurvey.R +++ b/R/simulate_serosurvey.R @@ -8,20 +8,20 @@ #' @return vector of probabilities of being seropositive for age-varying FoI #' including seroreversion (ordered from youngest to oldest individuals) probability_exact_age_varying <- function( - ages, - fois, - seroreversion_rate = 0 + ages, + fois, + seroreversion_rate = 0 ) { probabilities <- vector(length = length(ages)) # solves ODE exactly within pieces for (i in seq_along(ages)) { foi_tmp <- fois[i] - if(i == 1) + if (i == 1) probability_previous <- 0 else probability_previous <- probabilities[i - 1] - if(foi_tmp == 0) + if (foi_tmp == 0) lambda_over_both <- 0 else lambda_over_both <- (foi_tmp / (foi_tmp + seroreversion_rate)) @@ -34,28 +34,28 @@ probability_exact_age_varying <- function( #' Computes the probability of being seropositive when FOIs vary by time #' -#' @param years Integer indicating the years covering the birth ages of the sample +#' @param years Integer indicating the years covering the birth ages of the +#' sample #' @param fois Numeric atomic vector corresponding to the age-varying #' force-of-infection to simulate from #' @param seroreversion_rate Non-negative seroreversion rate. Default is 0. #' @return vector of probabilities of being seropositive for age-varying FoI #' including seroreversion (ordered from youngest to oldest individuals) probability_exact_time_varying <- function( - years, - fois, - seroreversion_rate = 0 + years, + fois, + seroreversion_rate = 0 ) { n_years <- length(years) - ages <- seq(1, n_years, 1) probabilities <- vector(length = length(years)) # solves ODE exactly within pieces for (i in seq_along(years)) { # birth cohorts probability_previous <- 0 - for(j in 1:(n_years - i + 1)) { # exposure during lifetime + for (j in 1:(n_years - i + 1)) { # exposure during lifetime foi_tmp <- fois[i + j - 1] - if(foi_tmp == 0) + if (foi_tmp == 0) lambda_over_both <- 0 else lambda_over_both <- (foi_tmp / (foi_tmp + seroreversion_rate)) @@ -69,19 +69,23 @@ probability_exact_time_varying <- function( return(probabilities_oldest_age_last) } -#' Generate probabilities of seropositivity by age based on a time-varying FOI model. +#' Generate probabilities of seropositivity by age based on a time-varying FOI +#' model. #' -#' This function calculates the probabilities of seropositivity by age based on a time-varying FOI model. +#' This function calculates the probabilities of seropositivity by age based on +#' a time-varying FOI model. #' It takes into account the FOI and the rate of seroreversion. #' -#' @param foi A dataframe containing the force of infection (FOI) values for different years. -#' It should have two columns: 'year' and 'foi'. -#' @param seroreversion_rate A non-negative numeric value representing the rate of seroreversion. +#' @param foi A dataframe containing the force of infection (FOI) values +#' for different years. It should have two columns: 'year' and 'foi'. +#' @param seroreversion_rate A non-negative numeric value representing the +#' rate of seroreversion. #' #' @return A dataframe with columns 'age' and 'seropositivity'. -probability_seropositive_time_model_by_age <- function( - foi, - seroreversion_rate) { +probability_seropositive_time_model_by_age <- function( #nolint + foi, + seroreversion_rate +) { years <- foi$year @@ -100,19 +104,23 @@ probability_seropositive_time_model_by_age <- function( } -#' Generate probabilities of seropositivity by age based on an age-varying FOI model. +#' Generate probabilities of seropositivity by age based on an age-varying +#' FOI model. #' -#' This function calculates the probabilities of seropositivity by age based on an age-varying FOI model. +#' This function calculates the probabilities of seropositivity by age based on +#' an age-varying FOI model. #' It takes into account the FOI and the rate of seroreversion. #' -#' @param foi A dataframe containing the force of infection (FOI) values for different ages. -#' It should have two columns: 'age' and 'foi'. -#' @param seroreversion_rate A non-negative numeric value representing the rate of seroreversion. +#' @param foi A dataframe containing the force of infection (FOI) values for +#' different ages. It should have two columns: 'age' and 'foi'. +#' @param seroreversion_rate A non-negative numeric value representing the rate +#' of seroreversion. #' #' @return A dataframe with columns 'age' and 'seropositivity'. -probability_seropositive_age_model_by_age <- function( - foi, - seroreversion_rate) { +probability_seropositive_age_model_by_age <- function( #nolint + foi, + seroreversion_rate +) { ages <- seq_along(foi$age) @@ -130,25 +138,28 @@ probability_seropositive_age_model_by_age <- function( return(df) } -#' Generate probabilities of seropositivity by age based on an age-and-time varying FOI model. +#' Generate probabilities of seropositivity by age based on an age-and-time +#' varying FOI model. #' -#' This function calculates the probabilities of seropositivity by age based on an age-and-time-varying FOI model. +#' This function calculates the probabilities of seropositivity by age based on +#' an age-and-time-varying FOI model. #' It takes into account the FOI and the rate of seroreversion. #' -#' @param foi A dataframe containing the force of infection (FOI) values for different ages. -#' It should have three columns: 'year', 'age' and 'foi'. -#' @param seroreversion_rate A non-negative numeric value representing the rate of seroreversion. +#' @param foi A dataframe containing the force of infection (FOI) values +#' for different ages. It should have three columns: 'year', 'age' and 'foi'. +#' @param seroreversion_rate A non-negative numeric value representing +#' the rate of seroreversion. #' #' @return A dataframe with columns 'age' and 'seropositivity'. -probability_seropositive_age_and_time_model_by_age <- function( - foi, - seroreversion_rate - ) { +probability_seropositive_age_and_time_model_by_age <- function( #nolint + foi, + seroreversion_rate +) { foi_matrix <- foi %>% tidyr::pivot_wider( values_from = foi, - names_from = c(year)) %>% + names_from = c(.data$year)) %>% tibble::column_to_rownames("age") %>% as.matrix() @@ -164,9 +175,9 @@ probability_seropositive_age_and_time_model_by_age <- function( foi_matrix_subset <- foi_matrix[1:(n_ages - i + 1), i:n_ages] %>% as.matrix() # only to handle single element matrix case foi_diag <- diag(foi_matrix_subset) - for(j in 1:(n_years - i + 1)) { # exposure during lifetime + for (j in 1:(n_years - i + 1)) { # exposure during lifetime foi_tmp <- foi_diag[j] - if(foi_tmp == 0) + if (foi_tmp == 0) lambda_over_both <- 0 else lambda_over_both <- (foi_tmp / (foi_tmp + seroreversion_rate)) @@ -192,26 +203,29 @@ probability_seropositive_age_and_time_model_by_age <- function( #' time-varying FOI model, an age-varying FOI model, or an age-and-time-varying #' FOI model. In all cases, it is possible to optionally include seroreversion. #' -#' @param model A string specifying the model type which can be one of ['age', 'time', 'age-time']. +#' @param model A string specifying the model type which can be one of +#' ['age', 'time', 'age-time']. #' @param foi A dataframe containing the force of infection (FOI) values. #' For time-varying models the columns should be ['year', 'foi']. #' For age-varying models the columns should be ['age', 'foi']. -#' For age-and-time-varying models the columns should be ['age', 'time', 'foi']. -#' @param seroreversion_rate A non-negative value determining the rate of seroreversion (per year). -#' Default is 0. +#' For age-and-time-varying models the columns should be +#' ['age', 'time', 'foi']. +#' @param seroreversion_rate A non-negative value determining the rate of +#' seroreversion (per year). Default is 0. #' #' @return A dataframe with columns 'age' and 'seropositivity'. #' @export -probability_seropositive_by_age <- function( - model, - foi, - seroreversion_rate = 0) { +probability_seropositive_by_age <- function( #nolint + model, + foi, + seroreversion_rate = 0 +) { - if(model == "time") { + if (model == "time") { probability_function <- probability_seropositive_time_model_by_age - } else if(model == "age") { + } else if (model == "age") { probability_function <- probability_seropositive_age_model_by_age - } else if(model == "age-time" || model == "time-age") { + } else if (model == "age-time" || model == "time-age") { probability_function <- probability_seropositive_age_and_time_model_by_age } @@ -225,16 +239,16 @@ probability_seropositive_by_age <- function( sum_of_A <- function(t, tau, construct_A_fn, ...) { k <- 1 - for(t_primed in (tau + 1):t) { - if(k == 1) + for (t_primed in (tau + 1):t) { + if (k == 1) { A <- construct_A_fn(t_primed, tau, ...) - else { + } else { tmp <- construct_A_fn(t_primed, tau, ...) A <- A + tmp } k <- k + 1 } - A + return(A) } #' Generate probabilities of seropositivity by age based on a general FOI model. @@ -242,25 +256,27 @@ sum_of_A <- function(t, tau, construct_A_fn, ...) { #' This function calculates the probabilities of seropositivity by age based on #' an abstract model of the serocatalytic system. #' -#' @param construct_A_fn A function that constructs a matrix that defines the multiplier -#' term in the linear ODE system. -#' @param calculate_seropositivity_function A function which takes the state vector -#' and returns the seropositive fraction. -#' @param initial_conditions The initial state vector proportions for each birth cohort. +#' @param construct_A_fn A function that constructs a matrix that defines the +#' multiplier term in the linear ODE system. +#' @param calculate_seropositivity_function A function which takes the state +#' vector and returns the seropositive fraction. +#' @param initial_conditions The initial state vector proportions for each +#' birth cohort. #' @param max_age The maximum age to simulate seropositivity for. #' #' @return A dataframe with columns 'age' and 'seropositivity'. #' @export -probability_seropositive_general_model_by_age <- function( - construct_A_function, - calculate_seropositivity_function, - initial_conditions, - max_age, - ...) { +probability_seropositive_general_model_by_age <- function( #nolint + construct_A_fn, + calculate_seropositivity_function, #nolint + initial_conditions, + max_age, + ... +) { probabilities <- vector(length = max_age) - for(i in seq_along(probabilities)) { - A_sum <- sum_of_A(max_age, max_age - i, construct_A, ...) + for (i in seq_along(probabilities)) { + A_sum <- sum_of_A(max_age, max_age - i, construct_A_fn, ...) Y <- Matrix::expm(A_sum) %>% as.matrix() %*% initial_conditions probabilities[i] <- calculate_seropositivity_function(Y) } @@ -276,17 +292,18 @@ probability_seropositive_general_model_by_age <- function( #' Add bins based on age intervals. #' -#' It generates a new column 'group' in the survey_features dataframe, representing -#' the group interval for each row based on the age_min and age_max columns. +#' It generates a new column 'group' in the survey_features dataframe, +#' representing the group interval for each row based on the age_min and +#' age_max columns. #' -#' @param survey_features A dataframe containing age_min and age_max columns representing -#' the minimum and maximum age boundaries for each group. +#' @param survey_features A dataframe containing age_min and age_max columns +#' representing the minimum and maximum age boundaries for each group. #' -#' @return A dataframe with an additional 'group' column representing the group interval -#' for each row based on the age_min and age_max columns. +#' @return A dataframe with an additional 'group' column representing the group +#' interval for each row based on the age_min and age_max columns. add_age_bins <- function(survey_features) { intervals <- vector(length = nrow(survey_features)) - for(i in seq_along(intervals)) { + for (i in seq_along(intervals)) { age_min <- survey_features$age_min[i] age_max <- survey_features$age_max[i] intervals[i] <- paste0("[", age_min, ",", age_max, "]") @@ -299,57 +316,61 @@ add_age_bins <- function(survey_features) { #' Create a survey dataframe with per individual age information. #' #' -#' @param survey_features A dataframe containing information about age groups and sample sizes. +#' @param survey_features A dataframe containing information about age groups +#' and sample sizes. #' @param age_df A dataframe containing 'age' and 'group'. #' -#' @return A dataframe with overall sample sizes calculated by joining survey_features and age_df. -#' This dataframe has columns including 'age' and 'overall_sample_size'. +#' @return A dataframe with overall sample sizes calculated by joining +#' survey_features and age_df. +#' This dataframe has columns including 'age' and 'overall_sample_size'. survey_by_individual_age <- function(survey_features, age_df) { age_df %>% left_join(survey_features, by = "group") %>% - rename(overall_sample_size = sample_size) + rename(overall_sample_size = .data$n_sample) } #' Generate random sample sizes using multinomial sampling. #' -#' This function generates random sample sizes for each age group using multinomial sampling. -#' It takes the total sample size and the number of age groups as input and returns a vector -#' of sample sizes for each age group. +#' This function generates random sample sizes for each age group using +#' multinomial sampling. It takes the total sample size and the number of age +#' groups as input and returns a vector of sample sizes for each age group. #' -#' @param sample_size The total sample size to be distributed among age groups. +#' @param n_sample The total sample size to be distributed among age groups. #' @param n_ages The number of age groups. #' #' @return A vector containing random sample sizes for each age group. -multinomial_sampling_group <- function(sample_size, n_ages) { +multinomial_sampling_group <- function(n_sample, n_ages) { prob_value <- 1 / n_ages probs <- rep(prob_value, n_ages) sample_size_by_age <- as.vector( - rmultinom(1, sample_size, prob=probs) + rmultinom(1, n_sample, prob = probs) ) return(sample_size_by_age) } #' Generate random sample sizes for each age group. #' -#' This function generates random sample sizes for each age group based on the overall sample size -#' and the distribution of individuals across age groups. It uses multinomial sampling to allocate -#' the total sample size to each age group proportionally. +#' This function generates random sample sizes for each age group based on the +#' overall sample size and the distribution of individuals across age groups. +#' It uses multinomial sampling to allocate the total sample size to each age +#' group proportionally. #' -#' @param survey_df_long A dataframe with columns 'age', 'group' and 'overall_sample_size'. +#' @param survey_df_long A dataframe with columns 'age', 'group' and +#' 'overall_sample_size'. #' -#' @return A dataframe with random sample sizes generated for each age based on the overall -#' sample size. +#' @return A dataframe with random sample sizes generated for each age based on +#' the overall sample size. generate_random_sample_sizes <- function(survey_df_long) { df_new <- NULL intervals <- unique(survey_df_long$group) for (interval_aux in na.omit(intervals)) { df_tmp <- survey_df_long %>% - filter(group == interval_aux) - sample_size <- df_tmp$overall_sample_size[1] - sample_size_by_age <- multinomial_sampling_group(sample_size, nrow(df_tmp)) + filter(.data$group == interval_aux) + n_sample <- df_tmp$overall_sample_size[1] + sample_size_by_age <- multinomial_sampling_group(n_sample, nrow(df_tmp)) df_tmp <- df_tmp %>% - mutate(sample_size = sample_size_by_age) + mutate(n_sample = sample_size_by_age) if (is.null(df_new)) { df_new <- df_tmp @@ -360,26 +381,28 @@ generate_random_sample_sizes <- function(survey_df_long) { return(df_new) } -#' Generate random sample sizes for each individual age based on survey features. +#' Generate random sample sizes for each individual age based on survey +#' features. #' -#' This function generates random sample sizes for each individual age based on the provided -#' survey features. It first creates age bins, assigns each individual in the survey features -#' to an age bin, calculates the overall sample size by group, and then generates random sample -#' sizes for each age group. Finally, it returns a dataframe with the random sample sizes for -#' each individual age. +#' This function generates random sample sizes for each individual age based on +#' the provided survey features. It first creates age bins, assigns each +#' individual in the survey features to an age bin, calculates the overall +#' sample size by group, and then generates random sample sizes for each age +#' group. Finally, it returns a dataframe with the random sample sizes for each +#' individual age. #' -#' @param survey_features A dataframe containing information about individuals' age ranges and -#' sample sizes. +#' @param survey_features A dataframe containing information about individuals' +#' age ranges and sample sizes. #' -#' @return A dataframe with random sample sizes generated for each individual age based on the -#' provided survey features. -sample_size_by_individual_age_random <- function(survey_features) { +#' @return A dataframe with random sample sizes generated for each individual +#' age based on the provided survey features. +sample_size_by_individual_age_random <- function(survey_features) { #nolint ages <- seq(1, max(survey_features$age_max), 1) age_bins <- rep(NA, length(ages)) for (i in seq_along(ages)) { - # Find the index of the row in survey_features where age falls within the range + # Find index of the row in survey_features where age falls within the range age_min <- survey_features$age_min age_max <- survey_features$age_max idx <- which(ages[i] >= age_min & ages[i] <= age_max) @@ -396,7 +419,7 @@ sample_size_by_individual_age_random <- function(survey_features) { survey_features <- add_age_bins(survey_features) - survey_features_by_individual_age <- survey_by_individual_age( + survey_features_by_individual_age <- survey_by_individual_age( #nolint survey_features, age_df) @@ -406,8 +429,8 @@ sample_size_by_individual_age_random <- function(survey_features) { } check_age_constraints <- function(df) { - for (i in 1:nrow(df)) { - for (j in 1:nrow(df)) { + for (i in seq_len(nrow(df))) { + for (j in seq_len(nrow(df))) { if (i != j && df$age_max[i] == df$age_min[j]) { return(FALSE) } @@ -416,78 +439,31 @@ check_age_constraints <- function(df) { return(TRUE) } -validate_survey <- function(survey_features) { - - if (!is.data.frame(survey_features) || !all(c("age_min", "age_max", "sample_size") %in% names(survey_features))) { - stop("survey_features must be a dataframe with columns 'age_min', 'age_max', and 'sample_size'.") - } - - # check that the age_max of a bin does not coincide with - # the age min of a different bin - is_age_ok <- check_age_constraints(survey_features) - if(!is_age_ok) - stop("Age bins in a survey are inclusive of both bounds, so the age_max of - one bin cannot equal the age_min of another.") -} - -validate_foi_df <- function(foi_df, cnames_additional) { - if (!is.data.frame(foi_df) || !all(cnames_additional %in% names(foi_df)) || ncol(foi_df) != (1 + length(cnames_additional))) { - if(length(cnames_additional) == 1) - message_end <- paste0(" and ", cnames_additional, ".") - else - message_end <- paste0(", ", paste(cnames_additional, collapse=" and "), ".") - message_beginning <- "foi must be a dataframe with columns foi" - stop(paste0(message_beginning, message_end)) - } -} - -validate_seroreversion_rate <- function(seroreversion_rate) { - if (!is.numeric(seroreversion_rate) || seroreversion_rate < 0) { - stop("seroreversion_rate must be a non-negative numeric value.") - } -} - -validate_survey_and_foi_consistency <- function( - survey_features, - foi_df -) { - max_age_foi_df <- nrow(foi_df) - if(max_age_foi_df > max(survey_features$age_max)) - stop("maximum age implicit in foi_df should not exceed max age in survey_features.") -} - -validate_survey_and_foi_consistency_age_time <- function( - survey_features, - foi_df -) { - max_age_foi_df <- max(foi_df$year) - min(foi_df$year) + 1 - if(max_age_foi_df > max(survey_features$age_max)) - stop("maximum age implicit in foi_df should not exceed max age in survey_features.") -} - -generate_seropositive_counts_by_age_bin <- function( - probability_seropositive_by_age, +generate_seropositive_counts_by_age_bin <- function( #nolint + probability_seropositive_by_age, #nolint sample_size_by_age_random, survey_features - ) { +) { combined_df <- probability_seropositive_by_age %>% - dplyr::left_join(sample_size_by_age_random, by="age") %>% + dplyr::left_join(sample_size_by_age_random, by = "age") %>% dplyr::mutate( - n_seropositive=rbinom(nrow(probability_seropositive_by_age), - sample_size, - seropositivity)) + n_seropositive = rbinom( + nrow(probability_seropositive_by_age), + .data$n_sample, + .data$seropositivity) + ) grouped_df <- combined_df %>% - dplyr::group_by(age_min, age_max) %>% + dplyr::group_by(.data$age_min, .data$age_max) %>% dplyr::summarise( - sample_size=sum(sample_size), - n_seropositive=sum(n_seropositive), + n_sample = sum(.data$n_sample), + n_seropositive = sum(.data$n_seropositive), .groups = "drop" ) %>% left_join( survey_features, - by = c("age_min", "age_max", "sample_size") + by = c("age_min", "age_max", "n_sample") ) return(grouped_df) @@ -495,20 +471,22 @@ generate_seropositive_counts_by_age_bin <- function( #' Simulate serosurvey data based on a time-varying FOI model. #' -#' This function generates binned serosurvey data based on a time-varying FOI model, -#' optionally including seroreversion. This function allows construction of serosurveys -#' with binned age groups, and it generates uncertainty in the distribution of a sample size -#' within an age bin through multinomial sampling. -#' -#' @param foi A dataframe containing the force of infection (FOI) values for different years. -#' It should have two columns: 'year' and 'foi'. -#' @param survey_features A dataframe containing information about the binned age groups and sample -#' sizes for each. It should contain columns: ['age_min', 'age_max', 'sample_size']. -#' @param seroreversion_rate A non-negative value determining the rate of seroreversion (per year). -#' Default is 0. -#' -#' @return A dataframe with simulated serosurvey data, including age group information, overall -#' sample sizes, the number of seropositive individuals, and other survey features. +#' This function generates binned serosurvey data based on a time-varying FOI +#' model, optionally including seroreversion. This function allows construction +#' of serosurveys with binned age groups, and it generates uncertainty in the +#' distribution of a sample size within an age bin through multinomial sampling. +#' +#' @param foi A dataframe containing the force of infection (FOI) values for +#' different years. It should have two columns: 'year' and 'foi'. +#' @param survey_features A dataframe containing information about the binned +#' age groups and sample sizes for each. It should contain columns: +#' ['age_min', 'age_max', 'n_sample']. +#' @param seroreversion_rate A non-negative value determining the rate of +#' seroreversion (per year). Default is 0. +#' +#' @return A dataframe with simulated serosurvey data, including age group +#' information, overall sample sizes, the number of seropositive individuals, +#' and other survey features. #' @examples #' # specify FOIs for each year #' foi_df <- data.frame( @@ -518,19 +496,19 @@ generate_seropositive_counts_by_age_bin <- function( #' survey_features <- data.frame( #' age_min = c(1, 3, 15), #' age_max = c(2, 14, 20), -#' sample_size = c(1000, 2000, 1500)) +#' n_sample = c(1000, 2000, 1500)) #' serosurvey <- simulate_serosurvey_time_model( #' foi_df, survey_features) #' @export simulate_serosurvey_time_model <- function( - foi, - survey_features, - seroreversion_rate=0 + foi, + survey_features, + seroreversion_rate = 0 ) { # Input validation - validate_foi_df(foi, c("year")) - validate_survey(survey_features) + validate_foi_df(foi, "year") + validate_survey_features(survey_features) validate_seroreversion_rate(seroreversion_rate) validate_survey_and_foi_consistency( survey_features, @@ -558,20 +536,22 @@ simulate_serosurvey_time_model <- function( #' Simulate serosurvey data based on an age-varying FOI model. #' -#' This function generates binned serosurvey data based on an age-varying FOI model, -#' optionally including seroreversion. This function allows construction of serosurveys -#' with binned age groups, and it generates uncertainty in the distribution of a sample size -#' within an age bin through multinomial sampling. -#' -#' @param foi A dataframe containing the force of infection (FOI) values for different ages. -#' It should have two columns: 'age' and 'foi'. -#' @param survey_features A dataframe containing information about the binned age groups and sample -#' sizes for each. It should contain columns: ['age_min', 'age_max', 'sample_size']. -#' @param seroreversion_rate A non-negative value determining the rate of seroreversion (per year). -#' Default is 0. -#' -#' @return A dataframe with simulated serosurvey data, including age group information, overall -#' sample sizes, the number of seropositive individuals, and other survey features. +#' This function generates binned serosurvey data based on an age-varying FOI +#' model, optionally including seroreversion. This function allows construction +#' of serosurveys with binned age groups, and it generates uncertainty in the +#' distribution of a sample size within an age bin through multinomial sampling. +#' +#' @param foi A dataframe containing the force of infection (FOI) values for +#' different ages. It should have two columns: 'age' and 'foi'. +#' @param survey_features A dataframe containing information about the binned +#' age groups and sample sizes for each. It should contain columns: +#' ['age_min', 'age_max', 'n_sample']. +#' @param seroreversion_rate A non-negative value determining the rate of +#' seroreversion (per year). Default is 0. +#' +#' @return A dataframe with simulated serosurvey data, including age group +#' information, overall sample sizes, the number of seropositive individuals, +#' and other survey features. #' @examples #' # specify FOIs for each year #' foi_df <- data.frame( @@ -581,19 +561,19 @@ simulate_serosurvey_time_model <- function( #' survey_features <- data.frame( #' age_min = c(1, 3, 15), #' age_max = c(2, 14, 20), -#' sample_size = c(1000, 2000, 1500)) +#' n_sample = c(1000, 2000, 1500)) #' serosurvey <- simulate_serosurvey_age_model( #' foi_df, survey_features) #' @export simulate_serosurvey_age_model <- function( - foi, - survey_features, - seroreversion_rate=0 + foi, + survey_features, + seroreversion_rate = 0 ) { # Input validation - validate_foi_df(foi, c("age")) - validate_survey(survey_features) + validate_foi_df(foi, "age") + validate_survey_features(survey_features) validate_seroreversion_rate(seroreversion_rate) validate_survey_and_foi_consistency( survey_features, @@ -620,20 +600,23 @@ simulate_serosurvey_age_model <- function( #' Simulate serosurvey data based on an age-and-time-varying FOI model. #' -#' This function generates binned serosurvey data based on an age-and-time-varying FOI model, -#' optionally including seroreversion. This function allows construction of serosurveys -#' with binned age groups, and it generates uncertainty in the distribution of a sample size -#' within an age bin through multinomial sampling. -#' -#' @param foi A dataframe containing the force of infection (FOI) values for different ages. -#' It should have two columns: 'year', 'age' and 'foi'. -#' @param survey_features A dataframe containing information about the binned age groups and sample -#' sizes for each. It should contain columns: ['age_min', 'age_max', 'sample_size']. -#' @param seroreversion_rate A non-negative value determining the rate of seroreversion (per year). -#' Default is 0. -#' -#' @return A dataframe with simulated serosurvey data, including age group information, overall -#' sample sizes, the number of seropositive individuals, and other survey features. +#' This function generates binned serosurvey data based on an +#' age-and-time-varying FOI model, optionally including seroreversion. +#' This function allows construction of serosurveys with binned age groups, and +#' it generates uncertainty in the distribution of a sample size within an age +#' bin through multinomial sampling. +#' +#' @param foi A dataframe containing the force of infection (FOI) values for +#' different ages. It should have two columns: 'year', 'age' and 'foi'. +#' @param survey_features A dataframe containing information about the binned +#' age groups and sample sizes for each. It should contain columns: +#' ['age_min', 'age_max', 'n_sample']. +#' @param seroreversion_rate A non-negative value determining the rate of +#' seroreversion (per year). Default is 0. +#' +#' @return A dataframe with simulated serosurvey data, including age group +#' information, overall sample sizes, the number of seropositive individuals, +#' and other survey features. #' @examples #' # specify FOIs for each year #' foi_df <- data.frame( @@ -644,29 +627,30 @@ simulate_serosurvey_age_model <- function( #' survey_features <- data.frame( #' age_min = c(1, 3, 15), #' age_max = c(2, 14, 20), -#' sample_size = c(1000, 2000, 1500)) +#' n_sample = c(1000, 2000, 1500)) #' serosurvey <- simulate_serosurvey_age_and_time_model( #' foi_df, survey_features) #' @export -simulate_serosurvey_age_and_time_model <- function( - foi, - survey_features, - seroreversion_rate=0 +simulate_serosurvey_age_and_time_model <- function( #nolint + foi, + survey_features, + seroreversion_rate = 0 ) { # Input validation validate_foi_df(foi, c("age", "year")) - validate_survey(survey_features) + validate_survey_features(survey_features) validate_seroreversion_rate(seroreversion_rate) validate_survey_and_foi_consistency_age_time( survey_features, foi ) - probability_serop_by_age <- probability_seropositive_age_and_time_model_by_age( - foi = foi, - seroreversion_rate = seroreversion_rate - ) + probability_serop_by_age <- + probability_seropositive_age_and_time_model_by_age( + foi = foi, + seroreversion_rate = seroreversion_rate + ) sample_size_by_age_random <- sample_size_by_individual_age_random( survey_features = survey_features @@ -684,24 +668,28 @@ simulate_serosurvey_age_and_time_model <- function( #' Simulate serosurvey data based on various FOI models. #' -#' This function generates binned serosurvey data based on either a time-varying FOI model, -#' an age-varying FOI model, or an age-and-time-varying FOI model. In all cases, it is possible -#' to optionally include seroreversion. This function allows construction of serosurveys -#' with binned age groups, and it generates uncertainty in the distribution of a sample size -#' within an age bin through multinomial sampling. +#' This function generates binned serosurvey data based on either a time-varying +#' FOI model, an age-varying FOI model, or an age-and-time-varying FOI model. +#' In all cases, it is possible to optionally include seroreversion. This +#' function allows construction of serosurveys with binned age groups, and it +#' generates uncertainty in the distribution of a sample size within an age bin +#' through multinomial sampling. #' -#' @param model A string specifying the model type which can be one of ['age', 'time', 'age-time']. +#' @param model A string specifying the model type which can be one of +#' ['age', 'time', 'age-time']. #' @param foi A dataframe containing the force of infection (FOI) values. -#' For time-varying models the columns should be ['year', 'foi']. -#' For age-varying models the columns should be ['age', 'foi']. -#' For age-and-time-varying models the columns should be ['age', 'time', 'foi']. -#' @param survey_features A dataframe containing information about the binned age groups and sample -#' sizes for each. It should contain columns: ['age_min', 'age_max', 'sample_size']. -#' @param seroreversion_rate A non-negative value determining the rate of seroreversion (per year). -#' Default is 0. -#' -#' @return A dataframe with simulated serosurvey data, including age group information, overall -#' sample sizes, the number of seropositive individuals, and other survey features. +#' For time-varying models the columns should be ['year', 'foi']. +#' For age-varying models the columns should be ['age', 'foi']. +#' For age-and-time-varying models the columns should be ['age', 'time', 'foi']. +#' @param survey_features A dataframe containing information about the binned +#' age groups and sample sizes for each. +#' It should contain columns: ['age_min', 'age_max', 'n_sample']. +#' @param seroreversion_rate A non-negative value determining the rate of +#' seroreversion (per year). Default is 0. +#' +#' @return A dataframe with simulated serosurvey data, including age group +#' information, overall sample sizes, the number of seropositive individuals, +#' and other survey features. #' @examples #' # time-varying model #' foi_df <- data.frame( @@ -711,7 +699,7 @@ simulate_serosurvey_age_and_time_model <- function( #' survey_features <- data.frame( #' age_min = c(1, 3, 15), #' age_max = c(2, 14, 20), -#' sample_size = c(1000, 2000, 1500)) +#' n_sample = c(1000, 2000, 1500)) #' serosurvey <- simulate_serosurvey( #' model = "time", #' foi = foi_df, @@ -725,7 +713,7 @@ simulate_serosurvey_age_and_time_model <- function( #' survey_features <- data.frame( #' age_min = c(1, 3, 15), #' age_max = c(2, 14, 20), -#' sample_size = c(1000, 2000, 1500)) +#' n_sample = c(1000, 2000, 1500)) #' serosurvey <- simulate_serosurvey( #' model = "age", #' foi = foi_df, @@ -740,36 +728,36 @@ simulate_serosurvey_age_and_time_model <- function( #' survey_features <- data.frame( #' age_min = c(1, 3, 15), #' age_max = c(2, 14, 20), -#' sample_size = c(1000, 2000, 1500)) +#' n_sample = c(1000, 2000, 1500)) #' serosurvey <- simulate_serosurvey( #' model = "age", #' foi = foi_df, #' survey_features = survey_features) #' @export simulate_serosurvey <- function( - model, - foi, - survey_features, - seroreversion_rate=0 + model, + foi, + survey_features, + seroreversion_rate = 0 ) { # don't advertise time-age in case people think this is something else - if(!model %in% c("age", "time", "age-time", "time-age")) + if (!model %in% c("age", "time", "age-time", "time-age")) stop("model must be one of 'age', 'time', or 'age-time'.") - if(model == "time") { + if (model == "time") { serosurvey <- simulate_serosurvey_time_model( foi, survey_features, seroreversion_rate ) - } else if(model == "age") { + } else if (model == "age") { serosurvey <- simulate_serosurvey_age_model( foi, survey_features, seroreversion_rate ) - } else if(model == "age-time" || model == "time-age") { + } else if (model == "age-time" || model == "time-age") { serosurvey <- simulate_serosurvey_age_and_time_model( foi, survey_features, @@ -782,26 +770,27 @@ simulate_serosurvey <- function( #' Simulate serosurvey data based on general serocatalytic model. #' -#' This simulation method assumes only that the model system can be written as a piecewise- -#' linear ordinary differential equation system. +#' This simulation method assumes only that the model system can be written as +#' a piecewise-linear ordinary differential equation system. #' #' @inheritParams probability_seropositive_general_model_by_age #' @inheritParams simulate_serosurvey #' -#' @return A dataframe with simulated serosurvey data, including age group information, overall -#' sample sizes, the number of seropositive individuals, and other survey features. +#' @return A dataframe with simulated serosurvey data, including age group +#' information, overall sample sizes, the number of seropositive individuals, +#' and other survey features. #' #' @export -simulate_serosurvey_general_model <- function( - construct_A_function, - calculate_seropositivity_function, - initial_conditions, - survey_features, - ... +simulate_serosurvey_general_model <- function( #nolint + construct_A_function, + calculate_seropositivity_function, #nolint + initial_conditions, + survey_features, + ... ) { # Input validation - validate_survey(survey_features) + validate_survey_features(survey_features) probability_serop_by_age <- probability_seropositive_general_model_by_age( construct_A_function, diff --git a/R/stanmodels.R b/R/stanmodels.R index e3529b84..7952d2b2 100644 --- a/R/stanmodels.R +++ b/R/stanmodels.R @@ -1,13 +1,17 @@ # Generated by rstantools. Do not edit by hand. # names of stan models -stanmodels <- c("av_normal", "constant", "tv_normal", "tv_normal_log") +stanmodels <- c("age_no_seroreversion", "age_seroreversion", "constant_no_seroreversion", "constant_seroreversion", "time_log_no_seroreversion", "time_log_seroreversion", "time_no_seroreversion", "time_seroreversion") # load each stan module -Rcpp::loadModule("stan_fit4av_normal_mod", what = TRUE) -Rcpp::loadModule("stan_fit4constant_mod", what = TRUE) -Rcpp::loadModule("stan_fit4tv_normal_mod", what = TRUE) -Rcpp::loadModule("stan_fit4tv_normal_log_mod", what = TRUE) +Rcpp::loadModule("stan_fit4age_no_seroreversion_mod", what = TRUE) +Rcpp::loadModule("stan_fit4age_seroreversion_mod", what = TRUE) +Rcpp::loadModule("stan_fit4constant_no_seroreversion_mod", what = TRUE) +Rcpp::loadModule("stan_fit4constant_seroreversion_mod", what = TRUE) +Rcpp::loadModule("stan_fit4time_log_no_seroreversion_mod", what = TRUE) +Rcpp::loadModule("stan_fit4time_log_seroreversion_mod", what = TRUE) +Rcpp::loadModule("stan_fit4time_no_seroreversion_mod", what = TRUE) +Rcpp::loadModule("stan_fit4time_seroreversion_mod", what = TRUE) # instantiate each stanmodel object stanmodels <- sapply(stanmodels, function(model_name) { diff --git a/R/summarise_seromodel.R b/R/summarise_seromodel.R new file mode 100644 index 00000000..08529cc2 --- /dev/null +++ b/R/summarise_seromodel.R @@ -0,0 +1,176 @@ +#' Extract specified loo estimate +#' +#' @inheritParams extract_central_estimates +#' @param par_loo_estimate Name of the loo estimate to be extracted. +#' Available options are: +#' \describe{ +#' \item{`"elpd_loo"`}{Expected log pointwise predictive density} +#' \item{`"p_loo"`}{Effective number of parameters} +#' \item{`"looic"`}{Leave-one-out cross-validation information criteria} +#' } +#' For additional information refer to [loo][loo::loo]. +#' @param loo_estimate_digits Number of loo estimate digits +#' @return Text summarising specified loo estimate +#' @export +summarise_loo_estimate <- function( + seromodel, + par_loo_estimate = "elpd_loo", + loo_estimate_digits = 2 +) { + checkmate::assert_class(seromodel, "stanfit", null.ok = TRUE) + + loo_fit <- loo::loo( + seromodel, + pars = c(parameter_name = "log_likelihood") + ) + loo_estimate <- loo_fit$estimates[par_loo_estimate, ] %>% + round(loo_estimate_digits) + + loo_estimate_summary <- paste0(loo_estimate[1], "(se=", loo_estimate[2], ")") + + return(loo_estimate_summary) +} + +#' Summarise central estimate +#' +#' @inheritParams extract_central_estimates +#' @param central_estimate_digits Number of central estimate digits +#' @return Text summarising specified central estimate +#' @export +summarise_central_estimate <- function( + seromodel, + serosurvey, + alpha, + par_name = "seroreversion_rate", + central_estimate_digits = 2 +) { + checkmate::assert_class(seromodel, "stanfit", null.ok = TRUE) + + central_estimates <- signif( + extract_central_estimates( + seromodel = seromodel, + serosurvey = serosurvey, + alpha = alpha, + par_name = par_name + ), + digits = 2 + ) + + central_estimate_summary <- paste0( + central_estimates$median, + "(", 100 * (1 - alpha), "% CI, ", + central_estimates$lower, "-", + central_estimates$upper, ")" + ) + + return(central_estimate_summary) +} + +#' Summarise specified model +#' +#' @inheritParams extract_central_estimates +#' @inheritParams summarise_loo_estimate +#' @inheritParams summarise_central_estimate +#' @return A list summarising the specified model +#' \describe{ +#' \item{`model_name`}{Name of the model} +#' \item{`elpd`}{elpd and its standard deviation} +#' \item{`foi`}{Estimated foi with credible interval (for 'constant' model)} +#' \item{`foi_rhat`}{foi rhat value (for 'constant' model)} +#' \item{`seroreversion_rate`}{Estimated seroreversion rate} +#' \item{`seroreversion_rate_rhat`}{Seroreversion rate rhat value} +#' } +#' @export +summarise_seromodel <- function( + seromodel, + serosurvey, + alpha = 0.05, + par_loo_estimate = "elpd_loo", + loo_estimate_digits = 1, + central_estimate_digits = 2, + rhat_digits = 2 +) { + checkmate::assert_class(seromodel, "stanfit", null.ok = TRUE) + + model_name <- seromodel@model_name + summary_list <- list(model_name = model_name) + + loo_estimate_summary <- summarise_loo_estimate( + seromodel = seromodel, + par_loo_estimate = par_loo_estimate, + loo_estimate_digits = loo_estimate_digits + ) + + summary_list[par_loo_estimate] <- loo_estimate_summary + + check_convergence <- NULL + if (startsWith(model_name, "constant")) { + foi_summary <- summarise_central_estimate( + seromodel = seromodel, + serosurvey = serosurvey, + alpha = alpha, + par_name = "foi", + central_estimate_digits = central_estimate_digits + ) + + foi_rhat <- bayesplot::rhat(seromodel, "foi") %>% + signif(rhat_digits) + + check_convergence <- c( + check_convergence, + foi_rhat < 1.01 + ) + + summary_list <- c( + summary_list, + list( + foi = foi_summary, + foi_rhat = foi_rhat + ) + ) + } else { + rhats <- bayesplot::rhat(seromodel, "foi_vector") + + check_convergence <- c( + check_convergence, + all(rhats < 1.01) + ) + } + + if (!endsWith(model_name, "no_seroreversion")) { + seroreversion_rate_summary <- summarise_central_estimate( + seromodel = seromodel, + serosurvey = serosurvey, + alpha = alpha, + par_name = "seroreversion_rate", + central_estimate_digits = central_estimate_digits + ) + + seroreversion_rate_rhat <- bayesplot::rhat( + seromodel, + "seroreversion_rate" + ) %>% + signif(rhat_digits) + + check_convergence <- c( + check_convergence, + seroreversion_rate_rhat < 1.01 + ) + + summary_list <- c( + summary_list, + list( + seroreversion_rate = seroreversion_rate_summary, + seroreversion_rate_rhat = seroreversion_rate_rhat + ) + ) + } + + if (all(check_convergence)) { + summary_list["converged"] <- "yes" + } else { + summary_list["converged"] <- "no" + } + + return(summary_list) +} diff --git a/R/validation.R b/R/validation.R new file mode 100644 index 00000000..c155db62 --- /dev/null +++ b/R/validation.R @@ -0,0 +1,109 @@ +validate_serosurvey <- function(serosurvey) { + # Check that necessary columns are present + col_types <- list( + age_min = "numeric", + age_max = "numeric", + n_sample = "numeric", + n_seropositive = "numeric" + ) + + checkmate::assert_names(names(serosurvey), must.include = names(col_types)) + + # Validates column types + error_messages <- list() + for (col in names(col_types)) { + valid_col_types <- as.list(col_types[[col]]) + + # Only validates column type if the column exists in the dataframe + if (col %in% colnames(serosurvey) && + !any(vapply(valid_col_types, function(type) { + do.call(sprintf("is.%s", type), list(serosurvey[[col]])) + }, logical(1)))) { + error_messages <- c( + error_messages, + sprintf( + "`%s` must be of any of these types: `%s`", + col, toString(col_types[[col]]) + ) + ) + } + } + if (length(error_messages) > 0) { + stop( + "The following columns in `serosurvey` have wrong types:\n", + toString(error_messages) + ) + } + + return(serosurvey) +} + +validate_survey_features <- function(survey_features) { + + if (!is.data.frame(survey_features) || + !all( + c("age_min", "age_max", "n_sample") %in% names(survey_features)) + ) { + stop( + "survey_features must be a dataframe with columns ", + "'age_min', 'age_max', and 'n_sample'." + ) + } + + # check that the age_max of a bin does not coincide with + # the age min of a different bin + is_age_ok <- check_age_constraints(survey_features) + if (!is_age_ok) + stop( + "Age bins in a survey are inclusive of both bounds, ", + "so the age_max of one bin cannot equal the age_min of another." + ) +} + +validate_foi_df <- function(foi_df, cnames_additional) { + if ( + !is.data.frame(foi_df) || + !all(cnames_additional %in% names(foi_df)) || + ncol(foi_df) != (1 + length(cnames_additional)) + ) { + if (length(cnames_additional) == 1) { + message_end <- paste0(" and ", cnames_additional, ".") + } else { + message_end <- paste0( + ", ", paste(cnames_additional, collapse = " and "), "." + ) + message_beginning <- "foi must be a dataframe with columns foi" + stop(glue::glue("{message_beginning}", "{message_end}")) + } + } +} + +validate_seroreversion_rate <- function(seroreversion_rate) { + if (!is.numeric(seroreversion_rate) || seroreversion_rate < 0) { + stop("seroreversion_rate must be a non-negative numeric value.") + } +} + +validate_survey_and_foi_consistency <- function( #nolint + survey_features, + foi_df +) { + max_age_foi_df <- nrow(foi_df) + if (max_age_foi_df > max(survey_features$age_max)) + stop( + "maximum age implicit in foi_df should ", + "not exceed max age in survey_features." + ) +} + +validate_survey_and_foi_consistency_age_time <- function( #nolint + survey_features, + foi_df +) { + max_age_foi_df <- max(foi_df$year) - min(foi_df$year) + 1 + if (max_age_foi_df > max(survey_features$age_max)) + stop( + "maximum age implicit in foi_df should ", + "not exceed max age in survey_features." + ) +} diff --git a/R/veev2012.R b/R/veev2012.R deleted file mode 100644 index 49e4d6fd..00000000 --- a/R/veev2012.R +++ /dev/null @@ -1,15 +0,0 @@ -#' Seroprevalence data on serofoi -#' -#' Data from a serological surveys -#' -#' @docType data -#' -#' @usage veev2012 -#' -#' @format An object of class `"cross"`; see [qtl::read.cross()]. -#' -#' @keywords datasets -#' -#' @examples -#' veev2012 -"veev2012" diff --git a/R/visualisation.R b/R/visualisation.R deleted file mode 100644 index 358d67c2..00000000 --- a/R/visualisation.R +++ /dev/null @@ -1,461 +0,0 @@ -#' Generate seropositivity plot from a raw serological -#' survey dataset -#' -#' @inheritParams prepare_serodata -#' @inheritParams get_prev_expanded -#' @param size_text Text size use in the theme of the graph returned by the -#' function. -#' @return A ggplot object containing the seropositivity-vs-age graph of the raw -#' data of a given seroprevalence survey with its corresponding binomial -#' confidence interval. -#' @examples -#' data(chagas2012) -#' serodata <- prepare_serodata(chagas2012) -#' plot_seroprev(serodata, size_text = 15) -#' @export -plot_seroprev <- function(serodata, - size_text = 6, - bin_data = TRUE, - bin_step = 5) { - serodata <- validate_prepared_serodata(serodata = serodata) - if (bin_data) { - if (any(serodata$age_max - serodata$age_min > 2)) { - warn_msg <- paste0( - "Make sure `serodata` is already grouped by age. ", - "Skipping binning in seroprevalence plotting." - ) - warning(warn_msg) - bin_data <- FALSE - } else { - checkmate::assert_int(bin_step, lower = 2) - } - } - - if (bin_data) { - serodata <- prepare_bin_data( - serodata, - bin_step = bin_step - ) - } - - min_prev <- min(serodata$prev_obs_lower) - max_prev <- max(serodata$prev_obs_upper) - - seroprev_plot <- ggplot2::ggplot( - data = serodata, - ggplot2::aes(x = .data$age_mean_f) - ) + - ggplot2::geom_errorbar( - ggplot2::aes( - ymin = .data$prev_obs_lower, - ymax = .data$prev_obs_upper - ), - width = 0.1 - ) + - ggplot2::geom_point( - ggplot2::aes( - y = .data$prev_obs, - size = .data$total - ), - fill = "#7a0177", colour = "black", shape = 21 - ) + - ggplot2::coord_cartesian( - xlim = c(min(serodata$age_min), max(serodata$age_max)), - ylim = c(min_prev, max_prev) - ) + - ggplot2::theme_bw(size_text) + - ggplot2::theme(legend.position = "none") + - ggplot2::ylab("seropositivity") + - ggplot2::xlab("age") - - return(seroprev_plot) -} - -#' Generate seropositivity plot corresponding to the specified -#' fitted serological model -#' -#' This function generates a seropositivity plot of the specified serological -#' model object. This includes the original data grouped by age as well as the -#' obtained fitting from the model implementation. Age is located on the x axis -#' and seropositivity on the y axis with its corresponding confidence interval. -#' @inheritParams get_foi_central_estimates -#' @inheritParams run_seromodel -#' @inheritParams get_prev_expanded -#' @param size_text Text size of the graph returned by the function. -#' @return A ggplot object containing the seropositivity-vs-age graph including -#' the data, the fitted model and their corresponding confidence intervals. -#' @examples -#' data(chagas2012) -#' serodata <- prepare_serodata(chagas2012) -#' seromodel_object <- fit_seromodel( -#' serodata = serodata, -#' foi_model = "constant", -#' iter = 1000 -#' ) -#' plot_seroprev_fitted(seromodel_object, -#' serodata = serodata, -#' size_text = 15 -#' ) -#' @export -plot_seroprev_fitted <- function(seromodel_object, - serodata, - size_text = 6, - bin_data = TRUE, - bin_step = 5, - alpha = 0.05 - ) { - checkmate::assert_class(seromodel_object, "stanfit", null.ok = TRUE) - validate_prepared_serodata(serodata) - - foi <- rstan::extract(seromodel_object, "foi", inc_warmup = FALSE)[[1]] - - prev_expanded <- get_prev_expanded( - foi, - serodata = serodata, - alpha = alpha, - bin_data = bin_data, - bin_step = bin_step - ) - prev_plot <- - ggplot2::ggplot(prev_expanded, ggplot2::aes(x = .data$age)) + - ggplot2::geom_ribbon( - ggplot2::aes( - ymin = .data$predicted_prev_lower, - ymax = .data$predicted_prev_upper - ), - fill = "#c994c7" - ) + - ggplot2::geom_line( - ggplot2::aes(y = .data$predicted_prev), - colour = "#7a0177" - ) + - ggplot2::geom_errorbar( - ggplot2::aes(ymin = .data$prev_obs_lower, ymax = .data$prev_obs_upper), - width = 0.1 - ) + - ggplot2::geom_point( - ggplot2::aes(y = .data$prev_obs, size = .data$total), - fill = "#7a0177", - colour = "black", - shape = 21 - ) + - ggplot2::theme_bw(size_text) + - ggplot2::coord_cartesian( - xlim = c(min(serodata$age_min), max(serodata$age_max)), - ylim = c( - min(prev_expanded$prev_obs_lower, prev_expanded$predicted_prev_lower), - max(prev_expanded$prev_obs_upper, prev_expanded$predicted_prev_upper) - ) - ) + - ggplot2::theme(legend.position = "none") + - ggplot2::ylab("seropositivity") + - ggplot2::xlab("age") - - return(prev_plot) -} - -# TODO Complete @param documentation - -#' Generate force-of-infection plot corresponding to the -#' specified fitted serological model -#' -#' This function generates a force-of-infection plot from the results obtained -#' by fitting a serological model. This includes the corresponding binomial -#' confidence interval. The x axis corresponds to the decades covered by the -#' survey the y axis to the force-of-infection. -#' @inheritParams get_foi_central_estimates -#' @param size_text Text size use in the theme of the graph returned by the -#' function. -#' @param max_lambda Upper `ylim`for force-of-infection plot -#' @param foi Data frame with the Force-of-infection trend to be plotted -#' alongside the estimated force-of-infection. Typically this corresponds to -#' the force-of-infection used to simulate the serosurvey used to model. -#' @return A ggplot2 object containing the force-of-infection vs time including -#' the corresponding confidence interval. -#' @examples -#' data(chagas2012) -#' serodata <- prepare_serodata(chagas2012) -#' seromodel_object <- fit_seromodel( -#' serodata = serodata, -#' foi_model = "constant", -#' iter = 1000 -#' ) -#' cohort_ages <- get_cohort_ages(serodata) -#' plot_foi( -#' seromodel_object = seromodel_object, -#' cohort_ages = cohort_ages, -#' size_text = 15 -#' ) -#' @export -plot_foi <- function(seromodel_object, - serodata, - max_lambda = NA, - size_text = 25, - foi = NULL) { - checkmate::assert_class(seromodel_object, "stanfit", null.ok = TRUE) - - #-------- This bit is to get the actual length of the foi data - foi_data <- get_foi_central_estimates( - seromodel_object = seromodel_object, - serodata = serodata - ) - - if ( - seromodel_object@model_name %in% - c("constant", "tv_normal", "tv_normal_log") - ) { - xlab <- "year" - foi_plot <- - ggplot2::ggplot(foi_data, ggplot2::aes(x = .data$year)) - - if (!is.null(foi)) { - stopifnot( - "`year` must be present in `foi`" = "year" %in% colnames(foi) - ) - foi_data <- dplyr::left_join( - foi_data, - foi, - by = "year" - ) - foi_plot <- foi_plot + - ggplot2::geom_line( - data = foi_data, ggplot2::aes(y = foi), - colour = "#b30909", - size = size_text / 8 - ) - } - } else if (seromodel_object@model_name == "av_normal") { - xlab <- "age" - foi_plot <- - ggplot2::ggplot(foi_data, ggplot2::aes(x = .data$age)) - - if (!is.null(foi)) { - stopifnot( - "`age` must be present in `foi`" = "age" %in% colnames(foi) - ) - foi_data <- dplyr::left_join( - foi_data, - foi, - by = "age" - ) - foi_plot <- foi_plot + - ggplot2::geom_line( - data = foi_data, ggplot2::aes(y = foi), - colour = "#b30909", - size = size_text / 8 - ) - } - } - - foi_plot <- foi_plot + - ggplot2::geom_ribbon( - ggplot2::aes( - ymin = .data$lower, - ymax = .data$upper - ), - fill = "#41b6c4", - alpha = 0.5 - ) + - ggplot2::geom_line( - ggplot2::aes(y = .data$medianv), - colour = "#253494", - size = size_text / 8 - ) + - ggplot2::theme_bw(size_text) + - ggplot2::coord_cartesian(ylim = c(0, max_lambda)) + - ggplot2::ylab("Force-of-Infection") + - ggplot2::xlab(xlab) - - return(foi_plot) -} - -#' Generate plot of the R-hat estimates for the specified fitted -#' serological model -#' -#' This function generates a plot of the R-hat estimates obtained for a -#' specified fitted serological model `seromodel_object`. The x axis corresponds -#' to the decades covered by the survey and the y axis to the value of the -#' rhats. All rhats must be smaller than 1 to ensure convergence (for further -#' details check [rhat][bayesplot::rhat]). -#' @inheritParams get_foi_central_estimates -#' @param size_text Text size use in the theme of the graph returned by the -#' function. -#' @return The rhats-convergence plot of the selected model. -#' @examples -#' data(chagas2012) -#' serodata <- prepare_serodata(chagas2012) -#' seromodel_object <- fit_seromodel( -#' serodata = serodata, -#' foi_model = "constant", -#' iter = 1000 -#' ) -#' cohort_ages <- get_cohort_ages(serodata = serodata) -#' plot_rhats(seromodel_object, -#' cohort_ages = cohort_ages, -#' size_text = 15 -#' ) -#' @export -plot_rhats <- function(seromodel_object, - cohort_ages, - size_text = 25) { - checkmate::assert_class(seromodel_object, "stanfit", null.ok = TRUE) - - rhats <- get_table_rhats( - seromodel_object = seromodel_object, - cohort_ages = cohort_ages - ) - - if ( - seromodel_object@model_name %in% - c("constant", "tv_normal", "tv_normal_log") - ) { - rhats_plot <- - ggplot2::ggplot(rhats, ggplot2::aes(.data$year, .data$rhat)) - } else if (seromodel_object@model_name == "av_normal") { - rhats_plot <- - ggplot2::ggplot(rhats, ggplot2::aes(.data$age, .data$rhat)) - } - - rhats_plot <- rhats_plot + - ggplot2::geom_line(colour = "purple") + - ggplot2::geom_point() + - ggplot2::coord_cartesian( - ylim = c( - min(1.0, min(rhats$rhat)), - max(1.02, max(rhats$rhat)) - ) - ) + - ggplot2::geom_hline( - yintercept = 1.01, - colour = "blue", - size = size_text / 12 - ) + - ggplot2::theme_bw(size_text) + - ggplot2::ylab("Convergence (R^)") - - return(rhats_plot) -} - -#' Generate vertical arrangement of plots showing a summary of a -#' model, the estimated seroprevalence, the force-of-infection fit and the R-hat -#' estimates plots. -#' -#' @inheritParams get_foi_central_estimates -#' @inheritParams run_seromodel -#' @inheritParams get_prev_expanded -#' @inheritParams plot_foi -#' @param size_text Text size use in the theme of the graph returned by the -#' function. -#' @return A ggplot object with a vertical arrange containing the -#' seropositivity, force of infection, and convergence plots. -#' @examples -#' data(chagas2012) -#' serodata <- prepare_serodata(chagas2012) -#' seromodel_object <- fit_seromodel( -#' serodata = serodata, -#' foi_model = "constant", -#' iter = 1000 -#' ) -#' plot_seromodel(seromodel_object, -#' serodata = serodata, -#' size_text = 15 -#' ) -#' @export -plot_seromodel <- function(seromodel_object, - serodata, - alpha = 0.05, - max_lambda = NA, - size_text = 25, - bin_data = TRUE, - bin_step = 5, - foi = NULL) { - checkmate::assert_class(seromodel_object, "stanfit", null.ok = TRUE) - serodata <- validate_serodata(serodata) - - cohort_ages <- get_cohort_ages(serodata = serodata) - - prev_plot <- plot_seroprev_fitted( - seromodel_object = seromodel_object, - serodata = serodata, - alpha = alpha, - size_text = size_text, - bin_data = bin_data, - bin_step = bin_step - ) - - foi_plot <- plot_foi( - seromodel_object = seromodel_object, - serodata = serodata, - max_lambda = max_lambda, - size_text = size_text, - foi = foi - ) - - rhats_plot <- plot_rhats( - seromodel_object = seromodel_object, - cohort_ages = cohort_ages, - size_text = size_text - ) - - model_summary <- extract_seromodel_summary( - seromodel_object = seromodel_object, - serodata = serodata - ) - summary_table <- t( - dplyr::select( - model_summary, - c("foi_model", "dataset", "elpd", "se", "converged") - ) - ) - summary_plot <- - plot_info_table(summary_table, size_text = size_text) - - plot_arrange <- cowplot::plot_grid( - summary_plot, - prev_plot, - foi_plot, - rhats_plot, - ncol = 1, - nrow = 4, - rel_heights = c(0.5, 1, 1, 1) - ) - return(plot_arrange) -} - -#' Generate plot summarizing a given table -#' -#' @param info_table Table with the information to be summarised -#' @param size_text Text size of the graph returned by the function -#' @return ggplot object summarizing the information in `info_table` -#' @examples -#' serodata <- prepare_serodata(chagas2012) -#' seromodel_object <- fit_seromodel( -#' serodata = serodata, -#' foi_model = "constant", -#' iter = 1000 -#' ) -#' seromodel_summary <- extract_seromodel_summary( -#' seromodel_object = seromodel_object, -#' serodata = serodata -#' ) -#' info_table <- t(seromodel_summary) -#' plot_info_table(info_table, size_text = 15) -#' @export -plot_info_table <- function(info_table, size_text) { - dato <- data.frame( - y = NROW(info_table):seq_len(1), - text = paste0(rownames(info_table), ": ", info_table[, 1]) - ) - p <- ggplot2::ggplot(dato, ggplot2::aes(x = 1, y = .data$y)) + - ggplot2::scale_y_continuous( - limits = c(0, NROW(info_table) + 1), - breaks = NULL - ) + - ggplot2::theme_void() + - ggplot2::geom_text(ggplot2::aes(label = text), - size = size_text / 2.5, - fontface = "bold" - ) - - return(p) -} diff --git a/data/chagas2012.RData b/data/chagas2012.RData index 02353b01..7d8c7bc3 100644 Binary files a/data/chagas2012.RData and b/data/chagas2012.RData differ diff --git a/data/chik2015.RData b/data/chik2015.RData index f09c9dbb..016d33b1 100644 Binary files a/data/chik2015.RData and b/data/chik2015.RData differ diff --git a/data/veev2012.RData b/data/veev2012.RData index 0db4d9ee..eb288af6 100644 Binary files a/data/veev2012.RData and b/data/veev2012.RData differ diff --git a/inst/WORDLIST b/inst/WORDLIST index aa4b41e8..462eaa93 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -1,101 +1,102 @@ Aedes Aki -al Alphavirus Alphaviruses -alphaviruses -arboviruses Angulo -Basáñez +BMJ Bahia Bajaj -bayesian -binconf -binom -BMJ +Basáñez +Binom CMD -Carrera -Chagas -Chikungunya -chikungunya Codecov Conteh -cruzi -Cucunubá -Darien -de Dib -disaggregated Domínguez -each -elpd Epiverse -et Everlyn +FOI +FOIs FoI Gamez Garzón -ggplot -Gómez Gruson -https HMC Henao IgG IgM -Javeriana Lesong -lifecycle Liscano Mariscal María -mc Neira -Nicolás -org -packagetemplate +ORCID Panamá Parra Pavlich Pittí -Pontificia -Programmes Quevedo -ORCID -rbinom -refactorization +RStan +Refactorization +Serocatalytic +Seroreversion +Serosurvey +Serosurveys +Sumali +Triatomine +Trypanosoma +VEEV +Vehtari +Yaneth +Zulma +alphaviruses +bayesian +bmatrix +boldsymbol +cauchy +cdots +cruzi +dD +dI +dN +dP +dR +dS +dX +ddots +df +dt +elpd +eq +foi +forall +frac +ggplot +hiv +leq +lifecycle +mathcal +mc +numerating +org +packagetemplate +prob rhat -rhats +rightarrow rstan -RStan -Réunion +sd se -serodata -serological +serocatalytic seromodel -Seroprevalence -seropositive -seropositivity +seropositives +seropositivities +seroprev seroreversion serosurvey serosurveys -Serosurveys -seroprev -seroprevalence -seroreversion sim stan -Stanfit -Sumali -sur -TBD +textit triatomine -Triatomine -Trypanosoma -Torres -Universidad -VEEV -Vehtari -warmup -Yaneth -Zulma +u +vdots diff --git a/inst/extdata/chagas2012.RDS b/inst/extdata/chagas2012.RDS index 034c6e1e..eac4be93 100644 Binary files a/inst/extdata/chagas2012.RDS and b/inst/extdata/chagas2012.RDS differ diff --git a/inst/extdata/chik2015.RDS b/inst/extdata/chik2015.RDS index 815caec1..eac4be93 100644 Binary files a/inst/extdata/chik2015.RDS and b/inst/extdata/chik2015.RDS differ diff --git a/inst/extdata/config.yml b/inst/extdata/config.yml new file mode 100644 index 00000000..3826fbd2 --- /dev/null +++ b/inst/extdata/config.yml @@ -0,0 +1,18 @@ +default: + priors: + indexes: + uniform: 0 + normal: 1 + defaults: + prior_index: 1 + # uniform distribution + min: 0 + max: 10 + # normal distribution + mean: 0 + sd: 1 + # cauchy + location: 0 + scale: 1 + # init + init: 0.1 diff --git a/inst/extdata/serodata.RDS b/inst/extdata/serodata.RDS deleted file mode 100644 index 034c6e1e..00000000 Binary files a/inst/extdata/serodata.RDS and /dev/null differ diff --git a/inst/extdata/simdata_constant.RDS b/inst/extdata/simdata_constant.RDS deleted file mode 100644 index dc126030..00000000 Binary files a/inst/extdata/simdata_constant.RDS and /dev/null differ diff --git a/inst/extdata/simdata_large_epi.RDS b/inst/extdata/simdata_large_epi.RDS deleted file mode 100644 index 62a44fa7..00000000 Binary files a/inst/extdata/simdata_large_epi.RDS and /dev/null differ diff --git a/inst/extdata/simdata_sw_dec.RDS b/inst/extdata/simdata_sw_dec.RDS deleted file mode 100644 index 8827a280..00000000 Binary files a/inst/extdata/simdata_sw_dec.RDS and /dev/null differ diff --git a/inst/extdata/veev2012.RDS b/inst/extdata/veev2012.RDS index 53f7de17..daf1d10f 100644 Binary files a/inst/extdata/veev2012.RDS and b/inst/extdata/veev2012.RDS differ diff --git a/inst/stan/age_no_seroreversion.stan b/inst/stan/age_no_seroreversion.stan new file mode 100644 index 00000000..df4c07dd --- /dev/null +++ b/inst/stan/age_no_seroreversion.stan @@ -0,0 +1,62 @@ +functions { + #include functions/prob_infected_age.stan +} + +data { + #include data/basic_data.stan + #include data/foi_prior_data.stan +} + +transformed data { + int n_foi = max(foi_index); +} + +parameters { + vector[n_foi] foi_vector; + real sigma; +} + +transformed parameters { + vector[n_observations] prob_infected; + + prob_infected = prob_infected_age_model( + age_groups, + n_observations, + foi_vector, + foi_index, + 0.0 + ); +} + +model { + n_seropositive ~ binomial(n_sample, prob_infected); + sigma ~ cauchy(foi_sigma_rw_sc, foi_sigma_rw_sc); + + // force of infection prior + if (foi_prior_index == 0) + foi_vector[1] ~ uniform(foi_min, foi_max); + if (foi_prior_index == 1) + foi_vector[1] ~ normal(foi_mean, foi_sd); + + for(i in 2:n_foi) + foi_vector[i] ~ normal(foi_vector[i - 1], sigma); +} + +generated quantities{ + #include generated_quantities/log_likelihood.stan + + vector[age_max] prob_infected_expanded; + vector[age_max] foi_expanded; + + for(age in 1:age_max) { + foi_expanded[age] = foi_vector[foi_index[age]]; + } + + prob_infected_expanded = prob_infected_age_model( + ages, + age_max, + foi_vector, + foi_index, + 0.0 + ); +} diff --git a/inst/stan/age_seroreversion.stan b/inst/stan/age_seroreversion.stan new file mode 100644 index 00000000..025df41d --- /dev/null +++ b/inst/stan/age_seroreversion.stan @@ -0,0 +1,70 @@ +functions { + #include functions/prob_infected_age.stan +} + +data { + #include data/basic_data.stan + #include data/foi_prior_data.stan + #include data/seroreversion_prior_data.stan +} + +transformed data { + int n_foi = max(foi_index); +} + +parameters { + vector[n_foi] foi_vector; + real seroreversion_rate; + real sigma; +} + +transformed parameters { + vector[n_observations] prob_infected; + + prob_infected = prob_infected_age_model( + age_groups, + n_observations, + foi_vector, + foi_index, + seroreversion_rate + ); +} + +model { + n_seropositive ~ binomial(n_sample, prob_infected); + sigma ~ cauchy(foi_sigma_rw_loc, foi_sigma_rw_sc); + + // force of infection prior + if (foi_prior_index == 0) + foi_vector[1] ~ uniform(foi_min, foi_max); + if (foi_prior_index == 1) + foi_vector[1] ~ normal(foi_mean, foi_sd); + + for(i in 2:n_foi) + foi_vector[i] ~ normal(foi_vector[i - 1], sigma); + + // seroreversion prior + if (seroreversion_prior_index == 0) + seroreversion_rate ~ uniform(seroreversion_min, seroreversion_max); + if (seroreversion_prior_index == 1) + seroreversion_rate ~ normal(seroreversion_mean, seroreversion_sd); +} + +generated quantities{ + #include generated_quantities/log_likelihood.stan + + vector[age_max] prob_infected_expanded; + vector[age_max] foi_expanded; + + for(age in 1:age_max) { + foi_expanded[age] = foi_vector[foi_index[age]]; + } + + prob_infected_expanded = prob_infected_age_model( + ages, + age_max, + foi_vector, + foi_index, + seroreversion_rate + ); +} diff --git a/inst/stan/av_normal.stan b/inst/stan/av_normal.stan deleted file mode 100644 index 6414fb2c..00000000 --- a/inst/stan/av_normal.stan +++ /dev/null @@ -1,66 +0,0 @@ -functions { - #include functions/prob_infected_av.stan -} - -data { - int n_obs; - int age_max; - int n_pos[n_obs]; - int n_total[n_obs]; - int ages[n_obs]; - - // prior choices - int chunks[age_max]; - real foi_location; - real foi_scale; -} - -transformed data { - int n_chunks = max(chunks); -} - -parameters { - row_vector[n_chunks] fois; - real sigma; -} - -transformed parameters { - vector[n_chunks] fois_vector; - vector[n_obs] prob_infected; - - fois_vector = to_vector(fois); - - prob_infected = prob_infected_calculate( - fois_vector, - chunks, - ages, - n_obs, - 0 - ); -} - -model { - n_pos ~ binomial(n_total, prob_infected); - sigma ~ cauchy(0, 1); - - fois[1] ~ normal(foi_location, foi_scale); - for(i in 2:n_chunks) - fois[i] ~ normal(fois[i - 1], sigma); -} - -generated quantities{ - vector[n_obs] n_pos_sim; - vector[n_obs] P_sim; - vector[n_obs] logLikelihood; - vector[age_max] foi; - - for(i in 1:age_max) { - foi[i] = fois_vector[chunks[i]]; - } - - for(i in 1:n_obs){ - n_pos_sim[i] = binomial_rng(n_total[i], prob_infected[i]); - P_sim[i] = n_pos_sim[i] / n_total[i]; - logLikelihood[i] = binomial_lpmf(n_pos[i] | n_total[i], prob_infected[i]); - } -} diff --git a/inst/stan/constant.stan b/inst/stan/constant.stan deleted file mode 100644 index 5436fa1c..00000000 --- a/inst/stan/constant.stan +++ /dev/null @@ -1,47 +0,0 @@ -data { - int n_obs; - int age_max; - int n_pos[n_obs]; - int n_total[n_obs]; - matrix[n_obs, age_max] observation_exposure_matrix; - - // prior choices - real foi_a; - real foi_b; -} - -parameters { - real lambda0; -} - -transformed parameters { - real prob_infected[n_obs]; - real scalar_dot_product[n_obs]; - row_vector[age_max] foi; - - for (j in 1:age_max) { - foi[j] = lambda0; - } - - for (i in 1:n_obs){ - scalar_dot_product[i] = dot_product(observation_exposure_matrix[i,], foi); - prob_infected[i] = 1 - exp(-scalar_dot_product[i]); - } -} - -model { - for (i in 1:n_obs) - n_pos[i] ~ binomial(n_total[i], prob_infected[i]); - lambda0 ~ uniform (foi_a, foi_b); -} - -generated quantities{ - vector[n_obs] n_pos_sim; - vector[n_obs] P_sim; - vector[n_obs] logLikelihood; - for(i in 1:n_obs){ - n_pos_sim[i] = binomial_rng(n_total[i], prob_infected[i]); - P_sim[i] = n_pos_sim[i] / n_total[i]; - logLikelihood[i] = binomial_lpmf(n_pos[i] | n_total[i], prob_infected[i]); - } -} diff --git a/inst/stan/constant_no_seroreversion.stan b/inst/stan/constant_no_seroreversion.stan new file mode 100644 index 00000000..fb5c7b87 --- /dev/null +++ b/inst/stan/constant_no_seroreversion.stan @@ -0,0 +1,50 @@ +functions { + #include functions/prob_infected_constant.stan +} +data { + #include data/basic_data.stan + #include data/foi_prior_data.stan +} + +parameters { + real foi; +} + +transformed parameters { + vector[n_observations] prob_infected; + + prob_infected = prob_infected_constant_model( + age_groups, + n_observations, + foi, + 0.0 + ); +} + +model { + n_seropositive ~ binomial(n_sample, prob_infected); + + // force of infection prior + if (foi_prior_index == 0) + foi ~ uniform(foi_min, foi_max); + if (foi_prior_index == 1) + foi ~ normal(foi_mean, foi_sd); +} + +generated quantities{ + #include generated_quantities/log_likelihood.stan + + vector[age_max] prob_infected_expanded; + vector[age_max] foi_expanded; + + for(i in 1:age_max) { + foi_expanded[i] = foi; + } + + prob_infected_expanded = prob_infected_constant_model( + ages, + age_max, + foi, + 0.0 + ); +} diff --git a/inst/stan/constant_seroreversion.stan b/inst/stan/constant_seroreversion.stan new file mode 100644 index 00000000..3a78374b --- /dev/null +++ b/inst/stan/constant_seroreversion.stan @@ -0,0 +1,59 @@ +functions { + #include functions/prob_infected_constant.stan +} + +data { + #include data/basic_data.stan + #include data/foi_prior_data.stan + #include data/seroreversion_prior_data.stan +} + +parameters { + real foi; + real seroreversion_rate; +} + +transformed parameters { + vector[n_observations] prob_infected; + + prob_infected = prob_infected_constant_model( + age_groups, + n_observations, + foi, + seroreversion_rate + ); +} + +model { + n_seropositive ~ binomial(n_sample, prob_infected); + + // force of infection prior + if (foi_prior_index == 0) + foi ~ uniform(foi_min, foi_max); + if (foi_prior_index == 1) + foi ~ normal(foi_mean, foi_sd); + + // seroreversion prior + if (seroreversion_prior_index == 0) + seroreversion_rate ~ uniform(seroreversion_min, seroreversion_max); + if (seroreversion_prior_index == 1) + seroreversion_rate ~ normal(seroreversion_mean, seroreversion_sd); +} + +generated quantities{ + #include generated_quantities/log_likelihood.stan + + vector[age_max] prob_infected_expanded; + vector[age_max] foi_expanded; + + for(i in 1:age_max) { + foi_expanded[i] = foi; + } + + prob_infected_expanded = prob_infected_constant_model( + ages, + age_max, + foi, + seroreversion_rate + ); +} diff --git a/inst/stan/data/basic_data.stan b/inst/stan/data/basic_data.stan new file mode 100644 index 00000000..deac9f91 --- /dev/null +++ b/inst/stan/data/basic_data.stan @@ -0,0 +1,6 @@ +int n_observations; +int age_max; +int ages[age_max]; +int n_seropositive[n_observations]; +int n_sample[n_observations]; +int age_groups[n_observations]; diff --git a/inst/stan/data/foi_prior_data.stan b/inst/stan/data/foi_prior_data.stan new file mode 100644 index 00000000..2df33061 --- /dev/null +++ b/inst/stan/data/foi_prior_data.stan @@ -0,0 +1,13 @@ + // prior index + int foi_prior_index; + // foi indexes (chunks) + int foi_index[age_max]; + // uniform + real foi_min; + real foi_max; + // normal + real foi_mean; + real foi_sd; + // cauchy + real foi_sigma_rw_loc; + real foi_sigma_rw_sc; diff --git a/inst/stan/data/seroreversion_prior_data.stan b/inst/stan/data/seroreversion_prior_data.stan new file mode 100644 index 00000000..1b6962e3 --- /dev/null +++ b/inst/stan/data/seroreversion_prior_data.stan @@ -0,0 +1,8 @@ + // prior index + int seroreversion_prior_index; + // seroreversion prior choices + real seroreversion_min; + real seroreversion_max; + // normal + real seroreversion_mean; + real seroreversion_sd; diff --git a/inst/stan/functions/prob_infected_age.stan b/inst/stan/functions/prob_infected_age.stan new file mode 100644 index 00000000..48fd8e4b --- /dev/null +++ b/inst/stan/functions/prob_infected_age.stan @@ -0,0 +1,36 @@ +real prob_infected_age_model_single_age( + int age, + vector foi_vector, + int[] foi_index, + real seroreversion_rate +) { + real prob = 0.0; + for(j in 1:age){ + real foi = foi_vector[foi_index[j]]; + real lambda_over_both = foi / (foi + seroreversion_rate); + real e_lower = exp(-(foi + seroreversion_rate)); + + prob = lambda_over_both + e_lower * (prob - lambda_over_both); + } + return prob; +} + +vector prob_infected_age_model( + int[] ages, + int n_ages, + vector foi_vector, + int[] foi_index, + real seroreversion_rate +) { + vector[n_ages] prob_infected; + + for (i in 1:n_ages) { + prob_infected[i] = prob_infected_age_model_single_age( + ages[i], + foi_vector, + foi_index, + seroreversion_rate + ); + } + return prob_infected; +} diff --git a/inst/stan/functions/prob_infected_av.stan b/inst/stan/functions/prob_infected_av.stan deleted file mode 100644 index a9226728..00000000 --- a/inst/stan/functions/prob_infected_av.stan +++ /dev/null @@ -1,35 +0,0 @@ -real prob_infected_age_varying( - vector fois_vector, - int[] chunks, - int age, - real mu -) { - real prob = 0.0; - for(j in 1:age){ - real foi = fois_vector[chunks[j]]; - prob = (1/ (foi + mu)) * exp(-(foi + mu)) * (foi * (exp(foi + mu) - 1) + prob * (foi+mu)); - } - return prob; -} - -vector prob_infected_calculate( - vector fois_vector, - int[] chunks, - int[] ages, - int n_obs, - real mu -) { - vector[n_obs] prob_infected; - - for(i in 1:n_obs){ - int age = ages[i]; - prob_infected[i] = prob_infected_age_varying( - fois_vector, - chunks, - age, - mu - ); - } - - return prob_infected; -} diff --git a/inst/stan/functions/prob_infected_constant.stan b/inst/stan/functions/prob_infected_constant.stan new file mode 100644 index 00000000..6b7939d2 --- /dev/null +++ b/inst/stan/functions/prob_infected_constant.stan @@ -0,0 +1,32 @@ +real prob_infected_constant_model_single_age( + int age, + real foi, + real seroreversion_rate +) { + real foi_over_both = foi / (foi + seroreversion_rate); + real e_lower = exp(-(foi + seroreversion_rate)); + + real prob = 0.0; + for(i in 1:age) { + prob = foi_over_both + e_lower * (prob - foi_over_both); + } + return prob; +} + +vector prob_infected_constant_model( + int[] ages, + int n_ages, + real foi, + real seroreversion_rate +) { + vector[n_ages] prob_infected; + + for (i in 1:n_ages) { + prob_infected[i] = prob_infected_constant_model_single_age( + ages[i], + foi, + seroreversion_rate + ); + } + return prob_infected; +} diff --git a/inst/stan/functions/prob_infected_time.stan b/inst/stan/functions/prob_infected_time.stan new file mode 100644 index 00000000..8489c413 --- /dev/null +++ b/inst/stan/functions/prob_infected_time.stan @@ -0,0 +1,41 @@ +real prob_infected_time_model_single_age( + int age, + int age_max, + vector foi_vector, + int[] foi_index, + real seroreversion_rate +) { + real prob = 0.0; + int birth_year = age_max - age; + for(j in 1:age) { + real foi = foi_vector[foi_index[birth_year + j]]; + real lambda_over_both = foi / (foi + seroreversion_rate); + real e_lower = exp(-(foi + seroreversion_rate)); + + prob = lambda_over_both + e_lower * (prob - lambda_over_both); + } + return prob; +} + +vector prob_infected_time_model( + int[] ages, + int n_ages, + int age_max, + vector foi_vector, + int[] foi_index, + real seroreversion_rate +) { + vector[n_ages] prob_infected; + + for (i in 1:n_ages) { + int age = ages[i]; + prob_infected[i] = prob_infected_time_model_single_age( + age, + age_max, + foi_vector, + foi_index, + seroreversion_rate + ); + } + return prob_infected; +} diff --git a/inst/stan/functions/prob_infected_tv.stan b/inst/stan/functions/prob_infected_tv.stan deleted file mode 100644 index 02822545..00000000 --- a/inst/stan/functions/prob_infected_tv.stan +++ /dev/null @@ -1,41 +0,0 @@ -vector prob_infected_noseroreversion( - vector fois_vector, - int[] chunks, - matrix observation_exposure_matrix, - int n_obs, - int age_max - ) { - real scalar_dot_product; - vector[n_obs] prob_infected; - vector[age_max] foi_every_age = fois_vector[chunks]; - - for(i in 1:n_obs){ - scalar_dot_product = dot_product( - observation_exposure_matrix[i,], - foi_every_age - ); - prob_infected[i] = 1 - exp(-scalar_dot_product); - } - - return prob_infected; -} - -vector prob_infected_calculate( - vector fois_vector, - int[] chunks, - matrix observation_exposure_matrix, - int n_obs, - int age_max - ) { - vector[n_obs] prob_infected; - - prob_infected = prob_infected_noseroreversion( - fois_vector, - chunks, - observation_exposure_matrix, - n_obs, - age_max - ); - - return prob_infected; - } diff --git a/inst/stan/generated_quantities/log_likelihood.stan b/inst/stan/generated_quantities/log_likelihood.stan new file mode 100644 index 00000000..6daf64e9 --- /dev/null +++ b/inst/stan/generated_quantities/log_likelihood.stan @@ -0,0 +1,5 @@ +vector[n_observations] log_likelihood; + +for(i in 1:n_observations){ + log_likelihood[i] = binomial_lpmf(n_seropositive[i] | n_sample[i], prob_infected[i]); +} diff --git a/inst/stan/time_log_no_seroreversion.stan b/inst/stan/time_log_no_seroreversion.stan new file mode 100644 index 00000000..50e59166 --- /dev/null +++ b/inst/stan/time_log_no_seroreversion.stan @@ -0,0 +1,69 @@ +functions { + #include functions/prob_infected_time.stan +} + +data { + #include data/basic_data.stan + #include data/foi_prior_data.stan +} + +transformed data { + int n_foi = max(foi_index); +} + +parameters { + vector[n_foi] log_foi_vector; + real sigma; +} + +transformed parameters { + vector[n_foi] foi_vector; + vector[n_observations] prob_infected; + + foi_vector = exp(log_foi_vector); + + prob_infected = prob_infected_time_model( + age_groups, + n_observations, + age_max, + foi_vector, + foi_index, + 0.0 + ); +} + +model { + n_seropositive ~ binomial(n_sample, prob_infected); + sigma ~ normal(foi_sigma_rw_loc, foi_sigma_rw_sc); + + // force of infection prior + if (foi_prior_index == 0) + foi_vector[1] ~ uniform(foi_min, foi_max); + if (foi_prior_index == 1) + foi_vector[1] ~ normal(foi_mean, foi_sd); + target += log(exp(log_foi_vector[1])); // Jacobian term + + for(i in 2:n_foi) + log_foi_vector[i] ~ normal(log_foi_vector[i - 1], sigma); +} + +generated quantities{ + #include generated_quantities/log_likelihood.stan + + vector[age_max] prob_infected_expanded; + vector[age_max] foi_expanded; + + for(i in 1:age_max) { + int time_index = age_max - i + 1; + foi_expanded[i] = foi_vector[foi_index[i]]; + } + + prob_infected_expanded = prob_infected_time_model( + ages, + age_max, // number of ages + age_max, // number of years + foi_vector, + foi_index, + 0.0 + ); +} diff --git a/inst/stan/time_log_seroreversion.stan b/inst/stan/time_log_seroreversion.stan new file mode 100644 index 00000000..45d8fe0f --- /dev/null +++ b/inst/stan/time_log_seroreversion.stan @@ -0,0 +1,77 @@ +functions { + #include functions/prob_infected_time.stan +} + +data { + #include data/basic_data.stan + #include data/foi_prior_data.stan + #include data/seroreversion_prior_data.stan +} + +transformed data { + int n_foi = max(foi_index); +} + +parameters { + vector[n_foi] log_foi_vector; + real seroreversion_rate; + real sigma; +} + +transformed parameters { + vector[n_foi] foi_vector; + vector[n_observations] prob_infected; + + foi_vector = exp(log_foi_vector); + + prob_infected = prob_infected_time_model( + age_groups, + n_observations, + age_max, + foi_vector, + foi_index, + seroreversion_rate + ); +} + +model { + n_seropositive ~ binomial(n_sample, prob_infected); + sigma ~ normal(foi_sigma_rw_loc, foi_sigma_rw_sc); + + // force of infection prior + if (foi_prior_index == 0) + foi_vector[1] ~ uniform(foi_min, foi_max); + if (foi_prior_index == 1) + foi_vector[1] ~ normal(foi_mean, foi_sd); + target += log(exp(log_foi_vector[1])); // Jacobian term + + for(i in 2:n_foi) + log_foi_vector[i] ~ normal(log_foi_vector[i - 1], sigma); + + // seroreversion prior + if (seroreversion_prior_index == 0) + seroreversion_rate ~ uniform(seroreversion_min, seroreversion_max); + if (seroreversion_prior_index == 1) + seroreversion_rate ~ normal(seroreversion_mean, seroreversion_sd); +} + +generated quantities{ + #include generated_quantities/log_likelihood.stan + + vector[age_max] prob_infected_expanded; + vector[age_max] foi_expanded; + + for(i in 1:age_max) { + int time_index = age_max - i + 1; + foi_expanded[time_index] = foi_vector[foi_index[i]]; + } + + prob_infected_expanded = prob_infected_time_model( + ages, + age_max, // number of ages + age_max, // number of years + foi_vector, + foi_index, + seroreversion_rate + ); +} diff --git a/inst/stan/time_no_seroreversion.stan b/inst/stan/time_no_seroreversion.stan new file mode 100644 index 00000000..7276938c --- /dev/null +++ b/inst/stan/time_no_seroreversion.stan @@ -0,0 +1,65 @@ +functions { + #include functions/prob_infected_time.stan +} + +data { + #include data/basic_data.stan + #include data/foi_prior_data.stan +} + +transformed data { + int n_foi = max(foi_index); +} + +parameters { + vector[n_foi] foi_vector; + real sigma; +} + +transformed parameters { + vector[n_observations] prob_infected; + + prob_infected = prob_infected_time_model( + age_groups, + n_observations, + age_max, + foi_vector, + foi_index, + 0.0 + ); +} + +model { + n_seropositive ~ binomial(n_sample, prob_infected); + sigma ~ cauchy(foi_sigma_rw_loc, foi_sigma_rw_sc); + + // force of infection prior + if (foi_prior_index == 0) + foi_vector[1] ~ uniform(foi_min, foi_max); + if (foi_prior_index == 1) + foi_vector[1] ~ normal(foi_mean, foi_sd); + + for(i in 2:n_foi) + foi_vector[i] ~ normal(foi_vector[i - 1], sigma); +} + +generated quantities{ + #include generated_quantities/log_likelihood.stan + + vector[age_max] prob_infected_expanded; + vector[age_max] foi_expanded; + + for(i in 1:age_max) { + int time_index = age_max - i + 1; + foi_expanded[i] = foi_vector[foi_index[i]]; + } + + prob_infected_expanded = prob_infected_time_model( + ages, + age_max, // number of ages + age_max, // number of years + foi_vector, + foi_index, + 0.0 + ); +} diff --git a/inst/stan/time_seroreversion.stan b/inst/stan/time_seroreversion.stan new file mode 100644 index 00000000..00598c04 --- /dev/null +++ b/inst/stan/time_seroreversion.stan @@ -0,0 +1,73 @@ +functions { + #include functions/prob_infected_time.stan +} + +data { + #include data/basic_data.stan + #include data/foi_prior_data.stan + #include data/seroreversion_prior_data.stan +} + +transformed data { + int n_foi = max(foi_index); +} + +parameters { + vector[n_foi] foi_vector; + real seroreversion_rate; + real sigma; +} + +transformed parameters { + vector[n_observations] prob_infected; + + prob_infected = prob_infected_time_model( + age_groups, + n_observations, + age_max, + foi_vector, + foi_index, + seroreversion_rate + ); +} + +model { + n_seropositive ~ binomial(n_sample, prob_infected); + sigma ~ cauchy(foi_sigma_rw_sc, foi_sigma_rw_sc); + + // force of infection prior + if (foi_prior_index == 0) + foi_vector[1] ~ uniform(foi_min, foi_max); + if (foi_prior_index == 1) + foi_vector[1] ~ normal(foi_mean, foi_sd); + + for(i in 2:n_foi) + foi_vector[i] ~ normal(foi_vector[i - 1], sigma); + + // seroreversion prior + if (seroreversion_prior_index == 0) + seroreversion_rate ~ uniform(seroreversion_min, seroreversion_max); + if (seroreversion_prior_index == 1) + seroreversion_rate ~ normal(seroreversion_mean, seroreversion_sd); +} + +generated quantities{ + #include generated_quantities/log_likelihood.stan + + vector[age_max] prob_infected_expanded; + vector[age_max] foi_expanded; + + for(i in 1:age_max) { + int time_index = age_max - i + 1; + foi_expanded[time_index] = foi_vector[foi_index[i]]; + } + + prob_infected_expanded = prob_infected_time_model( + ages, + age_max, // number of ages + age_max, // number of years + foi_vector, + foi_index, + seroreversion_rate + ); +} diff --git a/inst/stan/tv_normal.stan b/inst/stan/tv_normal.stan deleted file mode 100644 index 43fcc66e..00000000 --- a/inst/stan/tv_normal.stan +++ /dev/null @@ -1,66 +0,0 @@ -functions { - #include functions/prob_infected_tv.stan -} - -data { - int n_obs; - int age_max; - int n_pos[n_obs]; - int n_total[n_obs]; - matrix[n_obs, age_max] observation_exposure_matrix; - - // prior choices - int chunks[age_max]; - real foi_location; - real foi_scale; -} - -transformed data { - int n_chunks = max(chunks); -} - -parameters { - row_vector[n_chunks] fois; - real sigma; -} - -transformed parameters { - vector[n_chunks] fois_vector; - vector[n_obs] prob_infected; - - fois_vector = to_vector(fois); - - prob_infected = prob_infected_calculate( - fois_vector, - chunks, - observation_exposure_matrix, - n_obs, - age_max - ); -} - -model { - n_pos ~ binomial(n_total, prob_infected); - sigma ~ cauchy(0, 1); - - fois[1] ~ normal(foi_location, foi_scale); - for(i in 2:n_chunks) - fois[i] ~ normal(fois[i - 1], sigma); -} - -generated quantities{ - vector[n_obs] n_pos_sim; - vector[n_obs] P_sim; - vector[n_obs] logLikelihood; - vector[age_max] foi; - - for(i in 1:age_max) { - foi[i] = fois_vector[chunks[i]]; - } - - for(i in 1:n_obs){ - n_pos_sim[i] = binomial_rng(n_total[i], prob_infected[i]); - P_sim[i] = n_pos_sim[i] / n_total[i]; - logLikelihood[i] = binomial_lpmf(n_pos[i] | n_total[i], prob_infected[i]); - } -} diff --git a/inst/stan/tv_normal_log.stan b/inst/stan/tv_normal_log.stan deleted file mode 100644 index 4b4d033b..00000000 --- a/inst/stan/tv_normal_log.stan +++ /dev/null @@ -1,69 +0,0 @@ -functions { - #include functions/prob_infected_tv.stan -} - -data { - int n_obs; - int age_max; - int n_pos[n_obs]; - int n_total[n_obs]; - matrix[n_obs, age_max] observation_exposure_matrix; - - // prior choices - int chunks[age_max]; - real foi_location; - real foi_scale; -} - -transformed data { - int n_chunks = max(chunks); -} - -parameters { - row_vector[n_chunks] log_fois; - real sigma; -} - -transformed parameters { - row_vector[n_chunks] fois; - vector[n_chunks] fois_vector; - vector[n_obs] prob_infected; - - for(i in 1:n_chunks) - fois[i] = exp(log_fois[i]); - fois_vector = to_vector(fois); - - prob_infected = prob_infected_calculate( - fois_vector, - chunks, - observation_exposure_matrix, - n_obs, - age_max - ); -} - -model { - n_pos ~ binomial(n_total, prob_infected); - sigma ~ cauchy(0, 1); - - log_fois[1] ~ normal(foi_location, foi_scale); - for(i in 2:n_chunks) - log_fois[i] ~ normal(log_fois[i - 1], sigma); - } - -generated quantities{ - vector[n_obs] n_pos_sim; - vector[n_obs] P_sim; - vector[n_obs] logLikelihood; - vector[age_max] foi; - - for(i in 1:age_max) { - foi[i] = fois_vector[chunks[i]]; - } - - for(i in 1:n_obs){ - n_pos_sim[i] = binomial_rng(n_total[i], prob_infected[i]); - P_sim[i] = n_pos_sim[i] / n_total[i]; - logLikelihood[i] = binomial_lpmf(n_pos[i] | n_total[i], prob_infected[i]); - } -} diff --git a/man/add_age_bins.Rd b/man/add_age_bins.Rd index 6ab75844..9caa676a 100644 --- a/man/add_age_bins.Rd +++ b/man/add_age_bins.Rd @@ -7,14 +7,15 @@ add_age_bins(survey_features) } \arguments{ -\item{survey_features}{A dataframe containing age_min and age_max columns representing -the minimum and maximum age boundaries for each group.} +\item{survey_features}{A dataframe containing age_min and age_max columns +representing the minimum and maximum age boundaries for each group.} } \value{ -A dataframe with an additional 'group' column representing the group interval -for each row based on the age_min and age_max columns. +A dataframe with an additional 'group' column representing the group +interval for each row based on the age_min and age_max columns. } \description{ -It generates a new column 'group' in the survey_features dataframe, representing -the group interval for each row based on the age_min and age_max columns. +It generates a new column 'group' in the survey_features dataframe, +representing the group interval for each row based on the age_min and +age_max columns. } diff --git a/man/add_age_group_to_serosurvey.Rd b/man/add_age_group_to_serosurvey.Rd new file mode 100644 index 00000000..634531ed --- /dev/null +++ b/man/add_age_group_to_serosurvey.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fit_seromodel.R +\name{add_age_group_to_serosurvey} +\alias{add_age_group_to_serosurvey} +\title{Adds age group marker to serosurvey} +\usage{ +add_age_group_to_serosurvey(serosurvey) +} +\arguments{ +\item{serosurvey}{\describe{ +\item{\code{survey_year}}{Year in which the survey took place +(only needed to plot time models)} +\item{\code{age_min}}{Floor value of the average between age_min and age_max} +\item{\code{age_max}}{The size of the sample} +\item{\code{n_sample}}{Number of samples for each age group} +\item{\code{n_seropositive}}{Number of positive samples for each age group} +}} +} +\description{ +Adds age group marker to serosurvey +} diff --git a/man/build_stan_data.Rd b/man/build_stan_data.Rd new file mode 100644 index 00000000..b07bc224 --- /dev/null +++ b/man/build_stan_data.Rd @@ -0,0 +1,68 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/build_stan_data.R +\name{build_stan_data} +\alias{build_stan_data} +\title{Builds stan data for sampling depending on the selected model} +\usage{ +build_stan_data( + serosurvey, + model_type = "constant", + foi_prior = sf_uniform(), + foi_index = NULL, + is_log_foi = FALSE, + foi_sigma_rw = sf_none(), + is_seroreversion = FALSE, + seroreversion_prior = sf_none() +) +} +\arguments{ +\item{serosurvey}{\describe{ +\item{\code{survey_year}}{Year in which the survey took place +(only needed to plot time models)} +\item{\code{age_min}}{Floor value of the average between age_min and age_max} +\item{\code{age_max}}{The size of the sample} +\item{\code{n_sample}}{Number of samples for each age group} +\item{\code{n_seropositive}}{Number of positive samples for each age group} +}} + +\item{model_type}{Type of the model. Either "constant", "age" or "time"} + +\item{foi_prior}{Force-of-infection distribution specified by means of +the helper functions. Currently available options are: +\describe{ +\item{\link{sf_normal}}{Function to set normal distribution priors} +\item{\link{sf_uniform}}{Function to set uniform distribution priors} +}} + +\item{foi_index}{Integer vector specifying the age-groups for which +force-of-infection values will be estimated. It can be specified by +means of \link{get_foi_index}} + +\item{is_log_foi}{Boolean to set logarithmic scale in the FOI} + +\item{foi_sigma_rw}{Prior distribution for the standard deviation of the +force-of-infection. Currently available options are: +\describe{ +\item{\link{sf_normal}}{Function to set normal distribution prior. +Available for time models in the log-scale} +\item{\link{sf_cauchy}}{Function to set Cauchy distribution prior. +Available for time models in regular scale.} +}} + +\item{is_seroreversion}{Boolean specifying whether to include +seroreversion rate estimation in the model} + +\item{seroreversion_prior}{seroreversion distribution specified by means of +the helper functions. Currently available options are: +\describe{ +\item{\link{sf_normal}}{Function to set normal distribution priors} +\item{\link{sf_uniform}}{Function to set uniform distribution priors} +\item{\link{sf_none}}{Function to set no prior distribution} +}} +} +\value{ +List with necessary data for sampling the specified model +} +\description{ +Builds stan data for sampling depending on the selected model +} diff --git a/man/chagas2012.Rd b/man/chagas2012.Rd deleted file mode 100644 index 995fce31..00000000 --- a/man/chagas2012.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/chagas2012.R -\docType{data} -\name{chagas2012} -\alias{chagas2012} -\title{Seroprevalence data on serofoi} -\format{ -An object of class \code{"cross"}; see \code{\link[qtl:read.cross]{qtl::read.cross()}}. -} -\usage{ -chagas2012 -} -\description{ -Data from a serological surveys -} -\examples{ -chagas2012 -} -\keyword{datasets} diff --git a/man/chik2015.Rd b/man/chik2015.Rd deleted file mode 100644 index fa4991b3..00000000 --- a/man/chik2015.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/chik2015.R -\docType{data} -\name{chik2015} -\alias{chik2015} -\title{Seroprevalence data on serofoi} -\format{ -An object of class \code{"cross"}; see \code{\link[qtl:read.cross]{qtl::read.cross()}}. -} -\usage{ -chik2015 -} -\description{ -Data from a serological surveys -} -\examples{ -chik2015 -} -\keyword{datasets} diff --git a/man/extract_central_estimates.Rd b/man/extract_central_estimates.Rd new file mode 100644 index 00000000..e5c0ee4b --- /dev/null +++ b/man/extract_central_estimates.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_seromodel.R +\name{extract_central_estimates} +\alias{extract_central_estimates} +\title{Extracts central estimates from stan_fit object for specified parameter} +\usage{ +extract_central_estimates( + seromodel, + serosurvey, + alpha = 0.05, + par_name = "foi_vector" +) +} +\arguments{ +\item{seromodel}{stan_fit object obtained from sampling a model +with \link{fit_seromode}} + +\item{serosurvey}{\describe{ +\item{\code{survey_year}}{Year in which the survey took place +(only needed to plot time models)} +\item{\code{age_min}}{Floor value of the average between age_min and age_max} +\item{\code{age_max}}{The size of the sample} +\item{\code{n_sample}}{Number of samples for each age group} +\item{\code{n_seropositive}}{Number of positive samples for each age group} +}} + +\item{alpha}{1 - alpha indicates the credibility level to be used} + +\item{par_name}{String specifying the parameter to be extracted +from \code{seromodel}} +} +\value{ +A dataframe with the following columns +\describe{ +\item{\code{median}}{Median of the samples computed as the 0.5 quantile} +\item{\code{lower}}{Lower quantile \code{alpha}} +\item{\code{upper}}{Upper quantile \code{1 - alpha}} +} +} +\description{ +Extracts central estimates from stan_fit object for specified parameter +} diff --git a/man/extract_seromodel_summary.Rd b/man/extract_seromodel_summary.Rd deleted file mode 100644 index 6f7effda..00000000 --- a/man/extract_seromodel_summary.Rd +++ /dev/null @@ -1,63 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/modelling.R -\name{extract_seromodel_summary} -\alias{extract_seromodel_summary} -\title{Function to extract a summary of the specified serological model object} -\usage{ -extract_seromodel_summary(seromodel_object, serodata) -} -\arguments{ -\item{seromodel_object}{Stanfit object containing the results of fitting a -model by means of \link{fit_seromodel}.} - -\item{serodata}{A data frame containing the data from a seroprevalence -survey. This data frame must contain at least the following columns: -\describe{ -\item{\code{total}}{Number of samples for each age group} -\item{\code{counts}}{Number of positive samples for each age group} -\item{\code{tsur}}{Year in which the survey took place} -\item{\code{age_mean_f}}{Floor value of the average between age_min and age_max} -\item{\code{sample_size}}{The size of the sample} -\item{\code{birth_year}}{The year in which the individuals of each age group -were born} -} -The last six columns can be added to \code{serodata} by means of the function -\code{\link[=prepare_serodata]{prepare_serodata()}}.} -} -\value{ -\code{model_summary}. Object with a summary of \code{seromodel_object} -containing the following: -\describe{ -\item{\code{foi_model}}{Name of the selected model.} -\item{\code{data_set}}{Seroprevalence survey label} -\item{\code{country}}{Name of the country were the survey was conducted in.} -\item{\code{year}}{Year in which the survey was conducted.} -\item{\code{test}}{Type of test of the survey.} -\item{\code{antibody}}{Antibody} -\item{\code{n_sample}}{Total number of samples in the survey.} -\item{\code{n_agec}}{Number of age groups considered.} -\item{\code{n_iter}}{Number of iterations by chain including warmup.} -\item{\code{elpd}}{elpd} -\item{\code{se}}{se} -\item{\code{converged}}{convergence} -} -} -\description{ -This function extracts a summary corresponding to a serological model object -containing information about the original serological survey data used to -fit the model, such as the year when the survey took place, the type of test -taken and the corresponding antibody, as well as information about the -convergence of the model, like the expected log pointwise predictive density -\code{elpd} and its corresponding standard deviation. -} -\examples{ -data(chagas2012) -serodata <- prepare_serodata(chagas2012) -seromodel_object <- fit_seromodel( - serodata = serodata, - foi_model = "constant" -) -extract_seromodel_summary(seromodel_object, - serodata = serodata -) -} diff --git a/man/fit_seromodel.Rd b/man/fit_seromodel.Rd index 2aad529e..34dc0ff7 100644 --- a/man/fit_seromodel.Rd +++ b/man/fit_seromodel.Rd @@ -1,101 +1,81 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/modelling.R +% Please edit documentation in R/fit_seromodel.R \name{fit_seromodel} \alias{fit_seromodel} -\title{Fit selected model to the specified seroprevalence survey -data} +\title{Runs specified stan model for the force-of-infection} \usage{ fit_seromodel( - serodata, - foi_model = c("constant", "tv_normal_log", "tv_normal", "av_normal"), - foi_parameters = NULL, - chunks = NULL, - chunk_size = 1, - iter = 1000, - adapt_delta = 0.9, - max_treedepth = 10, - seed = 12345, + serosurvey, + model_type = "constant", + is_log_foi = FALSE, + foi_prior = sf_normal(), + foi_sigma_rw = sf_none(), + foi_index = NULL, + foi_init = NULL, + is_seroreversion = FALSE, + seroreversion_prior = sf_normal(), ... ) } \arguments{ -\item{serodata}{A data frame containing the data from a seroprevalence -survey. This data frame must contain at least the following columns: -\describe{ -\item{\code{total}}{Number of samples for each age group} -\item{\code{counts}}{Number of positive samples for each age group} -\item{\code{tsur}}{Year in which the survey took place} -\item{\code{age_mean_f}}{Floor value of the average between age_min and age_max} -\item{\code{sample_size}}{The size of the sample} -\item{\code{birth_year}}{The year in which the individuals of each age group -were born} -} -The last six columns can be added to \code{serodata} by means of the function -\code{\link[=prepare_serodata]{prepare_serodata()}}.} +\item{serosurvey}{\describe{ +\item{\code{survey_year}}{Year in which the survey took place +(only needed to plot time models)} +\item{\code{age_min}}{Floor value of the average between age_min and age_max} +\item{\code{age_max}}{The size of the sample} +\item{\code{n_sample}}{Number of samples for each age group} +\item{\code{n_seropositive}}{Number of positive samples for each age group} +}} -\item{foi_model}{Name of the selected model. Current version provides three -options: +\item{model_type}{Type of the model. Either "constant", "age" or "time"} + +\item{is_log_foi}{Boolean to set logarithmic scale in the FOI} + +\item{foi_prior}{Force-of-infection distribution specified by means of +the helper functions. Currently available options are: \describe{ -\item{\code{"constant"}}{Runs a constant model} -\item{\code{"tv_normal"}}{Runs a normal model} -\item{\code{"tv_normal_log"}}{Runs a normal logarithmic model} +\item{\link{sf_normal}}{Function to set normal distribution priors} +\item{\link{sf_uniform}}{Function to set uniform distribution priors} }} -\item{foi_parameters}{List specifying the initial prior parameters of the -model \code{foi_model} to be specified as (e.g.): +\item{foi_sigma_rw}{Prior distribution for the standard deviation of the +force-of-infection. Currently available options are: \describe{ -\item{\code{"constant"}}{\code{list(foi_a = 0, foi_b = 2)}} -\item{\code{"tv_normal"}}{\code{list(foi_location = 0, foi_scale = 1)}} -\item{\code{"tv_normal_log"}}{\code{list(foi_location = -6, foi_scale = 4)}} +\item{\link{sf_normal}}{Function to set normal distribution prior. +Available for time models in the log-scale} +\item{\link{sf_cauchy}}{Function to set Cauchy distribution prior. +Available for time models in regular scale.} }} -\item{chunks}{Numeric list specifying the chunk structure of the time -interval from the birth year of the oldest age cohort -\code{min(serodata$age_mean_f)} to the time when the serosurvey was conducted -\code{t_sur}. If \code{NULL}, the time interval is divided in chunks of size -\code{chunk_size}.} - -\item{chunk_size}{Size of the chunks to be used in case that the chunk -structure \code{chunks} is not specified in \link{fit_seromodel}. -Default is 1, meaning that one force of infection value is to be estimated -for every year in the time interval spanned by the serosurvey. -If the length of the time interval is not exactly divisible by \code{chunk_size}, -the remainder years are included in the last chunk.} +\item{foi_index}{Integer vector specifying the age-groups for which +force-of-infection values will be estimated. It can be specified by +means of \link{get_foi_index}} -\item{iter}{Number of interactions for each chain including the warmup. -\code{iter} in \link[rstan:stanmodel-method-sampling]{sampling}.} +\item{is_seroreversion}{Boolean specifying whether to include +seroreversion rate estimation in the model} -\item{adapt_delta}{Real number between 0 and 1 that represents the target -average acceptance probability. Increasing the value of \code{adapt_delta} will -result in a smaller step size and fewer divergences. For further details -refer to the \code{control} parameter in \link[rstan:stanmodel-method-sampling]{sampling} or -\href{https://mc-stan.org/rstanarm/reference/adapt_delta.html}{here}.} - -\item{max_treedepth}{Maximum tree depth for the binary tree used in the NUTS -stan sampler. For further details refer to the \code{control} parameter in -\link[rstan:stanmodel-method-sampling]{sampling}.} - -\item{seed}{For further details refer to the \code{seed} parameter in -\link[rstan:stanmodel-method-sampling]{sampling}.} +\item{seroreversion_prior}{seroreversion distribution specified by means of +the helper functions. Currently available options are: +\describe{ +\item{\link{sf_normal}}{Function to set normal distribution priors} +\item{\link{sf_uniform}}{Function to set uniform distribution priors} +\item{\link{sf_none}}{Function to set no prior distribution} +}} -\item{...}{Additional parameters for \link[rstan:stanmodel-method-sampling]{sampling}.} +\item{...}{Additional parameters for \link[rstan:stanmodel-method-sampling]{rstan}} } \value{ -\code{seromodel_object}. \code{stanfit} object returned by the function -\link[rstan:stanmodel-method-sampling]{sampling} +stan_fit object with force-of-infection and seroreversion +(when applicable) samples } \description{ -This function fits the specified model \code{foi_model} to the serological survey -data \code{serodata} by means of \link[rstan:stanmodel-method-sampling]{sampling}. The -function determines whether the corresponding stan model object needs to be -compiled by rstan. +Runs specified stan model for the force-of-infection } \examples{ -data(chagas2012) -serodata <- prepare_serodata(chagas2012) -seromodel_fit <- fit_seromodel( - serodata = serodata, - foi_model = "constant" +data(veev2012) +seromodel <- fit_seromodel( +serosurvey = veev2012, + model_type = "time", + foi_index = get_foi_index(veev2012, group_size = 30) ) - } diff --git a/man/generate_random_sample_sizes.Rd b/man/generate_random_sample_sizes.Rd index dc8b7039..6f5cfc8d 100644 --- a/man/generate_random_sample_sizes.Rd +++ b/man/generate_random_sample_sizes.Rd @@ -7,14 +7,16 @@ generate_random_sample_sizes(survey_df_long) } \arguments{ -\item{survey_df_long}{A dataframe with columns 'age', 'group' and 'overall_sample_size'.} +\item{survey_df_long}{A dataframe with columns 'age', 'group' and +'overall_sample_size'.} } \value{ -A dataframe with random sample sizes generated for each age based on the overall -sample size. +A dataframe with random sample sizes generated for each age based on +the overall sample size. } \description{ -This function generates random sample sizes for each age group based on the overall sample size -and the distribution of individuals across age groups. It uses multinomial sampling to allocate -the total sample size to each age group proportionally. +This function generates random sample sizes for each age group based on the +overall sample size and the distribution of individuals across age groups. +It uses multinomial sampling to allocate the total sample size to each age +group proportionally. } diff --git a/man/get_age_intervals.Rd b/man/get_age_intervals.Rd new file mode 100644 index 00000000..d68b35a2 --- /dev/null +++ b/man/get_age_intervals.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_seromodel.R +\name{get_age_intervals} +\alias{get_age_intervals} +\title{Construct age-group variable from age column} +\usage{ +get_age_intervals(serosurvey, step) +} +\arguments{ +\item{serosurvey}{\describe{ +\item{\code{survey_year}}{Year in which the survey took place +(only needed to plot time models)} +\item{\code{age_min}}{Floor value of the average between age_min and age_max} +\item{\code{age_max}}{The size of the sample} +\item{\code{n_sample}}{Number of samples for each age group} +\item{\code{n_seropositive}}{Number of positive samples for each age group} +}} + +\item{step}{step used to split the age interval} +} +\value{ +Serosurvey with addition factor variable grouping \code{age_intervals}. +The interval is taken as closed to the right and to the left. +} +\description{ +Generates age intervals of length step in the interval spanned by +\code{age_min} and \code{age_max} in a serosurvey. +In cases where \code{max(age_max)\%\%(step+1)!=0}, the last age interval is +truncated and will have a different length than the others. +} diff --git a/man/get_chunk_structure.Rd b/man/get_chunk_structure.Rd deleted file mode 100644 index c2e171d9..00000000 --- a/man/get_chunk_structure.Rd +++ /dev/null @@ -1,41 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/modelling.R -\name{get_chunk_structure} -\alias{get_chunk_structure} -\title{Generate list containing the chunk structure to be used in the retrospective -estimation of the force of infection.} -\usage{ -get_chunk_structure(serodata, chunk_size) -} -\arguments{ -\item{serodata}{A data frame containing the data from a seroprevalence -survey. This data frame must contain at least the following columns: -\describe{ -\item{\code{total}}{Number of samples for each age group} -\item{\code{counts}}{Number of positive samples for each age group} -\item{\code{tsur}}{Year in which the survey took place} -\item{\code{age_mean_f}}{Floor value of the average between age_min and age_max} -\item{\code{sample_size}}{The size of the sample} -\item{\code{birth_year}}{The year in which the individuals of each age group -were born} -} -The last six columns can be added to \code{serodata} by means of the function -\code{\link[=prepare_serodata]{prepare_serodata()}}.} - -\item{chunk_size}{Size of the chunks to be used in case that the chunk -structure \code{chunks} is not specified in \link{fit_seromodel}. -Default is 1, meaning that one force of infection value is to be estimated -for every year in the time interval spanned by the serosurvey. -If the length of the time interval is not exactly divisible by \code{chunk_size}, -the remainder years are included in the last chunk.} -} -\description{ -This function generates a numeric list specifying the chunk structure of the -time interval spanning from the year of birth of the oldest age cohort up to -the time when the serosurvey was conducted. -} -\examples{ -data(chagas2012) -serodata <- prepare_serodata(serodata = chagas2012, alpha = 0.05) -cohort_ages <- get_cohort_ages(serodata = serodata) -} diff --git a/man/get_cohort_ages.Rd b/man/get_cohort_ages.Rd deleted file mode 100644 index 148abc55..00000000 --- a/man/get_cohort_ages.Rd +++ /dev/null @@ -1,39 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/modelling.R -\name{get_cohort_ages} -\alias{get_cohort_ages} -\title{Generate data frame containing the age of each cohort -corresponding to each birth year excluding the year of the survey.} -\usage{ -get_cohort_ages(serodata) -} -\arguments{ -\item{serodata}{A data frame containing the data from a seroprevalence -survey. This data frame must contain at least the following columns: -\describe{ -\item{\code{total}}{Number of samples for each age group} -\item{\code{counts}}{Number of positive samples for each age group} -\item{\code{tsur}}{Year in which the survey took place} -\item{\code{age_mean_f}}{Floor value of the average between age_min and age_max} -\item{\code{sample_size}}{The size of the sample} -\item{\code{birth_year}}{The year in which the individuals of each age group -were born} -} -The last six columns can be added to \code{serodata} by means of the function -\code{\link[=prepare_serodata]{prepare_serodata()}}.} -} -\value{ -\code{cohort_ages}. A data frame containing the age of each cohort -corresponding to each birth year -} -\description{ -This function generates a data frame containing the age of each cohort -corresponding to each \code{birth_year} excluding the year of the survey, for -which the cohort age is still 0. specified serological survey data \code{serodata} -excluding the year of the survey. -} -\examples{ -data(chagas2012) -serodata <- prepare_serodata(serodata = chagas2012, alpha = 0.05) -cohort_ages <- get_cohort_ages(serodata = serodata) -} diff --git a/man/get_foi_central_estimates.Rd b/man/get_foi_central_estimates.Rd deleted file mode 100644 index cbbcc63b..00000000 --- a/man/get_foi_central_estimates.Rd +++ /dev/null @@ -1,56 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/modelling.R -\name{get_foi_central_estimates} -\alias{get_foi_central_estimates} -\title{Extract central estimates for the fitted forced FoI} -\usage{ -get_foi_central_estimates( - seromodel_object, - serodata, - lower_quantile = 0.05, - upper_quantile = 0.95 -) -} -\arguments{ -\item{seromodel_object}{Stanfit object containing the results of fitting a -model by means of \link{fit_seromodel}.} - -\item{serodata}{A data frame containing the data from a seroprevalence -survey. This data frame must contain at least the following columns: -\describe{ -\item{\code{total}}{Number of samples for each age group} -\item{\code{counts}}{Number of positive samples for each age group} -\item{\code{tsur}}{Year in which the survey took place} -\item{\code{age_mean_f}}{Floor value of the average between age_min and age_max} -\item{\code{sample_size}}{The size of the sample} -\item{\code{birth_year}}{The year in which the individuals of each age group -were born} -} -The last six columns can be added to \code{serodata} by means of the function -\code{\link[=prepare_serodata]{prepare_serodata()}}.} - -\item{lower_quantile}{Lower quantile used to compute the credible interval of -the fitted force-of-infection.} - -\item{upper_quantile}{Lower quantile used to compute the credible interval of -the fitted force-of-infection.} -} -\value{ -\code{foi_central_estimates}. Central estimates for the fitted forced FoI -} -\description{ -Extract central estimates for the fitted forced FoI -} -\examples{ -data(chagas2012) -serodata <- prepare_serodata(chagas2012) -seromodel_object <- fit_seromodel( - serodata = serodata, - foi_model = "constant" -) -cohort_ages <- get_cohort_ages(serodata = serodata) -foi_central_estimates <- get_foi_central_estimates( - seromodel_object = seromodel_object, - serodata = serodata -) -} diff --git a/man/get_foi_index.Rd b/man/get_foi_index.Rd new file mode 100644 index 00000000..10449090 --- /dev/null +++ b/man/get_foi_index.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/build_stan_data.R +\name{get_foi_index} +\alias{get_foi_index} +\title{Generates force-of-infection indexes for heterogeneous age groups} +\usage{ +get_foi_index(serosurvey, group_size) +} +\arguments{ +\item{serosurvey}{\describe{ +\item{\code{survey_year}}{Year in which the survey took place +(only needed to plot time models)} +\item{\code{age_min}}{Floor value of the average between age_min and age_max} +\item{\code{age_max}}{The size of the sample} +\item{\code{n_sample}}{Number of samples for each age group} +\item{\code{n_seropositive}}{Number of positive samples for each age group} +}} + +\item{group_size}{Age groups size} +} +\value{ +Integer vector with the indexes numerating each year/age +(depending on the model). +} +\description{ +Generates a list of integers indexing together the time/age intervals +for which FOI values will be estimated in \link{fit_seromodel}. +The max value in \code{foi_index} correspond to the number of FOI values to +be estimated when sampling. +} +\examples{ +data(chagas2012) +foi_index <- get_foi_index(chagas2012, group_size = 25) +} diff --git a/man/get_prev_expanded.Rd b/man/get_prev_expanded.Rd deleted file mode 100644 index 4ee85cb7..00000000 --- a/man/get_prev_expanded.Rd +++ /dev/null @@ -1,56 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/modelling.R -\name{get_prev_expanded} -\alias{get_prev_expanded} -\title{Generate data frame containing the confidence interval based on -a force-of-infection fitting} -\usage{ -get_prev_expanded(foi, serodata, alpha = 0.05, bin_data = FALSE, bin_step = 5) -} -\arguments{ -\item{foi}{Object containing the information of the force-of-infection. It is -obtained from \code{rstan::extract(seromodel_object$seromodel, "foi", inc_warmup = FALSE)[[1]]}.} - -\item{serodata}{A data frame containing the data from a seroprevalence -survey. This data frame must contain at least the following columns: -\describe{ -\item{\code{total}}{Number of samples for each age group} -\item{\code{counts}}{Number of positive samples for each age group} -\item{\code{tsur}}{Year in which the survey took place} -\item{\code{age_mean_f}}{Floor value of the average between age_min and age_max} -\item{\code{sample_size}}{The size of the sample} -\item{\code{birth_year}}{The year in which the individuals of each age group -were born} -} -The last six columns can be added to \code{serodata} by means of the function -\code{\link[=prepare_serodata]{prepare_serodata()}}.} - -\item{alpha}{Probability threshold for statistical significance used for both -the binomial confidence interval, and the lower and upper quantiles of the -estimated prevalence.} - -\item{bin_data}{If \code{TRUE}, \code{serodata} is binned by means of -\code{prepare_bin_data}. Otherwise, age groups are kept as originally input.} - -\item{bin_step}{Integer specifying the age groups bin size to be used when -\code{bin_data} is set to \code{TRUE}.} -} -\value{ -\code{prev_final}. The expanded prevalence data. This is used for plotting -purposes in the \code{visualization} module. -} -\description{ -This function computes the corresponding binomial confidence intervals for -the obtained prevalence based on a fitting of the force-of-infection \code{foi} -for plotting an analysis purposes. -} -\examples{ -data(chagas2012) -serodata <- prepare_serodata(chagas2012) -seromodel_object <- fit_seromodel( - serodata = serodata, - foi_model = "constant" -) -foi <- rstan::extract(seromodel_object, "foi")[[1]] -get_prev_expanded(foi, serodata) -} diff --git a/man/get_table_rhats.Rd b/man/get_table_rhats.Rd deleted file mode 100644 index fe6e9e1b..00000000 --- a/man/get_table_rhats.Rd +++ /dev/null @@ -1,35 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/model_comparison.R -\name{get_table_rhats} -\alias{get_table_rhats} -\title{Build dataframe containing the R-hat estimates for a given -serological model} -\usage{ -get_table_rhats(seromodel_object, cohort_ages) -} -\arguments{ -\item{seromodel_object}{Stanfit object containing the results of fitting a -model by means of \link{fit_seromodel}.} -} -\value{ -rhats table -} -\description{ -This function relies on \link[bayesplot:bayesplot-extractors]{rhat} to extract the -R-hat estimates of the serological model object \code{seromodel_object} and -returns a table a dataframe with the estimates for each year of birth. -} -\examples{ -data(chagas2012) -serodata <- prepare_serodata(serodata = chagas2012) -model_constant <- fit_seromodel( - serodata = serodata, - foi_model = "constant", - iter = 1500 -) -cohort_ages <- get_cohort_ages(serodata) -get_table_rhats( - seromodel_object = model_constant, - cohort_ages = cohort_ages -) -} diff --git a/man/multinomial_sampling_group.Rd b/man/multinomial_sampling_group.Rd index 42c5681a..5aa05989 100644 --- a/man/multinomial_sampling_group.Rd +++ b/man/multinomial_sampling_group.Rd @@ -4,10 +4,10 @@ \alias{multinomial_sampling_group} \title{Generate random sample sizes using multinomial sampling.} \usage{ -multinomial_sampling_group(sample_size, n_ages) +multinomial_sampling_group(n_sample, n_ages) } \arguments{ -\item{sample_size}{The total sample size to be distributed among age groups.} +\item{n_sample}{The total sample size to be distributed among age groups.} \item{n_ages}{The number of age groups.} } @@ -15,7 +15,7 @@ multinomial_sampling_group(sample_size, n_ages) A vector containing random sample sizes for each age group. } \description{ -This function generates random sample sizes for each age group using multinomial sampling. -It takes the total sample size and the number of age groups as input and returns a vector -of sample sizes for each age group. +This function generates random sample sizes for each age group using +multinomial sampling. It takes the total sample size and the number of age +groups as input and returns a vector of sample sizes for each age group. } diff --git a/man/plot_foi.Rd b/man/plot_foi.Rd deleted file mode 100644 index 6033a711..00000000 --- a/man/plot_foi.Rd +++ /dev/null @@ -1,67 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/visualisation.R -\name{plot_foi} -\alias{plot_foi} -\title{Generate force-of-infection plot corresponding to the -specified fitted serological model} -\usage{ -plot_foi( - seromodel_object, - serodata, - max_lambda = NA, - size_text = 25, - foi = NULL -) -} -\arguments{ -\item{seromodel_object}{Stanfit object containing the results of fitting a -model by means of \link{fit_seromodel}.} - -\item{serodata}{A data frame containing the data from a seroprevalence -survey. This data frame must contain at least the following columns: -\describe{ -\item{\code{total}}{Number of samples for each age group} -\item{\code{counts}}{Number of positive samples for each age group} -\item{\code{tsur}}{Year in which the survey took place} -\item{\code{age_mean_f}}{Floor value of the average between age_min and age_max} -\item{\code{sample_size}}{The size of the sample} -\item{\code{birth_year}}{The year in which the individuals of each age group -were born} -} -The last six columns can be added to \code{serodata} by means of the function -\code{\link[=prepare_serodata]{prepare_serodata()}}.} - -\item{max_lambda}{Upper \code{ylim}for force-of-infection plot} - -\item{size_text}{Text size use in the theme of the graph returned by the -function.} - -\item{foi}{Data frame with the Force-of-infection trend to be plotted -alongside the estimated force-of-infection. Typically this corresponds to -the force-of-infection used to simulate the serosurvey used to model.} -} -\value{ -A ggplot2 object containing the force-of-infection vs time including -the corresponding confidence interval. -} -\description{ -This function generates a force-of-infection plot from the results obtained -by fitting a serological model. This includes the corresponding binomial -confidence interval. The x axis corresponds to the decades covered by the -survey the y axis to the force-of-infection. -} -\examples{ -data(chagas2012) -serodata <- prepare_serodata(chagas2012) -seromodel_object <- fit_seromodel( - serodata = serodata, - foi_model = "constant", - iter = 1000 -) -cohort_ages <- get_cohort_ages(serodata) -plot_foi( - seromodel_object = seromodel_object, - cohort_ages = cohort_ages, - size_text = 15 -) -} diff --git a/man/plot_foi_estimates.Rd b/man/plot_foi_estimates.Rd new file mode 100644 index 00000000..e653b7ee --- /dev/null +++ b/man/plot_foi_estimates.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_seromodel.R +\name{plot_foi_estimates} +\alias{plot_foi_estimates} +\title{Plots force-of-infection central estimates} +\usage{ +plot_foi_estimates( + seromodel, + serosurvey, + alpha = 0.05, + foi_df = NULL, + foi_max = NULL, + size_text = 11 +) +} +\arguments{ +\item{seromodel}{stan_fit object obtained from sampling a model +with \link{fit_seromode}} + +\item{serosurvey}{\describe{ +\item{\code{survey_year}}{Year in which the survey took place +(only needed to plot time models)} +\item{\code{age_min}}{Floor value of the average between age_min and age_max} +\item{\code{age_max}}{The size of the sample} +\item{\code{n_sample}}{Number of samples for each age group} +\item{\code{n_seropositive}}{Number of positive samples for each age group} +}} + +\item{alpha}{1 - alpha indicates the credibility level to be used} + +\item{foi_df}{Dataframe with columns +\describe{ +\item{\code{year}/\code{age}}{Year/Age (depending on the model)} +\item{\code{foi}}{Force-of-infection values by year/age} +}} + +\item{foi_max}{Max force-of-infection value for plotting} + +\item{size_text}{Size of text for plotting (\code{base_size} in +\link[ggplot2:ggtheme]{ggplot2})} +} +\value{ +ggplot object with estimated force-of-infection +} +\description{ +Plots force-of-infection central estimates +} diff --git a/man/plot_info_table.Rd b/man/plot_info_table.Rd deleted file mode 100644 index 84c5cbb5..00000000 --- a/man/plot_info_table.Rd +++ /dev/null @@ -1,33 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/visualisation.R -\name{plot_info_table} -\alias{plot_info_table} -\title{Generate plot summarizing a given table} -\usage{ -plot_info_table(info_table, size_text) -} -\arguments{ -\item{info_table}{Table with the information to be summarised} - -\item{size_text}{Text size of the graph returned by the function} -} -\value{ -ggplot object summarizing the information in \code{info_table} -} -\description{ -Generate plot summarizing a given table -} -\examples{ -serodata <- prepare_serodata(chagas2012) -seromodel_object <- fit_seromodel( - serodata = serodata, - foi_model = "constant", - iter = 1000 -) -seromodel_summary <- extract_seromodel_summary( - seromodel_object = seromodel_object, - serodata = serodata -) -info_table <- t(seromodel_summary) -plot_info_table(info_table, size_text = 15) -} diff --git a/man/plot_rhats.Rd b/man/plot_rhats.Rd index 7effa44d..507f7eeb 100644 --- a/man/plot_rhats.Rd +++ b/man/plot_rhats.Rd @@ -1,40 +1,34 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/visualisation.R +% Please edit documentation in R/plot_seromodel.R \name{plot_rhats} \alias{plot_rhats} -\title{Generate plot of the R-hat estimates for the specified fitted -serological model} +\title{Plot r-hats convergence criteria for the specified model} \usage{ -plot_rhats(seromodel_object, cohort_ages, size_text = 25) +plot_rhats(seromodel, serosurvey, par_name = "foi_expanded", size_text = 11) } \arguments{ -\item{seromodel_object}{Stanfit object containing the results of fitting a -model by means of \link{fit_seromodel}.} +\item{seromodel}{stan_fit object obtained from sampling a model +with \link{fit_seromode}} -\item{size_text}{Text size use in the theme of the graph returned by the -function.} +\item{serosurvey}{\describe{ +\item{\code{survey_year}}{Year in which the survey took place +(only needed to plot time models)} +\item{\code{age_min}}{Floor value of the average between age_min and age_max} +\item{\code{age_max}}{The size of the sample} +\item{\code{n_sample}}{Number of samples for each age group} +\item{\code{n_seropositive}}{Number of positive samples for each age group} +}} + +\item{par_name}{String specifying the parameter to be extracted +from \code{seromodel}} + +\item{size_text}{Size of text for plotting (\code{base_size} in +\link[ggplot2:ggtheme]{ggplot2})} } \value{ -The rhats-convergence plot of the selected model. +ggplot object showing the r-hats of the model to be compared with the +convergence criteria (horizontal dashed line) } \description{ -This function generates a plot of the R-hat estimates obtained for a -specified fitted serological model \code{seromodel_object}. The x axis corresponds -to the decades covered by the survey and the y axis to the value of the -rhats. All rhats must be smaller than 1 to ensure convergence (for further -details check \link[bayesplot:bayesplot-extractors]{rhat}). -} -\examples{ -data(chagas2012) -serodata <- prepare_serodata(chagas2012) -seromodel_object <- fit_seromodel( - serodata = serodata, - foi_model = "constant", - iter = 1000 -) -cohort_ages <- get_cohort_ages(serodata = serodata) -plot_rhats(seromodel_object, - cohort_ages = cohort_ages, - size_text = 15 -) +Plot r-hats convergence criteria for the specified model } diff --git a/man/plot_seromodel.Rd b/man/plot_seromodel.Rd index 7fcb0562..59344530 100644 --- a/man/plot_seromodel.Rd +++ b/man/plot_seromodel.Rd @@ -1,77 +1,64 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/visualisation.R +% Please edit documentation in R/plot_seromodel.R \name{plot_seromodel} \alias{plot_seromodel} -\title{Generate vertical arrangement of plots showing a summary of a -model, the estimated seroprevalence, the force-of-infection fit and the R-hat -estimates plots.} +\title{Visualise results of the provided model} \usage{ plot_seromodel( - seromodel_object, - serodata, + seromodel, + serosurvey, alpha = 0.05, - max_lambda = NA, - size_text = 25, - bin_data = TRUE, + bin_serosurvey = FALSE, bin_step = 5, - foi = NULL + foi_df = NULL, + foi_max = NULL, + loo_estimate_digits = 1, + central_estimate_digits = 2, + seroreversion_digits = 2, + rhat_digits = 2, + size_text = 11 ) } \arguments{ -\item{seromodel_object}{Stanfit object containing the results of fitting a -model by means of \link{fit_seromodel}.} +\item{seromodel}{stan_fit object obtained from sampling a model +with \link{fit_seromode}} -\item{serodata}{A data frame containing the data from a seroprevalence -survey. This data frame must contain at least the following columns: -\describe{ -\item{\code{total}}{Number of samples for each age group} -\item{\code{counts}}{Number of positive samples for each age group} -\item{\code{tsur}}{Year in which the survey took place} -\item{\code{age_mean_f}}{Floor value of the average between age_min and age_max} -\item{\code{sample_size}}{The size of the sample} -\item{\code{birth_year}}{The year in which the individuals of each age group -were born} -} -The last six columns can be added to \code{serodata} by means of the function -\code{\link[=prepare_serodata]{prepare_serodata()}}.} +\item{serosurvey}{\describe{ +\item{\code{survey_year}}{Year in which the survey took place +(only needed to plot time models)} +\item{\code{age_min}}{Floor value of the average between age_min and age_max} +\item{\code{age_max}}{The size of the sample} +\item{\code{n_sample}}{Number of samples for each age group} +\item{\code{n_seropositive}}{Number of positive samples for each age group} +}} -\item{alpha}{Probability threshold for statistical significance used for both -the binomial confidence interval, and the lower and upper quantiles of the -estimated prevalence.} +\item{alpha}{1 - alpha indicates the credibility level to be used} -\item{max_lambda}{Upper \code{ylim}for force-of-infection plot} +\item{bin_serosurvey}{If \code{TRUE}, \code{serodata} is binned by means of +\code{prepare_bin_serosurvey}. +Otherwise, age groups are kept as originally input.} -\item{size_text}{Text size use in the theme of the graph returned by the -function.} +\item{bin_step}{Integer specifying the age groups bin size to be used when +\code{bin_serosurvey} is set to \code{TRUE}.} -\item{bin_data}{If \code{TRUE}, \code{serodata} is binned by means of -\code{prepare_bin_data}. Otherwise, age groups are kept as originally input.} +\item{foi_df}{Dataframe with columns +\describe{ +\item{\code{year}/\code{age}}{Year/Age (depending on the model)} +\item{\code{foi}}{Force-of-infection values by year/age} +}} -\item{bin_step}{Integer specifying the age groups bin size to be used when -\code{bin_data} is set to \code{TRUE}.} +\item{foi_max}{Max force-of-infection value for plotting} -\item{foi}{Object containing the information of the force-of-infection. It is -obtained from \code{rstan::extract(seromodel_object$seromodel, "foi", inc_warmup = FALSE)[[1]]}.} +\item{loo_estimate_digits}{Number of loo estimate digits} + +\item{central_estimate_digits}{Number of central estimate digits} + +\item{size_text}{Size of text for plotting (\code{base_size} in +\link[ggplot2:ggtheme]{ggplot2})} } \value{ -A ggplot object with a vertical arrange containing the -seropositivity, force of infection, and convergence plots. +seromodel summary plot } \description{ -Generate vertical arrangement of plots showing a summary of a -model, the estimated seroprevalence, the force-of-infection fit and the R-hat -estimates plots. -} -\examples{ -data(chagas2012) -serodata <- prepare_serodata(chagas2012) -seromodel_object <- fit_seromodel( - serodata = serodata, - foi_model = "constant", - iter = 1000 -) -plot_seromodel(seromodel_object, - serodata = serodata, - size_text = 15 -) +Visualise results of the provided model } diff --git a/man/plot_seroprev.Rd b/man/plot_seroprev.Rd deleted file mode 100644 index e123b697..00000000 --- a/man/plot_seroprev.Rd +++ /dev/null @@ -1,51 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/visualisation.R -\name{plot_seroprev} -\alias{plot_seroprev} -\title{Generate seropositivity plot from a raw serological -survey dataset} -\usage{ -plot_seroprev(serodata, size_text = 6, bin_data = TRUE, bin_step = 5) -} -\arguments{ -\item{serodata}{A data frame containing the data from a serological survey. -This data frame must contain the following columns: -\describe{ -\item{\code{survey}}{survey Label of the current survey} -\item{\code{total}}{Number of samples for each age group} -\item{\code{counts}}{Number of positive samples for each age group} -\item{\code{age_min}}{age_min} -\item{\code{age_max}}{age_max} -\item{\code{tsur}}{Year in which the survey took place} -\item{\code{country}}{The country where the survey took place} -\item{\code{test}}{The type of test taken} -\item{\code{antibody}}{antibody} -} -Alternatively to \code{age_min} and \code{age_max}, the dataset could already include -the age group marker \code{age_mean_f}, representing the middle point between -\code{age_min} and \code{age_max}. If \code{afe_mean_f} is missing, it will be generated -by the function.} - -\item{size_text}{Text size use in the theme of the graph returned by the -function.} - -\item{bin_data}{If \code{TRUE}, \code{serodata} is binned by means of -\code{prepare_bin_data}. Otherwise, age groups are kept as originally input.} - -\item{bin_step}{Integer specifying the age groups bin size to be used when -\code{bin_data} is set to \code{TRUE}.} -} -\value{ -A ggplot object containing the seropositivity-vs-age graph of the raw -data of a given seroprevalence survey with its corresponding binomial -confidence interval. -} -\description{ -Generate seropositivity plot from a raw serological -survey dataset -} -\examples{ -data(chagas2012) -serodata <- prepare_serodata(chagas2012) -plot_seroprev(serodata, size_text = 15) -} diff --git a/man/plot_seroprev_fitted.Rd b/man/plot_seroprev_fitted.Rd deleted file mode 100644 index 6e5d0313..00000000 --- a/man/plot_seroprev_fitted.Rd +++ /dev/null @@ -1,69 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/visualisation.R -\name{plot_seroprev_fitted} -\alias{plot_seroprev_fitted} -\title{Generate seropositivity plot corresponding to the specified -fitted serological model} -\usage{ -plot_seroprev_fitted( - seromodel_object, - serodata, - size_text = 6, - bin_data = TRUE, - bin_step = 5, - alpha = 0.05 -) -} -\arguments{ -\item{seromodel_object}{Stanfit object containing the results of fitting a -model by means of \link{fit_seromodel}.} - -\item{serodata}{A data frame containing the data from a seroprevalence -survey. This data frame must contain at least the following columns: -\describe{ -\item{\code{total}}{Number of samples for each age group} -\item{\code{counts}}{Number of positive samples for each age group} -\item{\code{tsur}}{Year in which the survey took place} -\item{\code{age_mean_f}}{Floor value of the average between age_min and age_max} -\item{\code{sample_size}}{The size of the sample} -\item{\code{birth_year}}{The year in which the individuals of each age group -were born} -} -The last six columns can be added to \code{serodata} by means of the function -\code{\link[=prepare_serodata]{prepare_serodata()}}.} - -\item{size_text}{Text size of the graph returned by the function.} - -\item{bin_data}{If \code{TRUE}, \code{serodata} is binned by means of -\code{prepare_bin_data}. Otherwise, age groups are kept as originally input.} - -\item{bin_step}{Integer specifying the age groups bin size to be used when -\code{bin_data} is set to \code{TRUE}.} - -\item{alpha}{Probability threshold for statistical significance used for both -the binomial confidence interval, and the lower and upper quantiles of the -estimated prevalence.} -} -\value{ -A ggplot object containing the seropositivity-vs-age graph including -the data, the fitted model and their corresponding confidence intervals. -} -\description{ -This function generates a seropositivity plot of the specified serological -model object. This includes the original data grouped by age as well as the -obtained fitting from the model implementation. Age is located on the x axis -and seropositivity on the y axis with its corresponding confidence interval. -} -\examples{ -data(chagas2012) -serodata <- prepare_serodata(chagas2012) -seromodel_object <- fit_seromodel( - serodata = serodata, - foi_model = "constant", - iter = 1000 -) -plot_seroprev_fitted(seromodel_object, - serodata = serodata, - size_text = 15 -) -} diff --git a/man/plot_seroprevalence_estimates.Rd b/man/plot_seroprevalence_estimates.Rd new file mode 100644 index 00000000..d81cbd22 --- /dev/null +++ b/man/plot_seroprevalence_estimates.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_seromodel.R +\name{plot_seroprevalence_estimates} +\alias{plot_seroprevalence_estimates} +\title{Plot seroprevalence estimates on top of the serosurvey} +\usage{ +plot_seroprevalence_estimates( + seromodel, + serosurvey, + alpha = 0.05, + size_text = 11, + bin_serosurvey = FALSE, + bin_step = 5 +) +} +\arguments{ +\item{seromodel}{stan_fit object obtained from sampling a model +with \link{fit_seromode}} + +\item{serosurvey}{\describe{ +\item{\code{survey_year}}{Year in which the survey took place +(only needed to plot time models)} +\item{\code{age_min}}{Floor value of the average between age_min and age_max} +\item{\code{age_max}}{The size of the sample} +\item{\code{n_sample}}{Number of samples for each age group} +\item{\code{n_seropositive}}{Number of positive samples for each age group} +}} + +\item{alpha}{1 - alpha indicates the credibility level to be used} + +\item{size_text}{Size of text for plotting (\code{base_size} in +\link[ggplot2:ggtheme]{ggplot2})} + +\item{bin_serosurvey}{If \code{TRUE}, \code{serodata} is binned by means of +\code{prepare_bin_serosurvey}. +Otherwise, age groups are kept as originally input.} + +\item{bin_step}{Integer specifying the age groups bin size to be used when +\code{bin_serosurvey} is set to \code{TRUE}.} +} +\value{ +ggplot object with seroprevalence estimates and serosurveys plots +} +\description{ +Plot seroprevalence estimates on top of the serosurvey +} diff --git a/man/plot_serosurvey.Rd b/man/plot_serosurvey.Rd new file mode 100644 index 00000000..7ab7d945 --- /dev/null +++ b/man/plot_serosurvey.Rd @@ -0,0 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_seromodel.R +\name{plot_serosurvey} +\alias{plot_serosurvey} +\title{Plots seroprevalence from the given serosurvey} +\usage{ +plot_serosurvey( + serosurvey, + size_text = 11, + bin_serosurvey = FALSE, + bin_step = 5 +) +} +\arguments{ +\item{serosurvey}{\describe{ +\item{\code{survey_year}}{Year in which the survey took place +(only needed to plot time models)} +\item{\code{age_min}}{Floor value of the average between age_min and age_max} +\item{\code{age_max}}{The size of the sample} +\item{\code{n_sample}}{Number of samples for each age group} +\item{\code{n_seropositive}}{Number of positive samples for each age group} +}} + +\item{size_text}{Size of text for plotting (\code{base_size} in +\link[ggplot2:ggtheme]{ggplot2})} + +\item{bin_serosurvey}{If \code{TRUE}, \code{serodata} is binned by means of +\code{prepare_bin_serosurvey}. +Otherwise, age groups are kept as originally input.} + +\item{bin_step}{Integer specifying the age groups bin size to be used when +\code{bin_serosurvey} is set to \code{TRUE}.} +} +\value{ +ggplot object with seroprevalence plot +} +\description{ +Plots seroprevalence from the given serosurvey +} +\examples{ +# Chikungunya example serosurvey +data(chik2015) +plot_serosurvey(chik2015) + +# VEEV example serosurvey +data(veev2012) +plot_serosurvey(veev2012) + +# Chagas disease example serosurvey +data(chagas2012) +plot_serosurvey(chagas2012, bin_serosurvey = TRUE) +} diff --git a/man/plot_summary.Rd b/man/plot_summary.Rd new file mode 100644 index 00000000..7d0b3359 --- /dev/null +++ b/man/plot_summary.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_seromodel.R +\name{plot_summary} +\alias{plot_summary} +\title{Plots model summary} +\usage{ +plot_summary( + seromodel, + serosurvey, + loo_estimate_digits = 1, + central_estimate_digits = 2, + rhat_digits = 2, + size_text = 11 +) +} +\arguments{ +\item{seromodel}{stan_fit object obtained from sampling a model +with \link{fit_seromode}} + +\item{serosurvey}{\describe{ +\item{\code{survey_year}}{Year in which the survey took place +(only needed to plot time models)} +\item{\code{age_min}}{Floor value of the average between age_min and age_max} +\item{\code{age_max}}{The size of the sample} +\item{\code{n_sample}}{Number of samples for each age group} +\item{\code{n_seropositive}}{Number of positive samples for each age group} +}} + +\item{loo_estimate_digits}{Number of loo estimate digits} + +\item{central_estimate_digits}{Number of central estimate digits} + +\item{size_text}{Size of text for plotting (\code{base_size} in +\link[ggplot2:ggtheme]{ggplot2})} +} +\value{ +ggplot object with a summary of the specified model +} +\description{ +Plots model summary +} diff --git a/man/prepare_serodata.Rd b/man/prepare_serodata.Rd deleted file mode 100644 index 4616124a..00000000 --- a/man/prepare_serodata.Rd +++ /dev/null @@ -1,54 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/seroprevalence_data.R -\name{prepare_serodata} -\alias{prepare_serodata} -\title{Prepare data from a serological survey for modelling} -\usage{ -prepare_serodata(serodata = serodata, alpha = 0.05) -} -\arguments{ -\item{serodata}{A data frame containing the data from a serological survey. -This data frame must contain the following columns: -\describe{ -\item{\code{survey}}{survey Label of the current survey} -\item{\code{total}}{Number of samples for each age group} -\item{\code{counts}}{Number of positive samples for each age group} -\item{\code{age_min}}{age_min} -\item{\code{age_max}}{age_max} -\item{\code{tsur}}{Year in which the survey took place} -\item{\code{country}}{The country where the survey took place} -\item{\code{test}}{The type of test taken} -\item{\code{antibody}}{antibody} -} -Alternatively to \code{age_min} and \code{age_max}, the dataset could already include -the age group marker \code{age_mean_f}, representing the middle point between -\code{age_min} and \code{age_max}. If \code{afe_mean_f} is missing, it will be generated -by the function.} - -\item{alpha}{probability of a type I error. For further details refer to -\link[Hmisc:binconf]{binconf}.} -} -\value{ -serodata with additional columns necessary for the analysis. These -columns are: -\describe{ -\item{\code{age_mean_f}}{Floor value of the average between age_min and age_max -for the age groups delimited by \code{age_min} and \code{age_max}} -\item{\code{sample_size}}{The size of the sample} -\item{\code{birth_year}}{Years in which the subject was born according to the -age group marker \code{age_mean_f}} -\item{\code{prev_obs}}{Observed prevalence} -\item{\code{prev_obs_lower}}{Lower limit of the confidence interval for the -observed prevalence} -\item{\code{prev_obs_upper}}{Upper limit of the confidence interval for the -observed prevalence} -} -} -\description{ -This function adds the necessary additional variables to the given dataset -\code{serodata} corresponding to a serological survey. -} -\examples{ -data(chagas2012) -serodata <- prepare_serodata(chagas2012) -} diff --git a/man/prepare_serosurvey_for_plotting.Rd b/man/prepare_serosurvey_for_plotting.Rd new file mode 100644 index 00000000..06c8bccf --- /dev/null +++ b/man/prepare_serosurvey_for_plotting.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_seromodel.R +\name{prepare_serosurvey_for_plotting} +\alias{prepare_serosurvey_for_plotting} +\title{Prepares serosurvey for plotting} +\usage{ +prepare_serosurvey_for_plotting(serosurvey, alpha = 0.05) +} +\arguments{ +\item{serosurvey}{\describe{ +\item{\code{survey_year}}{Year in which the survey took place +(only needed to plot time models)} +\item{\code{age_min}}{Floor value of the average between age_min and age_max} +\item{\code{age_max}}{The size of the sample} +\item{\code{n_sample}}{Number of samples for each age group} +\item{\code{n_seropositive}}{Number of positive samples for each age group} +}} + +\item{alpha}{1 - alpha indicates the confidence level to be used} +} +\value{ +serosurvey with additional columns: +\describe{ +\item{seroprev}{Seroprevalence computed as the proportion of positive +cases \code{n_seropositive} in the number of samples +\code{n_sample} for each age group} +\item{seroprev_lower}{Lower limit of the binomial confidence interval +of \code{seroprev}} +\item{seroprev_upper}{Upper limit of the binomial confidence interval +of \code{seroprev}} +} +} +\description{ +Adds seroprevalence values with corresponding binomial confidence interval +} diff --git a/man/probability_exact_time_varying.Rd b/man/probability_exact_time_varying.Rd index 36e7da06..2a923288 100644 --- a/man/probability_exact_time_varying.Rd +++ b/man/probability_exact_time_varying.Rd @@ -7,7 +7,8 @@ probability_exact_time_varying(years, fois, seroreversion_rate = 0) } \arguments{ -\item{years}{Integer indicating the years covering the birth ages of the sample} +\item{years}{Integer indicating the years covering the birth ages of the +sample} \item{fois}{Numeric atomic vector corresponding to the age-varying force-of-infection to simulate from} diff --git a/man/probability_seropositive_age_and_time_model_by_age.Rd b/man/probability_seropositive_age_and_time_model_by_age.Rd index 2c9e71ad..3c32fe9e 100644 --- a/man/probability_seropositive_age_and_time_model_by_age.Rd +++ b/man/probability_seropositive_age_and_time_model_by_age.Rd @@ -2,20 +2,23 @@ % Please edit documentation in R/simulate_serosurvey.R \name{probability_seropositive_age_and_time_model_by_age} \alias{probability_seropositive_age_and_time_model_by_age} -\title{Generate probabilities of seropositivity by age based on an age-and-time varying FOI model.} +\title{Generate probabilities of seropositivity by age based on an age-and-time +varying FOI model.} \usage{ probability_seropositive_age_and_time_model_by_age(foi, seroreversion_rate) } \arguments{ -\item{foi}{A dataframe containing the force of infection (FOI) values for different ages. -It should have three columns: 'year', 'age' and 'foi'.} +\item{foi}{A dataframe containing the force of infection (FOI) values +for different ages. It should have three columns: 'year', 'age' and 'foi'.} -\item{seroreversion_rate}{A non-negative numeric value representing the rate of seroreversion.} +\item{seroreversion_rate}{A non-negative numeric value representing +the rate of seroreversion.} } \value{ A dataframe with columns 'age' and 'seropositivity'. } \description{ -This function calculates the probabilities of seropositivity by age based on an age-and-time-varying FOI model. +This function calculates the probabilities of seropositivity by age based on +an age-and-time-varying FOI model. It takes into account the FOI and the rate of seroreversion. } diff --git a/man/probability_seropositive_age_model_by_age.Rd b/man/probability_seropositive_age_model_by_age.Rd index 24c93102..85f52bdb 100644 --- a/man/probability_seropositive_age_model_by_age.Rd +++ b/man/probability_seropositive_age_model_by_age.Rd @@ -2,20 +2,23 @@ % Please edit documentation in R/simulate_serosurvey.R \name{probability_seropositive_age_model_by_age} \alias{probability_seropositive_age_model_by_age} -\title{Generate probabilities of seropositivity by age based on an age-varying FOI model.} +\title{Generate probabilities of seropositivity by age based on an age-varying +FOI model.} \usage{ probability_seropositive_age_model_by_age(foi, seroreversion_rate) } \arguments{ -\item{foi}{A dataframe containing the force of infection (FOI) values for different ages. -It should have two columns: 'age' and 'foi'.} +\item{foi}{A dataframe containing the force of infection (FOI) values for +different ages. It should have two columns: 'age' and 'foi'.} -\item{seroreversion_rate}{A non-negative numeric value representing the rate of seroreversion.} +\item{seroreversion_rate}{A non-negative numeric value representing the rate +of seroreversion.} } \value{ A dataframe with columns 'age' and 'seropositivity'. } \description{ -This function calculates the probabilities of seropositivity by age based on an age-varying FOI model. +This function calculates the probabilities of seropositivity by age based on +an age-varying FOI model. It takes into account the FOI and the rate of seroreversion. } diff --git a/man/probability_seropositive_by_age.Rd b/man/probability_seropositive_by_age.Rd index e416a8b4..f2910a5b 100644 --- a/man/probability_seropositive_by_age.Rd +++ b/man/probability_seropositive_by_age.Rd @@ -7,15 +7,17 @@ probability_seropositive_by_age(model, foi, seroreversion_rate = 0) } \arguments{ -\item{model}{A string specifying the model type which can be one of \link{'age', 'time', 'age-time'}.} +\item{model}{A string specifying the model type which can be one of +\link{'age', 'time', 'age-time'}.} \item{foi}{A dataframe containing the force of infection (FOI) values. For time-varying models the columns should be \link{'year', 'foi'}. For age-varying models the columns should be \link{'age', 'foi'}. -For age-and-time-varying models the columns should be \link{'age', 'time', 'foi'}.} +For age-and-time-varying models the columns should be +\link{'age', 'time', 'foi'}.} -\item{seroreversion_rate}{A non-negative value determining the rate of seroreversion (per year). -Default is 0.} +\item{seroreversion_rate}{A non-negative value determining the rate of +seroreversion (per year). Default is 0.} } \value{ A dataframe with columns 'age' and 'seropositivity'. diff --git a/man/probability_seropositive_general_model_by_age.Rd b/man/probability_seropositive_general_model_by_age.Rd index d80e3d8a..db451eb8 100644 --- a/man/probability_seropositive_general_model_by_age.Rd +++ b/man/probability_seropositive_general_model_by_age.Rd @@ -5,7 +5,7 @@ \title{Generate probabilities of seropositivity by age based on a general FOI model.} \usage{ probability_seropositive_general_model_by_age( - construct_A_function, + construct_A_fn, calculate_seropositivity_function, initial_conditions, max_age, @@ -13,15 +13,16 @@ probability_seropositive_general_model_by_age( ) } \arguments{ -\item{calculate_seropositivity_function}{A function which takes the state vector -and returns the seropositive fraction.} +\item{construct_A_fn}{A function that constructs a matrix that defines the +multiplier term in the linear ODE system.} -\item{initial_conditions}{The initial state vector proportions for each birth cohort.} +\item{calculate_seropositivity_function}{A function which takes the state +vector and returns the seropositive fraction.} -\item{max_age}{The maximum age to simulate seropositivity for.} +\item{initial_conditions}{The initial state vector proportions for each +birth cohort.} -\item{construct_A_fn}{A function that constructs a matrix that defines the multiplier -term in the linear ODE system.} +\item{max_age}{The maximum age to simulate seropositivity for.} } \value{ A dataframe with columns 'age' and 'seropositivity'. diff --git a/man/probability_seropositive_time_model_by_age.Rd b/man/probability_seropositive_time_model_by_age.Rd index 080f6323..cba7321c 100644 --- a/man/probability_seropositive_time_model_by_age.Rd +++ b/man/probability_seropositive_time_model_by_age.Rd @@ -2,20 +2,23 @@ % Please edit documentation in R/simulate_serosurvey.R \name{probability_seropositive_time_model_by_age} \alias{probability_seropositive_time_model_by_age} -\title{Generate probabilities of seropositivity by age based on a time-varying FOI model.} +\title{Generate probabilities of seropositivity by age based on a time-varying FOI +model.} \usage{ probability_seropositive_time_model_by_age(foi, seroreversion_rate) } \arguments{ -\item{foi}{A dataframe containing the force of infection (FOI) values for different years. -It should have two columns: 'year' and 'foi'.} +\item{foi}{A dataframe containing the force of infection (FOI) values +for different years. It should have two columns: 'year' and 'foi'.} -\item{seroreversion_rate}{A non-negative numeric value representing the rate of seroreversion.} +\item{seroreversion_rate}{A non-negative numeric value representing the +rate of seroreversion.} } \value{ A dataframe with columns 'age' and 'seropositivity'. } \description{ -This function calculates the probabilities of seropositivity by age based on a time-varying FOI model. +This function calculates the probabilities of seropositivity by age based on +a time-varying FOI model. It takes into account the FOI and the rate of seroreversion. } diff --git a/man/run_seromodel.Rd b/man/run_seromodel.Rd deleted file mode 100644 index f564ca8e..00000000 --- a/man/run_seromodel.Rd +++ /dev/null @@ -1,108 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/modelling.R -\name{run_seromodel} -\alias{run_seromodel} -\title{Run specified stan model for the force-of-infection and -estimate the seroprevalence based on the result of the fit} -\usage{ -run_seromodel( - serodata, - foi_model = c("constant", "tv_normal_log", "tv_normal"), - foi_parameters = NULL, - chunks = NULL, - chunk_size = 1, - iter = 1000, - adapt_delta = 0.9, - max_treedepth = 10, - seed = 12345, - print_summary = TRUE, - ... -) -} -\arguments{ -\item{serodata}{A data frame containing the data from a seroprevalence -survey. This data frame must contain at least the following columns: -\describe{ -\item{\code{total}}{Number of samples for each age group} -\item{\code{counts}}{Number of positive samples for each age group} -\item{\code{tsur}}{Year in which the survey took place} -\item{\code{age_mean_f}}{Floor value of the average between age_min and age_max} -\item{\code{sample_size}}{The size of the sample} -\item{\code{birth_year}}{The year in which the individuals of each age group -were born} -} -The last six columns can be added to \code{serodata} by means of the function -\code{\link[=prepare_serodata]{prepare_serodata()}}.} - -\item{foi_model}{Name of the selected model. Current version provides three -options: -\describe{ -\item{\code{"constant"}}{Runs a constant model} -\item{\code{"tv_normal"}}{Runs a normal model} -\item{\code{"tv_normal_log"}}{Runs a normal logarithmic model} -}} - -\item{foi_parameters}{List specifying the initial prior parameters of the -model \code{foi_model} to be specified as (e.g.): -\describe{ -\item{\code{"constant"}}{\code{list(foi_a = 0, foi_b = 2)}} -\item{\code{"tv_normal"}}{\code{list(foi_location = 0, foi_scale = 1)}} -\item{\code{"tv_normal_log"}}{\code{list(foi_location = -6, foi_scale = 4)}} -}} - -\item{chunks}{Numeric list specifying the chunk structure of the time -interval from the birth year of the oldest age cohort -\code{min(serodata$age_mean_f)} to the time when the serosurvey was conducted -\code{t_sur}. If \code{NULL}, the time interval is divided in chunks of size -\code{chunk_size}.} - -\item{chunk_size}{Size of the chunks to be used in case that the chunk -structure \code{chunks} is not specified in \link{fit_seromodel}. -Default is 1, meaning that one force of infection value is to be estimated -for every year in the time interval spanned by the serosurvey. -If the length of the time interval is not exactly divisible by \code{chunk_size}, -the remainder years are included in the last chunk.} - -\item{iter}{Number of interactions for each chain including the warmup. -\code{iter} in \link[rstan:stanmodel-method-sampling]{sampling}.} - -\item{adapt_delta}{Real number between 0 and 1 that represents the target -average acceptance probability. Increasing the value of \code{adapt_delta} will -result in a smaller step size and fewer divergences. For further details -refer to the \code{control} parameter in \link[rstan:stanmodel-method-sampling]{sampling} or -\href{https://mc-stan.org/rstanarm/reference/adapt_delta.html}{here}.} - -\item{max_treedepth}{Maximum tree depth for the binary tree used in the NUTS -stan sampler. For further details refer to the \code{control} parameter in -\link[rstan:stanmodel-method-sampling]{sampling}.} - -\item{seed}{For further details refer to the \code{seed} parameter in -\link[rstan:stanmodel-method-sampling]{sampling}.} - -\item{print_summary}{Boolean. If \code{TRUE}, a table summarizing modelling -results is printed.} - -\item{...}{Additional parameters for \link[rstan:stanmodel-method-sampling]{sampling}.} -} -\value{ -\code{seromodel_object}. An object containing relevant information about -the implementation of the model. For further details refer to -\link{fit_seromodel}. -} -\description{ -Starting on v.0.1.0, this function will be DEPRECATED. Use \code{fit_seromodel} -instead. -This function runs the specified model for the force-of-infection \code{foi_model} -using the data from a seroprevalence survey \code{serodata} as the input data. See -\link{fit_seromodel} for further details. -} -\examples{ -\dontrun{ -data(chagas2012) -serodata <- prepare_serodata(chagas2012) -fit_seromodel( - serodata, - foi_model = "constant" -) -} -} diff --git a/man/sample_size_by_individual_age_random.Rd b/man/sample_size_by_individual_age_random.Rd index cb046c9e..117284c5 100644 --- a/man/sample_size_by_individual_age_random.Rd +++ b/man/sample_size_by_individual_age_random.Rd @@ -2,22 +2,24 @@ % Please edit documentation in R/simulate_serosurvey.R \name{sample_size_by_individual_age_random} \alias{sample_size_by_individual_age_random} -\title{Generate random sample sizes for each individual age based on survey features.} +\title{Generate random sample sizes for each individual age based on survey +features.} \usage{ sample_size_by_individual_age_random(survey_features) } \arguments{ -\item{survey_features}{A dataframe containing information about individuals' age ranges and -sample sizes.} +\item{survey_features}{A dataframe containing information about individuals' +age ranges and sample sizes.} } \value{ -A dataframe with random sample sizes generated for each individual age based on the -provided survey features. +A dataframe with random sample sizes generated for each individual +age based on the provided survey features. } \description{ -This function generates random sample sizes for each individual age based on the provided -survey features. It first creates age bins, assigns each individual in the survey features -to an age bin, calculates the overall sample size by group, and then generates random sample -sizes for each age group. Finally, it returns a dataframe with the random sample sizes for -each individual age. +This function generates random sample sizes for each individual age based on +the provided survey features. It first creates age bins, assigns each +individual in the survey features to an age bin, calculates the overall +sample size by group, and then generates random sample sizes for each age +group. Finally, it returns a dataframe with the random sample sizes for each +individual age. } diff --git a/man/serofoi-package.Rd b/man/serofoi-package.Rd index f2c8e932..8fcb4efa 100644 --- a/man/serofoi-package.Rd +++ b/man/serofoi-package.Rd @@ -11,4 +11,24 @@ A DESCRIPTION OF THE PACKAGE \references{ Stan Development Team (NA). RStan: the R interface to Stan. R package version 2.26.22. https://mc-stan.org + +#' @keywords internal +} +\seealso{ +Useful links: +\itemize{ + \item \url{https://trace-lac.github.io/serofoi/} +} + +} +\author{ +\strong{Maintainer}: Zulma M. Cucunubá \email{zulma.cucunuba@javeriana.edu.co} (\href{https://orcid.org/0000-0002-8165-3198}{ORCID}) + +Authors: +\itemize{ + \item Nicolás T. Domínguez \email{ex-ntorres@javeriana.edu.co} (\href{https://orcid.org/0009-0002-8484-1298}{ORCID}) + \item Ben Lambert + \item Pierre Nouvellet +} + } diff --git a/man/set_foi_init.Rd b/man/set_foi_init.Rd new file mode 100644 index 00000000..d62452db --- /dev/null +++ b/man/set_foi_init.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fit_seromodel.R +\name{set_foi_init} +\alias{set_foi_init} +\title{Sets initialization function for sampling} +\usage{ +set_foi_init(foi_init, is_log_foi, foi_index) +} +\arguments{ +\item{is_log_foi}{Boolean to set logarithmic scale in the FOI} + +\item{foi_index}{Integer vector specifying the age-groups for which +force-of-infection values will be estimated. It can be specified by +means of \link{get_foi_index}} +} +\description{ +Sets initialization function for sampling +} diff --git a/man/set_stan_data_defaults.Rd b/man/set_stan_data_defaults.Rd new file mode 100644 index 00000000..c909c154 --- /dev/null +++ b/man/set_stan_data_defaults.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/build_stan_data.R +\name{set_stan_data_defaults} +\alias{set_stan_data_defaults} +\title{Set stan data defaults for sampling} +\usage{ +set_stan_data_defaults(stan_data, is_log_foi = FALSE, is_seroreversion = FALSE) +} +\arguments{ +\item{stan_data}{List to be passed to \link[rstan:stanmodel-method-sampling]{rstan}} + +\item{is_log_foi}{Boolean to set logarithmic scale in the FOI} + +\item{is_seroreversion}{Boolean specifying whether to include +seroreversion rate estimation in the model} +} +\value{ +List with default values of stan data for sampling +} +\description{ +Set stan data defaults for sampling +} diff --git a/man/sf_cauchy.Rd b/man/sf_cauchy.Rd new file mode 100644 index 00000000..4d89439a --- /dev/null +++ b/man/sf_cauchy.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/build_stan_data.R +\name{sf_cauchy} +\alias{sf_cauchy} +\title{Sets Cauchy distribution parameters for sampling} +\usage{ +sf_cauchy(location = 0, scale = 1) +} +\arguments{ +\item{scale}{Scale +of the normal distribution} + +\item{sd}{Standard deviation of the normal distribution} +} +\value{ +List with specified statistics and name of the model +} +\description{ +Sets Cauchy distribution parameters for sampling +} diff --git a/man/sf_none.Rd b/man/sf_none.Rd new file mode 100644 index 00000000..f101d850 --- /dev/null +++ b/man/sf_none.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/build_stan_data.R +\name{sf_none} +\alias{sf_none} +\title{Sets empty distribution} +\usage{ +sf_none() +} +\description{ +Sets empty distribution +} diff --git a/man/sf_normal.Rd b/man/sf_normal.Rd new file mode 100644 index 00000000..32977287 --- /dev/null +++ b/man/sf_normal.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/build_stan_data.R +\name{sf_normal} +\alias{sf_normal} +\title{Sets normal distribution parameters for sampling} +\usage{ +sf_normal(mean = 0, sd = 1) +} +\arguments{ +\item{mean}{Mean of the normal distribution} + +\item{sd}{Standard deviation of the normal distribution} +} +\value{ +List with specified statistics and name of the model +} +\description{ +Sets normal distribution parameters for sampling +} diff --git a/man/sf_uniform.Rd b/man/sf_uniform.Rd new file mode 100644 index 00000000..a2ef7279 --- /dev/null +++ b/man/sf_uniform.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/build_stan_data.R +\name{sf_uniform} +\alias{sf_uniform} +\title{Sets uniform distribution parameters for sampling} +\usage{ +sf_uniform(min = 0, max = 10) +} +\arguments{ +\item{min}{Minimum value of the random variable of the uniform distribution} + +\item{max}{Maximum value of the random variable of the uniform distribution} +} +\value{ +List with specified statistics and name of the model +} +\description{ +Sets uniform distribution parameters for sampling +} diff --git a/man/simulate_serosurvey.Rd b/man/simulate_serosurvey.Rd index 2e250102..76dc69c2 100644 --- a/man/simulate_serosurvey.Rd +++ b/man/simulate_serosurvey.Rd @@ -7,29 +7,33 @@ simulate_serosurvey(model, foi, survey_features, seroreversion_rate = 0) } \arguments{ -\item{model}{A string specifying the model type which can be one of \link{'age', 'time', 'age-time'}.} +\item{model}{A string specifying the model type which can be one of +\link{'age', 'time', 'age-time'}.} \item{foi}{A dataframe containing the force of infection (FOI) values. For time-varying models the columns should be \link{'year', 'foi'}. For age-varying models the columns should be \link{'age', 'foi'}. For age-and-time-varying models the columns should be \link{'age', 'time', 'foi'}.} -\item{survey_features}{A dataframe containing information about the binned age groups and sample -sizes for each. It should contain columns: \link{'age_min', 'age_max', 'sample_size'}.} +\item{survey_features}{A dataframe containing information about the binned +age groups and sample sizes for each. +It should contain columns: \link{'age_min', 'age_max', 'n_sample'}.} -\item{seroreversion_rate}{A non-negative value determining the rate of seroreversion (per year). -Default is 0.} +\item{seroreversion_rate}{A non-negative value determining the rate of +seroreversion (per year). Default is 0.} } \value{ -A dataframe with simulated serosurvey data, including age group information, overall -sample sizes, the number of seropositive individuals, and other survey features. +A dataframe with simulated serosurvey data, including age group +information, overall sample sizes, the number of seropositive individuals, +and other survey features. } \description{ -This function generates binned serosurvey data based on either a time-varying FOI model, -an age-varying FOI model, or an age-and-time-varying FOI model. In all cases, it is possible -to optionally include seroreversion. This function allows construction of serosurveys -with binned age groups, and it generates uncertainty in the distribution of a sample size -within an age bin through multinomial sampling. +This function generates binned serosurvey data based on either a time-varying +FOI model, an age-varying FOI model, or an age-and-time-varying FOI model. +In all cases, it is possible to optionally include seroreversion. This +function allows construction of serosurveys with binned age groups, and it +generates uncertainty in the distribution of a sample size within an age bin +through multinomial sampling. } \examples{ # time-varying model @@ -40,7 +44,7 @@ mutate(foi = rnorm(length(year), 0.1, 0.01)) survey_features <- data.frame( age_min = c(1, 3, 15), age_max = c(2, 14, 20), - sample_size = c(1000, 2000, 1500)) + n_sample = c(1000, 2000, 1500)) serosurvey <- simulate_serosurvey( model = "time", foi = foi_df, @@ -54,7 +58,7 @@ mutate(foi = rnorm(length(year), 0.1, 0.01)) survey_features <- data.frame( age_min = c(1, 3, 15), age_max = c(2, 14, 20), - sample_size = c(1000, 2000, 1500)) + n_sample = c(1000, 2000, 1500)) serosurvey <- simulate_serosurvey( model = "age", foi = foi_df, @@ -69,7 +73,7 @@ mutate(foi = rnorm(20 * 20, 0.1, 0.01)) survey_features <- data.frame( age_min = c(1, 3, 15), age_max = c(2, 14, 20), - sample_size = c(1000, 2000, 1500)) + n_sample = c(1000, 2000, 1500)) serosurvey <- simulate_serosurvey( model = "age", foi = foi_df, diff --git a/man/simulate_serosurvey_age_and_time_model.Rd b/man/simulate_serosurvey_age_and_time_model.Rd index 4a3f761a..d853122e 100644 --- a/man/simulate_serosurvey_age_and_time_model.Rd +++ b/man/simulate_serosurvey_age_and_time_model.Rd @@ -11,24 +11,27 @@ simulate_serosurvey_age_and_time_model( ) } \arguments{ -\item{foi}{A dataframe containing the force of infection (FOI) values for different ages. -It should have two columns: 'year', 'age' and 'foi'.} +\item{foi}{A dataframe containing the force of infection (FOI) values for +different ages. It should have two columns: 'year', 'age' and 'foi'.} -\item{survey_features}{A dataframe containing information about the binned age groups and sample -sizes for each. It should contain columns: \link{'age_min', 'age_max', 'sample_size'}.} +\item{survey_features}{A dataframe containing information about the binned +age groups and sample sizes for each. It should contain columns: +\link{'age_min', 'age_max', 'n_sample'}.} -\item{seroreversion_rate}{A non-negative value determining the rate of seroreversion (per year). -Default is 0.} +\item{seroreversion_rate}{A non-negative value determining the rate of +seroreversion (per year). Default is 0.} } \value{ -A dataframe with simulated serosurvey data, including age group information, overall -sample sizes, the number of seropositive individuals, and other survey features. +A dataframe with simulated serosurvey data, including age group +information, overall sample sizes, the number of seropositive individuals, +and other survey features. } \description{ -This function generates binned serosurvey data based on an age-and-time-varying FOI model, -optionally including seroreversion. This function allows construction of serosurveys -with binned age groups, and it generates uncertainty in the distribution of a sample size -within an age bin through multinomial sampling. +This function generates binned serosurvey data based on an +age-and-time-varying FOI model, optionally including seroreversion. +This function allows construction of serosurveys with binned age groups, and +it generates uncertainty in the distribution of a sample size within an age +bin through multinomial sampling. } \examples{ # specify FOIs for each year @@ -40,7 +43,7 @@ mutate(foi = rnorm(20 * 20, 0.1, 0.01)) survey_features <- data.frame( age_min = c(1, 3, 15), age_max = c(2, 14, 20), - sample_size = c(1000, 2000, 1500)) + n_sample = c(1000, 2000, 1500)) serosurvey <- simulate_serosurvey_age_and_time_model( foi_df, survey_features) } diff --git a/man/simulate_serosurvey_age_model.Rd b/man/simulate_serosurvey_age_model.Rd index 2302aecc..73f5342a 100644 --- a/man/simulate_serosurvey_age_model.Rd +++ b/man/simulate_serosurvey_age_model.Rd @@ -7,24 +7,26 @@ simulate_serosurvey_age_model(foi, survey_features, seroreversion_rate = 0) } \arguments{ -\item{foi}{A dataframe containing the force of infection (FOI) values for different ages. -It should have two columns: 'age' and 'foi'.} +\item{foi}{A dataframe containing the force of infection (FOI) values for +different ages. It should have two columns: 'age' and 'foi'.} -\item{survey_features}{A dataframe containing information about the binned age groups and sample -sizes for each. It should contain columns: \link{'age_min', 'age_max', 'sample_size'}.} +\item{survey_features}{A dataframe containing information about the binned +age groups and sample sizes for each. It should contain columns: +\link{'age_min', 'age_max', 'n_sample'}.} -\item{seroreversion_rate}{A non-negative value determining the rate of seroreversion (per year). -Default is 0.} +\item{seroreversion_rate}{A non-negative value determining the rate of +seroreversion (per year). Default is 0.} } \value{ -A dataframe with simulated serosurvey data, including age group information, overall -sample sizes, the number of seropositive individuals, and other survey features. +A dataframe with simulated serosurvey data, including age group +information, overall sample sizes, the number of seropositive individuals, +and other survey features. } \description{ -This function generates binned serosurvey data based on an age-varying FOI model, -optionally including seroreversion. This function allows construction of serosurveys -with binned age groups, and it generates uncertainty in the distribution of a sample size -within an age bin through multinomial sampling. +This function generates binned serosurvey data based on an age-varying FOI +model, optionally including seroreversion. This function allows construction +of serosurveys with binned age groups, and it generates uncertainty in the +distribution of a sample size within an age bin through multinomial sampling. } \examples{ # specify FOIs for each year @@ -35,7 +37,7 @@ mutate(foi = rnorm(length(year), 0.1, 0.01)) survey_features <- data.frame( age_min = c(1, 3, 15), age_max = c(2, 14, 20), - sample_size = c(1000, 2000, 1500)) + n_sample = c(1000, 2000, 1500)) serosurvey <- simulate_serosurvey_age_model( foi_df, survey_features) } diff --git a/man/simulate_serosurvey_general_model.Rd b/man/simulate_serosurvey_general_model.Rd index 0d7ca514..2012a683 100644 --- a/man/simulate_serosurvey_general_model.Rd +++ b/man/simulate_serosurvey_general_model.Rd @@ -13,19 +13,22 @@ simulate_serosurvey_general_model( ) } \arguments{ -\item{calculate_seropositivity_function}{A function which takes the state vector -and returns the seropositive fraction.} +\item{calculate_seropositivity_function}{A function which takes the state +vector and returns the seropositive fraction.} -\item{initial_conditions}{The initial state vector proportions for each birth cohort.} +\item{initial_conditions}{The initial state vector proportions for each +birth cohort.} -\item{survey_features}{A dataframe containing information about the binned age groups and sample -sizes for each. It should contain columns: \link{'age_min', 'age_max', 'sample_size'}.} +\item{survey_features}{A dataframe containing information about the binned +age groups and sample sizes for each. +It should contain columns: \link{'age_min', 'age_max', 'n_sample'}.} } \value{ -A dataframe with simulated serosurvey data, including age group information, overall -sample sizes, the number of seropositive individuals, and other survey features. +A dataframe with simulated serosurvey data, including age group +information, overall sample sizes, the number of seropositive individuals, +and other survey features. } \description{ -This simulation method assumes only that the model system can be written as a piecewise- -linear ordinary differential equation system. +This simulation method assumes only that the model system can be written as +a piecewise-linear ordinary differential equation system. } diff --git a/man/simulate_serosurvey_time_model.Rd b/man/simulate_serosurvey_time_model.Rd index 7b965831..a5a15094 100644 --- a/man/simulate_serosurvey_time_model.Rd +++ b/man/simulate_serosurvey_time_model.Rd @@ -7,24 +7,26 @@ simulate_serosurvey_time_model(foi, survey_features, seroreversion_rate = 0) } \arguments{ -\item{foi}{A dataframe containing the force of infection (FOI) values for different years. -It should have two columns: 'year' and 'foi'.} +\item{foi}{A dataframe containing the force of infection (FOI) values for +different years. It should have two columns: 'year' and 'foi'.} -\item{survey_features}{A dataframe containing information about the binned age groups and sample -sizes for each. It should contain columns: \link{'age_min', 'age_max', 'sample_size'}.} +\item{survey_features}{A dataframe containing information about the binned +age groups and sample sizes for each. It should contain columns: +\link{'age_min', 'age_max', 'n_sample'}.} -\item{seroreversion_rate}{A non-negative value determining the rate of seroreversion (per year). -Default is 0.} +\item{seroreversion_rate}{A non-negative value determining the rate of +seroreversion (per year). Default is 0.} } \value{ -A dataframe with simulated serosurvey data, including age group information, overall -sample sizes, the number of seropositive individuals, and other survey features. +A dataframe with simulated serosurvey data, including age group +information, overall sample sizes, the number of seropositive individuals, +and other survey features. } \description{ -This function generates binned serosurvey data based on a time-varying FOI model, -optionally including seroreversion. This function allows construction of serosurveys -with binned age groups, and it generates uncertainty in the distribution of a sample size -within an age bin through multinomial sampling. +This function generates binned serosurvey data based on a time-varying FOI +model, optionally including seroreversion. This function allows construction +of serosurveys with binned age groups, and it generates uncertainty in the +distribution of a sample size within an age bin through multinomial sampling. } \examples{ # specify FOIs for each year @@ -35,7 +37,7 @@ mutate(foi = rnorm(length(year), 0.1, 0.01)) survey_features <- data.frame( age_min = c(1, 3, 15), age_max = c(2, 14, 20), - sample_size = c(1000, 2000, 1500)) + n_sample = c(1000, 2000, 1500)) serosurvey <- simulate_serosurvey_time_model( foi_df, survey_features) } diff --git a/man/summarise_central_estimate.Rd b/man/summarise_central_estimate.Rd new file mode 100644 index 00000000..5a26b682 --- /dev/null +++ b/man/summarise_central_estimate.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/summarise_seromodel.R +\name{summarise_central_estimate} +\alias{summarise_central_estimate} +\title{Summarise central estimate} +\usage{ +summarise_central_estimate( + seromodel, + serosurvey, + alpha, + par_name = "seroreversion_rate", + central_estimate_digits = 2 +) +} +\arguments{ +\item{seromodel}{stan_fit object obtained from sampling a model +with \link{fit_seromode}} + +\item{serosurvey}{\describe{ +\item{\code{survey_year}}{Year in which the survey took place +(only needed to plot time models)} +\item{\code{age_min}}{Floor value of the average between age_min and age_max} +\item{\code{age_max}}{The size of the sample} +\item{\code{n_sample}}{Number of samples for each age group} +\item{\code{n_seropositive}}{Number of positive samples for each age group} +}} + +\item{alpha}{1 - alpha indicates the credibility level to be used} + +\item{par_name}{String specifying the parameter to be extracted +from \code{seromodel}} + +\item{central_estimate_digits}{Number of central estimate digits} +} +\value{ +Text summarising specified central estimate +} +\description{ +Summarise central estimate +} diff --git a/man/summarise_loo_estimate.Rd b/man/summarise_loo_estimate.Rd new file mode 100644 index 00000000..456da0ac --- /dev/null +++ b/man/summarise_loo_estimate.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/summarise_seromodel.R +\name{summarise_loo_estimate} +\alias{summarise_loo_estimate} +\title{Extract specified loo estimate} +\usage{ +summarise_loo_estimate( + seromodel, + par_loo_estimate = "elpd_loo", + loo_estimate_digits = 2 +) +} +\arguments{ +\item{seromodel}{stan_fit object obtained from sampling a model +with \link{fit_seromode}} + +\item{par_loo_estimate}{Name of the loo estimate to be extracted. +Available options are: +\describe{ +\item{\code{"elpd_loo"}}{Expected log pointwise predictive density} +\item{\code{"p_loo"}}{Effective number of parameters} +\item{\code{"looic"}}{Leave-one-out cross-validation information criteria} +} +For additional information refer to \link[loo:loo]{loo}.} + +\item{loo_estimate_digits}{Number of loo estimate digits} +} +\value{ +Text summarising specified loo estimate +} +\description{ +Extract specified loo estimate +} diff --git a/man/summarise_seromodel.Rd b/man/summarise_seromodel.Rd new file mode 100644 index 00000000..7f0245ff --- /dev/null +++ b/man/summarise_seromodel.Rd @@ -0,0 +1,58 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/summarise_seromodel.R +\name{summarise_seromodel} +\alias{summarise_seromodel} +\title{Summarise specified model} +\usage{ +summarise_seromodel( + seromodel, + serosurvey, + alpha = 0.05, + par_loo_estimate = "elpd_loo", + loo_estimate_digits = 1, + central_estimate_digits = 2, + rhat_digits = 2 +) +} +\arguments{ +\item{seromodel}{stan_fit object obtained from sampling a model +with \link{fit_seromode}} + +\item{serosurvey}{\describe{ +\item{\code{survey_year}}{Year in which the survey took place +(only needed to plot time models)} +\item{\code{age_min}}{Floor value of the average between age_min and age_max} +\item{\code{age_max}}{The size of the sample} +\item{\code{n_sample}}{Number of samples for each age group} +\item{\code{n_seropositive}}{Number of positive samples for each age group} +}} + +\item{alpha}{1 - alpha indicates the credibility level to be used} + +\item{par_loo_estimate}{Name of the loo estimate to be extracted. +Available options are: +\describe{ +\item{\code{"elpd_loo"}}{Expected log pointwise predictive density} +\item{\code{"p_loo"}}{Effective number of parameters} +\item{\code{"looic"}}{Leave-one-out cross-validation information criteria} +} +For additional information refer to \link[loo:loo]{loo}.} + +\item{loo_estimate_digits}{Number of loo estimate digits} + +\item{central_estimate_digits}{Number of central estimate digits} +} +\value{ +A list summarising the specified model +\describe{ +\item{\code{model_name}}{Name of the model} +\item{\code{elpd}}{elpd and its standard deviation} +\item{\code{foi}}{Estimated foi with credible interval (for 'constant' model)} +\item{\code{foi_rhat}}{foi rhat value (for 'constant' model)} +\item{\code{seroreversion_rate}}{Estimated seroreversion rate} +\item{\code{seroreversion_rate_rhat}}{Seroreversion rate rhat value} +} +} +\description{ +Summarise specified model +} diff --git a/man/survey_by_individual_age.Rd b/man/survey_by_individual_age.Rd index 6b2a0af7..a9b4b47e 100644 --- a/man/survey_by_individual_age.Rd +++ b/man/survey_by_individual_age.Rd @@ -7,12 +7,14 @@ survey_by_individual_age(survey_features, age_df) } \arguments{ -\item{survey_features}{A dataframe containing information about age groups and sample sizes.} +\item{survey_features}{A dataframe containing information about age groups +and sample sizes.} \item{age_df}{A dataframe containing 'age' and 'group'.} } \value{ -A dataframe with overall sample sizes calculated by joining survey_features and age_df. +A dataframe with overall sample sizes calculated by joining +survey_features and age_df. This dataframe has columns including 'age' and 'overall_sample_size'. } \description{ diff --git a/man/veev2012.Rd b/man/veev2012.Rd deleted file mode 100644 index 65899a42..00000000 --- a/man/veev2012.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/veev2012.R -\docType{data} -\name{veev2012} -\alias{veev2012} -\title{Seroprevalence data on serofoi} -\format{ -An object of class \code{"cross"}; see \code{\link[qtl:read.cross]{qtl::read.cross()}}. -} -\usage{ -veev2012 -} -\description{ -Data from a serological surveys -} -\examples{ -veev2012 -} -\keyword{datasets} diff --git a/tests/testthat/clean_expected_files.R b/tests/testthat/clean_expected_files.R deleted file mode 100644 index 72ad5a0a..00000000 --- a/tests/testthat/clean_expected_files.R +++ /dev/null @@ -1,14 +0,0 @@ -# This script removes the svg and csv files used in the automatic tests -# This is usually required when a new version of rstan is released -# Random number generation changes for each new version of -# Rstan, https://mc-stan.org/docs/2_18/reference-manual/reproducibility-chapter.html -# thus all files with expected results (both tables and graphs) become -# obsolete and need to be regenerated - -library(testthat) -paths <- c(testthat::test_path("_snaps", "*"), testthat::test_path("extdata", "dataframes", "expected", "*")) - -for (path in paths) { - cat("Deleting", path, "\n") - unlink(path, recursive = TRUE) -} diff --git a/tests/testthat/extdata/haiti_ssa_sample.RDS b/tests/testthat/extdata/haiti_ssa_sample.RDS deleted file mode 100644 index 04f9fa75..00000000 Binary files a/tests/testthat/extdata/haiti_ssa_sample.RDS and /dev/null differ diff --git a/tests/testthat/test-simulate_serosurvey.R b/tests/testthat/test-simulate_serosurvey.R index b0f640eb..a0860bf2 100644 --- a/tests/testthat/test-simulate_serosurvey.R +++ b/tests/testthat/test-simulate_serosurvey.R @@ -247,14 +247,14 @@ test_that("add_age_bins function works as expected", { test_that("survey_by_individual_age function works as expected", { # Test case 1: Check if overall sample size is calculated correctly for a single row dataframe age_df <- data.frame(age_min = 20, age_max = 30, group = "[20,30]") - survey_features <- data.frame(group = "[20,30]", sample_size = 100) + survey_features <- data.frame(group = "[20,30]", n_sample = 100) expected_df <- data.frame(age_min = 20, age_max = 30, group = "[20,30]", overall_sample_size = 100) actual_df <- survey_by_individual_age(survey_features, age_df) expect_equal(actual_df, expected_df) # Test case 2: Check if overall sample size is calculated correctly for multiple rows dataframe age_df <- data.frame(age_min = c(20, 30), age_max = c(31, 50), group = c("[20,30]", "[31,50]")) - survey_features <- data.frame(group = c("[20,30]", "[31,50]"), sample_size = c(100, 150)) + survey_features <- data.frame(group = c("[20,30]", "[31,50]"), n_sample = c(100, 150)) expected_df <- data.frame(age_min = c(20, 30), age_max = c(31, 50), group = c("[20,30]", "[31,50]"), overall_sample_size = c(100, 150)) actual_df <- survey_by_individual_age(survey_features, age_df) expect_equal(actual_df, expected_df) @@ -262,20 +262,20 @@ test_that("survey_by_individual_age function works as expected", { test_that("multinomial_sampling_group function works as expected", { # Test case 1: Check if sample sizes are generated correctly for a sample size of 100 and 5 age groups - sample_size <- 100 + n_sample <- 100 n_ages <- 5 expected_length <- n_ages - actual_sample_sizes <- multinomial_sampling_group(sample_size, n_ages) + actual_sample_sizes <- multinomial_sampling_group(n_sample, n_ages) expect_length(actual_sample_sizes, expected_length) - expect_equal(sum(actual_sample_sizes), sample_size) + expect_equal(sum(actual_sample_sizes), n_sample) # Test case 2: Check if sample sizes are generated correctly for a sample size of 200 and 10 age groups - sample_size <- 200 + n_sample <- 200 n_ages <- 10 expected_length <- n_ages - actual_sample_sizes <- multinomial_sampling_group(sample_size, n_ages) + actual_sample_sizes <- multinomial_sampling_group(n_sample, n_ages) expect_length(actual_sample_sizes, expected_length) - expect_equal(sum(actual_sample_sizes), sample_size) + expect_equal(sum(actual_sample_sizes), n_sample) }) test_that("generate_random_sample_sizes function works as expected", { @@ -289,11 +289,11 @@ test_that("generate_random_sample_sizes function works as expected", { dplyr::group_by(group) %>% dplyr::summarise( overall_sample_size = overall_sample_size[1], - sample_size = sum(sample_size) + n_sample = sum(n_sample) ) expect_equal( group_df$overall_sample_size[1], - group_df$sample_size[1] + group_df$n_sample[1] ) # Test case 2: Check if random sample sizes are generated correctly for two intervals @@ -307,9 +307,9 @@ test_that("generate_random_sample_sizes function works as expected", { dplyr::group_by(group) %>% dplyr::summarise( overall_sample_size = overall_sample_size[1], - sample_size = sum(sample_size) + n_sample = sum(n_sample) ) - expect_equal(group_df$sample_size, group_df$overall_sample_size) + expect_equal(group_df$n_sample, group_df$overall_sample_size) }) test_that("sample_size_by_individual_age_random returns correct dataframe structure", { @@ -318,7 +318,7 @@ test_that("sample_size_by_individual_age_random returns correct dataframe struct survey_features <- data.frame( age_min = c(1, 3, 15), age_max = c(2, 14, 20), - sample_size = c(1000, 2000, 1500) + n_sample = c(1000, 2000, 1500) ) actual_df <- sample_size_by_individual_age_random(survey_features) expect_equal(nrow(actual_df), max(survey_features$age_max)) @@ -327,9 +327,9 @@ test_that("sample_size_by_individual_age_random returns correct dataframe struct dplyr::group_by(group) %>% dplyr::summarise( overall_sample_size = overall_sample_size[1], - sample_size = sum(sample_size) + n_sample = sum(n_sample) ) - expect_equal(group_df$sample_size, group_df$overall_sample_size) + expect_equal(group_df$n_sample, group_df$overall_sample_size) # Test with sample survey_features data: non-contiguous age bins # TODO: doesn't work as age_bins construction too simple currently. @@ -337,7 +337,7 @@ test_that("sample_size_by_individual_age_random returns correct dataframe struct survey_features <- data.frame( age_min = c(1, 7, 18), age_max = c(2, 16, 20), - sample_size = c(1000, 2000, 1500) + n_sample = c(1000, 2000, 1500) ) actual_df <- sample_size_by_individual_age_random(survey_features) expect_equal(nrow(actual_df), 15) @@ -346,7 +346,7 @@ test_that("sample_size_by_individual_age_random returns correct dataframe struct test_that("simulate_serosurvey_time_model function works as expected", { # Test case 1: Check if the output dataframe has the correct structure - sample_sizes <- c(1000, 2000, 1500) + n_samples <- c(1000, 2000, 1500) foi_df <- data.frame( year=seq(1990, 2009, 1) ) %>% @@ -354,11 +354,11 @@ test_that("simulate_serosurvey_time_model function works as expected", { survey_features <- data.frame( age_min = c(1, 3, 15), age_max = c(2, 14, 20), - sample_size = sample_sizes) + n_sample = n_samples) actual_df <- simulate_serosurvey_time_model(foi_df, survey_features) expect_true("age_min" %in% colnames(actual_df)) expect_true("age_max" %in% colnames(actual_df)) - expect_true("sample_size" %in% colnames(actual_df)) + expect_true("n_sample" %in% colnames(actual_df)) expect_true("n_seropositive" %in% colnames(actual_df)) # Test case 2: Check if the output dataframe has the correct number of rows @@ -393,7 +393,7 @@ test_that("simulate_serosurvey_time_model input validation", { survey_features <- data.frame( age_min = c(1, 3, 15), age_max = c(2, 14, 20), - sample_size = c(1000, 2000, 1500) + n_sample = c(1000, 2000, 1500) ) # Test with valid inputs @@ -405,7 +405,7 @@ test_that("simulate_serosurvey_time_model input validation", { # Test with non-dataframe survey_features dataframe expect_error(simulate_serosurvey_time_model(foi_df, list()), - "survey_features must be a dataframe with columns 'age_min', 'age_max', and 'sample_size'.") + "survey_features must be a dataframe with columns 'age_min', 'age_max', and 'n_sample'.") # Test with misspelt columns in foi dataframe expect_error(simulate_serosurvey_time_model(data.frame(years = c(1990), foi = c(0.1)), survey_features), @@ -417,7 +417,7 @@ test_that("simulate_serosurvey_time_model input validation", { # Test with missing columns in survey_features dataframe expect_error(simulate_serosurvey_time_model(foi_df, data.frame(age_min = c(1))), - "survey_features must be a dataframe with columns 'age_min', 'age_max', and 'sample_size'.") + "survey_features must be a dataframe with columns 'age_min', 'age_max', and 'n_sample'.") # Test with non-numeric seroreversion_rate expect_error(simulate_serosurvey_time_model(foi_df, survey_features, "seroreversion"), @@ -431,7 +431,7 @@ test_that("simulate_serosurvey_time_model input validation", { test_that("simulate_serosurvey_age_model function works as expected", { # Test case 1: Check if the output dataframe has the correct structure - sample_sizes <- c(1000, 2000, 1500) + n_samples <- c(1000, 2000, 1500) foi_df <- data.frame( age=seq(1, 20, 1) ) %>% @@ -439,11 +439,11 @@ test_that("simulate_serosurvey_age_model function works as expected", { survey_features <- data.frame( age_min = c(1, 3, 15), age_max = c(2, 14, 20), - sample_size = sample_sizes) + n_sample = n_samples) actual_df <- simulate_serosurvey_age_model(foi_df, survey_features) expect_true("age_min" %in% colnames(actual_df)) expect_true("age_max" %in% colnames(actual_df)) - expect_true("sample_size" %in% colnames(actual_df)) + expect_true("n_sample" %in% colnames(actual_df)) expect_true("n_seropositive" %in% colnames(actual_df)) # Test case 2: Check if the output dataframe has the correct number of rows @@ -478,7 +478,7 @@ test_that("simulate_serosurvey_age_model input validation", { survey_features <- data.frame( age_min = c(1, 3, 15), age_max = c(2, 14, 20), - sample_size = c(1000, 2000, 1500) + n_sample = c(1000, 2000, 1500) ) # Test with valid inputs @@ -490,7 +490,7 @@ test_that("simulate_serosurvey_age_model input validation", { # Test with non-dataframe survey_features dataframe expect_error(simulate_serosurvey_age_model(foi_df, list()), - "survey_features must be a dataframe with columns 'age_min', 'age_max', and 'sample_size'.") + "survey_features must be a dataframe with columns 'age_min', 'age_max', and 'n_sample'.") # Test with misspelt columns in foi dataframe expect_error(simulate_serosurvey_age_model(data.frame(ages = c(1), foi = c(0.1)), survey_features), @@ -502,7 +502,7 @@ test_that("simulate_serosurvey_age_model input validation", { # Test with missing columns in survey_features dataframe expect_error(simulate_serosurvey_age_model(foi_df, data.frame(age_min = c(1))), - "survey_features must be a dataframe with columns 'age_min', 'age_max', and 'sample_size'.") + "survey_features must be a dataframe with columns 'age_min', 'age_max', and 'n_sample'.") # Test with non-numeric seroreversion_rate expect_error(simulate_serosurvey_age_model(foi_df, survey_features, "seroreversion"), @@ -515,7 +515,7 @@ test_that("simulate_serosurvey_age_model input validation", { test_that("simulate_serosurvey_age_and_time_model function works as expected", { # Test case 1: Check if the output dataframe has the correct structure - sample_sizes <- c(1000, 2000, 1500) + n_samples <- c(1000, 2000, 1500) foi_df <- tidyr::expand_grid( year = seq(1990, 2009, 1), age = seq(1, 20, 1) @@ -524,11 +524,11 @@ test_that("simulate_serosurvey_age_and_time_model function works as expected", { survey_features <- data.frame( age_min = c(1, 3, 15), age_max = c(2, 14, 20), - sample_size = sample_sizes) + n_sample = n_samples) actual_df <- simulate_serosurvey_age_and_time_model(foi_df, survey_features) expect_true("age_min" %in% colnames(actual_df)) expect_true("age_max" %in% colnames(actual_df)) - expect_true("sample_size" %in% colnames(actual_df)) + expect_true("n_sample" %in% colnames(actual_df)) expect_true("n_seropositive" %in% colnames(actual_df)) # Test case 2: Check if the output dataframe has the correct number of rows @@ -565,7 +565,7 @@ test_that("simulate_serosurvey_age_and_time_model input validation", { survey_features <- data.frame( age_min = c(1, 3, 15), age_max = c(2, 14, 20), - sample_size = c(1000, 2000, 1500) + n_sample = c(1000, 2000, 1500) ) # Test with valid inputs @@ -577,7 +577,7 @@ test_that("simulate_serosurvey_age_and_time_model input validation", { # Test with non-dataframe survey_features dataframe expect_error(simulate_serosurvey_age_and_time_model(foi_df, list()), - "survey_features must be a dataframe with columns 'age_min', 'age_max', and 'sample_size'.") + "survey_features must be a dataframe with columns 'age_min', 'age_max', and 'n_sample'.") # Test with misspelt columns in foi dataframe expect_error(simulate_serosurvey_age_and_time_model(data.frame(ages = c(1), foi = c(0.1)), survey_features), @@ -595,7 +595,7 @@ test_that("simulate_serosurvey_age_and_time_model input validation", { # Test with missing columns in survey_features dataframe expect_error(simulate_serosurvey_age_and_time_model(foi_df, data.frame(age_min = c(1))), - "survey_features must be a dataframe with columns 'age_min', 'age_max', and 'sample_size'.") + "survey_features must be a dataframe with columns 'age_min', 'age_max', and 'n_sample'.") # Test with non-numeric seroreversion_rate expect_error(simulate_serosurvey_age_and_time_model(foi_df, survey_features, "seroreversion"), @@ -616,10 +616,10 @@ test_that("simulate_serosurvey returns serosurvey data based on specified model" survey_features <- data.frame( age_min = c(1, 3, 15), age_max = c(2, 14, 20), - sample_size = c(1000, 2000, 1500) + n_sample = c(1000, 2000, 1500) ) serosurvey <- simulate_serosurvey("age", foi_df, survey_features) - expect_true(all(names(serosurvey) %in% c("age_min", "age_max", "sample_size", "n_seropositive"))) + expect_true(all(names(serosurvey) %in% c("age_min", "age_max", "n_sample", "n_seropositive"))) # Test with 'time' model foi_df <- data.frame( @@ -627,7 +627,7 @@ test_that("simulate_serosurvey returns serosurvey data based on specified model" foi = runif(20, 0.05, 0.15) ) serosurvey <- simulate_serosurvey("time", foi_df, survey_features) - expect_true(all(names(serosurvey) %in% c("age_min", "age_max", "sample_size", "n_seropositive"))) + expect_true(all(names(serosurvey) %in% c("age_min", "age_max", "n_sample", "n_seropositive"))) # Test with 'age-time' model foi_df <- tidyr::expand_grid( @@ -637,7 +637,7 @@ test_that("simulate_serosurvey returns serosurvey data based on specified model" mutate(foi=rnorm(20 * 20, 0.1, 0.001)) serosurvey <- simulate_serosurvey("age-time", foi_df, survey_features) - expect_true(all(names(serosurvey) %in% c("age_min", "age_max", "sample_size", "n_seropositive"))) + expect_true(all(names(serosurvey) %in% c("age_min", "age_max", "n_sample", "n_seropositive"))) }) test_that("simulate_serosurvey handles invalid model inputs", { @@ -649,7 +649,7 @@ test_that("simulate_serosurvey handles invalid model inputs", { survey_features <- data.frame( age_min = c(1, 3, 15), age_max = c(2, 14, 20), - sample_size = c(1000, 2000, 1500) + n_sample = c(1000, 2000, 1500) ) expect_error(simulate_serosurvey("invalid_model", foi_df, survey_features), "model must be one of 'age', 'time', or 'age-time'.") diff --git a/tests/testthat/test_fit_seromodel.R b/tests/testthat/test_fit_seromodel.R deleted file mode 100644 index 541f48bc..00000000 --- a/tests/testthat/test_fit_seromodel.R +++ /dev/null @@ -1,164 +0,0 @@ -test_that("fit_seromodel constant model estimates the right force of infection", { - - foi_df <- data.frame( - year = 1970:1999, - foi = rep(0.03, 30) - ) - survey_features <- data.frame( - age_min = seq(1, 21, 10), - age_max = seq(10, 30, 10), - sample_size = c(1000, 1500, 2000) - ) - - simdata <- simulate_serosurvey( - model = "time", - foi = foi_df, - survey_features = survey_features - ) %>% - mutate( - survey = "constant_foi", - tsur = max(foi_df$year) + 1 - ) %>% - rename( - total = sample_size, - counts = n_seropositive - ) %>% prepare_serodata() - - model_object <- fit_seromodel( - serodata = simdata, - foi_model = "constant", - iter = 1000 - ) - - foi_central_estimates <- get_foi_central_estimates( - seromodel_object = model_object, - serodata = simdata - ) %>% - mutate( - # calculates tolerance as half the confidence interval size - tol = (upper - lower)/2 - ) %>% - left_join(foi_df, by = "year") - - expect_true( - all( - dplyr::near( - foi_central_estimates$medianv, - foi_central_estimates$foi, - tol = max(foi_central_estimates$tol, 0.05) - ) - ) - ) -}) - -test_that("fit_seromodel tv_normal model estimates the right force of infection", { - - foi_df <- data.frame( - year = 1940:1999, - foi = rep(c(0.06, 0.03, 0.01), c(20, 20, 20)) - ) - survey_features <- data.frame( - age_min = seq(1, 51, 10), - age_max = seq(10, 60, 10), - sample_size = 100 - ) - - simdata <- simulate_serosurvey( - model = "time", - foi = foi_df, - survey_features = survey_features - ) %>% - mutate( - survey = "sw_dec_foi", - tsur = max(foi_df$year) + 1 - ) %>% - rename( - total = sample_size, - counts = n_seropositive - ) %>% prepare_serodata() - - model_object <- fit_seromodel( - serodata = simdata, - foi_model = "tv_normal", - chunks = rep(c(1, 2, 3), c(15, 20, 20)), - iter = 800 - ) - - foi_central_estimates <- get_foi_central_estimates( - seromodel_object = model_object, - serodata = simdata - ) %>% - mutate( - # calculates tolerance as half the confidence interval size - tol = (upper - lower) / 2 - ) %>% - left_join(foi_df, by = "year") - - expect_true( - all( - dplyr::near( - foi_central_estimates$medianv, - foi_central_estimates$foi, - tol = max(foi_central_estimates$tol, 0.05) - ) - ) - ) -}) - -test_that("fit_seromodel tv_normal_log model estimates the right force of infection", { - - foi_df <- data.frame( - year = 1950:1999, - foi = rep( - c(0.001, 0.4, 0.001), - c(30, 5, 15) - ) - ) - - survey_features <- data.frame( - age_min = seq(1, 41, 10), - age_max = seq(10, 50, 10), - sample_size = 100 - ) - - simdata <- simulate_serosurvey( - model = "time", - foi = foi_df, - survey_features = survey_features - ) %>% - mutate( - survey = "big_outbreak", - tsur = max(foi_df$year) + 1 - ) %>% - rename( - total = sample_size, - counts = n_seropositive - ) %>% prepare_serodata() - - model_object <- fit_seromodel( - serodata = simdata, - foi_model = "tv_normal_log", - chunks = rep(c(1, 2, 3), c(25, 5, 15)), - iter = 700 - ) - - foi_central_estimates <- get_foi_central_estimates( - seromodel_object = model_object, - serodata = simdata - ) %>% - mutate( - # calculates tolerance as half the confidence interval size - tol = (upper - lower) / 2 - ) %>% - left_join(foi_df, by = "year") - - expect_true( - all( - dplyr::near( - foi_central_estimates$medianv, - foi_central_estimates$foi, - tol = max(foi_central_estimates$tol, 0.05) - ) - ) - ) -}) diff --git a/tests/testthat/test_issue_47.R b/tests/testthat/test_issue_47.R deleted file mode 100644 index d4bb27eb..00000000 --- a/tests/testthat/test_issue_47.R +++ /dev/null @@ -1,24 +0,0 @@ -test_that("issue 47", { - skip_on_os(c("windows", "mac")) - source("testing_utils.R") - - library(dplyr) - - # Load data - ## This dataset is already prepared - serodata_path <- testthat::test_path("extdata", "haiti_ssa_sample.RDS") - serodata <- readRDS(serodata_path) - - # Error reproduction - model_test <- fit_seromodel( - serodata = serodata, - foi_model = "tv_normal", - print_summary = FALSE - ) - foi <- rstan::extract(model_test, "foi", inc_warmup = FALSE)[[1]] - prev_expanded <- get_prev_expanded(foi, serodata = serodata) - - # Test - age_max <- max(serodata$age_mean_f) - expect_length(prev_expanded$age, n = age_max) -}) diff --git a/tests/testthat/testing_utils.R b/tests/testthat/testing_utils.R deleted file mode 100644 index 3119eddf..00000000 --- a/tests/testthat/testing_utils.R +++ /dev/null @@ -1,69 +0,0 @@ -# TODO Move to separate package ### -# TODO Document all functions and provide examples -library(testthat) -library(vdiffr) -equal_with_tolerance <- function(tolerance = 2e-1) { - function(a, b) { - c <- base::mapply(function(x, y) { - if (is.na(x) && is.na(y)) { - return(0) - } else if (is.na(x) || is.na(y)) { - return(tolerance + 1) - } else { - return(abs(x - y)) - } - }, a, b) - return(base::all(c < tolerance)) - } -} -equal_exact <- function() { - function(a, b) { - x <- base::mapply(function(x, y) x == y || (is.na(x) && is.na(y)), a, b) - return(base::all(x == TRUE)) - } -} - -# TODO use testthat snapshots -expect_similar_dataframes <- function(name, actual_df, column_comparation_functions) { - actual_df_filename <- file.path(tempdir(), paste(name, "csv", sep = ".")) - write.csv(actual_df, actual_df_filename) - compare_fun <- function(expected_df_filename, actual_df_filename) { - return(compare_dataframes(expected_df_filename, actual_df_filename, column_comparation_functions)) - } - expect_snapshot_file(actual_df_filename, compare = compare_fun) -} - - -compare_dataframes <- function(expected_df_filename, actual_df_filename, column_comparation_functions) { - expected_df <- read.csv(expected_df_filename) - actual_df <- read.csv(actual_df_filename) - - all_columns_ok <- TRUE - for (col in base::names(column_comparation_functions)) { - if (col %in% colnames(expected_df) && col %in% colnames(actual_df)) { - compare_function <- column_comparation_functions[[col]] - col_ok <- compare_function(expected_df[[col]], actual_df[[col]]) - if (!col_ok) { - cat("Column", col, "differs ", expected_df[[col]], "!=", actual_df[[col]], "\n") - } - all_columns_ok <- all_columns_ok && col_ok - } else { - if (!(col %in% colnames(expected_df))) { - cat("Column", col, "not in first dataframe") - } - if (!(col %in% colnames(expected_df))) { - cat("Column", col, "not in second dataframe") - } - } - } - return(all_columns_ok) -} - -expect_same_plot <- function(plot_name, actual_plot) { - if (per_platform_snapshots) { - title <- file.path(r_version_id(), plot_name) - } else { - title <- plot_name - } - return(vdiffr::expect_doppelganger(title, actual_plot)) -} diff --git a/vignettes/articles/foi_models.Rmd b/vignettes/articles/foi_models.Rmd new file mode 100644 index 00000000..e504330f --- /dev/null +++ b/vignettes/articles/foi_models.Rmd @@ -0,0 +1,326 @@ +--- +title: "An Introduction To Force Of Infection Models" +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +```{r cleaning, include = FALSE, echo = TRUE} +library(serofoi) +library(dplyr) +``` + +The current version of ***serofoi*** supports three different serocatalytic models for estimating the *Force-of-Infection (FoI)*: constant, age-varying, and time-varying FoI. To estimate relevant parameters we employ a suit of Bayesian models using age-disaggregated population-based cross-sectional serological surveys as input data. + +## What is the Force-of-Infection + +The force of infection, also known as the hazard rate or the infection pressure, is a key concept in mathematical modelling of infectious diseases. It represents the rate at which susceptible individuals become infected, given their exposure to a pathogen. In simple terms, the force of infection quantifies the risk of a susceptible individual becoming infected over a period of time. It is usually expressed as a rate per unit of time (e.g., per day or per year). + +To illustrate this, consider a simple SIR model: +$$ +\begin{aligned} + \frac{dS}{dt} &= -\beta S I,\\ + \frac{dI}{dt} &= \beta S I - \gamma I,\\ + \frac{dR}{dt} &= \gamma I, +\end{aligned} +$$ + +The FoI in this case is defined as $\lambda(t) = \beta I(t)$ (Note that the FoI is time-dependent by definition). To solve this system of equations, we need to know the transmission rate, $\beta>0$, and the rate of recovery from infection $\gamma>0$, as well as some initial conditions. + +**Serocatalytic models** offer a simpler perspective: to estimate how the risk of historical exposure to a pathogen varies depending on the time of birth of the individuals in the sample. For this, we put together the $I$ and $R$ compartments into a single **seropositives** compartment $P(t)$ and consider the the dynamics of each birth cohort separately: +$$ +\begin{aligned} + &\frac{dN^{\tau}(t)}{dt} = -\lambda(t) N^{\tau}(t),\\ + &\frac{dP^{\tau}(t)}{dt} = \lambda(t) N^{\tau}(t), \\ + &N^{\tau}(t)+P^{\tau}(t)=1; \quad t<\tau +\end{aligned} +$$ +where $0 \leq N^\tau(t)\leq 1$ is the proportion of individuals born in year $\tau$ who are seronegative at time $t$. Assuming lack of maternal antibodies, the initial condition for each cohort can be specified as $N^{\tau}(\tau) = 1$. Depending on whether the FoI is constant, time dependent, or age dependent, this system of equations yields the different solutions on which *serofoi* is based on. + +## Constant vs Time-varying FoI + +The *FoI* is often incorrectly assumed to be constant over time. Identifying whether the *FoI* follows a constant, an age-varying or a time-varying trend can be important in the identification and characterization of the spread of disease. **serofoi** offers tools to implement and compare a wide variety of Bayesian models to estimate the FoI according to the aforementioned serocatalytic models. + +In the idealised situation where FoI is constant $\lambda(t) = \lambda$: + +$$ +\frac{dP^{\tau}(t)}{dt} = \lambda (1 - P^{\tau}(t)). +$$ +Applying the integrating factor method and using the initial conditions $X^\tau(\tau) = 0$ yields to the solution: + +$$ +P^{\tau}(t) = 1 - \exp(-\bar \lambda t), +$$ + +meaning that, in the long rung, the seropositivity converges to 1$P^{\tau}(t)\rightarrow 1$. + +In more realistic applications, the FoI is assumed to be piecewise-constant on discrete intervals of 1-year length. The solution for each year is then: +$$ +P^{\tau}(t) = 1 + (P_0 - 1) \exp(-\lambda_t t) +$$ +where $P_0$ corresponds to the seropositivity at the end of the previous year and $\lambda_t$ is the constant value assumed for the FoI at time $t$. From the resulting recursive equation, it is possible to obtain analytical solutions for the seropositivity $P^\tau(t)$ along each 1-year length chunk. + +## Considering seroreversion + +Since it is possible for seropositive individuals to lose immunity over time, we consider the rate at which infected individuals become seronegative $\mu$. The system of equations is now: + +$$ +\begin{aligned} + \frac{dP^{\tau}(t)}{dt} &= \lambda(t) (1 - P^{\tau}(t)) - \mu P^{\tau}(t),\\ + P^{\tau}(\tau) &= 0, +\end{aligned} +$$ + +Similarly, this yields to a iterative solution for the seropositivity $X^\tau (t)$ when piece-wise constant FoI is considered: + + +$$ + P^{\tau}(t) = \frac{\lambda_t}{\lambda_t + \mu} + \left(P_0 - \frac{\lambda_t}{\lambda_t + \mu}\right) \exp(-(\lambda_t + \mu)), +$$ + +where, again, $P_0$ corresponds to the proportion of seropositive individuals of cohort $\tau$ by the end of the previous year. Correct indexation and simplification of this equation allows the description of age-dependent FOIs. + +## Estimating the FoI - Bayesian modelling + +Now that we can describe the proportion of seropositive individuals $P(t)$ by means of the serocatalytic models for constant, time-varying, and age-varying FOIs, what we want is to obtain estimates of the FoI $\lambda$ and the seroreversion rate $\mu$ by sampling from a Bayesian model taking a cross-sectional serological survey as data. For this, we can use **serofoi**'s `fit_seromodel()`, which relies on the statistical programming language *Stan* to perform this task. The current available models are summarised in table 1: + + +::: l-body-outset +| Serocatalytic model | scale | n_seropositive | FoI_1 prior | FoI_i, i>1 | sigma | mu | +| ------------------- | ------- | ------------------------------------- | ------------------------------------- | ------------------------ | ----------------------- | ------------------------------------- | +| constant | regular | binomial(n_sample, prob_infected) | normal(mean, sd)
uniform(min, max) | \- | cauchy(mean, sd) | normal(mean, sd)
uniform(min, max) | +| age | regular | binomial(n_sample, prob_infected) | normal(mean, sd)
uniform(min, max) | normal(FoI_{i-1}, sigma) | cauchy(mean, sd) | normal(mean, sd)
uniform(min, max) | +| time | regular | binomial(n_sample, prob_infected) | normal(mean, sd)
uniform(min, max) | normal(FoI_{i-1}, sigma) | cauchy(mean, sd) | normal(mean, sd)
uniform(min, max) | +| time | log | binomial(n_sample, prob_infected) | normal(mean, sd)
uniform(min, max) | normal(FoI_{i-1}, sigma) | normal(location, scale) | normal(mean, sd)
uniform(min, max) | +::: +Table 1. Available options. + +## Model 1. Constant Force-of-Infection (endemic model) +The *endemic constant model* is a simple mathematical model used in epidemiology to describe the seroprevalence of an infectious disease within a population, as a product of a long-term transmission. In this case, the rate of infection acquisition $\lambda$ is constant over time, and the seroprevalence $P(A)$ behaves as a cumulative process increasing monotonically with age. + +We always model the number of positive cases according to a binomial distribution, where $n(A)$ and $X(A)$ are the sample size and number of seropositive of age group $A$ respectively: +$$ +X(A) \sim \textit{Binom}(n(A), P(A)) +$$ + +To test the `constant` model we simulate a serosurvey following a stepwise decreasing *FoI* (red line in Fig. 1) using the data simulation functions of ***serofoi***: + +```{r constant simdata, include = TRUE, echo = TRUE, results="hide", errors = FALSE, warning = FALSE, message = FALSE} +# how the disease circulates +foi_df <- data.frame( + year = seq(2000, 2049, 1), + foi = rep(0.02, 50) +) + +# specify 5-year age bins for observations +survey_features <- data.frame( + age_min = seq(1, 50, 5), + age_max = seq(5, 50, 5), + n_sample = rep(25, 10) +) + +serosurvey_constant <- simulate_serosurvey( + "time", + foi_df, + survey_features +) %>% +mutate(survey_year = 2050) +``` + +The simulated dataset `serosurvey_constant` contains information about 250 samples of individuals between 1 and 50 years old (5 samples per age) with age groups of 5 years length. The following code shows how to implement the constant model to this simulated serosurvey: + +```{r constant model, include = TRUE, echo = TRUE, results="hide", errors = FALSE, warning = FALSE, message = FALSE, fig.width=4, fig.asp=1.5, fig.align="center", out.width ="50%", fig.keep="all"} +seromodel_constant <- fit_seromodel( + serosurvey = serosurvey_constant, + model_type = "constant", + iter = 800 +) +plot_seromodel( + seromodel_constant, + serosurvey = serosurvey_constant, + foi_df = foi_df, + size_text = 6 +) +``` +Figure 1. Constant serofoi model plot. Simulated (red) vs modelled (blue) *FoI*. + +```{r clean env constant, include = FALSE, echo = TRUE, message=FALSE} +rm(list = ls(pattern = "_constant")) +``` + +In this case, 800 iterations are enough to ensure convergence. `plot_seromodel()` returns a visualisation of the results including a summary where the expected log pointwise predictive density (`elpd`) and its standard error (`se`) are shown. We say that a model converges if all the R-hat estimates are below 1.01. + +## Time-varying FoI models + +For the time-varying *FoI* models, the probability for a case to be positive at age a at time $t$ also follows a binomial distribution, as described above. However, the seroprevalence is obtained from a cumulative of the yearly-varying values of the *FoI* over time: + +$$ + P^{\tau}(t) = \frac{\lambda_t}{\lambda_t + \mu} + \left(P_0 - \frac{\lambda_t}{\lambda_t + \mu}\right) \exp(-(\lambda_t + \mu)), +$$ + +## Model 2. Time-varying FoI - Slow Time-Varying FoI + +The *time-varying model* (`model_type = "time"`) uses a forward random walk algorithm where the prior of the first chronological FoI value in the time-span of the serological survey can be either a normal distribution (`foi_prior = sf_normal()`) or a uniform distribution (`foi_prior = sf_unif()`). Subsequent values of the FoI are sampled from the normal distribution as $\lambda_i \sim \mathcal{N}(\lambda_{i-1}, \sigma)$, where $\sigma \sim \textit{Cauchy}(0, 1)$. The prior distribution for the seroreversion rate can also be either normal or uniform, and is specified by means of the parameter `seroreversio_prior`. + +To test the `tv_normal` model we simulate a serosurvey following a stepwise decreasing *FoI* (red line in Fig. 2) using the data simulation functions of ***serofoi***: + +```{r tv_normal simdata, include = TRUE, echo = TRUE, results="hide", errors = FALSE, warning = FALSE, message = FALSE} + +foi_df <- data.frame( + year = seq(2000, 2049, 1), + foi = c( + rep(0.2, 25), + rep(0.1, 10), + rep(0.00001, 15) + ) +) + +survey_features <- data.frame( + age_min = seq(1, 50, 5), + age_max = seq(5, 50, 5), + n_sample = rep(25, 10) +) + +serosurvey_sw_dec <- simulate_serosurvey( + "time", + foi_df, + survey_features +) %>% +mutate(survey_year = 2050) + +``` + +The simulated dataset `foi_sim_sw_dec` contains information about 250 samples of individuals between 1 and 50 years old (5 samples per age) with age groups of 5 years length. The following code shows how to implement the slow time-varying normal model to this simulated serosurvey: + +```{r tv_normal model, include = TRUE, echo = TRUE, results="hide", errors = FALSE, warning = FALSE, message = FALSE, fig.width=4, fig.asp=1.5, fig.align="center", out.width ="50%", fig.keep="all"} + +seromodel_time_normal <- fit_seromodel( + serosurvey = serosurvey_sw_dec, + model_type = "time", + foi_index = rep(c(1, 2, 3), c(25, 10, 15)), + iter = 1500 +) +plot_seromodel( + seromodel_time_normal, + serosurvey = serosurvey_sw_dec, + foi_df = foi_df, + size_text = 6 +) +``` +Figure 2. Slow time-varying serofoi model plot. Simulated (red) vs modelled (blue) *FoI*. + +```{r clean env sw_dec, include = FALSE, echo = TRUE, message=FALSE} +rm(list = ls(pattern = "_sw_dec|_normal")) +``` + +The number of iterations required may depend on the number of years, reflected by the difference between the year of the serosurvey and the maximum age-class sampled. + +## Model 3. Time-varying FoI - Fast Epidemic Model +The *time-varying fast epidemic model* is parametrized in such a way that the initial FoI value in the forward random walk is sampled in the logarithmic scale. In this case, the priors for subsequent FOIs are: +$$ +\log\left(\lambda(t)\right) \sim \mathcal{N}(\log(\lambda(t-1)), \sigma) \\ +\sigma \sim \mathcal{N} (0, 1) +$$ +This is done in order to capture fast changes in the *FoI* trend. + +To test the `log_time` model we simulate a serosurvey conducted in 2050 emulating a hypothetical situation where a three-year epidemic occurred between 2030 and 2035: + +```{r tv_normal_log simdata, include = TRUE, echo = TRUE, results="hide", errors = FALSE, warning = FALSE, message = FALSE} +foi_df <- data.frame( + year = seq(2000, 2049, 1), + foi = c( + rep(0, 30), + rep(0.6, 5), + rep(0, 15) + ) +) + +survey_features <- data.frame( + age_min = seq(1, 50, 5), + age_max = seq(5, 50, 5), + n_sample = rep(25, 10) +) + +serosurvey_large_epi <- simulate_serosurvey( + survey_features = survey_features, + foi_df, + model = "time" +) %>% +mutate(survey_year = 2050) + +``` + +The simulated serosurvey tests 250 individuals between 1 and 50 years old by the year 2050. The implementation of the fast epidemic model can be obtained running the following lines of code: + +```{r tv_normal_log model, include = TRUE, echo = TRUE, results="hide", errors = FALSE, warning = FALSE, message = FALSE, fig.width=4, fig.asp=1.5, fig.align="center", out.width ="50%", fig.keep="all"} +seromodel_log_time_normal <- fit_seromodel( + serosurvey = serosurvey_large_epi, + model_type = "time", + is_log_foi = TRUE, + foi_index = rep(c(1, 2, 3), c(30, 5, 15)), + iter = 2000 +) + +plot_log_time_normal <- plot_seromodel( + seromodel_log_time_normal, + serosurvey = serosurvey_large_epi, + foi_df = foi_df, + size_text = 6 +) +plot(plot_log_time_normal) +``` +Figure 3. *Time-varying fast epidemic serofoi model* plot. Simulated (red) vs modelled (blue) *FoI*. + +In Fig 3 we can see that the *fast epidemic serofoi model* is able to identify the large epidemic simulated on the `simdata_large_epi` dataset. + +## Models Comparison + +Above we showed that the fast epidemic model (specified by `model_type = "time"` and `is_log = TRUE` in `fit_seromodel()`) is able to identify the large epidemic outbreak described by the dataset simulated according to a step-wise decreasing *FoI* (red line in Fig 3). + +Now, we would like to know whether this model actually fits this dataset better than the other available models in ***serofoi***. For this, we also implement both the endemic model (`model_type = "constant"`) and the slow time-varying normal model (`model_type="time"`, `is_log = FALSE`): + +```{r model_comparison, include = FALSE, echo = TRUE, results="hide", errors = FALSE, warning = FALSE, message = FALSE } +seromodel_constant <- fit_seromodel( + serosurvey = serosurvey_large_epi, + model_type = "constant", + iter = 800 +) +plot_constant <- plot_seromodel( + seromodel_constant, + serosurvey = serosurvey_large_epi, + foi_df = foi_df, + size_text = 6 +) + +seromodel_time_normal <- fit_seromodel( + serosurvey = serosurvey_large_epi, + model_type = "time", + foi_index = rep(c(1, 2, 3), c(30, 5, 15)), + iter = 2000 +) +plot_time_normal <- plot_seromodel( + seromodel_time_normal, + serosurvey = serosurvey_large_epi, + foi_df = foi_df, + size_text = 6 +) +``` +Using the function `cowplot::plot_grid` we can visualise the results of the three models simultaneously: + +```{r model_comparison_plot, include = TRUE, echo = TRUE, errors = FALSE, warning = FALSE, message = FALSE, fig.width=5, fig.asp=1, fig.align="center"} +cowplot::plot_grid( + plot_constant, plot_time_normal, plot_log_time_normal, + nrow = 1, ncol = 3, labels = "AUTO" +) +``` +Figure 4. Model comparison between the three serofoi models for a large-epidemic simulated dataset. + +A common criterion to decide what model fits the data the best is to choose the one with the larger `elpd`. According to this criterion, in this case the best model is the fast epidemic model, which is the only one that manages to identify the large epidemic (see the second row of panel C in Figure 4). + +NOTE: Running the ***serofoi*** models for the first time on your local computer may take a few minutes for the rstan code to compile locally. However, once the initial compilation is complete, there is no further need for local compilation. + +## References diff --git a/vignettes/simulating_serosurveys.Rmd b/vignettes/articles/simulating_serosurveys.Rmd similarity index 74% rename from vignettes/simulating_serosurveys.Rmd rename to vignettes/articles/simulating_serosurveys.Rmd index 87fbd000..cf511ec5 100644 --- a/vignettes/simulating_serosurveys.Rmd +++ b/vignettes/articles/simulating_serosurveys.Rmd @@ -1,10 +1,5 @@ --- title: "Simulating Serosurveys" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{simulating_serosurveys} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} @@ -19,7 +14,6 @@ library(serofoi) library(ggplot2) library(dplyr) library(purrr) -library(forcats) ``` ```{r ggplot theme, include=FALSE} theme_set(theme_bw()) @@ -38,19 +32,19 @@ foi_constant <- data.frame( foi = rep(0.05, max_age) ) -foi_constant %>% - ggplot(aes(x=age, y=foi)) + +foi_constant %>% + ggplot(aes(x = age, y = foi)) + geom_line() ``` We suppose that we carry out a serological survey which randomly samples the population. For simplicity, to start, we imagine that all individual one-year group ages are sampled and the sample size in each age is the same: $n=15$. ```{r} -sample_size <- 15 +n_sample <- 15 survey_features <- data.frame( age_min = seq(1, max_age, 1), age_max = seq(1, max_age, 1), - sample_size = rep(sample_size, max_age)) + n_sample = rep(n_sample, max_age)) ``` We then put these pieces together and simulate a serosurvey. Either a time-varying FOI model or an age-varying FOI model would produce the same results here since they are equivalent when the FOI is constant. @@ -66,25 +60,8 @@ glimpse(serosurvey_constant) Now, we can plot the proportions of those seropositive, including the posterior 95% percentiles (when assuming a flat prior), and plot it along the true seropositivities (line blue): -```{r, echo=FALSE, fig.align="center"} -add_posterior_quantiles <- function(df) { - df %>% - mutate( - alpha = 1 + n_seropositive, - beta = 1 + sample_size - n_seropositive - ) %>% - mutate( - lower = qbeta(0.025, alpha, beta), - middle = qbeta(0.5, alpha, beta), - upper = qbeta(0.975, alpha, beta) - ) -} - -ggplot() + - geom_pointrange( - data = serosurvey_constant %>% add_posterior_quantiles(), - aes(x = age_min, y = middle, ymin = lower, ymax = upper) - ) + +```{r, fig.align="center"} +plot_serosurvey(serosurvey_constant) + geom_line( data = probability_seropositive_by_age( model = "age", @@ -95,9 +72,7 @@ ggplot() + color = "blue", linewidth = 1 ) + - scale_y_continuous(labels=scales::percent) + - ylab("Seropositivity") + - xlab("Age") + scale_y_continuous(labels = scales::percent) ``` ## Age-varying FOI @@ -106,11 +81,11 @@ We now consider a pathogen which has an FOI that varies throughout the age of in ```{r, fig.align="center"} foi_age_varying <- data.frame( age = seq(1, max_age, 1) -) %>% - mutate(foi=0.1 * exp(-0.1 * age)) +) %>% + mutate(foi = 0.1 * exp(-0.1 * age)) -foi_age_varying %>% - ggplot(aes(x=age, y=foi)) + +foi_age_varying %>% + ggplot(aes(x = age, y = foi)) + geom_line() ``` @@ -127,48 +102,45 @@ serosurvey_age_dep <- simulate_serosurvey( Below, we see that the FOI for the age-dependent FOI pathogen increases rapidly during the earliest years then plateaus in later adulthood as the FOI effectively becomes negligible. -```{r, fig.align="center", echo=FALSE} +```{r, fig.align="center"} # combine with constant FOI survey -serosurvey_combined <- serosurvey_constant %>% - add_posterior_quantiles() %>% - rename(age = age_min) %>% - mutate(type = "constant FOI") %>% +serosurvey_combined <- rbind( + serosurvey_constant %>% + mutate(model_type = "constant FOI") %>% left_join( probability_seropositive_by_age( model = "age", foi = foi_constant, seroreversion_rate = 0 - ), - by = "age") %>% - bind_rows( - serosurvey_age_dep %>% - add_posterior_quantiles() %>% - rename(age = age_min) %>% - mutate(type = "age-dependent FOI") %>% + ) %>% + rename(age_min = age), + by = "age_min" + ), + serosurvey_age_dep %>% + mutate(model_type = "age-dependent FOI") %>% left_join( probability_seropositive_by_age( model = "age", foi = foi_age_varying, seroreversion_rate = 0 - ), - by = "age") - ) %>% - mutate(type = as.factor(type)) - -# plot both -ggplot(data = serosurvey_combined) + - geom_pointrange( - aes(x = age, y = middle, ymin = lower, ymax = upper) - ) + + ) %>% + rename(age_min = age), + by = "age_min" + ) + ) %>% + mutate(model_type = as.factor(model_type)) + +plot_serosurvey(serosurvey = serosurvey_combined) + geom_line( - aes(x = age, y = seropositivity), + data = serosurvey_combined, + aes(x = age_min, y = seropositivity), color = "blue", linewidth = 1 ) + - scale_y_continuous(labels=scales::percent) + + scale_y_continuous(labels = scales::percent) + ylab("Seropositivity") + xlab("Age") + - facet_wrap(~type) + facet_wrap(~model_type) ``` ## Time-dependent FOI @@ -189,8 +161,8 @@ foi_spiky <- data.frame( ) # plot -foi_spiky %>% - ggplot(aes(x=year, y=foi)) + +foi_spiky %>% + ggplot(aes(x = year, y = foi)) + geom_line() ``` @@ -205,13 +177,9 @@ serosurvey_spiky <- simulate_serosurvey( Again, we can plot the true seropositivities, which highlights the jumps in seropositivity corresponding to cohorts born either side of epidemics: -```{r, echo=FALSE, fig.align="center"} +```{r, fig.align="center"} # plot shows jumps in seropositivity -ggplot() + - geom_pointrange( - data = serosurvey_spiky %>% add_posterior_quantiles(), - aes(x = age_min, y = middle, ymin = lower, ymax = upper) - ) + +plot_serosurvey(serosurvey_spiky) + geom_line( data = probability_seropositive_by_age( model = "time", @@ -222,7 +190,7 @@ ggplot() + color = "blue", linewidth = 1 ) + - scale_y_continuous(labels=scales::percent) + + scale_y_continuous(labels = scales::percent) + ylab("Seropositivity") + xlab("Age") ``` @@ -239,8 +207,8 @@ foi_df_time <- data.frame( ) # plot -foi_df_time %>% - ggplot(aes(x=year, y=foi)) + +foi_df_time %>% + ggplot(aes(x = year, y = foi)) + geom_line() ``` @@ -256,46 +224,46 @@ foi_df_age <- data.frame( ) # plot -foi_df_age %>% - ggplot(aes(x=age, y=foi)) + +foi_df_age %>% + ggplot(aes(x = age, y = foi)) + geom_line() ``` To create the overall FOI (which is a function of both time and age), we create the product of the time- and age-dependent parts of it. ```{r} foi_age_time <- expand.grid( - year=foi_df_time$year, - age=foi_df_age$age -) %>% - left_join(foi_df_age, by="age") %>% - rename(foi_age=foi) %>% - left_join(foi_df_time, by="year") %>% - rename(foi_time=foi) %>% - mutate(foi = foi_age * foi_time) %>% + year = foi_df_time$year, + age = foi_df_age$age +) %>% + left_join(foi_df_age, by = "age") %>% + rename(foi_age = foi) %>% + left_join(foi_df_time, by = "year") %>% + rename(foi_time = foi) %>% + mutate(foi = foi_age * foi_time) %>% select(-c("foi_age", "foi_time")) ``` ```{r, fig.align="center", echo=FALSE} # plot a few birth years select_birth_years <- c(1955, 1965, 1975, 1985, 1995, 2005, 2015) -foi_age_time %>% - mutate(birth_year = year - age) %>% - filter(birth_year >= 1945) %>% +foi_age_time %>% + mutate(birth_year = year - age) %>% + filter(birth_year >= 1945) %>% arrange(birth_year, age) %>% - filter(birth_year %in% select_birth_years) %>% - mutate(birth_year = as.factor(birth_year)) %>% - ggplot(aes(x=year, y=foi, colour=birth_year)) + + filter(birth_year %in% select_birth_years) %>% + mutate(birth_year = as.factor(birth_year)) %>% + ggplot(aes(x = year, y = foi, colour = birth_year)) + geom_line() ``` We now simulate a serosurvey assuming these historical FOIs generated the population-wide seropositivities in 2025; we make the sample sizes larger to be able to clearly visualise the patterns in seropositivity. We also illustrate how serosurveys with wider age-bins can be generated by choosing 5-year bins. ```{r} max_age <- 80 -sample_size <- 50 +n_sample <- 50 survey_features <- data.frame( age_min = seq(1, max_age, 5), - age_max = seq(5, max_age, 5)) %>% - mutate(sample_size = rep(sample_size, length(age_min))) + age_max = seq(5, max_age, 5)) %>% + mutate(n_sample = rep(n_sample, length(age_min))) serosurvey <- simulate_serosurvey( model = "age-time", @@ -304,13 +272,9 @@ serosurvey <- simulate_serosurvey( ) ``` -```{r, echo=FALSE, fig.align="center"} +```{r, fig.align="center"} # plot -ggplot() + - geom_pointrange( - data = serosurvey %>% add_posterior_quantiles(), - aes(x = age_min, y = middle, ymin = lower, ymax = upper) - ) + +plot_serosurvey(serosurvey) + geom_line( data = probability_seropositive_by_age( model = "age-time", @@ -321,7 +285,7 @@ ggplot() + color = "blue", linewidth = 1 ) + - scale_y_continuous(labels=scales::percent) + + scale_y_continuous(labels = scales::percent) + ylab("Seropositivity") + xlab("Age") ``` @@ -337,48 +301,46 @@ serosurvey_serorevert <- simulate_serosurvey( ) ``` -```{r, fig.align="center", echo=FALSE} +```{r, fig.align="center"} # combine with constant FOI survey -serosurvey_combined <- serosurvey %>% - add_posterior_quantiles() %>% - rename(age = age_min) %>% - mutate(type = "non-seroreverting") %>% +serosurvey_combined <- rbind( + serosurvey %>% + mutate(model_type = "non-seroreverting") %>% left_join( probability_seropositive_by_age( model = "age-time", foi = foi_age_time, seroreversion_rate = 0 - ), - by = "age") %>% - bind_rows( - serosurvey_serorevert %>% - add_posterior_quantiles() %>% - rename(age = age_min) %>% - mutate(type = "seroreverting") %>% + ) %>% + rename(age_min = age), + by = "age_min" + ), + serosurvey_serorevert %>% + mutate(model_type = "seroreverting") %>% left_join( - probability_seropositive_by_age( + probability_seropositive_by_age( model = "age-time", foi = foi_age_time, seroreversion_rate = 0.01 - ), - by = "age") - ) %>% - mutate(type = as.factor(type)) + ) %>% + rename(age_min = age), + by = "age_min" + ) + ) %>% + mutate(model_type = as.factor(model_type)) # plot both -ggplot(data = serosurvey_combined) + - geom_pointrange( - aes(x = age, y = middle, ymin = lower, ymax = upper) - ) + +plot_serosurvey(serosurvey = serosurvey_combined) + geom_line( - aes(x = age, y = seropositivity), + data = serosurvey_combined, + aes(x = age_min, y = seropositivity), color = "blue", linewidth = 1 ) + - scale_y_continuous(labels=scales::percent) + + scale_y_continuous(labels = scales::percent) + ylab("Seropositivity") + xlab("Age") + - facet_wrap(~type) + facet_wrap(~model_type) ``` # Simulating from a general serological model @@ -435,18 +397,18 @@ v <- foi_df_time$foi construct_A <- function(t, tau, u, v) { u_bar <- u[t - tau] v_bar <- v[t] - + A <- diag(-1, ncol = 12, nrow = 12) A[row(A) == (col(A) + 1)] <- 1 A[1, 1] <- -u_bar * v_bar A[2, 1] <- u_bar * v_bar A[12, 12] <- 0 - + A } # determines the sum of seropositive compartments of those still alive -calculate_seropositivity_function <- function(Y) { +calculate_seropositivity_fn <- function(Y) { sum(Y[2:11]) / (1 - Y[12]) } @@ -457,16 +419,16 @@ initial_conditions[1] <- 1 # solve seropositive_hiv <- probability_seropositive_general_model_by_age( construct_A, - calculate_seropositivity_function, + calculate_seropositivity_fn, initial_conditions, - max_age=80, + max_age = 80, u, v ) -seropositive_hiv %>% - ggplot(aes(x=age, y=seropositivity)) + +seropositive_hiv %>% + ggplot(aes(x = age, y = seropositivity)) + geom_line() + - scale_y_continuous(labels=scales::percent) + + scale_y_continuous(labels = scales::percent) + ylab("Seropositivity") + xlab("Age") ``` @@ -475,21 +437,24 @@ We can also simulate a serosurvey assuming the above model. ```{r} serosurvey <- simulate_serosurvey_general_model( construct_A, - calculate_seropositivity_function, + calculate_seropositivity_fn, initial_conditions, survey_features, u, v -) +) ``` -```{r, echo=FALSE, fig.align="center"} +```{r, fig.align="center"} # plot -serosurvey %>% - add_posterior_quantiles() %>% - ggplot(aes(x=age_min, y=middle)) + - geom_pointrange(aes(ymin=lower, ymax=upper)) + - geom_smooth(se=FALSE) + - scale_y_continuous(labels=scales::percent) + - ylab("Seropositivity") +plot_serosurvey(serosurvey) + + geom_line( + data = seropositive_hiv, + aes(x = age, y = seropositivity), + color = "blue", + linewidth = 1 + ) + + scale_y_continuous(labels = scales::percent) + + ylab("Seropositivity") + + xlab("Age") ``` diff --git a/vignettes/use_cases.Rmd b/vignettes/articles/use_cases.Rmd similarity index 80% rename from vignettes/use_cases.Rmd rename to vignettes/articles/use_cases.Rmd index 57db4b39..0afbce3b 100644 --- a/vignettes/use_cases.Rmd +++ b/vignettes/articles/use_cases.Rmd @@ -1,15 +1,7 @@ --- -title: "Real-life use cases for serofoi" -output: rmarkdown::html_vignette -bibliography: references.bib -link-citations: true -vignette: > - %\VignetteIndexEntry{Real-life use cases for serofoi} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} +title: "Real-Life Use Cases For Serofoi" --- - ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, @@ -48,50 +40,53 @@ serofoi was used to compare three potential scenarios of Chikungunya transmissio ```{r chik_fast, include = TRUE, echo = TRUE, results="hide", errors = FALSE, warning = FALSE, message = FALSE, fig.width=5, fig.asp=1, fig.align="center", fig.keep="all"} # Load and prepare data data("chik2015") -chik2015p <- prepare_serodata(chik2015) # Implementation of the models -m1_chik <- fit_seromodel( - serodata = chik2015p, - foi_model = "constant", - iter = 1000, - thin = 2 +seromodel_constant <- fit_seromodel( + serosurvey = chik2015, + model_type = "constant", + iter = 1000 ) -m2_chik <- fit_seromodel( - serodata = chik2015p, - foi_model = "tv_normal", - chunk_size = 5, - iter = 2000, - thin = 2 + +seromodel_time <- fit_seromodel( + serosurvey = chik2015, + model_type = "time", + foi_prior = sf_normal(0, 0.01), + foi_index = get_foi_index(chik2015, group_size = 5), + iter = 2500 ) -m3_chik <- fit_seromodel( - serodata = chik2015p, - foi_model = "tv_normal_log", - chunk_size = 5, - iter = 2000, - thin = 2 +seromodel_log_time <- fit_seromodel( + serosurvey = chik2015, + model_type = "time", + foi_prior = sf_normal(0, 0.01), + is_log_foi = TRUE, + foi_index = get_foi_index(chik2015, group_size = 5), + iter = 2000 ) # Visualisation of the results -p1_chik <- plot_seromodel(m1_chik, - serodata = chik2015p, - max_lambda = 0.07, +plot_constant <- plot_seromodel( + seromodel = seromodel_constant, + serosurvey = chik2015, + foi_max = 0.07, size_text = 6 ) -p2_chik <- plot_seromodel(m2_chik, - serodata = chik2015p, - max_lambda = 0.07, +plot_time <- plot_seromodel( + seromodel = seromodel_time, + serosurvey = chik2015, + foi_max = 0.07, size_text = 6 ) -p3_chik <- plot_seromodel(m3_chik, - serodata = chik2015p, - max_lambda = 0.07, +plot_log_time <- plot_seromodel( + seromodel = seromodel_log_time, + serosurvey = chik2015, + foi_max = 0.07, size_text = 6 ) -cowplot::plot_grid(p1_chik, p2_chik, p3_chik, ncol = 3) +cowplot::plot_grid(plot_constant, plot_time, plot_log_time, ncol = 3) ``` Figure 1. ***serofoi*** models for FoI estimates of Chikungunya virus transmission in an urban remote area of Brazil. @@ -113,58 +108,57 @@ From [@carrera2020], we use a dataset measuring IgG antibodies against *VEEV* i ### The result: -***serofoi*** was used to compare three potential scenarios of *VEEV* transmission: *constant endemic*, *epidemic slow*, and *epidemic fast*. The results showed a significant increase in the estimated *Force-of-Infection (FoI)* in the region, indicating a rise in *VEEV* transmission. The study found that there was much higher statistical support for a time-varying rather than a constant scenario based on higher elpd and lower se values of the two time-varying models compared to the constant one (Figure 2). The results also suggest slightly (yet relevant) better support for model 3 (`tv-nomal-log`), compared to model 2 (`tv-normal`), suggesting a recent increase in transmission in the study area. +***serofoi*** was used to compare three potential scenarios of *VEEV* transmission: *constant endemic*, *epidemic slow*, and *epidemic fast*. The results showed a significant increase in the estimated *Force-of-Infection (FoI)* in the region, indicating a rise in *VEEV* transmission. The study found that there was much higher statistical support for a time-varying rather than a constant scenario based on higher elpd and lower se values of the two time-varying models compared to the constant one (Figure 2). The results also suggest slightly (yet relevant) better support for model 3 (`log-time`), compared to model 2 (`time`), suggesting a recent increase in transmission in the study area. ```{r veev_hidden, include = TRUE, echo = TRUE, results="hide", errors = FALSE, warning = FALSE, message = FALSE, fig.width=5, fig.asp=1, fig.align="center", fig.keep="all"} # Load and prepare data data("veev2012") -veev2012p <- prepare_serodata(veev2012) # Implementation of the models -m1_veev <- fit_seromodel( - serodata = veev2012p, - foi_model = "constant", - iter = 500, - thin = 2 +seromodel_constant <- fit_seromodel( + serosurvey = veev2012, + model_type = "constant", + iter = 1000 ) -m2_veev <- fit_seromodel( - serodata = veev2012p, - foi_model = "tv_normal", - chunk_size = 5, - iter = 2000, - thin = 2 +seromodel_time <- fit_seromodel( + serosurvey = veev2012, + model_type = "time", + foi_prior = sf_normal(0, 0.1), + foi_index = get_foi_index(veev2012, group_size = 5), + iter = 2000 ) -m3_veev <- fit_seromodel( - serodata = veev2012p, - foi_model = "tv_normal_log", - chunk_size = 5, - iter = 2000, - thin = 2 +seromodel_log_time <- fit_seromodel( + serosurvey = veev2012, + model_type = "time", + foi_prior = sf_normal(0, 0.1), + is_log_foi = TRUE, + foi_index = get_foi_index(veev2012, group_size = 5), + iter = 2000 ) # Visualisation of the results -p1_veev <- plot_seromodel( - m1_veev, - serodata = veev2012p, - max_lambda = 0.35, +plot_constant <- plot_seromodel( + seromodel = seromodel_constant, + serosurvey = veev2012, + foi_max = 0.35, size_text = 6 ) -p2_veev <- plot_seromodel( - m2_veev, - serodata = veev2012p, - max_lambda = 0.35, +plot_time <- plot_seromodel( + seromodel = seromodel_time, + serosurvey = veev2012, + foi_max = 0.35, size_text = 6 ) -p3_veev <- plot_seromodel( - m3_veev, - serodata = veev2012p, - max_lambda = 0.35, +plot_log_time <- plot_seromodel( + seromodel = seromodel_log_time, + serosurvey = veev2012, + foi_max = 0.35, size_text = 6 ) -cowplot::plot_grid(p1_veev, p2_veev, p3_veev, ncol = 3) +cowplot::plot_grid(plot_constant, plot_time, plot_log_time, ncol = 3) ``` Figure 2. ***serofoi*** models for *FoI* estimates of *Venezuelan Equine Encephalitis Virus (VEEV)* transmission in a rural remote area of Panama. @@ -190,39 +184,42 @@ Because Chagas is an endemic disease, we should use only the ***serofoi*** endem ```{r chagas_endemic, include = TRUE, echo = TRUE, results="hide", errors = FALSE, warning = FALSE, message = FALSE, fig.width=5, fig.asp=1, fig.align="center", fig.keep="all"} # Load and prepare data data("chagas2012") -chagas2012p <- prepare_serodata(chagas2012) # Implementation of the models -m1_cha <- fit_seromodel( - serodata = chagas2012p, - foi_model = "constant", +seromodel_constant <- fit_seromodel( + serosurvey = chagas2012, + model_type = "constant", iter = 800 ) -m2_cha <- fit_seromodel( - serodata = chagas2012p, - foi_model = "tv_normal", - chunk_size = 5, - iter = 1000 +seromodel_time <- fit_seromodel( + serosurvey = chagas2012, + model_type = "time", + foi_index = get_foi_index(chagas2012, group_size = 10), + iter = 1500 ) # Visualisation of the results -p1_cha <- plot_seromodel( - m1_cha, - serodata = chagas2012p, - size_text = 6, - bin_data = FALSE -) -p2_cha <- plot_seromodel( - m2_cha, - serodata = chagas2012p, - size_text = 6, - bin_data = FALSE -) -cowplot::plot_grid(p1_cha, p2_cha, ncol = 2) +plot_constant <- plot_seromodel( + seromodel = seromodel_constant, + serosurvey = chagas2012, + bin_serosurvey = TRUE, + bin_step = 10, + size_text = 6 +) +plot_time <- plot_seromodel( + seromodel = seromodel_time, + serosurvey = chagas2012, + bin_serosurvey = TRUE, + bin_step = 10, + size_text = 6 +) +cowplot::plot_grid(plot_constant, plot_time, ncol = 2) ``` Figure 3. ***serofoi*** endemic models for *FoI* estimates of *Trypanosoma cruzi* in a rural area of Colombia. ```{r clean env cha, include = FALSE, echo = TRUE, message=FALSE} rm(list = ls(pattern = "cha")) ``` + ## References + diff --git a/vignettes/foi_models.Rmd b/vignettes/foi_models.Rmd deleted file mode 100644 index 90f481b9..00000000 --- a/vignettes/foi_models.Rmd +++ /dev/null @@ -1,311 +0,0 @@ ---- -title: "An introduction to Force-of-Infection (FoI) Models" -output: rmarkdown::html_vignette -bibliography: references.bib -link-citations: true -vignette: > - %\VignetteIndexEntry{An introduction to Force-of-Infection (FoI) Models} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) -``` - -```{r cleaning, include = FALSE, echo = TRUE} -library(serofoi) -library(dplyr) -``` - -The current version of ***serofoi*** supports three different models for estimating the *Force-of-Infection (FoI)*, including constant and time-varying trajectories. For fitting the model to the seroprevalence data we use a suit of bayesian models that include prior and upper prior distributions - -## What is the Force-of-Infection - -The force of infection, also known as the hazard rate or the infection pressure, is a key concept in mathematical modelling of infectious diseases. It represents the rate at which susceptible individuals become infected, given their exposure to a pathogen. In simple terms, the force of infection quantifies the risk of a susceptible individual becoming infected over a period of time. It is usually expressed as a rate per unit of time (e.g., per day or per year). - -## Constant vs Time-varying FoI - -The *FoI* is one of the most important parameters in epidemiology, but it is often incorrectly assumed to be constant over time. Identifying whether the *FoI* follows a constant or a time-varying trend can be important in the identification and characterization of the spread of disease. In Table 1 there is a summary of the models currently supported by serofoi. - -::: l-body-outset -| Model Option | Description and usage | -|-----------------|:-----------------------------------------------------------:| -| `constant` | Constant FoI | -| `tv_normal` | Time-varying normal FoI: slow change in FoI | -| `tv_normal_log` | Time-varying normal-log FoI: fast epidemic change in FoI | -::: -Table 1. Model options and descriptions. - -## Model 1. Constant Force-of-Infection (endemic model) -The *endemic constant model* is a simple mathematical model used in epidemiology to describe the seroprevalence of an infectious disease within a population, as a product of a long-term transmission. - -For a constant FoI endemic model, the rate of infection acquisition $\lambda$ is constant over time for each trajectory, and the seroprevalence $P$ behaves as a cumulative process increasing monotonically with age. For the seroprevalence at age $a$ and time $t$, we have: -$$ -P(a,t) = 1-\exp\left(-\lambda a\right) -$$ -The number of positive cases follows a binomial distribution, where $n$ is the number of trials (size of the age group) and $P$ is the probability of successes (seroprevalence) for a certain age group: -$$ -p(a,t) \sim binom(n(a,t), P(a,t)) -$$ -In ***serofoi***, for the constant model, the *FoI* ($\lambda$) is modelled within a Bayesian framework using a uniform prior distribution $\sim U(0,2)$. - -To test the `constant` model we simulate a serosurvey following a stepwise decreasing *FoI* (red line in Fig. 1) using the data simulation functions of ***serofoi***: - -```{r constant simdata, include = TRUE, echo = TRUE, results="hide", errors = FALSE, warning = FALSE, message = FALSE} -# how the disease circulates -foi_df <- data.frame( - year = seq(2000, 2049, 1), - foi = rep(0.02, 50) -) - -# specify 5-year age bins for observations -survey_features <- data.frame( - age_min = seq(1, 50, 5), - age_max = seq(5, 50, 5), - sample_size = rep(25, 10) -) - -serodata_constant <- simulate_serosurvey( - "time", - foi_df, - survey_features -) %>% -rename( - total = sample_size, - counts = n_seropositive -) %>% -mutate( - tsur = 2050, - survey = "simdata_constant" -) %>% -prepare_serodata() -``` - -The simulated dataset `serodata_constant` contains information about 250 samples of individuals between 1 and 50 years old (5 samples per age) with age groups of 5 years length. The following code shows how to implement the constant model to this simulated serosurvey: - -```{r constant model, include = TRUE, echo = TRUE, results="hide", errors = FALSE, warning = FALSE, message = FALSE, fig.width=4, fig.asp=1.5, fig.align="center", out.width ="50%", fig.keep="all"} -seromodel_constant <- fit_seromodel( - serodata = serodata_constant, - foi_model = "constant", - iter = 800 -) -plot_seromodel( - seromodel_constant, - serodata = serodata_constant, - foi = foi_df, - size_text = 6 -) -``` -Figure 1. Constant serofoi model plot. Simulated (red) vs modelled (blue) *FoI*. - -```{r clean env constant, include = FALSE, echo = TRUE, message=FALSE} -rm(list = ls(pattern = "_constant")) -``` - -In this case, 800 iterations are enough to ensure convergence. The `plot_seromodel` method provides a visualisation of the results, including a summary where the expected log pointwise predictive density (`elpd`) and its standard error (`se`) are shown. We say that a model converges if all the R-hat estimates are below 1.01. - -## Time-varying FoI models - -For the time-varying *FoI* models, the probability for a case to be positive at age a at time $t$ also follows a binomial distribution, as described above. However, the seroprevalence is obtained from a cumulative of the yearly-varying values of the *FoI* over time: -$$ -P(a,t) = 1 - \exp\left(-\sum_{i=t-a+1}^{t}\lambda_i\right) -$$ -The corresponding serosurvey completed at time $t_{sur}$ is informative for the interval $[t_{sur}-a_{max}, t_{sur}]$. - -## Model 2. Time-varying FoI - Slow Time-Varying FoI - -The *time-varying slow normal model* relies on the following prior distributions for the *FoI* to describe the spread of a given infectious disease within a population over time: -$$ -\lambda(t)\sim normal(\lambda(t-1), \sigma) \\ -\lambda(t=1) \sim normal(0, 1) -$$ -To test the `tv_normal` model we simulate a serosurvey following a stepwise decreasing *FoI* (red line in Fig. 2) using the data simulation functions of ***serofoi***: - -```{r tv_normal simdata, include = TRUE, echo = TRUE, results="hide", errors = FALSE, warning = FALSE, message = FALSE} - -foi_df <- data.frame( - year = seq(2000, 2049, 1), - foi = c( - rep(0.2, 25), - rep(0.1, 10), - rep(0.00001, 15) - ) -) - -survey_features <- data.frame( - age_min = seq(1, 50, 5), - age_max = seq(5, 50, 5), - sample_size = rep(25, 10) -) - -serodata_sw_dec <- simulate_serosurvey( - "time", - foi_df, - survey_features -) %>% -rename( - total = sample_size, - counts = n_seropositive -) %>% -mutate( - tsur = 2050, - survey = "simdata_step_wise" -) %>% -prepare_serodata() - -``` - -The simulated dataset `foi_sim_sw_dec` contains information about 250 samples of individuals between 1 and 50 years old (5 samples per age) with age groups of 5 years length. The following code shows how to implement the slow time-varying normal model to this simulated serosurvey: - -```{r tv_normal model, include = TRUE, echo = TRUE, results="hide", errors = FALSE, warning = FALSE, message = FALSE, fig.width=4, fig.asp=1.5, fig.align="center", out.width ="50%", fig.keep="all"} - -seromodel_tv_normal <- fit_seromodel( - serodata = serodata_sw_dec, - foi_model = "tv_normal", - chunks = rep(c(1, 2, 3), c(23, 10, 15)), - iter = 1500 -) -plot_seromodel( - seromodel_tv_normal, - serodata = serodata_sw_dec, - foi = foi_df, - size_text = 6 -) -``` -Figure 2. Slow time-varying serofoi model plot. Simulated (red) vs modelled (blue) *FoI*. - -```{r clean env sw_dec, include = FALSE, echo = TRUE, message=FALSE} -rm(list = ls(pattern = "_sw_dec|_normal")) -``` - -The number of iterations required may depend on the number of years, reflected by the difference between the year of the serosurvey and the maximum age-class sampled. - -## Model 3. Time-varying FoI - Fast Epidemic Model -The *time-varying fast epidemic model*, relies on normal prior distributions for the *FoI* in the logarithmic scale, i.e: -$$ -\lambda(t)\sim normal(\log(\lambda(t-1)), \sigma) \\ -\lambda(t=1) \sim normal(-6, 4) -$$ -This is done in order to capture fast changes in the *FoI* trend. Importantly, the standard deviation parameter of this normal distribution of the *FoI* $\lambda(t)$ is set using an upper prior that follows a Cauchy distribution. - -To test the `tv_normal_log` model we simulate a serosurvey conducted in 2050 emulating a hypothetical situation where a three-year epidemic occurred between 2030 and 2035: - -```{r tv_normal_log simdata, include = TRUE, echo = TRUE, results="hide", errors = FALSE, warning = FALSE, message = FALSE} -foi_df <- data.frame( - year = seq(2000, 2049, 1), - foi = c( - rep(0, 30), - rep(0.6, 5), - rep(0, 15) - ) -) - -survey_features <- data.frame( - age_min = seq(1, 50, 5), - age_max = seq(5, 50, 5), - sample_size = rep(25, 10) -) - -serodata_large_epi <- simulate_serosurvey( - survey_features = survey_features, - foi = foi_df, - model = "time" -) %>% -rename( - total = sample_size, - counts = n_seropositive -) %>% -mutate( - tsur = 2050, - survey = "simdata_big_outbreak" -) %>% -prepare_serodata() - -``` - -The simulated serosurvey tests 250 individuals between 1 and 50 years old by the year 2050. The implementation of the fast epidemic model can be obtained running the following lines of code: - -```{r tv_normal_log model, include = TRUE, echo = TRUE, results="hide", errors = FALSE, warning = FALSE, message = FALSE, fig.width=4, fig.asp=1.5, fig.align="center", out.width ="50%", fig.keep="all"} -seromodel_tv_normal_log <- fit_seromodel( - serodata = serodata_large_epi, - foi_model = "tv_normal_log", - chunks = rep(c(1, 2, 3), c(28, 5, 15)), - iter = 2000 -) - -plot_tv_normal_log <- plot_seromodel( - seromodel_tv_normal_log, - serodata = serodata_large_epi, - foi = foi_df, - size_text = 6 -) -plot(plot_tv_normal_log) -``` -Figure 3. *Time-varying fast epidemic serofoi model* plot. Simulated (red) vs modelled (blue) *FoI*. - -In Fig 3 we can see that the *fast epidemic serofoi model* is able to identify the large epidemic simulated on the `simdata_large_epi` dataset. - -## Models Comparison - -The statistical details of the three models are described in Table 2. - -::: l-body-outset -| Model Option | Probability of positive case at age $a$ | Prior distribution | Upper priors | -|:----------------|:----------------------------------------|:----------------------------------------------------------|:------------------------| -| `constant` | $\sim binom(n(a,t), P(a,t))$ | $\lambda\sim uniform(0,2)$ | | -| `tv_normal` | $\sim binom(n(a,t), P(a,t))$ | $\lambda\sim normal(\lambda(t-1),\sigma)\\ - \lambda(t=1)\sim normal(0,1)$ | $\sigma\sim Cauchy(0,1)$| -| `tv_normal_log` | $\sim binom(n(a,t), P(a,t))$ | $\lambda\sim normal(log(\lambda(t-1)),\sigma)\\ - \lambda(t=1)\sim normal(-6,4)$ | $\sigma\sim Cauchy(0,1)$| -::: -Table 2. Statistical characteristics of ***serofoi***'s currently supported models for the FoI ($\lambda$). Here $n$ is the size of an age group $a$ at time-step $t$ and $P$ is its corresponding seroprevalence. - -Above we showed that the fast epidemic model (`tv_normal_log`) is able to identify the large epidemic outbreak described by the `simdata_large_epi` dataset, which was simulated according to a step-wise decreasing *FoI* (red line in Fig 3). - -Now, we would like to know whether this model actually fits this dataset better than the other available models in ***serofoi***. For this, we also implement both the endemic model (`constant`) and the slow time-varying normal model (`tv_normal`): - -```{r model_comparison, include = FALSE, echo = TRUE, results="hide", errors = FALSE, warning = FALSE, message = FALSE } -seromodel_constant <- fit_seromodel( - serodata = serodata_large_epi, - foi_model = "constant", - iter = 800 -) -plot_constant <- plot_seromodel( - seromodel_constant, - serodata = serodata_large_epi, - foi = foi_df, - size_text = 6 -) - -seromodel_tv_normal <- fit_seromodel( - serodata = serodata_large_epi, - foi_model = "tv_normal", - chunks = rep(c(1, 2, 3), c(28, 5, 15)), - iter = 2000 -) -plot_tv_normal <- plot_seromodel( - seromodel_tv_normal, - serodata = serodata_large_epi, - foi = foi_df, - size_text = 6 -) -``` -Using the function `cowplot::plot_grid` we can visualise the results of the three models simultaneously: - -```{r model_comparison_plot, include = TRUE, echo = TRUE, errors = FALSE, warning = FALSE, message = FALSE, fig.width=5, fig.asp=1, fig.align="center"} -cowplot::plot_grid( - plot_constant, plot_tv_normal, plot_tv_normal_log, - nrow = 1, ncol = 3, labels = "AUTO" -) -``` -Figure 4. Model comparison between the three serofoi models for a large-epidemic simulated dataset. - -A common criterion to decide what model fits the data the best is to choose the one with the larger `elpd`. According to this criterion, in this case the best model is the fast epidemic model, which is the only one that manages to identify the large epidemic (see the second row of panel C in Figure 4). - -NOTE: Running the ***serofoi*** models for the first time on your local computer may take a few minutes for the rstan code to compile locally. However, once the initial compilation is complete, there is no further need for local compilation. - -## References diff --git a/vignettes/serofoi.Rmd b/vignettes/serofoi.Rmd index 11a882c1..34d34de1 100644 --- a/vignettes/serofoi.Rmd +++ b/vignettes/serofoi.Rmd @@ -61,15 +61,17 @@ The integrated dataset `serodata_test` provides a minimal example of the input o library(serofoi) # Loading and preparing data for modelling data(chagas2012) -serodata_test <- prepare_serodata(chagas2012) + # Model implementation model_constant <- fit_seromodel( - serodata = serodata_test, - foi_model = "constant" + serosurvey = chagas2012, + model_type = "constant" ) # Visualisation -plot_seromodel(model_constant, - serodata = serodata_test, +plot_seromodel( + model_constant, + serosurvey = chagas2012, + bin_serosurvey = TRUE, size_text = 6 ) ```