diff --git a/NEWS.md b/NEWS.md index 2451988b..215c42b7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,52 @@ +# serofoi development version + +## Documentation + +* Datasets `simdata_*` were removed from the package and replaced by corresponding code to simulate data in vignettes (see [#184](https://github.com/epiverse-trace/serofoi/pull/184)). + +## Breaking changes + +* Update R-hat convergence threshold to $\hat{R} < 1.01$ ([Vehtari, Aki, et al. 2021](https://projecteuclid.org/journals/bayesian-analysis/volume-16/issue-2/Rank-Normalization-Folding-and-Localization--An-Improved-R%cb%86-for/10.1214/20-BA1221.full)) + +* Add `av_normal` model without seroreversion. + +* Allow for uniform prior parameters specification for `constant` model $\sim U(a, b)$ + +* Change initial prior parameters input specification in `fit_seromodel`. Now they are specified by means +parameter `foi_parameter` as follows: + +``` +# constant model +foi_model <- "constant" +foi_parameter <- list( + foi_a = 0.01, + foi_b = 0.1 +) + +# normal models +foi_model <- "tv_normal" # "tv_normal_log" or "av_normal" +foi_parameters <- list( + foi_location = 0.1, + foi_scale = 0.05 +) + +# running the model +seromodel <- fit_seromodel( + serodata = serodata, + foi_model = foi_model, + foi_parameters = foi_parameters +) +``` + +Note that the meaning of the parameters may vary depending on the model. + + +## Minor changes + +* Add input validation for `plot_rhats`. + +* The x-axis label in `plot_foi` and `plot_rhats` is `"age"` or `"year"` depending on the model type. + # serofoi 0.1.0 ## New features diff --git a/R/model_comparison.R b/R/model_comparison.R index d596c9bc..cc566013 100644 --- a/R/model_comparison.R +++ b/R/model_comparison.R @@ -22,13 +22,24 @@ #' @export get_table_rhats <- function(seromodel_object, cohort_ages) { + checkmate::assert_class(seromodel_object, "stanfit") + rhats <- bayesplot::rhat(seromodel_object, "foi") if (any(is.nan(rhats))) { - rhats[which(is.nan(rhats))] <- 0 + warn_msg <- paste0( + length(which(is.nan(rhats))), + " rhat values are `nan`, ", + "indicating the model may not have run correctly for those times.\n", + "Setting those rhat values to `NA`." + ) + warning(warn_msg) + rhats[which(is.nan(rhats))] <- NA } - model_rhats <- data.frame(year = cohort_ages$birth_year, rhat = rhats) - model_rhats$rhat[model_rhats$rhat == 0] <- NA + model_rhats <- data.frame( + year = cohort_ages$birth_year, + rhat = rhats + ) return(model_rhats) } diff --git a/R/modelling.R b/R/modelling.R index 4e2a0952..3c89621f 100644 --- a/R/modelling.R +++ b/R/modelling.R @@ -140,7 +140,7 @@ validate_prepared_serodata <- function(serodata) { #' \dontrun{ #' data(chagas2012) #' serodata <- prepare_serodata(chagas2012) -#' run_seromodel( +#' fit_seromodel( #' serodata, #' foi_model = "constant" #' ) @@ -149,10 +149,9 @@ validate_prepared_serodata <- function(serodata) { run_seromodel <- function( serodata, foi_model = c("constant", "tv_normal_log", "tv_normal"), - foi_location = 0, - foi_scale = 1, - chunk_size = 1, + foi_parameters = NULL, chunks = NULL, + chunk_size = 1, iter = 1000, adapt_delta = 0.90, max_treedepth = 10, @@ -168,10 +167,9 @@ run_seromodel <- function( seromodel_object <- fit_seromodel( serodata = serodata, foi_model = foi_model, - foi_location = foi_location, - foi_scale = foi_scale, - chunk_size = chunk_size, + foi_parameters = foi_parameters, chunks = chunks, + chunk_size = chunk_size, iter = iter, adapt_delta = adapt_delta, max_treedepth = max_treedepth, @@ -220,10 +218,13 @@ run_seromodel <- function( #' \item{`"tv_normal"`}{Runs a normal model} #' \item{`"tv_normal_log"`}{Runs a normal logarithmic model} #' } -#' @param foi_location Location parameter of the force-of-infection distribution -#' of the selected model. Depending on `foi_model`, the meaning may vary. -#' @param foi_scale Scale parameter of the force-of-infection distribution -#' of the selected model. Depending on `foi_model`, the meaning may vary. +#' @param foi_parameters List specifying the initial prior parameters of the +#' model `foi_model` to be specified as (e.g.): +#' \describe{ +#' \item{`"constant"`}{`list(foi_a = 0, foi_b = 2)`} +#' \item{`"tv_normal"`}{`list(foi_location = 0, foi_scale = 1)`} +#' \item{`"tv_normal_log"`}{`list(foi_location = -6, foi_scale = 4)`} +#' } #' @param chunks Numeric list specifying the chunk structure of the time #' interval from the birth year of the oldest age cohort #' `min(serodata$age_mean_f)` to the time when the serosurvey was conducted @@ -261,9 +262,8 @@ run_seromodel <- function( #' @export fit_seromodel <- function( serodata, - foi_model = c("constant", "tv_normal_log", "tv_normal"), - foi_location = 0, - foi_scale = 1, + foi_model = c("constant", "tv_normal_log", "tv_normal", "av_normal"), + foi_parameters = NULL, chunks = NULL, chunk_size = 1, iter = 1000, @@ -272,16 +272,48 @@ fit_seromodel <- function( seed = 12345, ...) { serodata <- validate_prepared_serodata(serodata) + err_msg <- paste0( + "foi_model must be either ", + "constant, ", + "tv_normal, tv_normal_log, or ", + "av_normal" + ) stopifnot( - "foi_model must be either `constant`, `tv_normal_log`, or `tv_normal`" = - foi_model %in% c("constant", "tv_normal_log", "tv_normal"), + err_msg = + foi_model %in% c( + "constant", + "tv_normal", "tv_normal_log", + "av_normal" + ), "iter must be numeric" = is.numeric(iter), "seed must be numeric" = is.numeric(seed) ) + + # Set default foi parameters + if (is.null(foi_parameters)) { + if (foi_model == "constant") { + foi_parameters <- list( + foi_a = 0, + foi_b = 2 + ) + } else if (foi_model %in% c("tv_normal", "av_normal")) { + foi_parameters <- list( + foi_location = 0, + foi_scale = 1 + ) + } else if (foi_model == "tv_normal_log") { + foi_parameters <- list( + foi_location = -6, + foi_scale = 4 + ) + } + } + + # Load Stan model model <- stanmodels[[foi_model]] exposure_matrix <- get_exposure_matrix(serodata) - n_obs <- nrow(serodata) + # Set default chunks structure if (is.null(chunks)) { chunks <- get_chunk_structure( serodata = serodata, @@ -294,17 +326,48 @@ fit_seromodel <- function( length(chunks) == max(serodata$age_mean_f) ) + # Build Stan data stan_data <- list( - n_obs = n_obs, + n_obs = nrow(serodata), n_pos = serodata$counts, n_total = serodata$total, age_max = max(serodata$age_mean_f), - observation_exposure_matrix = exposure_matrix, - chunks = chunks, - foi_location = foi_location, - foi_scale = foi_scale + chunks = chunks ) + if (foi_model %in% c("constant", "tv_normal", "tv_normal_log")) { + exposure_matrix <- get_exposure_matrix(serodata) + stan_data <- append( + stan_data, + list( + observation_exposure_matrix = exposure_matrix + ) + ) + } else if (foi_model == "av_normal") { + stan_data <- append( + stan_data, + list(ages = serodata$age_mean_f) + ) + } + + if (foi_model == "constant") { + stan_data <- append( + stan_data, + list( + foi_a = foi_parameters$foi_a, + foi_b = foi_parameters$foi_b + ) + ) + } else { + stan_data <- append( + stan_data, + list( + foi_location = foi_parameters$foi_location, + foi_scale = foi_parameters$foi_scale + ) + ) + } + if (foi_model == "tv_normal_log") { f_init <- function() { list(log_fois = rep(-3, max(chunks))) @@ -447,6 +510,10 @@ get_exposure_matrix <- function(serodata) { #' model by means of [run_seromodel]. #' @param cohort_ages A data frame containing the age of each cohort #' corresponding to each birth year. +#' @param lower_quantile Lower quantile used to compute the credible interval of +#' the fitted force-of-infection. +#' @param upper_quantile Lower quantile used to compute the credible interval of +#' the fitted force-of-infection. #' @return `foi_central_estimates`. Central estimates for the fitted forced FoI #' @examples #' data(chagas2012) @@ -461,27 +528,37 @@ get_exposure_matrix <- function(serodata) { #' cohort_ages = cohort_ages #' ) #' @export -get_foi_central_estimates <- function(seromodel_object, - cohort_ages) { - if (seromodel_object@model_name == "tv_normal_log") { - lower_quantile <- 0.1 - upper_quantile <- 0.9 - medianv_quantile <- 0.5 - } else { - lower_quantile <- 0.05 - upper_quantile <- 0.95 - medianv_quantile <- 0.5 - } +get_foi_central_estimates <- function( + seromodel_object, + cohort_ages, + lower_quantile = 0.05, + upper_quantile = 0.95 + ) { # extracts force-of-infection from stan fit foi <- rstan::extract(seromodel_object, "foi", inc_warmup = FALSE)[[1]] + # defines time scale depending on the type of the model + if( + seromodel_object@model_name %in% + c("constant", "tv_normal", "tv_normal_log") + ) { + foi_central_estimates <- data.frame( + year = cohort_ages$birth_year + ) + } else if (seromodel_object@model_name == "av_normal") { + foi_central_estimates <- data.frame( + age = rev(cohort_ages$age) + ) + } + # generates central estimations - foi_central_estimates <- data.frame( - year = cohort_ages$birth_year, - lower = apply(foi, 2, quantile, lower_quantile), - upper = apply(foi, 2, quantile, upper_quantile), - medianv = apply(foi, 2, quantile, medianv_quantile) - ) + foi_central_estimates <- foi_central_estimates %>% + mutate( + lower = apply(foi, 2, quantile, lower_quantile), + upper = apply(foi, 2, quantile, upper_quantile), + medianv = apply(foi, 2, quantile, 0.5) + ) + return(foi_central_estimates) } @@ -559,8 +636,17 @@ extract_seromodel_summary <- function(seromodel_object, seromodel_object = seromodel_object, cohort_ages = cohort_ages ) - if (!any(rhats$rhat > 1.1)) { + + if (all(rhats$rhat <= 1.01)) { model_summary$converged <- "Yes" + } else { + model_summary$converged <- "No" + warn_msg <- paste0( + length(which(rhats$rhat > 1.01)), + " rhat values are above 1.01. ", + "Running the chains for more iterations is recommended." + ) + warning(warn_msg) } return(model_summary) diff --git a/R/simdata_constant.R b/R/simdata_constant.R deleted file mode 100644 index f86b6cdf..00000000 --- a/R/simdata_constant.R +++ /dev/null @@ -1,18 +0,0 @@ -#' Constant force-of-infection simulated serosurvey -#' -#' Simulated dataset describing a endemic situation where the force-of-infection -#' is constant over a period of 50 years (2000-2050) with a value of 0.2. -#' The hypothetical serosurvey is conducted in year 2050 for 250 individuals -#' that are up to 50 years old. -#' -#' @docType data -#' -#' @usage simdata_constant -#' -#' @format An object of class `"cross"`; see [qtl::read.cross()]. -#' -#' @keywords datasets -#' -#' @examples -#' simdata_constant -"simdata_constant" diff --git a/R/simdata_large_epi.R b/R/simdata_large_epi.R deleted file mode 100644 index b057f737..00000000 --- a/R/simdata_large_epi.R +++ /dev/null @@ -1,18 +0,0 @@ -#' Large epidemic simulated serosurvey -#' -#' Simulated dataset describing a large epidemic between the years 2032 and 2035 -#' with a constant force-of-infection with value 1.5. -#' The hypothetical serosurvey is conducted in year 2050 for 250 individuals -#' that are up to 50 years old. -#' -#' @docType data -#' -#' @usage simdata_large_epi -#' -#' @format An object of class `"cross"`; see [qtl::read.cross()]. -#' -#' @keywords datasets -#' -#' @examples -#' simdata_large_epi -"simdata_large_epi" diff --git a/R/simdata_sw_dec.R b/R/simdata_sw_dec.R deleted file mode 100644 index 809496f4..00000000 --- a/R/simdata_sw_dec.R +++ /dev/null @@ -1,18 +0,0 @@ -#' Step-wise decreasing force-of-infection simulated serosurvey -#' -#' Simulated dataset describing a situation where the force-of-infection -#' follows a step-wise decreasing tendency over a period of 50 years (2000-2050) -#' The hypothetical serosurvey is conducted in year 2050 for 250 individuals -#' that are up to 50 years old. -#' -#' @docType data -#' -#' @usage simdata_sw_dec -#' -#' @format An object of class `"cross"`; see [qtl::read.cross()]. -#' -#' @keywords datasets -#' -#' @examples -#' simdata_sw_dec -"simdata_sw_dec" diff --git a/R/stanmodels.R b/R/stanmodels.R index 4d385063..e3529b84 100644 --- a/R/stanmodels.R +++ b/R/stanmodels.R @@ -1,9 +1,10 @@ # Generated by rstantools. Do not edit by hand. # names of stan models -stanmodels <- c("constant", "tv_normal", "tv_normal_log") +stanmodels <- c("av_normal", "constant", "tv_normal", "tv_normal_log") # load each stan module +Rcpp::loadModule("stan_fit4av_normal_mod", what = TRUE) Rcpp::loadModule("stan_fit4constant_mod", what = TRUE) Rcpp::loadModule("stan_fit4tv_normal_mod", what = TRUE) Rcpp::loadModule("stan_fit4tv_normal_log_mod", what = TRUE) diff --git a/R/visualisation.R b/R/visualisation.R index fb6e2303..2304e8f9 100644 --- a/R/visualisation.R +++ b/R/visualisation.R @@ -166,8 +166,10 @@ plot_seroprev_fitted <- function(seromodel_object, #' @inheritParams get_foi_central_estimates #' @param size_text Text size use in the theme of the graph returned by the #' function. -#' @param max_lambda TBD -#' @param foi_sim TBD +#' @param max_lambda Upper `ylim`for force-of-infection plot +#' @param foi_sim Force-of-infection trend to be plotted alongside the estimated +#' force-of-infection. Typically this corresponds to the force-of-infection used +#' to simulate the serosurvey used to model. #' @return A ggplot2 object containing the force-of-infection vs time including #' the corresponding confidence interval. #' @examples @@ -191,14 +193,41 @@ plot_foi <- function(seromodel_object, size_text = 25, foi_sim = NULL) { checkmate::assert_class(seromodel_object, "stanfit", null.ok = TRUE) + #-------- This bit is to get the actual length of the foi data foi_data <- get_foi_central_estimates( seromodel_object = seromodel_object, cohort_ages = cohort_ages ) - foi_plot <- - ggplot2::ggplot(foi_data, ggplot2::aes(x = .data$year)) + + if ( + seromodel_object@model_name %in% + c("constant", "tv_normal", "tv_normal_log") + ) { + xlab <- "year" + foi_plot <- + ggplot2::ggplot(foi_data, ggplot2::aes(x = .data$year)) + + if (!is.null(foi_sim)) { + foi_sim_data <- data.frame( + year = foi_data$year + ) + } + } else if ( + seromodel_object@model_name == "av_normal" + ) { + xlab <- "age" + foi_plot <- + ggplot2::ggplot(foi_data, ggplot2::aes(x = .data$age)) + + if (!is.null(foi_sim)) { + foi_sim_data <- data.frame( + age = foi_data$age + ) + } + } + + foi_plot <- foi_plot + ggplot2::geom_ribbon( ggplot2::aes( ymin = .data$lower, @@ -215,7 +244,7 @@ plot_foi <- function(seromodel_object, ggplot2::theme_bw(size_text) + ggplot2::coord_cartesian(ylim = c(0, max_lambda)) + ggplot2::ylab("Force-of-Infection") + - ggplot2::xlab("year") + ggplot2::xlab(xlab) if (!is.null(foi_sim)) { if (nrow(foi_data) != length(foi_sim)) { @@ -281,49 +310,41 @@ plot_foi <- function(seromodel_object, plot_rhats <- function(seromodel_object, cohort_ages, size_text = 25) { - if (is.character(seromodel_object)) { - message("model did not run") - print_warning <- "errors" + checkmate::assert_class(seromodel_object, "stanfit", null.ok = TRUE) - rhats_plot <- ggplot2::ggplot(data.frame()) + - ggplot2::geom_point() + - ggplot2::xlim(0, 10) + - ggplot2::ylim(0, 10) + - ggplot2::annotate("text", - x = 4, - y = 5, - label = print_warning - ) + - ggplot2::theme_bw(25) + - ggplot2::theme( - axis.text.x = ggplot2::element_blank(), - axis.text.y = ggplot2::element_blank() - ) + - ggplot2::ylab(" ") + - ggplot2::xlab(" ") - ggplot2::theme(plot.title = ggplot2::element_text(size = 10)) - } else { - if (!is.null(seromodel_object@sim$samples)) { - rhats <- get_table_rhats( - seromodel_object = seromodel_object, - cohort_ages = cohort_ages - ) + rhats <- get_table_rhats( + seromodel_object = seromodel_object, + cohort_ages = cohort_ages + ) - rhats_plot <- - ggplot2::ggplot(rhats, ggplot2::aes(.data$year, .data$rhat)) + - ggplot2::geom_line(colour = "purple") + - ggplot2::geom_point() + - ggplot2::coord_cartesian(ylim = c(0.7, 2)) + - ggplot2::geom_hline( - yintercept = 1.1, - colour = "blue", - size = size_text / 12 - ) + - ggplot2::theme_bw(size_text) + - ggplot2::ylab("Convergence (R^)") - } + if ( + seromodel_object@model_name %in% + c("constant", "tv_normal", "tv_normal_log") + ) { + rhats_plot <- + ggplot2::ggplot(rhats, ggplot2::aes(.data$year, .data$rhat)) + } else if (seromodel_object@model_name == "av_normal") { + rhats_plot <- + ggplot2::ggplot(rhats, ggplot2::aes(.data$age, .data$rhat)) } + rhats_plot <- rhats_plot + + ggplot2::geom_line(colour = "purple") + + ggplot2::geom_point() + + ggplot2::coord_cartesian( + ylim = c( + min(1.0, min(rhats$rhat)), + max(1.02, max(rhats$rhat)) + ) + ) + + ggplot2::geom_hline( + yintercept = 1.01, + colour = "blue", + size = size_text / 12 + ) + + ggplot2::theme_bw(size_text) + + ggplot2::ylab("Convergence (R^)") + return(rhats_plot) } diff --git a/data/simdata_constant.RData b/data/simdata_constant.RData deleted file mode 100644 index 62df2d5d..00000000 Binary files a/data/simdata_constant.RData and /dev/null differ diff --git a/data/simdata_large_epi.RData b/data/simdata_large_epi.RData deleted file mode 100644 index 3617b6ca..00000000 Binary files a/data/simdata_large_epi.RData and /dev/null differ diff --git a/data/simdata_sw_dec.RData b/data/simdata_sw_dec.RData deleted file mode 100644 index e6d8be6e..00000000 Binary files a/data/simdata_sw_dec.RData and /dev/null differ diff --git a/inst/WORDLIST b/inst/WORDLIST index ac0c137d..aa4b41e8 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -1,4 +1,5 @@ Aedes +Aki al Alphavirus Alphaviruses @@ -20,6 +21,7 @@ chikungunya Codecov Conteh cruzi +Cucunubá Darien de Dib @@ -49,6 +51,7 @@ Mariscal María mc Neira +Nicolás org packagetemplate Panamá @@ -79,6 +82,7 @@ serosurveys Serosurveys seroprev seroprevalence +seroreversion sim stan Stanfit @@ -91,6 +95,7 @@ Trypanosoma Torres Universidad VEEV +Vehtari warmup Yaneth Zulma diff --git a/inst/stan/av_normal.stan b/inst/stan/av_normal.stan new file mode 100644 index 00000000..6414fb2c --- /dev/null +++ b/inst/stan/av_normal.stan @@ -0,0 +1,66 @@ +functions { + #include functions/prob_infected_av.stan +} + +data { + int n_obs; + int age_max; + int n_pos[n_obs]; + int n_total[n_obs]; + int ages[n_obs]; + + // prior choices + int chunks[age_max]; + real foi_location; + real foi_scale; +} + +transformed data { + int n_chunks = max(chunks); +} + +parameters { + row_vector[n_chunks] fois; + real sigma; +} + +transformed parameters { + vector[n_chunks] fois_vector; + vector[n_obs] prob_infected; + + fois_vector = to_vector(fois); + + prob_infected = prob_infected_calculate( + fois_vector, + chunks, + ages, + n_obs, + 0 + ); +} + +model { + n_pos ~ binomial(n_total, prob_infected); + sigma ~ cauchy(0, 1); + + fois[1] ~ normal(foi_location, foi_scale); + for(i in 2:n_chunks) + fois[i] ~ normal(fois[i - 1], sigma); +} + +generated quantities{ + vector[n_obs] n_pos_sim; + vector[n_obs] P_sim; + vector[n_obs] logLikelihood; + vector[age_max] foi; + + for(i in 1:age_max) { + foi[i] = fois_vector[chunks[i]]; + } + + for(i in 1:n_obs){ + n_pos_sim[i] = binomial_rng(n_total[i], prob_infected[i]); + P_sim[i] = n_pos_sim[i] / n_total[i]; + logLikelihood[i] = binomial_lpmf(n_pos[i] | n_total[i], prob_infected[i]); + } +} diff --git a/inst/stan/constant.stan b/inst/stan/constant.stan index 85e8cb9d..5436fa1c 100644 --- a/inst/stan/constant.stan +++ b/inst/stan/constant.stan @@ -4,6 +4,10 @@ data { int n_pos[n_obs]; int n_total[n_obs]; matrix[n_obs, age_max] observation_exposure_matrix; + + // prior choices + real foi_a; + real foi_b; } parameters { @@ -28,7 +32,7 @@ transformed parameters { model { for (i in 1:n_obs) n_pos[i] ~ binomial(n_total[i], prob_infected[i]); - lambda0 ~ uniform (0, 2); + lambda0 ~ uniform (foi_a, foi_b); } generated quantities{ diff --git a/inst/stan/functions/prob_infected.stan b/inst/stan/functions/prob_infected.stan deleted file mode 100644 index 4709c64b..00000000 --- a/inst/stan/functions/prob_infected.stan +++ /dev/null @@ -1,41 +0,0 @@ - vector prob_infected_noseroreversion( - vector fois_vector, - matrix observation_exposure_matrix, - int n_obs, - int age_max, - int[] chunks - ) { - real scalar_dot_product; - vector[n_obs] prob_infected; - vector[age_max] foi_every_age = fois_vector[chunks]; - - for(i in 1:n_obs){ - scalar_dot_product = dot_product( - observation_exposure_matrix[i,], - foi_every_age - ); - prob_infected[i] = 1 - exp(-scalar_dot_product); - } - - return prob_infected; - } - - vector prob_infected_calculate( - vector fois_vector, - matrix observation_exposure_matrix, - int n_obs, - int age_max, - int[] chunks - ) { - vector[n_obs] prob_infected; - - prob_infected = prob_infected_noseroreversion( - fois_vector, - observation_exposure_matrix, - n_obs, - age_max, - chunks - ); - - return prob_infected; - } diff --git a/inst/stan/functions/prob_infected_av.stan b/inst/stan/functions/prob_infected_av.stan new file mode 100644 index 00000000..a9226728 --- /dev/null +++ b/inst/stan/functions/prob_infected_av.stan @@ -0,0 +1,35 @@ +real prob_infected_age_varying( + vector fois_vector, + int[] chunks, + int age, + real mu +) { + real prob = 0.0; + for(j in 1:age){ + real foi = fois_vector[chunks[j]]; + prob = (1/ (foi + mu)) * exp(-(foi + mu)) * (foi * (exp(foi + mu) - 1) + prob * (foi+mu)); + } + return prob; +} + +vector prob_infected_calculate( + vector fois_vector, + int[] chunks, + int[] ages, + int n_obs, + real mu +) { + vector[n_obs] prob_infected; + + for(i in 1:n_obs){ + int age = ages[i]; + prob_infected[i] = prob_infected_age_varying( + fois_vector, + chunks, + age, + mu + ); + } + + return prob_infected; +} diff --git a/inst/stan/functions/prob_infected_tv.stan b/inst/stan/functions/prob_infected_tv.stan new file mode 100644 index 00000000..02822545 --- /dev/null +++ b/inst/stan/functions/prob_infected_tv.stan @@ -0,0 +1,41 @@ +vector prob_infected_noseroreversion( + vector fois_vector, + int[] chunks, + matrix observation_exposure_matrix, + int n_obs, + int age_max + ) { + real scalar_dot_product; + vector[n_obs] prob_infected; + vector[age_max] foi_every_age = fois_vector[chunks]; + + for(i in 1:n_obs){ + scalar_dot_product = dot_product( + observation_exposure_matrix[i,], + foi_every_age + ); + prob_infected[i] = 1 - exp(-scalar_dot_product); + } + + return prob_infected; +} + +vector prob_infected_calculate( + vector fois_vector, + int[] chunks, + matrix observation_exposure_matrix, + int n_obs, + int age_max + ) { + vector[n_obs] prob_infected; + + prob_infected = prob_infected_noseroreversion( + fois_vector, + chunks, + observation_exposure_matrix, + n_obs, + age_max + ); + + return prob_infected; + } diff --git a/inst/stan/tv_normal.stan b/inst/stan/tv_normal.stan index b7694104..43fcc66e 100644 --- a/inst/stan/tv_normal.stan +++ b/inst/stan/tv_normal.stan @@ -1,5 +1,5 @@ functions { - #include functions/prob_infected.stan + #include functions/prob_infected_tv.stan } data { @@ -32,10 +32,10 @@ transformed parameters { prob_infected = prob_infected_calculate( fois_vector, + chunks, observation_exposure_matrix, n_obs, - age_max, - chunks + age_max ); } diff --git a/inst/stan/tv_normal_log.stan b/inst/stan/tv_normal_log.stan index c250a9f8..4b4d033b 100644 --- a/inst/stan/tv_normal_log.stan +++ b/inst/stan/tv_normal_log.stan @@ -1,5 +1,5 @@ functions { - #include functions/prob_infected.stan + #include functions/prob_infected_tv.stan } data { @@ -35,10 +35,10 @@ transformed parameters { prob_infected = prob_infected_calculate( fois_vector, + chunks, observation_exposure_matrix, n_obs, - age_max, - chunks + age_max ); } diff --git a/man/fit_seromodel.Rd b/man/fit_seromodel.Rd index 18132065..2aad529e 100644 --- a/man/fit_seromodel.Rd +++ b/man/fit_seromodel.Rd @@ -7,9 +7,8 @@ data} \usage{ fit_seromodel( serodata, - foi_model = c("constant", "tv_normal_log", "tv_normal"), - foi_location = 0, - foi_scale = 1, + foi_model = c("constant", "tv_normal_log", "tv_normal", "av_normal"), + foi_parameters = NULL, chunks = NULL, chunk_size = 1, iter = 1000, @@ -42,11 +41,13 @@ options: \item{\code{"tv_normal_log"}}{Runs a normal logarithmic model} }} -\item{foi_location}{Location parameter of the force-of-infection distribution -of the selected model. Depending on \code{foi_model}, the meaning may vary.} - -\item{foi_scale}{Scale parameter of the force-of-infection distribution -of the selected model. Depending on \code{foi_model}, the meaning may vary.} +\item{foi_parameters}{List specifying the initial prior parameters of the +model \code{foi_model} to be specified as (e.g.): +\describe{ +\item{\code{"constant"}}{\code{list(foi_a = 0, foi_b = 2)}} +\item{\code{"tv_normal"}}{\code{list(foi_location = 0, foi_scale = 1)}} +\item{\code{"tv_normal_log"}}{\code{list(foi_location = -6, foi_scale = 4)}} +}} \item{chunks}{Numeric list specifying the chunk structure of the time interval from the birth year of the oldest age cohort diff --git a/man/get_foi_central_estimates.Rd b/man/get_foi_central_estimates.Rd index a39d079a..0ae49e20 100644 --- a/man/get_foi_central_estimates.Rd +++ b/man/get_foi_central_estimates.Rd @@ -4,7 +4,12 @@ \alias{get_foi_central_estimates} \title{Extract central estimates for the fitted forced FoI} \usage{ -get_foi_central_estimates(seromodel_object, cohort_ages) +get_foi_central_estimates( + seromodel_object, + cohort_ages, + lower_quantile = 0.05, + upper_quantile = 0.95 +) } \arguments{ \item{seromodel_object}{Stanfit object containing the results of fitting a @@ -12,6 +17,12 @@ model by means of \link{run_seromodel}.} \item{cohort_ages}{A data frame containing the age of each cohort corresponding to each birth year.} + +\item{lower_quantile}{Lower quantile used to compute the credible interval of +the fitted force-of-infection.} + +\item{upper_quantile}{Lower quantile used to compute the credible interval of +the fitted force-of-infection.} } \value{ \code{foi_central_estimates}. Central estimates for the fitted forced FoI diff --git a/man/plot_foi.Rd b/man/plot_foi.Rd index e9a0960b..3cde9bff 100644 --- a/man/plot_foi.Rd +++ b/man/plot_foi.Rd @@ -20,12 +20,14 @@ model by means of \link{run_seromodel}.} \item{cohort_ages}{A data frame containing the age of each cohort corresponding to each birth year.} -\item{max_lambda}{TBD} +\item{max_lambda}{Upper \code{ylim}for force-of-infection plot} \item{size_text}{Text size use in the theme of the graph returned by the function.} -\item{foi_sim}{TBD} +\item{foi_sim}{Force-of-infection trend to be plotted alongside the estimated +force-of-infection. Typically this corresponds to the force-of-infection used +to simulate the serosurvey used to model.} } \value{ A ggplot2 object containing the force-of-infection vs time including diff --git a/man/run_seromodel.Rd b/man/run_seromodel.Rd index a9cc89af..f564ca8e 100644 --- a/man/run_seromodel.Rd +++ b/man/run_seromodel.Rd @@ -8,10 +8,9 @@ estimate the seroprevalence based on the result of the fit} run_seromodel( serodata, foi_model = c("constant", "tv_normal_log", "tv_normal"), - foi_location = 0, - foi_scale = 1, - chunk_size = 1, + foi_parameters = NULL, chunks = NULL, + chunk_size = 1, iter = 1000, adapt_delta = 0.9, max_treedepth = 10, @@ -43,11 +42,19 @@ options: \item{\code{"tv_normal_log"}}{Runs a normal logarithmic model} }} -\item{foi_location}{Location parameter of the force-of-infection distribution -of the selected model. Depending on \code{foi_model}, the meaning may vary.} +\item{foi_parameters}{List specifying the initial prior parameters of the +model \code{foi_model} to be specified as (e.g.): +\describe{ +\item{\code{"constant"}}{\code{list(foi_a = 0, foi_b = 2)}} +\item{\code{"tv_normal"}}{\code{list(foi_location = 0, foi_scale = 1)}} +\item{\code{"tv_normal_log"}}{\code{list(foi_location = -6, foi_scale = 4)}} +}} -\item{foi_scale}{Scale parameter of the force-of-infection distribution -of the selected model. Depending on \code{foi_model}, the meaning may vary.} +\item{chunks}{Numeric list specifying the chunk structure of the time +interval from the birth year of the oldest age cohort +\code{min(serodata$age_mean_f)} to the time when the serosurvey was conducted +\code{t_sur}. If \code{NULL}, the time interval is divided in chunks of size +\code{chunk_size}.} \item{chunk_size}{Size of the chunks to be used in case that the chunk structure \code{chunks} is not specified in \link{fit_seromodel}. @@ -56,12 +63,6 @@ for every year in the time interval spanned by the serosurvey. If the length of the time interval is not exactly divisible by \code{chunk_size}, the remainder years are included in the last chunk.} -\item{chunks}{Numeric list specifying the chunk structure of the time -interval from the birth year of the oldest age cohort -\code{min(serodata$age_mean_f)} to the time when the serosurvey was conducted -\code{t_sur}. If \code{NULL}, the time interval is divided in chunks of size -\code{chunk_size}.} - \item{iter}{Number of interactions for each chain including the warmup. \code{iter} in \link[rstan:stanmodel-method-sampling]{sampling}.} @@ -99,7 +100,7 @@ using the data from a seroprevalence survey \code{serodata} as the input data. S \dontrun{ data(chagas2012) serodata <- prepare_serodata(chagas2012) -run_seromodel( +fit_seromodel( serodata, foi_model = "constant" ) diff --git a/man/simdata_constant.Rd b/man/simdata_constant.Rd deleted file mode 100644 index e06f96e7..00000000 --- a/man/simdata_constant.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/simdata_constant.R -\docType{data} -\name{simdata_constant} -\alias{simdata_constant} -\title{Constant force-of-infection simulated serosurvey} -\format{ -An object of class \code{"cross"}; see \code{\link[qtl:read.cross]{qtl::read.cross()}}. -} -\usage{ -simdata_constant -} -\description{ -Simulated dataset describing a endemic situation where the force-of-infection -is constant over a period of 50 years (2000-2050) with a value of 0.2. -The hypothetical serosurvey is conducted in year 2050 for 250 individuals -that are up to 50 years old. -} -\examples{ -simdata_constant -} -\keyword{datasets} diff --git a/man/simdata_large_epi.Rd b/man/simdata_large_epi.Rd deleted file mode 100644 index 7f0ba780..00000000 --- a/man/simdata_large_epi.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/simdata_large_epi.R -\docType{data} -\name{simdata_large_epi} -\alias{simdata_large_epi} -\title{Large epidemic simulated serosurvey} -\format{ -An object of class \code{"cross"}; see \code{\link[qtl:read.cross]{qtl::read.cross()}}. -} -\usage{ -simdata_large_epi -} -\description{ -Simulated dataset describing a large epidemic between the years 2032 and 2035 -with a constant force-of-infection with value 1.5. -The hypothetical serosurvey is conducted in year 2050 for 250 individuals -that are up to 50 years old. -} -\examples{ -simdata_large_epi -} -\keyword{datasets} diff --git a/man/simdata_sw_dec.Rd b/man/simdata_sw_dec.Rd deleted file mode 100644 index 406cffc7..00000000 --- a/man/simdata_sw_dec.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/simdata_sw_dec.R -\docType{data} -\name{simdata_sw_dec} -\alias{simdata_sw_dec} -\title{Step-wise decreasing force-of-infection simulated serosurvey} -\format{ -An object of class \code{"cross"}; see \code{\link[qtl:read.cross]{qtl::read.cross()}}. -} -\usage{ -simdata_sw_dec -} -\description{ -Simulated dataset describing a situation where the force-of-infection -follows a step-wise decreasing tendency over a period of 50 years (2000-2050) -The hypothetical serosurvey is conducted in year 2050 for 250 individuals -that are up to 50 years old. -} -\examples{ -simdata_sw_dec -} -\keyword{datasets} diff --git a/tests/testthat/models_serialization.R b/tests/testthat/models_serialization.R deleted file mode 100644 index 3926c1ff..00000000 --- a/tests/testthat/models_serialization.R +++ /dev/null @@ -1,40 +0,0 @@ -library(serofoi) -library(testthat) - -set.seed(1234) # For reproducibility - -#----- Read and prepare data -data("simdata_large_epi") -simdata <- prepare_serodata(simdata_large_epi) -no_transm <- 0.0000000001 -big_outbreak <- 1.5 -foi_sim <- c(rep(no_transm, 32), rep(big_outbreak, 3), rep(no_transm, 15)) # 1 epidemics - - -#----- Run models -models_to_run <- c( - "constant", - "tv_normal", - "tv_normal_log" -) - -models_list <- lapply(models_to_run, - run_seromodel, - serodata = simdata, - iter = 1000 -) - -saveRDS( - models_list[[1]], - testthat::test_path("extdata", "model_constant.RDS") -) - -saveRDS( - models_list[[2]], - testthat::test_path("extdata", "model_tv_normal.RDS") -) - -saveRDS( - models_list[[3]], - testthat::test_path("extdata", "model_tv_normal_log.RDS") -) diff --git a/vignettes/foi_models.Rmd b/vignettes/foi_models.Rmd index b70b3793..31a3071e 100644 --- a/vignettes/foi_models.Rmd +++ b/vignettes/foi_models.Rmd @@ -50,37 +50,46 @@ The number of positive cases follows a binomial distribution, where $n$ is the n $$ p(a,t) \sim binom(n(a,t), P(a,t)) $$ -In ***serofoi***, for the constant model, the *FoI* ($\lambda$) is modelled within a Bayesian framework using a uniform prior distribution $\sim U(0,2)$. Future versions of the package may allow to choose different default distributions. This model can be implemented for the previously prepared dataset `data_test` by means of the `fit_seromodel` function specifying `fit_seromodel="constant"`. +In ***serofoi***, for the constant model, the *FoI* ($\lambda$) is modelled within a Bayesian framework using a uniform prior distribution $\sim U(0,2)$. -The object `simdata_constant` contains a minimal simulated dataset that emulates an hypothetical endemic situation where the *FoI* is constant with value 0.2 and includes data for 250 samples of individuals between 2 and 47 years old with a number of trials $n=5$. The following code shows how to implement the constant model to this simulated serosurvey: +To test the `constant` model we simulate a serosurvey following a stepwise decreasing *FoI* (red line in Fig. 1) using the data simulation functions of ***serofoi***: -```{r model_1, include = TRUE, echo = TRUE, results="hide", errors = FALSE, warning = FALSE, message = FALSE, fig.width=4, fig.asp=1.5, fig.align="center", out.width ="50%", fig.keep="none"} -data("simdata_constant") -serodata_constant <- prepare_serodata(simdata_constant) -model_1 <- fit_seromodel( +```{r constant simdata, include = TRUE, echo = TRUE, results="hide", errors = FALSE, warning = FALSE, message = FALSE} +foi_sim_constant <- rep(0.02, 50) + +serodata_constant <- generate_sim_data( + sim_data = data.frame( + age=seq(1,50), + tsur=2050), + foi=foi_sim_constant, + sample_size_by_age = 5 +) |> + group_sim_data(step = 5) + +``` + +The simulated dataset `serodata_constant` contains information about 250 samples of individuals between 1 and 50 years old (5 samples per age) with age groups of 5 years length. The following code shows how to implement the constant model to this simulated serosurvey: + +```{r constant model, include = TRUE, echo = TRUE, results="hide", errors = FALSE, warning = FALSE, message = FALSE, fig.width=4, fig.asp=1.5, fig.align="center", out.width ="50%", fig.keep="all"} +seromodel_constant <- fit_seromodel( serodata = serodata_constant, foi_model = "constant", iter = 800 ) plot_seromodel( - model_1, - serodata = serodata_constant, - size_text = 6 -) -``` -```{r model_1_plot, include = TRUE, echo = FALSE, results="hide", errors = FALSE, warning = FALSE, message = FALSE, fig.width=4, fig.asp=1.5, fig.align="center", out.width ="50%", fig.keep="all"} -foi_sim_constant <- rep(0.02, 50) -model_1_plot <- plot_seromodel( - model_1, + seromodel_constant, serodata = serodata_constant, foi_sim = foi_sim_constant, size_text = 6 ) -plot(model_1_plot) ``` Figure 1. Constant serofoi model plot. Simulated (red) vs modelled (blue) *FoI*. -In this case, 800 iterations are enough to ensure convergence. The `plot_seromodel` method provides a visualisation of the results, including a summary where the expected log pointwise predictive density (`elpd`) and its standard error (`se`) are shown. We say that a model converges if all the R-hat estimates are below 1.1. +```{r clean env constant, include = FALSE, echo = TRUE, message=FALSE} +rm(list = ls(pattern = "_constant")) +``` + +In this case, 800 iterations are enough to ensure convergence. The `plot_seromodel` method provides a visualisation of the results, including a summary where the expected log pointwise predictive density (`elpd`) and its standard error (`se`) are shown. We say that a model converges if all the R-hat estimates are below 1.01. ## Time-varying FoI models @@ -97,34 +106,46 @@ $$ \lambda(t)\sim normal(\lambda(t-1), \sigma) \\ \lambda(t=1) \sim normal(0, 1) $$ -The object `simdata_sw_dec` contains a minimal simulated dataset that emulates a situation where the *FoI* follows a stepwise decreasing tendency (*FoI* panel in Fig. 2). The simulated dataset contains information about 250 samples of individuals between 2 and 47 years old with a number of trials $n=5$. The following code shows how to implement the slow time-varying normal model to this simulated serosurvey: +To test the `tv_normal` model we simulate a serosurvey following a stepwise decreasing *FoI* (red line in Fig. 2) using the data simulation functions of ***serofoi***: + +```{r tv_normal simdata, include = TRUE, echo = TRUE, results="hide", errors = FALSE, warning = FALSE, message = FALSE} +no_transm <- 0.0000000001 +foi_sim_sw_dec <- c(rep(0.2, 25), rep(0.1, 10), rep(no_transm, 15)) -```{r model_2, include = TRUE, echo = TRUE, results="hide", errors = FALSE, warning = FALSE, message = FALSE, fig.width=4, fig.asp=1.5, fig.align="center", out.width ="50%", fig.keep="none"} -data("simdata_sw_dec") -serodata_sw_dec <- prepare_serodata(simdata_sw_dec) -model_2 <- fit_seromodel( +serodata_sw_dec <- generate_sim_data( + sim_data = data.frame( + age=seq(1,50), + tsur=2050), + foi=foi_sim_sw_dec, + sample_size_by_age = 5 +) |> + group_sim_data(step = 5) + +``` + +The simulated dataset `foi_sim_sw_dec` contains information about 250 samples of individuals between 1 and 50 years old (5 samples per age) with age groups of 5 years length. The following code shows how to implement the slow time-varying normal model to this simulated serosurvey: + +```{r tv_normal model, include = TRUE, echo = TRUE, results="hide", errors = FALSE, warning = FALSE, message = FALSE, fig.width=4, fig.asp=1.5, fig.align="center", out.width ="50%", fig.keep="all"} + +seromodel_tv_normal <- fit_seromodel( serodata = serodata_sw_dec, foi_model = "tv_normal", + chunks = rep(c(1, 2, 3), c(23, 10, 15)), iter = 1500 ) -plot_seromodel(model_2, - serodata = serodata_sw_dec, - size_text = 6 -) -``` -```{r model_2_plot, include = TRUE, echo = FALSE, results="hide", errors = FALSE, warning = FALSE, message = FALSE, fig.width=4, fig.asp=1.5, fig.align="center", out.width ="50%", fig.keep="all"} -no_transm <- 0.0000000001 -foi_sim_sw_dec <- c(rep(0.2, 25), rep(0.1, 10), rep(no_transm, 17)) - -model_2_plot <- plot_seromodel(model_2, +plot_seromodel( + seromodel_tv_normal, serodata = serodata_sw_dec, foi_sim = foi_sim_sw_dec, size_text = 6 ) -plot(model_2_plot) ``` Figure 2. Slow time-varying serofoi model plot. Simulated (red) vs modelled (blue) *FoI*. +```{r clean env sw_dec, include = FALSE, echo = TRUE, message=FALSE} +rm(list = ls(pattern = "_sw_dec|_normal")) +``` + The number of iterations required may depend on the number of years, reflected by the difference between the year of the serosurvey and the maximum age-class sampled. ## Model 3. Time-varying FoI - Fast Epidemic Model @@ -135,37 +156,45 @@ $$ $$ This is done in order to capture fast changes in the *FoI* trend. Importantly, the standard deviation parameter of this normal distribution of the *FoI* $\lambda(t)$ is set using an upper prior that follows a Cauchy distribution. -In order to test this model we use the minimal simulated dataset contained in the `simdata_large_epi` object. This dataset emulates a hypothetical situation where a three-year epidemic occurs between 2032 and 2035. The simulated serosurvey tests 250 individuals from 0 to 50 years of age in the year 2050. The implementation of the fast epidemic model can be obtained running the following lines of code: +To test the `tv_normal_log` model we simulate a serosurvey conducted in 2050 emulating a hypothetical situation where a three-year epidemic occurred between 2030 and 2035: -```{r model_3, include = TRUE, echo = TRUE, results="hide", errors = FALSE, warning = FALSE, message = FALSE, fig.width=4, fig.asp=1.5, fig.align="center", out.width ="50%", fig.keep="none"} -data("simdata_large_epi") -serodata_large_epi <- prepare_serodata(simdata_large_epi) -model_3 <- fit_seromodel( - serodata = serodata_large_epi, - foi_model = "tv_normal_log", - iter = 1500 -) -model_3_plot <- plot_seromodel(model_3, - serodata = serodata_large_epi, - size_text = 6 -) -plot(model_3_plot) -``` -```{r model_3_plot, include = TRUE, echo = FALSE, results="hide", errors = FALSE, warning = FALSE, message = FALSE, fig.width=4, fig.asp=1.5, fig.align="center", out.width ="50%", fig.keep="all"} +```{r tv_normal_log simdata, include = TRUE, echo = TRUE, results="hide", errors = FALSE, warning = FALSE, message = FALSE} no_transm <- 0.0000000001 -big_outbreak <- 1.5 +big_outbreak <- 0.6 foi_sim_large_epi <- c( - rep(no_transm, 32), - rep(big_outbreak, 3), + rep(no_transm, 30), + rep(big_outbreak, 5), rep(no_transm, 15) ) -model_3_plot <- plot_seromodel(model_3, +serodata_large_epi <- generate_sim_data( + sim_data = data.frame( + age=seq(1,50), + tsur=2050), + foi=foi_sim_large_epi, + sample_size_by_age = 25 +) |> + group_sim_data(step = 5) + +``` + +The simulated serosurvey tests 250 individuals between 1 and 50 years old by the year 2050. The implementation of the fast epidemic model can be obtained running the following lines of code: + +```{r tv_normal_log model, include = TRUE, echo = TRUE, results="hide", errors = FALSE, warning = FALSE, message = FALSE, fig.width=4, fig.asp=1.5, fig.align="center", out.width ="50%", fig.keep="all"} +seromodel_tv_normal_log <- fit_seromodel( + serodata = serodata_large_epi, + foi_model = "tv_normal_log", + chunks = rep(c(1, 2, 3), c(28, 5, 15)), + iter = 2000 +) + +plot_tv_normal_log <- plot_seromodel( + seromodel_tv_normal_log, serodata = serodata_large_epi, foi_sim = foi_sim_large_epi, size_text = 6 ) -plot(model_3_plot) +plot(plot_tv_normal_log) ``` Figure 3. *Time-varying fast epidemic serofoi model* plot. Simulated (red) vs modelled (blue) *FoI*. @@ -191,54 +220,36 @@ Above we showed that the fast epidemic model (`tv_normal_log`) is able to identi Now, we would like to know whether this model actually fits this dataset better than the other available models in ***serofoi***. For this, we also implement both the endemic model (`constant`) and the slow time-varying normal model (`tv_normal`): ```{r model_comparison, include = FALSE, echo = TRUE, results="hide", errors = FALSE, warning = FALSE, message = FALSE } -model_1 <- fit_seromodel( +seromodel_constant <- fit_seromodel( serodata = serodata_large_epi, foi_model = "constant", iter = 800 ) -model_1_plot <- plot_seromodel(model_1, +plot_constant <- plot_seromodel( + seromodel_constant, serodata = serodata_large_epi, + foi_sim = foi_sim_large_epi, size_text = 6 ) -model_2 <- fit_seromodel( +seromodel_tv_normal <- fit_seromodel( serodata = serodata_large_epi, foi_model = "tv_normal", - iter = 1500 + chunks = rep(c(1, 2, 3), c(28, 5, 15)), + iter = 2000 ) -model_2_plot <- plot_seromodel(model_2, +plot_tv_normal <- plot_seromodel( + seromodel_tv_normal, serodata = serodata_large_epi, + foi_sim = foi_sim_large_epi, size_text = 6 ) ``` Using the function `cowplot::plot_grid` we can visualise the results of the three models simultaneously: ```{r model_comparison_plot, include = TRUE, echo = TRUE, results="hide", errors = FALSE, warning = FALSE, message = FALSE, fig.width=5, fig.asp=1, fig.align="center", fig.keep="none"} -cowplot::plot_grid(model_1_plot, model_2_plot, model_3_plot, - nrow = 1, ncol = 3, labels = "AUTO" -) -``` -```{r model_comparison_plot_, include = TRUE, echo = FALSE, results="hide", errors = FALSE, warning = FALSE, message = FALSE, fig.width=5, fig.asp=1, fig.align="center", fig.keep="all"} -size_text <- 6 -model_1_plot <- plot_seromodel( - model_1, - serodata = serodata_large_epi, - foi_sim = foi_sim_large_epi, - size_text = size_text -) -model_2_plot <- plot_seromodel( - model_2, - serodata = serodata_large_epi, - foi_sim = foi_sim_large_epi, - size_text = size_text -) -model_3_plot <- plot_seromodel( - model_3, - serodata = serodata_large_epi, - foi_sim = foi_sim_large_epi, - size_text = size_text -) -cowplot::plot_grid(model_1_plot, model_2_plot, model_3_plot, +cowplot::plot_grid( + plot_constant, plot_tv_normal, plot_tv_normal_log, nrow = 1, ncol = 3, labels = "AUTO" ) ``` diff --git a/vignettes/use_cases.Rmd b/vignettes/use_cases.Rmd index c145791c..2e3b536e 100644 --- a/vignettes/use_cases.Rmd +++ b/vignettes/use_cases.Rmd @@ -61,34 +61,43 @@ m1_chik <- fit_seromodel( m2_chik <- fit_seromodel( serodata = chik2015p, foi_model = "tv_normal", - iter = 1500, + chunk_size = 5, + iter = 2000, thin = 2 ) m3_chik <- fit_seromodel( serodata = chik2015p, foi_model = "tv_normal_log", - iter = 1500, + chunk_size = 5, + iter = 2000, thin = 2 ) # Visualisation of the results p1_chik <- plot_seromodel(m1_chik, serodata = chik2015p, + max_lambda = 0.07, size_text = 6 ) p2_chik <- plot_seromodel(m2_chik, serodata = chik2015p, + max_lambda = 0.07, size_text = 6 ) p3_chik <- plot_seromodel(m3_chik, serodata = chik2015p, + max_lambda = 0.07, size_text = 6 ) cowplot::plot_grid(p1_chik, p2_chik, p3_chik, ncol = 3) ``` -Figure 1. Serofoi models for FoI estimates of Chikungunya virus transmission in an urban remote area of Brazil. +Figure 1. ***serofoi*** models for FoI estimates of Chikungunya virus transmission in an urban remote area of Brazil. + +```{r clean env chik, include = FALSE, echo = TRUE, message=FALSE} +rm(list = ls(pattern = "chik")) +``` ## Case study 2. Hidden Alphaviruses epidemics in Panama @@ -122,28 +131,36 @@ m1_veev <- fit_seromodel( m2_veev <- fit_seromodel( serodata = veev2012p, foi_model = "tv_normal", - iter = 500, + chunk_size = 5, + iter = 2000, thin = 2 ) m3_veev <- fit_seromodel( serodata = veev2012p, foi_model = "tv_normal_log", - iter = 500, + chunk_size = 5, + iter = 2000, thin = 2 ) # Visualisation of the results -p1_veev <- plot_seromodel(m1_veev, +p1_veev <- plot_seromodel( + m1_veev, serodata = veev2012p, + max_lambda = 0.35, size_text = 6 ) -p2_veev <- plot_seromodel(m2_veev, +p2_veev <- plot_seromodel( + m2_veev, serodata = veev2012p, + max_lambda = 0.35, size_text = 6 ) -p3_veev <- plot_seromodel(m3_veev, +p3_veev <- plot_seromodel( + m3_veev, serodata = veev2012p, + max_lambda = 0.35, size_text = 6 ) @@ -151,6 +168,10 @@ cowplot::plot_grid(p1_veev, p2_veev, p3_veev, ncol = 3) ``` Figure 2. ***serofoi*** models for *FoI* estimates of *Venezuelan Equine Encephalitis Virus (VEEV)* transmission in a rural remote area of Panama. +```{r clean env veev, include = FALSE, echo = TRUE, message=FALSE} +rm(list = ls(pattern = "veev")) +``` + ## Case study 3. Chagas disease (endemic disease) ::: {.alert .alert-secondary} @@ -164,7 +185,7 @@ Chagas disease is a parasitic infection caused by the protozoan *Trypanosoma cru Based on the data and analysis shown in [@cucunubá2017], we use one of the datasets that measure the seroprevalence of IgG antibodies against *Trypanosoma cruzi* infection in rural areas of Colombia. The dataset is part of the ***serofoi*** package as `chagas2012`. This dataset corresponds to a serosurvey conducted in 2012 for a rural indigenous community known to have long-term endemic transmission, where some control interventions have taken place over the years. ### The result -Because Chagas is an endemic disease, we should use only the ***serofoi*** endemic models (1. `constant`, 2. `tv-normal`) on the `chagas2012` dataset and compare which model is better supported. The results are shown in Figure 3. We found that for this serosurvey, both ***serofoi*** models converged (based on R-hat values not crossing 1.1), but the comparison of the two models shows a relevant slow decreasing trend, which was consistent with model 2 - `tv-normal`. This model was statistically better supported based on the highest `elpd` and lowest `se` values compared to the constant model. These results suggest a slow, still relevant decrease in the *FoI* values over the last decades which may have been a consequence of some of the interventions or local environmental changes that have occurred in the study area over the years, up to the point (2012) when the serosurvey was conducted. +Because Chagas is an endemic disease, we should use only the ***serofoi*** endemic models (1. `constant`, 2. `tv-normal`) on the `chagas2012` dataset and compare which model is better supported. The results are shown in Figure 3. We found that for this serosurvey, both ***serofoi*** models converged (based on R-hat values not crossing 1.01), but the comparison of the two models shows a relevant slow decreasing trend, which was consistent with model 2 - `tv-normal`. This model was statistically better supported based on the highest `elpd` and lowest `se` values compared to the constant model. These results suggest a slow, still relevant decrease in the *FoI* values over the last decades which may have been a consequence of some of the interventions or local environmental changes that have occurred in the study area over the years, up to the point (2012) when the serosurvey was conducted. ```{r chagas_endemic, include = TRUE, echo = TRUE, results="hide", errors = FALSE, warning = FALSE, message = FALSE, fig.width=5, fig.asp=1, fig.align="center", fig.keep="all"} # Load and prepare data @@ -180,7 +201,8 @@ m1_cha <- fit_seromodel( m2_cha <- fit_seromodel( serodata = chagas2012p, foi_model = "tv_normal", - iter = 800 + chunk_size = 5, + iter = 1000 ) # Visualisation of the results @@ -197,4 +219,8 @@ p2_cha <- plot_seromodel( cowplot::plot_grid(p1_cha, p2_cha, ncol = 2) ``` Figure 3. ***serofoi*** endemic models for *FoI* estimates of *Trypanosoma cruzi* in a rural area of Colombia. + +```{r clean env cha, include = FALSE, echo = TRUE, message=FALSE} +rm(list = ls(pattern = "cha")) +``` ## References