From d97c9213534ca0ceb12ff60ed3508cc61e3d86e5 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Fri, 24 May 2024 11:56:55 -0500 Subject: [PATCH 01/78] clean R and tests folders --- R/chagas2012.R | 17 -- R/chik2015.R | 15 -- R/seroprevalence_data.R | 147 ------------------ R/veev2012.R | 15 -- tests/testthat/clean_expected_files.R | 14 -- tests/testthat/extdata/haiti_ssa_sample.RDS | Bin 1150 -> 0 bytes tests/testthat/test_fit_seromodel.R | 164 -------------------- tests/testthat/test_issue_47.R | 24 --- tests/testthat/testing_utils.R | 69 -------- 9 files changed, 465 deletions(-) delete mode 100644 R/chagas2012.R delete mode 100644 R/chik2015.R delete mode 100644 R/seroprevalence_data.R delete mode 100644 R/veev2012.R delete mode 100644 tests/testthat/clean_expected_files.R delete mode 100644 tests/testthat/extdata/haiti_ssa_sample.RDS delete mode 100644 tests/testthat/test_fit_seromodel.R delete mode 100644 tests/testthat/test_issue_47.R delete mode 100644 tests/testthat/testing_utils.R diff --git a/R/chagas2012.R b/R/chagas2012.R deleted file mode 100644 index 6c7ea6f8..00000000 --- a/R/chagas2012.R +++ /dev/null @@ -1,17 +0,0 @@ -# TODO Check if we really need to have the package `qtl` installed. Otherwise -# remove all entries of the form `see \code{\link[qtl]...` -#' Seroprevalence data on serofoi -#' -#' Data from a serological surveys -#' -#' @docType data -#' -#' @usage chagas2012 -#' -#' @format An object of class `"cross"`; see [qtl::read.cross()]. -#' -#' @keywords datasets -#' -#' @examples -#' chagas2012 -"chagas2012" diff --git a/R/chik2015.R b/R/chik2015.R deleted file mode 100644 index 30142601..00000000 --- a/R/chik2015.R +++ /dev/null @@ -1,15 +0,0 @@ -#' Seroprevalence data on serofoi -#' -#' Data from a serological surveys -#' -#' @docType data -#' -#' @usage chik2015 -#' -#' @format An object of class `"cross"`; see [qtl::read.cross()]. -#' -#' @keywords datasets -#' -#' @examples -#' chik2015 -"chik2015" diff --git a/R/seroprevalence_data.R b/R/seroprevalence_data.R deleted file mode 100644 index 94d56450..00000000 --- a/R/seroprevalence_data.R +++ /dev/null @@ -1,147 +0,0 @@ -# TODO Complete @param documentation - - -#' Prepare data from a serological survey for modelling -#' -#' This function adds the necessary additional variables to the given dataset -#' `serodata` corresponding to a serological survey. -#' @param serodata A data frame containing the data from a serological survey. -#' This data frame must contain the following columns: -#' \describe{ -#' \item{`survey`}{survey Label of the current survey} -#' \item{`total`}{Number of samples for each age group} -#' \item{`counts`}{Number of positive samples for each age group} -#' \item{`age_min`}{age_min} -#' \item{`age_max`}{age_max} -#' \item{`tsur`}{Year in which the survey took place} -#' \item{`country`}{The country where the survey took place} -#' \item{`test`}{The type of test taken} -#' \item{`antibody`}{antibody} -#' } -#' Alternatively to `age_min` and `age_max`, the dataset could already include -#' the age group marker `age_mean_f`, representing the middle point between -#' `age_min` and `age_max`. If `afe_mean_f` is missing, it will be generated -#' by the function. -#' @param alpha probability of a type I error. For further details refer to -#' [binconf][Hmisc::binconf]. -#' @return serodata with additional columns necessary for the analysis. These -#' columns are: -#' \describe{ -#' \item{`age_mean_f`}{Floor value of the average between age_min and age_max -#' for the age groups delimited by `age_min` and `age_max`} -#' \item{`sample_size`}{The size of the sample} -#' \item{`birth_year`}{Years in which the subject was born according to the -#' age group marker `age_mean_f`} -#' \item{`prev_obs`}{Observed prevalence} -#' \item{`prev_obs_lower`}{Lower limit of the confidence interval for the -#' observed prevalence} -#' \item{`prev_obs_upper`}{Upper limit of the confidence interval for the -#' observed prevalence} -#' } -#' @examples -#' data(chagas2012) -#' serodata <- prepare_serodata(chagas2012) -#' @export -prepare_serodata <- function(serodata = serodata, - alpha = 0.05) { - checkmate::assert_numeric(alpha, lower = 0, upper = 1) - serodata <- validate_serodata(serodata) - - if (!any(colnames(serodata) == "age_mean_f")) { - serodata <- serodata %>% - dplyr::mutate( - age_mean_f = floor((.data$age_min + .data$age_max) / 2), - sample_size = sum(.data$total) - ) - } - - if (!any(colnames(serodata) == "birth_year")) { - serodata <- dplyr::mutate( - serodata, - birth_year = .data$tsur - .data$age_mean_f - ) - } - - serodata <- serodata %>% - cbind( - Hmisc::binconf( - serodata$counts, - serodata$total, - alpha = alpha, - method = "exact", - return.df = TRUE - ) - ) %>% - dplyr::rename( - prev_obs = "PointEst", - prev_obs_lower = "Lower", - prev_obs_upper = "Upper" - ) %>% - dplyr::arrange(.data$age_mean_f) - - return(serodata) -} - - -#' Prepare pre-processed serological survey dataset to plot the -#' binomial confidence intervals of the seroprevalence by age group -#' -#' This function prepapares a given pre-processed serological dataset (see -#' [prepare_serodata()]) to plot the binomial confidence intervals of its -#' corresponding seroprevalence grouped by age group. -#' @inheritParams run_seromodel -#' @return data set with the binomial confidence intervals -#' @examples -#' data(chagas2012) -#' serodata <- prepare_serodata(chagas2012) -#' prepare_bin_data(serodata) -#' @keywords internal -#' @noRd -prepare_bin_data <- function(serodata, - bin_step = 5, - alpha = 0.05) { - if (!any(colnames(serodata) == "age_mean_f")) { - serodata <- serodata %>% - dplyr::mutate( - age_mean_f = floor((.data$age_min + .data$age_max) / 2), - sample_size = sum(.data$total) - ) - } - serodata$age_group <- get_age_group( - age = serodata$age_mean_f, - step = bin_step - ) - - serodata_bin <- serodata %>% - dplyr::group_by(.data$age_group) %>% - dplyr::summarise( - total = sum(.data$total), - counts = sum(.data$counts) - ) %>% - dplyr::mutate( - survey = unique(serodata$survey), - tsur = unique(serodata$tsur), - age_min = as.integer(gsub("[(]|\\,.*", "\\1", .data$age_group)) + 1, - age_max = as.integer(gsub(".*\\,|[]]", "\\1", .data$age_group)), - age_mean_f = floor((.data$age_min + .data$age_max) / 2) - ) - - serodata_bin <- cbind( - serodata_bin, - Hmisc::binconf( - serodata_bin$counts, - serodata_bin$total, - alpha = alpha, - method = "exact", - return.df = TRUE - ) - ) %>% - dplyr::rename( - prev_obs = "PointEst", - prev_obs_lower = "Lower", - prev_obs_upper = "Upper" - ) %>% - dplyr::arrange(.data$age_mean_f) - - return(serodata_bin) -} diff --git a/R/veev2012.R b/R/veev2012.R deleted file mode 100644 index 49e4d6fd..00000000 --- a/R/veev2012.R +++ /dev/null @@ -1,15 +0,0 @@ -#' Seroprevalence data on serofoi -#' -#' Data from a serological surveys -#' -#' @docType data -#' -#' @usage veev2012 -#' -#' @format An object of class `"cross"`; see [qtl::read.cross()]. -#' -#' @keywords datasets -#' -#' @examples -#' veev2012 -"veev2012" diff --git a/tests/testthat/clean_expected_files.R b/tests/testthat/clean_expected_files.R deleted file mode 100644 index 72ad5a0a..00000000 --- a/tests/testthat/clean_expected_files.R +++ /dev/null @@ -1,14 +0,0 @@ -# This script removes the svg and csv files used in the automatic tests -# This is usually required when a new version of rstan is released -# Random number generation changes for each new version of -# Rstan, https://mc-stan.org/docs/2_18/reference-manual/reproducibility-chapter.html -# thus all files with expected results (both tables and graphs) become -# obsolete and need to be regenerated - -library(testthat) -paths <- c(testthat::test_path("_snaps", "*"), testthat::test_path("extdata", "dataframes", "expected", "*")) - -for (path in paths) { - cat("Deleting", path, "\n") - unlink(path, recursive = TRUE) -} diff --git a/tests/testthat/extdata/haiti_ssa_sample.RDS b/tests/testthat/extdata/haiti_ssa_sample.RDS deleted file mode 100644 index 04f9fa750e7d714fa65c62758a106ead078306aa..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1150 zcmV-^1cCb>iwFP!000001MQY=Oj~6X$1hMOFa(T@#RL~HOeSFsg+faWK81;}7ojIr6`GIdNdjGzf z#xo)&QedQ|%Y2yYm1JlAU-?C0?L)ceMcogg18HDB)LKEKi z##uuB1bV!`SwYAm8~X_lpfB$a=F50rN-%vn+mcA#fY%$*o9%>$Xdl^5pO5|5FrNn5 zqzKMahwE11^Gnd@GQmtl#v|i`b5i5$)Atc0rJL5vM;74oiqV&%FGpXAen0wZ%w2=^ zT70j1?5pYfVZUJ_C@0OWcnO(~%tFeMc{Crg2-hn`uS8#oejoa3^fj2H7VCAGQ;mH( z{|4-9Hsu`cA@wJeOz<8Ru4MJR{%JLvW>KOluPb<3P4Fzj%G=8Rdy@V}@rU5s8+Rck z$Q?bTiUxvN{y$U_uldC5PYth${g%tI8of`WM*w@fxnDf#WQh!3}%Q!nKL*%N%%$!?!KtCskas2XqBFf9asa$ zj8-zE%>>)zZK>q42^{K;16;=^>?V04I1>uL)ox7&N1IAJm(T^a<=aOucHM_X-Nxag zyJ>sL_k0uBNjUgvaQzta<@_b4NHr6ofyK0?-s019;8fo{5h=b3u3ckqrTwuCPTiwB zMQQ1R&#<1mp90SFDwq03GuUQ6a@)?xV6jkks6BoRepWKuKe%KCTS0Pa!gpVQeReR# z%6oPDwE6Fqp$dqY=h)ZlrneQ+fH26zr+tCLiju+$KJ;?$KE zSdu??txuZ(uEsWR;p76iRvr!H|0oB?x#NquyT`!QKQuL-_c|;+`>}KJx39rHUA67v zdzIkkJTEjxMu79)o*ypAs=!?luecye_R-sa_xIvSEBqSP@``fR3wMSFD?Z;Te8Gr9 zg`6RDqcgCboP{A9T`U&2kS|Pn32)@t4%#|s>@x5cbkDGDoUT)E2(oMsS`ja-Lt7j7 znR|UL&cdUPU=6&!+1NsRQ5&mptU=d`m8}-G)6~J~EczoHy{K7l=G%3>97~@NVKQ@v zb;f3zErxnshq0S8|D)PvG6~g9(>U0{TIe)rF{*{-S#hgb7}TcABhAKcao_;yKShX! z?ywrM#4j`bGRrTeewpo;IesbgOSxa>(oqVJ$OzDq0G%12vjVg}UNZ-Z3CnDq?+06khk`Tzg` diff --git a/tests/testthat/test_fit_seromodel.R b/tests/testthat/test_fit_seromodel.R deleted file mode 100644 index 541f48bc..00000000 --- a/tests/testthat/test_fit_seromodel.R +++ /dev/null @@ -1,164 +0,0 @@ -test_that("fit_seromodel constant model estimates the right force of infection", { - - foi_df <- data.frame( - year = 1970:1999, - foi = rep(0.03, 30) - ) - survey_features <- data.frame( - age_min = seq(1, 21, 10), - age_max = seq(10, 30, 10), - sample_size = c(1000, 1500, 2000) - ) - - simdata <- simulate_serosurvey( - model = "time", - foi = foi_df, - survey_features = survey_features - ) %>% - mutate( - survey = "constant_foi", - tsur = max(foi_df$year) + 1 - ) %>% - rename( - total = sample_size, - counts = n_seropositive - ) %>% prepare_serodata() - - model_object <- fit_seromodel( - serodata = simdata, - foi_model = "constant", - iter = 1000 - ) - - foi_central_estimates <- get_foi_central_estimates( - seromodel_object = model_object, - serodata = simdata - ) %>% - mutate( - # calculates tolerance as half the confidence interval size - tol = (upper - lower)/2 - ) %>% - left_join(foi_df, by = "year") - - expect_true( - all( - dplyr::near( - foi_central_estimates$medianv, - foi_central_estimates$foi, - tol = max(foi_central_estimates$tol, 0.05) - ) - ) - ) -}) - -test_that("fit_seromodel tv_normal model estimates the right force of infection", { - - foi_df <- data.frame( - year = 1940:1999, - foi = rep(c(0.06, 0.03, 0.01), c(20, 20, 20)) - ) - survey_features <- data.frame( - age_min = seq(1, 51, 10), - age_max = seq(10, 60, 10), - sample_size = 100 - ) - - simdata <- simulate_serosurvey( - model = "time", - foi = foi_df, - survey_features = survey_features - ) %>% - mutate( - survey = "sw_dec_foi", - tsur = max(foi_df$year) + 1 - ) %>% - rename( - total = sample_size, - counts = n_seropositive - ) %>% prepare_serodata() - - model_object <- fit_seromodel( - serodata = simdata, - foi_model = "tv_normal", - chunks = rep(c(1, 2, 3), c(15, 20, 20)), - iter = 800 - ) - - foi_central_estimates <- get_foi_central_estimates( - seromodel_object = model_object, - serodata = simdata - ) %>% - mutate( - # calculates tolerance as half the confidence interval size - tol = (upper - lower) / 2 - ) %>% - left_join(foi_df, by = "year") - - expect_true( - all( - dplyr::near( - foi_central_estimates$medianv, - foi_central_estimates$foi, - tol = max(foi_central_estimates$tol, 0.05) - ) - ) - ) -}) - -test_that("fit_seromodel tv_normal_log model estimates the right force of infection", { - - foi_df <- data.frame( - year = 1950:1999, - foi = rep( - c(0.001, 0.4, 0.001), - c(30, 5, 15) - ) - ) - - survey_features <- data.frame( - age_min = seq(1, 41, 10), - age_max = seq(10, 50, 10), - sample_size = 100 - ) - - simdata <- simulate_serosurvey( - model = "time", - foi = foi_df, - survey_features = survey_features - ) %>% - mutate( - survey = "big_outbreak", - tsur = max(foi_df$year) + 1 - ) %>% - rename( - total = sample_size, - counts = n_seropositive - ) %>% prepare_serodata() - - model_object <- fit_seromodel( - serodata = simdata, - foi_model = "tv_normal_log", - chunks = rep(c(1, 2, 3), c(25, 5, 15)), - iter = 700 - ) - - foi_central_estimates <- get_foi_central_estimates( - seromodel_object = model_object, - serodata = simdata - ) %>% - mutate( - # calculates tolerance as half the confidence interval size - tol = (upper - lower) / 2 - ) %>% - left_join(foi_df, by = "year") - - expect_true( - all( - dplyr::near( - foi_central_estimates$medianv, - foi_central_estimates$foi, - tol = max(foi_central_estimates$tol, 0.05) - ) - ) - ) -}) diff --git a/tests/testthat/test_issue_47.R b/tests/testthat/test_issue_47.R deleted file mode 100644 index d4bb27eb..00000000 --- a/tests/testthat/test_issue_47.R +++ /dev/null @@ -1,24 +0,0 @@ -test_that("issue 47", { - skip_on_os(c("windows", "mac")) - source("testing_utils.R") - - library(dplyr) - - # Load data - ## This dataset is already prepared - serodata_path <- testthat::test_path("extdata", "haiti_ssa_sample.RDS") - serodata <- readRDS(serodata_path) - - # Error reproduction - model_test <- fit_seromodel( - serodata = serodata, - foi_model = "tv_normal", - print_summary = FALSE - ) - foi <- rstan::extract(model_test, "foi", inc_warmup = FALSE)[[1]] - prev_expanded <- get_prev_expanded(foi, serodata = serodata) - - # Test - age_max <- max(serodata$age_mean_f) - expect_length(prev_expanded$age, n = age_max) -}) diff --git a/tests/testthat/testing_utils.R b/tests/testthat/testing_utils.R deleted file mode 100644 index 3119eddf..00000000 --- a/tests/testthat/testing_utils.R +++ /dev/null @@ -1,69 +0,0 @@ -# TODO Move to separate package ### -# TODO Document all functions and provide examples -library(testthat) -library(vdiffr) -equal_with_tolerance <- function(tolerance = 2e-1) { - function(a, b) { - c <- base::mapply(function(x, y) { - if (is.na(x) && is.na(y)) { - return(0) - } else if (is.na(x) || is.na(y)) { - return(tolerance + 1) - } else { - return(abs(x - y)) - } - }, a, b) - return(base::all(c < tolerance)) - } -} -equal_exact <- function() { - function(a, b) { - x <- base::mapply(function(x, y) x == y || (is.na(x) && is.na(y)), a, b) - return(base::all(x == TRUE)) - } -} - -# TODO use testthat snapshots -expect_similar_dataframes <- function(name, actual_df, column_comparation_functions) { - actual_df_filename <- file.path(tempdir(), paste(name, "csv", sep = ".")) - write.csv(actual_df, actual_df_filename) - compare_fun <- function(expected_df_filename, actual_df_filename) { - return(compare_dataframes(expected_df_filename, actual_df_filename, column_comparation_functions)) - } - expect_snapshot_file(actual_df_filename, compare = compare_fun) -} - - -compare_dataframes <- function(expected_df_filename, actual_df_filename, column_comparation_functions) { - expected_df <- read.csv(expected_df_filename) - actual_df <- read.csv(actual_df_filename) - - all_columns_ok <- TRUE - for (col in base::names(column_comparation_functions)) { - if (col %in% colnames(expected_df) && col %in% colnames(actual_df)) { - compare_function <- column_comparation_functions[[col]] - col_ok <- compare_function(expected_df[[col]], actual_df[[col]]) - if (!col_ok) { - cat("Column", col, "differs ", expected_df[[col]], "!=", actual_df[[col]], "\n") - } - all_columns_ok <- all_columns_ok && col_ok - } else { - if (!(col %in% colnames(expected_df))) { - cat("Column", col, "not in first dataframe") - } - if (!(col %in% colnames(expected_df))) { - cat("Column", col, "not in second dataframe") - } - } - } - return(all_columns_ok) -} - -expect_same_plot <- function(plot_name, actual_plot) { - if (per_platform_snapshots) { - title <- file.path(r_version_id(), plot_name) - } else { - title <- plot_name - } - return(vdiffr::expect_doppelganger(title, actual_plot)) -} From 4dfbf463d7e86716930e87830bf7cfa8e3a35db9 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Fri, 24 May 2024 19:46:20 -0500 Subject: [PATCH 02/78] feat: add constant models with and without seroreversion --- inst/stan/av_normal.stan | 66 ------------------ inst/stan/constant.stan | 67 +++++++++--------- inst/stan/constant_seroreversion.stan | 58 ++++++++++++++++ inst/stan/data/basic_data.stan | 6 ++ inst/stan/data/foi_prior_data.stan | 8 +++ inst/stan/data/seroreversion_prior_data.stan | 8 +++ inst/stan/functions/prob_infected_av.stan | 48 ++++--------- .../functions/prob_infected_constant.stan | 32 +++++++++ inst/stan/functions/prob_infected_tv.stan | 33 ++++----- .../generated_quantities/log_likelihood.stan | 9 +++ inst/stan/tv_normal.stan | 66 ------------------ inst/stan/tv_normal_log.stan | 69 ------------------- 12 files changed, 186 insertions(+), 284 deletions(-) delete mode 100644 inst/stan/av_normal.stan create mode 100644 inst/stan/constant_seroreversion.stan create mode 100644 inst/stan/data/basic_data.stan create mode 100644 inst/stan/data/foi_prior_data.stan create mode 100644 inst/stan/data/seroreversion_prior_data.stan create mode 100644 inst/stan/functions/prob_infected_constant.stan create mode 100644 inst/stan/generated_quantities/log_likelihood.stan delete mode 100644 inst/stan/tv_normal.stan delete mode 100644 inst/stan/tv_normal_log.stan diff --git a/inst/stan/av_normal.stan b/inst/stan/av_normal.stan deleted file mode 100644 index 6414fb2c..00000000 --- a/inst/stan/av_normal.stan +++ /dev/null @@ -1,66 +0,0 @@ -functions { - #include functions/prob_infected_av.stan -} - -data { - int n_obs; - int age_max; - int n_pos[n_obs]; - int n_total[n_obs]; - int ages[n_obs]; - - // prior choices - int chunks[age_max]; - real foi_location; - real foi_scale; -} - -transformed data { - int n_chunks = max(chunks); -} - -parameters { - row_vector[n_chunks] fois; - real sigma; -} - -transformed parameters { - vector[n_chunks] fois_vector; - vector[n_obs] prob_infected; - - fois_vector = to_vector(fois); - - prob_infected = prob_infected_calculate( - fois_vector, - chunks, - ages, - n_obs, - 0 - ); -} - -model { - n_pos ~ binomial(n_total, prob_infected); - sigma ~ cauchy(0, 1); - - fois[1] ~ normal(foi_location, foi_scale); - for(i in 2:n_chunks) - fois[i] ~ normal(fois[i - 1], sigma); -} - -generated quantities{ - vector[n_obs] n_pos_sim; - vector[n_obs] P_sim; - vector[n_obs] logLikelihood; - vector[age_max] foi; - - for(i in 1:age_max) { - foi[i] = fois_vector[chunks[i]]; - } - - for(i in 1:n_obs){ - n_pos_sim[i] = binomial_rng(n_total[i], prob_infected[i]); - P_sim[i] = n_pos_sim[i] / n_total[i]; - logLikelihood[i] = binomial_lpmf(n_pos[i] | n_total[i], prob_infected[i]); - } -} diff --git a/inst/stan/constant.stan b/inst/stan/constant.stan index 5436fa1c..55ae6d96 100644 --- a/inst/stan/constant.stan +++ b/inst/stan/constant.stan @@ -1,47 +1,50 @@ +functions { + #include functions/prob_infected_constant.stan +} data { - int n_obs; - int age_max; - int n_pos[n_obs]; - int n_total[n_obs]; - matrix[n_obs, age_max] observation_exposure_matrix; - - // prior choices - real foi_a; - real foi_b; + #include data/basic_data.stan + #include data/foi_prior_data.stan } parameters { - real lambda0; + real foi; } transformed parameters { - real prob_infected[n_obs]; - real scalar_dot_product[n_obs]; - row_vector[age_max] foi; - - for (j in 1:age_max) { - foi[j] = lambda0; - } - - for (i in 1:n_obs){ - scalar_dot_product[i] = dot_product(observation_exposure_matrix[i,], foi); - prob_infected[i] = 1 - exp(-scalar_dot_product[i]); - } + vector[n_observations] prob_infected; + + prob_infected = prob_infected_constant( + age_groups, + n_observations, + foi, + 0.0 + ); } model { - for (i in 1:n_obs) - n_pos[i] ~ binomial(n_total[i], prob_infected[i]); - lambda0 ~ uniform (foi_a, foi_b); + n_seropositive ~ binomial(sample_size, prob_infected); + + // force of infection prior + if (foi_prior_index == 0) + foi ~ uniform(foi_min, foi_max); + if (foi_prior_index == 1) + foi ~ normal(foi_mean, foi_sd); } generated quantities{ - vector[n_obs] n_pos_sim; - vector[n_obs] P_sim; - vector[n_obs] logLikelihood; - for(i in 1:n_obs){ - n_pos_sim[i] = binomial_rng(n_total[i], prob_infected[i]); - P_sim[i] = n_pos_sim[i] / n_total[i]; - logLikelihood[i] = binomial_lpmf(n_pos[i] | n_total[i], prob_infected[i]); + #include generated_quantities/log_likelihood.stan + + vector[age_max] prob_infected_expanded; + vector[age_max] foi_vector; + + for(i in 1:age_max) { + foi_vector[i] = foi; } + + prob_infected_expanded = prob_infected_constant( + ages, + age_max, + foi, + 0.0 + ); } diff --git a/inst/stan/constant_seroreversion.stan b/inst/stan/constant_seroreversion.stan new file mode 100644 index 00000000..16e463e0 --- /dev/null +++ b/inst/stan/constant_seroreversion.stan @@ -0,0 +1,58 @@ +functions { + #include functions/prob_infected_constant.stan +} +data { + #include data/basic_data.stan + #include data/foi_prior_data.stan + #include data/seroreversion_prior_data.stan +} + +parameters { + real foi; + real seroreversion_rate; +} + +transformed parameters { + vector[n_observations] prob_infected; + + prob_infected = prob_infected_constant( + age_groups, + n_observations, + foi, + seroreversion_rate + ); +} + +model { + n_seropositive ~ binomial(sample_size, prob_infected); + + // force of infection prior + if (foi_prior_index == 0) + foi ~ uniform(foi_min, foi_max); + if (foi_prior_index == 1) + foi ~ normal(foi_mean, foi_sd); + + // seroreversion prior + if (seroreversion_prior_index == 0) + seroreversion_rate ~ uniform(seroreversion_min, seroreversion_max); + if (seroreversion_prior_index == 1) + seroreversion_rate ~ normal(seroreversion_mean, seroreversion_sd); +} + +generated quantities{ + #include generated_quantities/log_likelihood.stan + + vector[age_max] prob_infected_expanded; + vector[age_max] foi_vector; + + for(i in 1:age_max) { + foi_vector[i] = foi; + } + + prob_infected_expanded = prob_infected_constant( + ages, + age_max, + foi, + seroreversion_rate + ); +} diff --git a/inst/stan/data/basic_data.stan b/inst/stan/data/basic_data.stan new file mode 100644 index 00000000..f4f8a6e6 --- /dev/null +++ b/inst/stan/data/basic_data.stan @@ -0,0 +1,6 @@ +int n_observations; +int age_max; +int ages[age_max]; +int n_seropositive[n_observations]; +int sample_size[n_observations]; +int age_groups[n_observations]; diff --git a/inst/stan/data/foi_prior_data.stan b/inst/stan/data/foi_prior_data.stan new file mode 100644 index 00000000..1f8e9178 --- /dev/null +++ b/inst/stan/data/foi_prior_data.stan @@ -0,0 +1,8 @@ + // prior index + int foi_prior_index; + // uniform + real foi_min; + real foi_max; + // normal + real foi_mean; + real foi_sd; diff --git a/inst/stan/data/seroreversion_prior_data.stan b/inst/stan/data/seroreversion_prior_data.stan new file mode 100644 index 00000000..1b6962e3 --- /dev/null +++ b/inst/stan/data/seroreversion_prior_data.stan @@ -0,0 +1,8 @@ + // prior index + int seroreversion_prior_index; + // seroreversion prior choices + real seroreversion_min; + real seroreversion_max; + // normal + real seroreversion_mean; + real seroreversion_sd; diff --git a/inst/stan/functions/prob_infected_av.stan b/inst/stan/functions/prob_infected_av.stan index a9226728..a2d09d46 100644 --- a/inst/stan/functions/prob_infected_av.stan +++ b/inst/stan/functions/prob_infected_av.stan @@ -1,35 +1,17 @@ -real prob_infected_age_varying( - vector fois_vector, - int[] chunks, - int age, - real mu -) { - real prob = 0.0; - for(j in 1:age){ - real foi = fois_vector[chunks[j]]; - prob = (1/ (foi + mu)) * exp(-(foi + mu)) * (foi * (exp(foi + mu) - 1) + prob * (foi+mu)); - } - return prob; -} - -vector prob_infected_calculate( - vector fois_vector, - int[] chunks, - int[] ages, - int n_obs, - real mu -) { - vector[n_obs] prob_infected; + real prob_infected_age_varying( + int age, + real age_max, + vector foi_vector, + int[] chunks, + real seroreversion_rate + ) { + real prob = 0.0; + for(j in 1:age){ + real foi = foi_vector[chunks[j]]; + real lambda_over_both = foi / (foi + seroreversion_rate); + real e_lower = exp(-(foi + seroreversion_rate)); - for(i in 1:n_obs){ - int age = ages[i]; - prob_infected[i] = prob_infected_age_varying( - fois_vector, - chunks, - age, - mu - ); + prob = lambda_over_both + e_lower * (prob - lambda_over_both); + } + return prob; } - - return prob_infected; -} diff --git a/inst/stan/functions/prob_infected_constant.stan b/inst/stan/functions/prob_infected_constant.stan new file mode 100644 index 00000000..0fc688ee --- /dev/null +++ b/inst/stan/functions/prob_infected_constant.stan @@ -0,0 +1,32 @@ +real prob_infected_constant_single_age( + int age, + real foi, + real seroreversion_rate +) { + real foi_over_both = foi / (foi + seroreversion_rate); + real e_lower = exp(-(foi + seroreversion_rate)); + + real prob = 0.0; + for(i in 1:age) { + prob = foi_over_both + e_lower * (prob - foi_over_both); + } + return prob; +} + +vector prob_infected_constant( + int[] ages, + int n_ages, + real foi, + real seroreversion_rate +) { + vector[n_ages] prob_infected; + + for (i in 1:n_ages) { + prob_infected[i] = prob_infected_constant_single_age( + ages[i], + foi, + seroreversion_rate + ); + } + return prob_infected; +} diff --git a/inst/stan/functions/prob_infected_tv.stan b/inst/stan/functions/prob_infected_tv.stan index 02822545..e38f6a81 100644 --- a/inst/stan/functions/prob_infected_tv.stan +++ b/inst/stan/functions/prob_infected_tv.stan @@ -1,5 +1,5 @@ vector prob_infected_noseroreversion( - vector fois_vector, + vector foi_vector, int[] chunks, matrix observation_exposure_matrix, int n_obs, @@ -7,7 +7,7 @@ vector prob_infected_noseroreversion( ) { real scalar_dot_product; vector[n_obs] prob_infected; - vector[age_max] foi_every_age = fois_vector[chunks]; + vector[age_max] foi_every_age = foi_vector[chunks]; for(i in 1:n_obs){ scalar_dot_product = dot_product( @@ -20,22 +20,19 @@ vector prob_infected_noseroreversion( return prob_infected; } -vector prob_infected_calculate( - vector fois_vector, - int[] chunks, - matrix observation_exposure_matrix, - int n_obs, - int age_max + real prob_infected_age_varying( + int age, + vector foi_vector, + int[] chunks, + real seroreversion_rate ) { - vector[n_obs] prob_infected; - - prob_infected = prob_infected_noseroreversion( - fois_vector, - chunks, - observation_exposure_matrix, - n_obs, - age_max - ); + real prob = 0.0; + for(j in 1:age){ + real foi = foi_vector[chunks[j]]; + real lambda_over_both = foi / (foi + seroreversion_rate); + real e_lower = exp(-(foi + seroreversion_rate)); - return prob_infected; + prob = lambda_over_both + e_lower * (prob - lambda_over_both); + } + return prob; } diff --git a/inst/stan/generated_quantities/log_likelihood.stan b/inst/stan/generated_quantities/log_likelihood.stan new file mode 100644 index 00000000..789d0db5 --- /dev/null +++ b/inst/stan/generated_quantities/log_likelihood.stan @@ -0,0 +1,9 @@ +vector[n_observations] n_seropositive_sim; +vector[n_observations] prob_infected_sim; +vector[n_observations] log_likelihood; + +for(i in 1:n_observations){ + n_seropositive_sim[i] = binomial_rng(sample_size[i], prob_infected[i]); + prob_infected_sim[i] = n_seropositive_sim[i] / sample_size[i]; + log_likelihood[i] = binomial_lpmf(n_seropositive[i] | sample_size[i], prob_infected[i]); +} diff --git a/inst/stan/tv_normal.stan b/inst/stan/tv_normal.stan deleted file mode 100644 index 43fcc66e..00000000 --- a/inst/stan/tv_normal.stan +++ /dev/null @@ -1,66 +0,0 @@ -functions { - #include functions/prob_infected_tv.stan -} - -data { - int n_obs; - int age_max; - int n_pos[n_obs]; - int n_total[n_obs]; - matrix[n_obs, age_max] observation_exposure_matrix; - - // prior choices - int chunks[age_max]; - real foi_location; - real foi_scale; -} - -transformed data { - int n_chunks = max(chunks); -} - -parameters { - row_vector[n_chunks] fois; - real sigma; -} - -transformed parameters { - vector[n_chunks] fois_vector; - vector[n_obs] prob_infected; - - fois_vector = to_vector(fois); - - prob_infected = prob_infected_calculate( - fois_vector, - chunks, - observation_exposure_matrix, - n_obs, - age_max - ); -} - -model { - n_pos ~ binomial(n_total, prob_infected); - sigma ~ cauchy(0, 1); - - fois[1] ~ normal(foi_location, foi_scale); - for(i in 2:n_chunks) - fois[i] ~ normal(fois[i - 1], sigma); -} - -generated quantities{ - vector[n_obs] n_pos_sim; - vector[n_obs] P_sim; - vector[n_obs] logLikelihood; - vector[age_max] foi; - - for(i in 1:age_max) { - foi[i] = fois_vector[chunks[i]]; - } - - for(i in 1:n_obs){ - n_pos_sim[i] = binomial_rng(n_total[i], prob_infected[i]); - P_sim[i] = n_pos_sim[i] / n_total[i]; - logLikelihood[i] = binomial_lpmf(n_pos[i] | n_total[i], prob_infected[i]); - } -} diff --git a/inst/stan/tv_normal_log.stan b/inst/stan/tv_normal_log.stan deleted file mode 100644 index 4b4d033b..00000000 --- a/inst/stan/tv_normal_log.stan +++ /dev/null @@ -1,69 +0,0 @@ -functions { - #include functions/prob_infected_tv.stan -} - -data { - int n_obs; - int age_max; - int n_pos[n_obs]; - int n_total[n_obs]; - matrix[n_obs, age_max] observation_exposure_matrix; - - // prior choices - int chunks[age_max]; - real foi_location; - real foi_scale; -} - -transformed data { - int n_chunks = max(chunks); -} - -parameters { - row_vector[n_chunks] log_fois; - real sigma; -} - -transformed parameters { - row_vector[n_chunks] fois; - vector[n_chunks] fois_vector; - vector[n_obs] prob_infected; - - for(i in 1:n_chunks) - fois[i] = exp(log_fois[i]); - fois_vector = to_vector(fois); - - prob_infected = prob_infected_calculate( - fois_vector, - chunks, - observation_exposure_matrix, - n_obs, - age_max - ); -} - -model { - n_pos ~ binomial(n_total, prob_infected); - sigma ~ cauchy(0, 1); - - log_fois[1] ~ normal(foi_location, foi_scale); - for(i in 2:n_chunks) - log_fois[i] ~ normal(log_fois[i - 1], sigma); - } - -generated quantities{ - vector[n_obs] n_pos_sim; - vector[n_obs] P_sim; - vector[n_obs] logLikelihood; - vector[age_max] foi; - - for(i in 1:age_max) { - foi[i] = fois_vector[chunks[i]]; - } - - for(i in 1:n_obs){ - n_pos_sim[i] = binomial_rng(n_total[i], prob_infected[i]); - P_sim[i] = n_pos_sim[i] / n_total[i]; - logLikelihood[i] = binomial_lpmf(n_pos[i] | n_total[i], prob_infected[i]); - } -} From 1fe63f66183a34a6c2b4c4e7a35d57c893153c19 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Fri, 24 May 2024 19:47:45 -0500 Subject: [PATCH 03/78] delete old modules --- R/model_comparison.R | 45 --- R/modelling.R | 765 ------------------------------------------- R/visualisation.R | 461 -------------------------- 3 files changed, 1271 deletions(-) delete mode 100644 R/model_comparison.R delete mode 100644 R/modelling.R delete mode 100644 R/visualisation.R diff --git a/R/model_comparison.R b/R/model_comparison.R deleted file mode 100644 index cc566013..00000000 --- a/R/model_comparison.R +++ /dev/null @@ -1,45 +0,0 @@ -#' Build dataframe containing the R-hat estimates for a given -#' serological model -#' -#' This function relies on [rhat][bayesplot::rhat] to extract the -#' R-hat estimates of the serological model object `seromodel_object` and -#' returns a table a dataframe with the estimates for each year of birth. -#' @inheritParams get_foi_central_estimates -#' @return rhats table -#' @examples -#' data(chagas2012) -#' serodata <- prepare_serodata(serodata = chagas2012) -#' model_constant <- fit_seromodel( -#' serodata = serodata, -#' foi_model = "constant", -#' iter = 1500 -#' ) -#' cohort_ages <- get_cohort_ages(serodata) -#' get_table_rhats( -#' seromodel_object = model_constant, -#' cohort_ages = cohort_ages -#' ) -#' @export -get_table_rhats <- function(seromodel_object, - cohort_ages) { - checkmate::assert_class(seromodel_object, "stanfit") - - rhats <- bayesplot::rhat(seromodel_object, "foi") - - if (any(is.nan(rhats))) { - warn_msg <- paste0( - length(which(is.nan(rhats))), - " rhat values are `nan`, ", - "indicating the model may not have run correctly for those times.\n", - "Setting those rhat values to `NA`." - ) - warning(warn_msg) - rhats[which(is.nan(rhats))] <- NA - } - model_rhats <- data.frame( - year = cohort_ages$birth_year, - rhat = rhats - ) - - return(model_rhats) -} diff --git a/R/modelling.R b/R/modelling.R deleted file mode 100644 index 1b42480a..00000000 --- a/R/modelling.R +++ /dev/null @@ -1,765 +0,0 @@ -stop_if_missing <- function(serodata, must_have_cols) { - if ( - !all( - must_have_cols - %in% colnames(serodata) - ) - ) { - missing <- must_have_cols[which(!(must_have_cols %in% colnames(serodata)))] - stop( - "The following mandatory columns in `serodata` are missing.\n", - toString(missing) - ) - } -} - -stop_if_wrong_type <- function(serodata, col_types) { - error_messages <- list() - for (col in names(col_types)) { - # valid_col_types <- ifelse(is.list(col_types[[col]]), - # col_types[[col]], as.list(col_types[[col]]) - # ) - valid_col_types <- as.list(col_types[[col]]) - - # Only validates column type if the column exists in the dataframe - if (col %in% colnames(serodata) && - !any(vapply(valid_col_types, function(type) { - do.call(sprintf("is.%s", type), list(serodata[[col]])) - }, logical(1)))) { - error_messages <- append( - error_messages, - sprintf( - "`%s` must be of any of these types: `%s`", - col, toString(col_types[[col]]) - ) - ) - } - } - if (length(error_messages) > 0) { - stop( - "The following columns in `serodata` have wrong types:\n", - toString(error_messages) - ) - } -} - -warn_missing <- function( - serodata, - optional_cols - ) { - if ( - !all( - optional_cols - %in% colnames(serodata) - ) - ) { - missing <- optional_cols[which(!(optional_cols %in% colnames(serodata)))] - warning( - "The following optional columns in `serodata` are missing. ", - "Consider including them to get more information from this analysis:\n", - toString(missing) - ) - for (col in missing) { - serodata[[col]] <- "None" # TODO Shouln't we use `NA` instead? - } - } - - return(serodata) -} - - -validate_serodata <- function(serodata) { - col_types <- list( - survey = c("character", "factor"), - total = "numeric", - counts = "numeric", - tsur = "numeric", - age_min = "numeric", - age_max = "numeric" - ) - - stop_if_missing(serodata, - must_have_cols = names(col_types) - ) - - stop_if_wrong_type(serodata, col_types) - - optional_col_types <- list( - country = c("character", "factor"), - test = c("character", "factor"), - antibody = c("character", "factor") - ) - - # Add missing columns - serodata <- warn_missing( - serodata, - optional_cols = names(optional_col_types) - ) - - # If any optional column is present, validates that is has the correct type - stop_if_wrong_type(serodata, optional_col_types) - - return(serodata) -} - -validate_prepared_serodata <- function(serodata) { - col_types <- list( - total = "numeric", - counts = "numeric", - tsur = "numeric", - age_mean_f = "numeric", - birth_year = "numeric", - prev_obs = "numeric", - prev_obs_lower = "numeric", - prev_obs_upper = "numeric" - ) - serodata <- validate_serodata(serodata) - - stop_if_missing(serodata, must_have_cols = names(col_types)) - stop_if_wrong_type(serodata, col_types) - - return(serodata) -} - -#' Run specified stan model for the force-of-infection and -#' estimate the seroprevalence based on the result of the fit -#' -#' Starting on v.0.1.0, this function will be DEPRECATED. Use `fit_seromodel` -#' instead. -#' This function runs the specified model for the force-of-infection `foi_model` -#' using the data from a seroprevalence survey `serodata` as the input data. See -#' [fit_seromodel] for further details. -#' -#' @inheritParams fit_seromodel -#' @param print_summary Boolean. If `TRUE`, a table summarizing modelling -#' results is printed. -#' @return `seromodel_object`. An object containing relevant information about -#' the implementation of the model. For further details refer to -#' [fit_seromodel]. -#' @examples -#' \dontrun{ -#' data(chagas2012) -#' serodata <- prepare_serodata(chagas2012) -#' fit_seromodel( -#' serodata, -#' foi_model = "constant" -#' ) -#' } -#' @export -run_seromodel <- function( - serodata, - foi_model = c("constant", "tv_normal_log", "tv_normal"), - foi_parameters = NULL, - chunks = NULL, - chunk_size = 1, - iter = 1000, - adapt_delta = 0.90, - max_treedepth = 10, - seed = 12345, - print_summary = TRUE, - ...) { - .Deprecated("fit_seromodel") - foi_model <- match.arg(foi_model) - survey <- unique(serodata$survey) - if (length(survey) > 1) { - warning("You have more than 1 surveys or survey codes") - } - seromodel_object <- fit_seromodel( - serodata = serodata, - foi_model = foi_model, - foi_parameters = foi_parameters, - chunks = chunks, - chunk_size = chunk_size, - iter = iter, - adapt_delta = adapt_delta, - max_treedepth = max_treedepth, - seed = seed, - ... - ) - message( - "serofoi model ", - foi_model, - " finished running ------" - ) - if (print_summary) { - model_summary <- extract_seromodel_summary( - seromodel_object = seromodel_object, - serodata = serodata - ) - print(t(model_summary)) - } - return(seromodel_object) -} - -#' Fit selected model to the specified seroprevalence survey -#' data -#' -#' This function fits the specified model `foi_model` to the serological survey -#' data `serodata` by means of [sampling][rstan::sampling]. The -#' function determines whether the corresponding stan model object needs to be -#' compiled by rstan. -#' @param serodata A data frame containing the data from a seroprevalence -#' survey. This data frame must contain at least the following columns: -#' \describe{ -#' \item{`total`}{Number of samples for each age group} -#' \item{`counts`}{Number of positive samples for each age group} -#' \item{`tsur`}{Year in which the survey took place} -#' \item{`age_mean_f`}{Floor value of the average between age_min and age_max} -#' \item{`sample_size`}{The size of the sample} -#' \item{`birth_year`}{The year in which the individuals of each age group -#' were born} -#' } -#' The last six columns can be added to `serodata` by means of the function -#' [prepare_serodata()]. -#' @param foi_model Name of the selected model. Current version provides three -#' options: -#' \describe{ -#' \item{`"constant"`}{Runs a constant model} -#' \item{`"tv_normal"`}{Runs a normal model} -#' \item{`"tv_normal_log"`}{Runs a normal logarithmic model} -#' } -#' @param foi_parameters List specifying the initial prior parameters of the -#' model `foi_model` to be specified as (e.g.): -#' \describe{ -#' \item{`"constant"`}{`list(foi_a = 0, foi_b = 2)`} -#' \item{`"tv_normal"`}{`list(foi_location = 0, foi_scale = 1)`} -#' \item{`"tv_normal_log"`}{`list(foi_location = -6, foi_scale = 4)`} -#' } -#' @param chunks Numeric list specifying the chunk structure of the time -#' interval from the birth year of the oldest age cohort -#' `min(serodata$age_mean_f)` to the time when the serosurvey was conducted -#' `t_sur`. If `NULL`, the time interval is divided in chunks of size -#' `chunk_size`. -#' @param chunk_size Size of the chunks to be used in case that the chunk -#' structure `chunks` is not specified in [fit_seromodel]. -#' Default is 1, meaning that one force of infection value is to be estimated -#' for every year in the time interval spanned by the serosurvey. -#' If the length of the time interval is not exactly divisible by `chunk_size`, -#' the remainder years are included in the last chunk. -#' @param iter Number of interactions for each chain including the warmup. -#' `iter` in [sampling][rstan::sampling]. -#' @param adapt_delta Real number between 0 and 1 that represents the target -#' average acceptance probability. Increasing the value of `adapt_delta` will -#' result in a smaller step size and fewer divergences. For further details -#' refer to the `control` parameter in [sampling][rstan::sampling] or -#' [here](https://mc-stan.org/rstanarm/reference/adapt_delta.html). -#' @param max_treedepth Maximum tree depth for the binary tree used in the NUTS -#' stan sampler. For further details refer to the `control` parameter in -#' [sampling][rstan::sampling]. -#' @param seed For further details refer to the `seed` parameter in -#' [sampling][rstan::sampling]. -#' @param ... Additional parameters for [sampling][rstan::sampling]. -#' @return `seromodel_object`. `stanfit` object returned by the function -#' [sampling][rstan::sampling] -#' @examples -#' data(chagas2012) -#' serodata <- prepare_serodata(chagas2012) -#' seromodel_fit <- fit_seromodel( -#' serodata = serodata, -#' foi_model = "constant" -#' ) -#' -#' @export -fit_seromodel <- function( - serodata, - foi_model = c("constant", "tv_normal_log", "tv_normal", "av_normal"), - foi_parameters = NULL, - chunks = NULL, - chunk_size = 1, - iter = 1000, - adapt_delta = 0.90, - max_treedepth = 10, - seed = 12345, - ...) { - serodata <- validate_prepared_serodata(serodata) - err_msg <- paste0( - "foi_model must be either ", - "constant, ", - "tv_normal, tv_normal_log, or ", - "av_normal" - ) - stopifnot( - err_msg = - foi_model %in% c( - "constant", - "tv_normal", "tv_normal_log", - "av_normal" - ), - "iter must be numeric" = is.numeric(iter), - "seed must be numeric" = is.numeric(seed) - ) - - # Set default foi parameters - if (is.null(foi_parameters)) { - if (foi_model == "constant") { - foi_parameters <- list( - foi_a = 0, - foi_b = 2 - ) - } else if (foi_model %in% c("tv_normal", "av_normal")) { - foi_parameters <- list( - foi_location = 0, - foi_scale = 1 - ) - } else if (foi_model == "tv_normal_log") { - foi_parameters <- list( - foi_location = -6, - foi_scale = 4 - ) - } - } - - # Load Stan model - model <- stanmodels[[foi_model]] - exposure_matrix <- get_exposure_matrix(serodata) - - # Set default chunks structure - if (is.null(chunks)) { - chunks <- get_chunk_structure( - serodata = serodata, - chunk_size = chunk_size - ) - } - checkmate::assert_class(chunks, "numeric") - stopifnot( - "`chunks` length must be equal to `max(serodata$age_mean_f)`" = - length(chunks) == max(serodata$age_mean_f) - ) - - # Build Stan data - stan_data <- list( - n_obs = nrow(serodata), - n_pos = serodata$counts, - n_total = serodata$total, - age_max = max(serodata$age_mean_f), - chunks = chunks - ) - - if (foi_model %in% c("constant", "tv_normal", "tv_normal_log")) { - exposure_matrix <- get_exposure_matrix(serodata) - stan_data <- append( - stan_data, - list( - observation_exposure_matrix = exposure_matrix - ) - ) - } else if (foi_model == "av_normal") { - stan_data <- append( - stan_data, - list(ages = serodata$age_mean_f) - ) - } - - if (foi_model == "constant") { - stan_data <- append( - stan_data, - list( - foi_a = foi_parameters$foi_a, - foi_b = foi_parameters$foi_b - ) - ) - } else { - stan_data <- append( - stan_data, - list( - foi_location = foi_parameters$foi_location, - foi_scale = foi_parameters$foi_scale - ) - ) - } - - if (foi_model == "tv_normal_log") { - f_init <- function() { - list(log_fois = rep(-3, max(chunks))) - } - } else { - f_init <- function() { - list(fois = rep(0.01, max(chunks))) - } - } - - seromodel_fit <- rstan::sampling( - model, - data = stan_data, - iter = iter, - init = f_init, - control = list( - adapt_delta = adapt_delta, - max_treedepth = max_treedepth - ), - seed = seed, - # https://github.com/stan-dev/rstan/issues/761#issuecomment-647029649 - chain_id = 0, - include = FALSE, - pars = "fois_vector", - verbose = FALSE, - refresh = 0, - ... - ) - - if (seromodel_fit@mode == 0) { - seromodel_object <- seromodel_fit - return(seromodel_object) - } else { - # This may happen for invalid inputs in rstan::sampling() (e.g. thin > iter) - seromodel_object <- "no model" - return(seromodel_object) - } -} - - -#' Generate list containing the chunk structure to be used in the retrospective -#' estimation of the force of infection. -#' -#' This function generates a numeric list specifying the chunk structure of the -#' time interval spanning from the year of birth of the oldest age cohort up to -#' the time when the serosurvey was conducted. -#' @inheritParams fit_seromodel -#' @examples -#' data(chagas2012) -#' serodata <- prepare_serodata(serodata = chagas2012, alpha = 0.05) -#' cohort_ages <- get_cohort_ages(serodata = serodata) -#' @export -get_chunk_structure <- function( - serodata, - chunk_size - ) { - checkmate::assert_int( - chunk_size, - lower = 1, - upper = max(serodata$age_mean_f) - ) - - chunks <- unlist( - purrr::map( - seq( - 1, - max(serodata$age_mean_f) / chunk_size, - 1), - rep, - times = chunk_size - ) - ) - - chunks <- append( - chunks, - rep( - max(chunks), - max(serodata$age_mean_f) - length(chunks) - ) - ) - - return(chunks) -} - -#' Generate data frame containing the age of each cohort -#' corresponding to each birth year excluding the year of the survey. -#' -#' This function generates a data frame containing the age of each cohort -#' corresponding to each `birth_year` excluding the year of the survey, for -#' which the cohort age is still 0. specified serological survey data `serodata` -#' excluding the year of the survey. -#' @inheritParams run_seromodel -#' @return `cohort_ages`. A data frame containing the age of each cohort -#' corresponding to each birth year -#' @examples -#' data(chagas2012) -#' serodata <- prepare_serodata(serodata = chagas2012, alpha = 0.05) -#' cohort_ages <- get_cohort_ages(serodata = serodata) -#' @export -get_cohort_ages <- function(serodata) { - - birth_year <- min(serodata$birth_year):(serodata$tsur[1] - 1) - age <- rev(seq_along(birth_year)) - - cohort_ages <- data.frame( - birth_year = birth_year, - age = age - ) - return(cohort_ages) -} - -# TODO Is necessary to explain better what we mean by the exposure matrix. - -#' Generate exposure matrix corresponding to a serological -#' survey -#' -#' @inheritParams run_seromodel -#' @return `exposure_output`. An atomic matrix containing the expositions for -#' each entry of `serodata` by year. -#' @examples -#' data(chagas2012) -#' serodata <- prepare_serodata(serodata = chagas2012) -#' exposure_matrix <- get_exposure_matrix(serodata = serodata) -#' @keywords internal -#' @noRd -get_exposure_matrix <- function(serodata) { - age_class <- serodata$age_mean_f - cohort_ages <- get_cohort_ages(serodata = serodata) - ly <- nrow(cohort_ages) - exposure <- matrix(0, nrow = length(age_class), ncol = ly) - for (k in seq_along(age_class)) { - exposure[k, (ly - age_class[k] + 1):ly] <- 1 - } - exposure_output <- exposure - return(exposure_output) -} - -#' Extract central estimates for the fitted forced FoI -#' -#' @param seromodel_object Stanfit object containing the results of fitting a -#' model by means of [fit_seromodel]. -#' @inheritParams fit_seromodel -#' @param lower_quantile Lower quantile used to compute the credible interval of -#' the fitted force-of-infection. -#' @param upper_quantile Lower quantile used to compute the credible interval of -#' the fitted force-of-infection. -#' @return `foi_central_estimates`. Central estimates for the fitted forced FoI -#' @examples -#' data(chagas2012) -#' serodata <- prepare_serodata(chagas2012) -#' seromodel_object <- fit_seromodel( -#' serodata = serodata, -#' foi_model = "constant" -#' ) -#' cohort_ages <- get_cohort_ages(serodata = serodata) -#' foi_central_estimates <- get_foi_central_estimates( -#' seromodel_object = seromodel_object, -#' serodata = serodata -#' ) -#' @export -get_foi_central_estimates <- function( - seromodel_object, - serodata, - lower_quantile = 0.05, - upper_quantile = 0.95 - ) { - # extracts force-of-infection from stan fit - foi <- rstan::extract(seromodel_object, "foi", inc_warmup = FALSE)[[1]] - cohort_ages <- get_cohort_ages(serodata) - # defines time scale depending on the type of the model - if( - seromodel_object@model_name %in% - c("constant", "tv_normal", "tv_normal_log") - ) { - foi_central_estimates <- data.frame( - year = cohort_ages$birth_year - ) - } else if (seromodel_object@model_name == "av_normal") { - foi_central_estimates <- data.frame( - age = rev(cohort_ages$age) - ) - } - - # generates central estimations - foi_central_estimates <- foi_central_estimates %>% - mutate( - lower = apply(foi, 2, quantile, lower_quantile), - upper = apply(foi, 2, quantile, upper_quantile), - medianv = apply(foi, 2, quantile, 0.5) - ) - - return(foi_central_estimates) -} - -#' Function to extract a summary of the specified serological model object -#' -#' This function extracts a summary corresponding to a serological model object -#' containing information about the original serological survey data used to -#' fit the model, such as the year when the survey took place, the type of test -#' taken and the corresponding antibody, as well as information about the -#' convergence of the model, like the expected log pointwise predictive density -#' `elpd` and its corresponding standard deviation. -#' @inheritParams get_foi_central_estimates -#' @inheritParams run_seromodel -#' @return `model_summary`. Object with a summary of `seromodel_object` -#' containing the following: -#' \describe{ -#' \item{`foi_model`}{Name of the selected model.} -#' \item{`data_set`}{Seroprevalence survey label} -#' \item{`country`}{Name of the country were the survey was conducted in.} -#' \item{`year`}{Year in which the survey was conducted.} -#' \item{`test`}{Type of test of the survey.} -#' \item{`antibody`}{Antibody} -#' \item{`n_sample`}{Total number of samples in the survey.} -#' \item{`n_agec`}{Number of age groups considered.} -#' \item{`n_iter`}{Number of iterations by chain including warmup.} -#' \item{`elpd`}{elpd} -#' \item{`se`}{se} -#' \item{`converged`}{convergence} -#' } -#' @examples -#' data(chagas2012) -#' serodata <- prepare_serodata(chagas2012) -#' seromodel_object <- fit_seromodel( -#' serodata = serodata, -#' foi_model = "constant" -#' ) -#' extract_seromodel_summary(seromodel_object, -#' serodata = serodata -#' ) -#' @export -extract_seromodel_summary <- function(seromodel_object, - serodata) { - serodata <- validate_prepared_serodata(serodata) - #------- Loo estimates - # The argument parameter_name refers to the name given to the Log-likelihood - # in the stan models. See loo::extract_log_lik() documentation for further - # details - loo_fit <- loo::loo( - seromodel_object, - save_psis = FALSE, - pars = c(parameter_name = "logLikelihood") - ) - if (sum(is.na(loo_fit)) < 1) { - lll <- as.numeric((round(loo_fit$estimates[1, ], 2))) - } else { - lll <- c(-1e10, 0) - } - - model_summary <- data.frame( - foi_model = seromodel_object@model_name, - dataset = unique(serodata$survey), - country = unique(serodata$country), - year = unique(serodata$tsur), - test = unique(serodata$test), - antibody = unique(serodata$antibody), - n_sample = sum(serodata$total), - n_agec = length(serodata$age_mean_f), - n_iter = seromodel_object@sim$iter, - elpd = lll[1], - se = lll[2], - converged = NA - ) - cohort_ages <- get_cohort_ages(serodata = serodata) - rhats <- get_table_rhats( - seromodel_object = seromodel_object, - cohort_ages = cohort_ages - ) - - if (all(rhats$rhat <= 1.01)) { - model_summary$converged <- "Yes" - } else { - model_summary$converged <- "No" - warn_msg <- paste0( - length(which(rhats$rhat > 1.01)), - " rhat values are above 1.01. ", - "Running the chains for more iterations is recommended." - ) - warning(warn_msg) - } - - return(model_summary) -} - - -#' Generate data frame containing the confidence interval based on -#' a force-of-infection fitting -#' -#' This function computes the corresponding binomial confidence intervals for -#' the obtained prevalence based on a fitting of the force-of-infection `foi` -#' for plotting an analysis purposes. -#' @param foi Object containing the information of the force-of-infection. It is -#' obtained from `rstan::extract(seromodel_object$seromodel, "foi", inc_warmup -#' = FALSE)[[1]]`. -#' @param alpha Probability threshold for statistical significance used for both -#' the binomial confidence interval, and the lower and upper quantiles of the -#' estimated prevalence. -#' @inheritParams run_seromodel -#' @param bin_data If `TRUE`, `serodata` is binned by means of -#' `prepare_bin_data`. Otherwise, age groups are kept as originally input. -#' @param bin_step Integer specifying the age groups bin size to be used when -#' `bin_data` is set to `TRUE`. -#' @return `prev_final`. The expanded prevalence data. This is used for plotting -#' purposes in the `visualization` module. -#' @examples -#' data(chagas2012) -#' serodata <- prepare_serodata(chagas2012) -#' seromodel_object <- fit_seromodel( -#' serodata = serodata, -#' foi_model = "constant" -#' ) -#' foi <- rstan::extract(seromodel_object, "foi")[[1]] -#' get_prev_expanded(foi, serodata) -#' @export -get_prev_expanded <- function(foi, - serodata, - alpha = 0.05, - bin_data = FALSE, - bin_step = 5 - ) { - - if (bin_data && any(serodata$age_max - serodata$age_min > 2)) { - warning("Make sure `serodata` is already grouped by age") - bin_data <- FALSE - } - - foi_expanded <- foi - - ly <- NCOL(foi_expanded) - exposure_expanded <- matrix(0, nrow = ly, ncol = ly) - exposure_expanded[apply( - lower.tri(exposure_expanded, diag = TRUE), - 1, rev - )] <- 1 - - prev_pn <- t(1 - exp(-exposure_expanded %*% t(foi_expanded))) - - predicted_prev <- t( - apply( - prev_pn, - 2, - function(x) { - quantile( - x, - c( - 0.5, - alpha, - 1 - alpha - ) - ) - } - ) - ) - colnames(predicted_prev) <- c( - "predicted_prev", - "predicted_prev_lower", - "predicted_prev_upper" - ) - predicted_prev <- as.data.frame(predicted_prev) - predicted_prev$age <- 1:ly - - if (bin_data) { - observed_prev <- prepare_bin_data( - serodata = serodata, - bin_step = bin_step, - alpha = alpha - ) - } else { - observed_prev <- serodata - } - - observed_prev <- observed_prev %>% - dplyr::select( - "age_mean_f", - "prev_obs", - "prev_obs_lower", - "prev_obs_upper", - "total", - "counts" - ) %>% - rename( - age = "age_mean_f" - ) - - prev_expanded <- - base::merge( - predicted_prev, - observed_prev, - by = "age", - all.x = TRUE - ) %>% - dplyr::mutate(survey = unique(serodata$survey)) - - return(prev_expanded) -} diff --git a/R/visualisation.R b/R/visualisation.R deleted file mode 100644 index 358d67c2..00000000 --- a/R/visualisation.R +++ /dev/null @@ -1,461 +0,0 @@ -#' Generate seropositivity plot from a raw serological -#' survey dataset -#' -#' @inheritParams prepare_serodata -#' @inheritParams get_prev_expanded -#' @param size_text Text size use in the theme of the graph returned by the -#' function. -#' @return A ggplot object containing the seropositivity-vs-age graph of the raw -#' data of a given seroprevalence survey with its corresponding binomial -#' confidence interval. -#' @examples -#' data(chagas2012) -#' serodata <- prepare_serodata(chagas2012) -#' plot_seroprev(serodata, size_text = 15) -#' @export -plot_seroprev <- function(serodata, - size_text = 6, - bin_data = TRUE, - bin_step = 5) { - serodata <- validate_prepared_serodata(serodata = serodata) - if (bin_data) { - if (any(serodata$age_max - serodata$age_min > 2)) { - warn_msg <- paste0( - "Make sure `serodata` is already grouped by age. ", - "Skipping binning in seroprevalence plotting." - ) - warning(warn_msg) - bin_data <- FALSE - } else { - checkmate::assert_int(bin_step, lower = 2) - } - } - - if (bin_data) { - serodata <- prepare_bin_data( - serodata, - bin_step = bin_step - ) - } - - min_prev <- min(serodata$prev_obs_lower) - max_prev <- max(serodata$prev_obs_upper) - - seroprev_plot <- ggplot2::ggplot( - data = serodata, - ggplot2::aes(x = .data$age_mean_f) - ) + - ggplot2::geom_errorbar( - ggplot2::aes( - ymin = .data$prev_obs_lower, - ymax = .data$prev_obs_upper - ), - width = 0.1 - ) + - ggplot2::geom_point( - ggplot2::aes( - y = .data$prev_obs, - size = .data$total - ), - fill = "#7a0177", colour = "black", shape = 21 - ) + - ggplot2::coord_cartesian( - xlim = c(min(serodata$age_min), max(serodata$age_max)), - ylim = c(min_prev, max_prev) - ) + - ggplot2::theme_bw(size_text) + - ggplot2::theme(legend.position = "none") + - ggplot2::ylab("seropositivity") + - ggplot2::xlab("age") - - return(seroprev_plot) -} - -#' Generate seropositivity plot corresponding to the specified -#' fitted serological model -#' -#' This function generates a seropositivity plot of the specified serological -#' model object. This includes the original data grouped by age as well as the -#' obtained fitting from the model implementation. Age is located on the x axis -#' and seropositivity on the y axis with its corresponding confidence interval. -#' @inheritParams get_foi_central_estimates -#' @inheritParams run_seromodel -#' @inheritParams get_prev_expanded -#' @param size_text Text size of the graph returned by the function. -#' @return A ggplot object containing the seropositivity-vs-age graph including -#' the data, the fitted model and their corresponding confidence intervals. -#' @examples -#' data(chagas2012) -#' serodata <- prepare_serodata(chagas2012) -#' seromodel_object <- fit_seromodel( -#' serodata = serodata, -#' foi_model = "constant", -#' iter = 1000 -#' ) -#' plot_seroprev_fitted(seromodel_object, -#' serodata = serodata, -#' size_text = 15 -#' ) -#' @export -plot_seroprev_fitted <- function(seromodel_object, - serodata, - size_text = 6, - bin_data = TRUE, - bin_step = 5, - alpha = 0.05 - ) { - checkmate::assert_class(seromodel_object, "stanfit", null.ok = TRUE) - validate_prepared_serodata(serodata) - - foi <- rstan::extract(seromodel_object, "foi", inc_warmup = FALSE)[[1]] - - prev_expanded <- get_prev_expanded( - foi, - serodata = serodata, - alpha = alpha, - bin_data = bin_data, - bin_step = bin_step - ) - prev_plot <- - ggplot2::ggplot(prev_expanded, ggplot2::aes(x = .data$age)) + - ggplot2::geom_ribbon( - ggplot2::aes( - ymin = .data$predicted_prev_lower, - ymax = .data$predicted_prev_upper - ), - fill = "#c994c7" - ) + - ggplot2::geom_line( - ggplot2::aes(y = .data$predicted_prev), - colour = "#7a0177" - ) + - ggplot2::geom_errorbar( - ggplot2::aes(ymin = .data$prev_obs_lower, ymax = .data$prev_obs_upper), - width = 0.1 - ) + - ggplot2::geom_point( - ggplot2::aes(y = .data$prev_obs, size = .data$total), - fill = "#7a0177", - colour = "black", - shape = 21 - ) + - ggplot2::theme_bw(size_text) + - ggplot2::coord_cartesian( - xlim = c(min(serodata$age_min), max(serodata$age_max)), - ylim = c( - min(prev_expanded$prev_obs_lower, prev_expanded$predicted_prev_lower), - max(prev_expanded$prev_obs_upper, prev_expanded$predicted_prev_upper) - ) - ) + - ggplot2::theme(legend.position = "none") + - ggplot2::ylab("seropositivity") + - ggplot2::xlab("age") - - return(prev_plot) -} - -# TODO Complete @param documentation - -#' Generate force-of-infection plot corresponding to the -#' specified fitted serological model -#' -#' This function generates a force-of-infection plot from the results obtained -#' by fitting a serological model. This includes the corresponding binomial -#' confidence interval. The x axis corresponds to the decades covered by the -#' survey the y axis to the force-of-infection. -#' @inheritParams get_foi_central_estimates -#' @param size_text Text size use in the theme of the graph returned by the -#' function. -#' @param max_lambda Upper `ylim`for force-of-infection plot -#' @param foi Data frame with the Force-of-infection trend to be plotted -#' alongside the estimated force-of-infection. Typically this corresponds to -#' the force-of-infection used to simulate the serosurvey used to model. -#' @return A ggplot2 object containing the force-of-infection vs time including -#' the corresponding confidence interval. -#' @examples -#' data(chagas2012) -#' serodata <- prepare_serodata(chagas2012) -#' seromodel_object <- fit_seromodel( -#' serodata = serodata, -#' foi_model = "constant", -#' iter = 1000 -#' ) -#' cohort_ages <- get_cohort_ages(serodata) -#' plot_foi( -#' seromodel_object = seromodel_object, -#' cohort_ages = cohort_ages, -#' size_text = 15 -#' ) -#' @export -plot_foi <- function(seromodel_object, - serodata, - max_lambda = NA, - size_text = 25, - foi = NULL) { - checkmate::assert_class(seromodel_object, "stanfit", null.ok = TRUE) - - #-------- This bit is to get the actual length of the foi data - foi_data <- get_foi_central_estimates( - seromodel_object = seromodel_object, - serodata = serodata - ) - - if ( - seromodel_object@model_name %in% - c("constant", "tv_normal", "tv_normal_log") - ) { - xlab <- "year" - foi_plot <- - ggplot2::ggplot(foi_data, ggplot2::aes(x = .data$year)) - - if (!is.null(foi)) { - stopifnot( - "`year` must be present in `foi`" = "year" %in% colnames(foi) - ) - foi_data <- dplyr::left_join( - foi_data, - foi, - by = "year" - ) - foi_plot <- foi_plot + - ggplot2::geom_line( - data = foi_data, ggplot2::aes(y = foi), - colour = "#b30909", - size = size_text / 8 - ) - } - } else if (seromodel_object@model_name == "av_normal") { - xlab <- "age" - foi_plot <- - ggplot2::ggplot(foi_data, ggplot2::aes(x = .data$age)) - - if (!is.null(foi)) { - stopifnot( - "`age` must be present in `foi`" = "age" %in% colnames(foi) - ) - foi_data <- dplyr::left_join( - foi_data, - foi, - by = "age" - ) - foi_plot <- foi_plot + - ggplot2::geom_line( - data = foi_data, ggplot2::aes(y = foi), - colour = "#b30909", - size = size_text / 8 - ) - } - } - - foi_plot <- foi_plot + - ggplot2::geom_ribbon( - ggplot2::aes( - ymin = .data$lower, - ymax = .data$upper - ), - fill = "#41b6c4", - alpha = 0.5 - ) + - ggplot2::geom_line( - ggplot2::aes(y = .data$medianv), - colour = "#253494", - size = size_text / 8 - ) + - ggplot2::theme_bw(size_text) + - ggplot2::coord_cartesian(ylim = c(0, max_lambda)) + - ggplot2::ylab("Force-of-Infection") + - ggplot2::xlab(xlab) - - return(foi_plot) -} - -#' Generate plot of the R-hat estimates for the specified fitted -#' serological model -#' -#' This function generates a plot of the R-hat estimates obtained for a -#' specified fitted serological model `seromodel_object`. The x axis corresponds -#' to the decades covered by the survey and the y axis to the value of the -#' rhats. All rhats must be smaller than 1 to ensure convergence (for further -#' details check [rhat][bayesplot::rhat]). -#' @inheritParams get_foi_central_estimates -#' @param size_text Text size use in the theme of the graph returned by the -#' function. -#' @return The rhats-convergence plot of the selected model. -#' @examples -#' data(chagas2012) -#' serodata <- prepare_serodata(chagas2012) -#' seromodel_object <- fit_seromodel( -#' serodata = serodata, -#' foi_model = "constant", -#' iter = 1000 -#' ) -#' cohort_ages <- get_cohort_ages(serodata = serodata) -#' plot_rhats(seromodel_object, -#' cohort_ages = cohort_ages, -#' size_text = 15 -#' ) -#' @export -plot_rhats <- function(seromodel_object, - cohort_ages, - size_text = 25) { - checkmate::assert_class(seromodel_object, "stanfit", null.ok = TRUE) - - rhats <- get_table_rhats( - seromodel_object = seromodel_object, - cohort_ages = cohort_ages - ) - - if ( - seromodel_object@model_name %in% - c("constant", "tv_normal", "tv_normal_log") - ) { - rhats_plot <- - ggplot2::ggplot(rhats, ggplot2::aes(.data$year, .data$rhat)) - } else if (seromodel_object@model_name == "av_normal") { - rhats_plot <- - ggplot2::ggplot(rhats, ggplot2::aes(.data$age, .data$rhat)) - } - - rhats_plot <- rhats_plot + - ggplot2::geom_line(colour = "purple") + - ggplot2::geom_point() + - ggplot2::coord_cartesian( - ylim = c( - min(1.0, min(rhats$rhat)), - max(1.02, max(rhats$rhat)) - ) - ) + - ggplot2::geom_hline( - yintercept = 1.01, - colour = "blue", - size = size_text / 12 - ) + - ggplot2::theme_bw(size_text) + - ggplot2::ylab("Convergence (R^)") - - return(rhats_plot) -} - -#' Generate vertical arrangement of plots showing a summary of a -#' model, the estimated seroprevalence, the force-of-infection fit and the R-hat -#' estimates plots. -#' -#' @inheritParams get_foi_central_estimates -#' @inheritParams run_seromodel -#' @inheritParams get_prev_expanded -#' @inheritParams plot_foi -#' @param size_text Text size use in the theme of the graph returned by the -#' function. -#' @return A ggplot object with a vertical arrange containing the -#' seropositivity, force of infection, and convergence plots. -#' @examples -#' data(chagas2012) -#' serodata <- prepare_serodata(chagas2012) -#' seromodel_object <- fit_seromodel( -#' serodata = serodata, -#' foi_model = "constant", -#' iter = 1000 -#' ) -#' plot_seromodel(seromodel_object, -#' serodata = serodata, -#' size_text = 15 -#' ) -#' @export -plot_seromodel <- function(seromodel_object, - serodata, - alpha = 0.05, - max_lambda = NA, - size_text = 25, - bin_data = TRUE, - bin_step = 5, - foi = NULL) { - checkmate::assert_class(seromodel_object, "stanfit", null.ok = TRUE) - serodata <- validate_serodata(serodata) - - cohort_ages <- get_cohort_ages(serodata = serodata) - - prev_plot <- plot_seroprev_fitted( - seromodel_object = seromodel_object, - serodata = serodata, - alpha = alpha, - size_text = size_text, - bin_data = bin_data, - bin_step = bin_step - ) - - foi_plot <- plot_foi( - seromodel_object = seromodel_object, - serodata = serodata, - max_lambda = max_lambda, - size_text = size_text, - foi = foi - ) - - rhats_plot <- plot_rhats( - seromodel_object = seromodel_object, - cohort_ages = cohort_ages, - size_text = size_text - ) - - model_summary <- extract_seromodel_summary( - seromodel_object = seromodel_object, - serodata = serodata - ) - summary_table <- t( - dplyr::select( - model_summary, - c("foi_model", "dataset", "elpd", "se", "converged") - ) - ) - summary_plot <- - plot_info_table(summary_table, size_text = size_text) - - plot_arrange <- cowplot::plot_grid( - summary_plot, - prev_plot, - foi_plot, - rhats_plot, - ncol = 1, - nrow = 4, - rel_heights = c(0.5, 1, 1, 1) - ) - return(plot_arrange) -} - -#' Generate plot summarizing a given table -#' -#' @param info_table Table with the information to be summarised -#' @param size_text Text size of the graph returned by the function -#' @return ggplot object summarizing the information in `info_table` -#' @examples -#' serodata <- prepare_serodata(chagas2012) -#' seromodel_object <- fit_seromodel( -#' serodata = serodata, -#' foi_model = "constant", -#' iter = 1000 -#' ) -#' seromodel_summary <- extract_seromodel_summary( -#' seromodel_object = seromodel_object, -#' serodata = serodata -#' ) -#' info_table <- t(seromodel_summary) -#' plot_info_table(info_table, size_text = 15) -#' @export -plot_info_table <- function(info_table, size_text) { - dato <- data.frame( - y = NROW(info_table):seq_len(1), - text = paste0(rownames(info_table), ": ", info_table[, 1]) - ) - p <- ggplot2::ggplot(dato, ggplot2::aes(x = 1, y = .data$y)) + - ggplot2::scale_y_continuous( - limits = c(0, NROW(info_table) + 1), - breaks = NULL - ) + - ggplot2::theme_void() + - ggplot2::geom_text(ggplot2::aes(label = text), - size = size_text / 2.5, - fontface = "bold" - ) - - return(p) -} From 998a182eb1cd9433cfca14b5f27761bef8c07f8a Mon Sep 17 00:00:00 2001 From: ntorresd Date: Fri, 24 May 2024 19:48:22 -0500 Subject: [PATCH 04/78] add config file with default priors and distribution indexes --- inst/extdata/config.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 inst/extdata/config.yml diff --git a/inst/extdata/config.yml b/inst/extdata/config.yml new file mode 100644 index 00000000..c27add47 --- /dev/null +++ b/inst/extdata/config.yml @@ -0,0 +1,11 @@ +default: + priors: + indexes: + uniform : 0 + normal : 1 + defaults: + prior_index : 1 + min : 0 + max : 10 + mean : 0 + sd : 1 From c86101a36d27832e28868d11a77dc62e94a6d242 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Fri, 24 May 2024 19:49:33 -0500 Subject: [PATCH 05/78] add input validation utilities --- R/validation.R | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 R/validation.R diff --git a/R/validation.R b/R/validation.R new file mode 100644 index 00000000..14e2570a --- /dev/null +++ b/R/validation.R @@ -0,0 +1,57 @@ +stop_if_missing <- function(serosurvey, must_have_cols) { + if ( + !all( + must_have_cols + %in% colnames(serosurvey) + ) + ) { + missing <- must_have_cols[which(!(must_have_cols %in% colnames(serosurvey)))] + stop( + "The following mandatory columns in `serosurvey` are missing.\n", + toString(missing) + ) + } +} + +stop_if_wrong_type <- function(serosurvey, col_types) { + error_messages <- list() + for (col in names(col_types)) { + valid_col_types <- as.list(col_types[[col]]) + + # Only validates column type if the column exists in the dataframe + if (col %in% colnames(serosurvey) && + !any(vapply(valid_col_types, function(type) { + do.call(sprintf("is.%s", type), list(serosurvey[[col]])) + }, logical(1)))) { + error_messages <- append( + error_messages, + sprintf( + "`%s` must be of any of these types: `%s`", + col, toString(col_types[[col]]) + ) + ) + } + } + if (length(error_messages) > 0) { + stop( + "The following columns in `serosurvey` have wrong types:\n", + toString(error_messages) + ) + } +} + +validate_serosurvey <- function(serosurvey) { + col_types <- list( + age_min = "numeric", + age_max = "numeric", + sample_size = "numeric", + n_seropositive = "numeric", + tsur = "numeric" + ) + + stop_if_missing(serosurvey, must_have_cols = names(col_types)) + + stop_if_wrong_type(serosurvey, col_types) + + return(serosurvey) +} From ba67365c796d7bf10d2555cb086c63fac7efaa94 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Fri, 24 May 2024 19:50:09 -0500 Subject: [PATCH 06/78] feat: add working version of `fit_seromodel` using functional specification of priors --- R/build_stan_data.R | 120 ++++++++++++++++++++++++++++++++++++++++++++ R/fit_seromodel.R | 48 ++++++++++++++++++ 2 files changed, 168 insertions(+) create mode 100644 R/build_stan_data.R create mode 100644 R/fit_seromodel.R diff --git a/R/build_stan_data.R b/R/build_stan_data.R new file mode 100644 index 00000000..b1de99af --- /dev/null +++ b/R/build_stan_data.R @@ -0,0 +1,120 @@ +sf_normal <- function(mean = 0, sd = 1) { + # Restricting normal inputs to be non-negative + if(mean < 0 | sd <= 0) { + msg <- paste0( + "Normal distribution here only accepts", + " non-negative values for mean and standard deviation" + ) + message(msg) + stop() + } + + return(list(mean = mean, sd = sd, name = "normal")) +} + +sf_uniform <- function(min = 0, max = 10) { + # Restricting uniform inputs to be non-negative + if (min < 0 | max < 0) { + msg <- paste0( + "Uniform distribution here only accepts", + " non-negative values for min and max" + ) + message(msg) + stop() + } + if (min >= max) { + message("Uniform distribution only accepts min < max") + } + + return(list(min = min, max = max, name = "uniform")) +} + +sf_none <- function() { + return(list(name = "none")) +} + +set_stan_data_defaults <- function( + stan_data, + is_seroreversion = FALSE +) { + config_file <- "inst/extdata/config.yml" + prior_default <- config::get(file = config_file, "priors")$defaults + + foi_defaults <- list( + foi_prior_index = prior_default$index, + foi_min = prior_default$min, + foi_max = prior_default$max, + foi_mean = prior_default$mean, + foi_sd = prior_default$sd + ) + stan_data <- append( + stan_data, + foi_defaults + ) + + if (is_seroreversion) { + seroreversion_defaults <- list( + seroreversion_prior_index = prior_default$index, + seroreversion_min = prior_default$min, + seroreversion_max = prior_default$max, + seroreversion_mean = prior_default$mean, + seroreversion_sd = prior_default$sd + ) + stan_data <- append( + stan_data, + seroreversion_defaults + ) + } + + return(stan_data) +} + +build_stan_data <- function( + serosurvey, + model_type = "constant", + foi_prior = sf_uniform(), + is_seroreversion = FALSE, + seroreversion_prior = sf_none() +) { + + stan_data <- list( + n_observations = nrow(serosurvey), + age_max = max(serosurvey$age_max), + ages = seq(1, max(serosurvey$age_max), 1), + n_seropositive = serosurvey$n_seropositive, + sample_size = serosurvey$sample_size, + age_groups = serosurvey$age_group + ) %>% + set_stan_data_defaults(is_seroreversion = is_seroreversion) + + config_file <- "inst/extdata/config.yml" + prior_index <- config::get(file = config_file, "priors")$indexes + if (foi_prior$name == "uniform") { + stan_data$foi_prior_index <- prior_index[["uniform"]] + stan_data$foi_min <- foi_prior$min + stan_data$foi_max <- foi_prior$max + } else if (foi_prior$name == "normal") { + stan_data$foi_prior_index <- prior_index[["normal"]] + stan_data$foi_mean <- foi_prior$mean + stan_data$foi_sd <- foi_prior$sd + } + + if (is_seroreversion) { + if(seroreversion_prior$name == "none") { + message("seroreversion_prior not specified") + stop() + } + else if (seroreversion_prior$name == "uniform") { + stan_data$seroreversion_prior_index <- prior_index[["uniform"]] + stan_data$seroreversion_min <- seroreversion_prior$min + stan_data$seroreversion_max <- seroreversion_prior$max + } + else if (seroreversion_prior$name == "normal") { + stan_data$seroreversion_prior_index <- prior_index[["normal"]] + stan_data$seroreversion_mean <- seroreversion_prior$mean + stan_data$seroreversion_sd <- seroreversion_prior$sd + } + } + + return(stan_data) +} diff --git a/R/fit_seromodel.R b/R/fit_seromodel.R new file mode 100644 index 00000000..c01eee82 --- /dev/null +++ b/R/fit_seromodel.R @@ -0,0 +1,48 @@ +add_age_group_to_serosurvey <- function(serosurvey) { + if (!any(colnames(serosurvey) == "age_group")) { + serosurvey <- serosurvey %>% + dplyr::mutate( + age_group = floor((.data$age_min + .data$age_max) / 2) + ) + } else { + message("Using `age_group` already present in serosurvey") + } + return(serosurvey) +} + +fit_seromodel <- function( + serosurvey, + model_type = "constant", + foi_prior = sf_normal(), + is_seroreversion = FALSE, + seroreversion_prior = sf_uniform(), + ... +) { + serosurvey <- serosurvey %>% + validate_serosurvey() %>% + add_age_group_to_serosurvey() + + stan_data <- build_stan_data( + serosurvey = serosurvey, + model_type = model_type, + foi_prior = foi_prior, + is_seroreversion = is_seroreversion, + seroreversion_prior = seroreversion_prior + ) + + if (is_seroreversion) + model_name <- paste0(model_type, "_seroreversion") + else + model_name <- model_type + + # model <- stan_models[[model_name]] + model <- rstan::stan_model(paste0("inst/stan/", model_name, ".stan")) + + seromodel <- rstan::sampling( + model, + data = stan_data, + ... + ) + + return(seromodel) +} From 9ca82760606038735aedd150d277e1b71ba7b977 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Fri, 24 May 2024 19:51:09 -0500 Subject: [PATCH 07/78] feat: add clean seroprevalence visualisation functions --- R/plot_seroprevalence.R | 103 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 R/plot_seroprevalence.R diff --git a/R/plot_seroprevalence.R b/R/plot_seroprevalence.R new file mode 100644 index 00000000..07a5f187 --- /dev/null +++ b/R/plot_seroprevalence.R @@ -0,0 +1,103 @@ +prepare_serosurvey_for_plotting <- function( + serosurvey, + alpha = 0.05 + ) { + + serosurvey <- serosurvey %>% + add_age_group_to_serosurvey() %>% + cbind( + Hmisc::binconf( + serosurvey$n_seropositive, + serosurvey$sample_size, + alpha = alpha, + method = "exact", + return.df = TRUE + ) + ) %>% + dplyr::rename( + seroprev = "PointEst", + seroprev_lower = "Lower", + seroprev_upper = "Upper" + ) %>% + dplyr::arrange(.data$age_group) %>% + dplyr::relocate(age_group) +} + +plot_serosurvey <- function( + serosurvey, + size_text = 11 + ) { + serosurvey <- validate_serosurvey(serosurvey = serosurvey) %>% + prepare_serosurvey_for_plotting() + + min_prev <- min(serosurvey$seroprev_lower) + max_prev <- max(serosurvey$seroprev_upper) + + seroprev_plot <- ggplot2::ggplot( + data = serosurvey, + ggplot2::aes(x = .data$age_group) + ) + + ggplot2::geom_errorbar( + ggplot2::aes( + ymin = .data$seroprev_lower, + ymax = .data$seroprev_upper, + ), + width = 0.1 + ) + + ggplot2::geom_point( + ggplot2::aes( + y = .data$seroprev, + size = .data$sample_size + ), + fill = "#7a0177", colour = "black", shape = 21 + ) + + ggplot2::coord_cartesian( + xlim = c(min(serosurvey$age_min), max(serosurvey$age_max)), + ylim = c(min_prev, max_prev) + ) + + ggplot2::theme_bw(size_text) + + ggplot2::theme(legend.position = "none") + + ggplot2::ylab("Seroprevalence") + + ggplot2::xlab("Age") + + return(seroprev_plot) +} + +plot_seroprevalence_estimates <- function( + seromodel, + serosurvey, + alpha = 0.05, + ... +) { + checkmate::assert_class(seromodel, "stanfit", null.ok = TRUE) + + seroprevalence_samples <- rstan::extract( + seromodel, "prob_infected_expanded" + )[[1]] + seroprevalence_estimates <- data.frame( + age = seq(1, max(serosurvey$age_max)), + median = apply(seroprevalence_samples, 2, quantile, 0.5), + lower = apply(seroprevalence_samples, 2, quantile, alpha), + upper = apply(seroprevalence_samples, 2, quantile, 1 - alpha) + ) + + seroprevalence_plot <- plot_serosurvey( + serosurvey = serosurvey, + ... + ) + + ggplot2::geom_line( + data = seroprevalence_estimates, + ggplot2::aes(x = age, y = median), + colour = "#7a0177" + ) + + ggplot2::geom_ribbon( + data = seroprevalence_estimates, + ggplot2::aes(x = age, ymin = lower, ymax = upper), + fill = "#c994c7", alpha = 0.5 + ) + + ggplot2::coord_cartesian( + xlim = c(0, max(serosurvey$age_max)) + ) + + return(seroprevalence_plot) +} From 58a838d4e5394cbc6fe18df75e3ba1f012d7393e Mon Sep 17 00:00:00 2001 From: ntorresd Date: Fri, 24 May 2024 21:45:16 -0500 Subject: [PATCH 08/78] feat: add age_seroreversion model --- inst/stan/age_seroreversion.stan | 70 ++++++++++++++++++++++ inst/stan/constant.stan | 6 +- inst/stan/constant_seroreversion.stan | 11 ++-- inst/stan/data/foi_prior_data.stan | 2 + inst/stan/functions/prob_infected_age.stan | 38 ++++++++++++ 5 files changed, 119 insertions(+), 8 deletions(-) create mode 100644 inst/stan/age_seroreversion.stan create mode 100644 inst/stan/functions/prob_infected_age.stan diff --git a/inst/stan/age_seroreversion.stan b/inst/stan/age_seroreversion.stan new file mode 100644 index 00000000..0d19418d --- /dev/null +++ b/inst/stan/age_seroreversion.stan @@ -0,0 +1,70 @@ +functions { + #include functions/prob_infected_age.stan +} + +data { + #include data/basic_data.stan + #include data/foi_prior_data.stan + #include data/seroreversion_prior_data.stan +} + +transformed data { + int n_foi = max(foi_index); +} + +parameters { + vector[n_foi] foi_vector; + real seroreversion_rate; + real sigma; +} + +transformed parameters { + vector[n_observations] prob_infected; + + prob_infected = prob_infected_age( + age_groups, + n_observations, + foi_vector, + foi_index, + seroreversion_rate + ); +} + +model { + n_seropositive ~ binomial(sample_size, prob_infected); + sigma ~ cauchy(0, 1); + + // force of infection prior + if (foi_prior_index == 0) + foi_vector[1] ~ uniform(foi_min, foi_max); + if (foi_prior_index == 1) + foi_vector[1] ~ normal(foi_mean, foi_sd); + + for(i in 2:n_foi) + foi_vector[i] ~ normal(foi_vector[i - 1], sigma); + + // seroreversion prior + if (seroreversion_prior_index == 0) + seroreversion_rate ~ uniform(seroreversion_min, seroreversion_max); + if (seroreversion_prior_index == 1) + seroreversion_rate ~ normal(seroreversion_mean, seroreversion_sd); +} + +generated quantities{ + #include generated_quantities/log_likelihood.stan + + vector[age_max] prob_infected_expanded; + vector[age_max] foi_expanded; + + for(age in 1:age_max) { + foi_expanded[age] = foi_vector[foi_index[age]]; + } + + prob_infected_expanded = prob_infected_age( + ages, + age_max, + foi_vector, + foi_index, + seroreversion_rate + ); +} diff --git a/inst/stan/constant.stan b/inst/stan/constant.stan index 55ae6d96..6c8862ac 100644 --- a/inst/stan/constant.stan +++ b/inst/stan/constant.stan @@ -35,12 +35,12 @@ generated quantities{ #include generated_quantities/log_likelihood.stan vector[age_max] prob_infected_expanded; - vector[age_max] foi_vector; + vector[age_max] foi_expanded; for(i in 1:age_max) { - foi_vector[i] = foi; + foi_expanded[i] = foi; } - + prob_infected_expanded = prob_infected_constant( ages, age_max, diff --git a/inst/stan/constant_seroreversion.stan b/inst/stan/constant_seroreversion.stan index 16e463e0..1aa7ee45 100644 --- a/inst/stan/constant_seroreversion.stan +++ b/inst/stan/constant_seroreversion.stan @@ -1,6 +1,7 @@ functions { #include functions/prob_infected_constant.stan } + data { #include data/basic_data.stan #include data/foi_prior_data.stan @@ -14,7 +15,7 @@ parameters { transformed parameters { vector[n_observations] prob_infected; - + prob_infected = prob_infected_constant( age_groups, n_observations, @@ -31,7 +32,7 @@ model { foi ~ uniform(foi_min, foi_max); if (foi_prior_index == 1) foi ~ normal(foi_mean, foi_sd); - + // seroreversion prior if (seroreversion_prior_index == 0) seroreversion_rate ~ uniform(seroreversion_min, seroreversion_max); @@ -43,12 +44,12 @@ generated quantities{ #include generated_quantities/log_likelihood.stan vector[age_max] prob_infected_expanded; - vector[age_max] foi_vector; + vector[age_max] foi_expanded; for(i in 1:age_max) { - foi_vector[i] = foi; + foi_expanded[i] = foi; } - + prob_infected_expanded = prob_infected_constant( ages, age_max, diff --git a/inst/stan/data/foi_prior_data.stan b/inst/stan/data/foi_prior_data.stan index 1f8e9178..c731f3f9 100644 --- a/inst/stan/data/foi_prior_data.stan +++ b/inst/stan/data/foi_prior_data.stan @@ -1,5 +1,7 @@ // prior index int foi_prior_index; + // foi indexes (chunks) + int foi_index[age_max]; // uniform real foi_min; real foi_max; diff --git a/inst/stan/functions/prob_infected_age.stan b/inst/stan/functions/prob_infected_age.stan new file mode 100644 index 00000000..6daf6b63 --- /dev/null +++ b/inst/stan/functions/prob_infected_age.stan @@ -0,0 +1,38 @@ +real prob_infected_age_single_age( + int age, + real age_max, + vector foi_vector, + int[] foi_index, + real seroreversion_rate +) { + real prob = 0.0; + for(j in 1:age){ + real foi = foi_vector[foi_index[j]]; + real lambda_over_both = foi / (foi + seroreversion_rate); + real e_lower = exp(-(foi + seroreversion_rate)); + + prob = lambda_over_both + e_lower * (prob - lambda_over_both); + } + return prob; +} + +vector prob_infected_age( + int[] ages, + int n_ages, + vector foi_vector, + int[] foi_index, + real seroreversion_rate +) { + vector[n_ages] prob_infected; + + for (i in 1:n_ages) { + prob_infected[i] = prob_infected_age_single_age( + ages[i], + n_ages, + foi_vector, + foi_index, + seroreversion_rate + ); + } + return prob_infected; +} From 760096d6d3221f4b4d4773d0454cd92c207c0627 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Fri, 24 May 2024 21:46:26 -0500 Subject: [PATCH 09/78] feat: add `foi_index` (old `chunks`) default behavior --- R/build_stan_data.R | 47 +++++++++++++++++++++++++++++++++++++++++++++ R/fit_seromodel.R | 2 ++ 2 files changed, 49 insertions(+) diff --git a/R/build_stan_data.R b/R/build_stan_data.R index b1de99af..cc652297 100644 --- a/R/build_stan_data.R +++ b/R/build_stan_data.R @@ -33,6 +33,38 @@ sf_none <- function() { return(list(name = "none")) } +get_foi_index <- function( + serosurvey, + group_size + ) { + checkmate::assert_int( + group_size, + lower = 1, + upper = max(serosurvey$age_max) + ) + + foi_index <- unlist( + purrr::map( + seq( + 1, + max(serosurvey$age_max) / group_size, + 1), + rep, + times = group_size + ) + ) + + foi_index <- append( + foi_index, + rep( + max(foi_index), + max(serosurvey$age_max) - length(foi_index) + ) + ) + + return(foi_index) +} + set_stan_data_defaults <- function( stan_data, is_seroreversion = FALSE @@ -73,6 +105,7 @@ build_stan_data <- function( serosurvey, model_type = "constant", foi_prior = sf_uniform(), + foi_index = NULL, is_seroreversion = FALSE, seroreversion_prior = sf_none() ) { @@ -87,6 +120,20 @@ build_stan_data <- function( ) %>% set_stan_data_defaults(is_seroreversion = is_seroreversion) + if (is.null(foi_index)) { + foi_index_default <- get_foi_index(serosurvey = serosurvey, group_size = 1) + stan_data <- append( + stan_data, + list(foi_index = foi_index_default) + ) + } else { + # TODO: check that foi_index is the right size + stan_data <- append( + stan_data, + list(foi_index = foi_index) + ) + } + config_file <- "inst/extdata/config.yml" prior_index <- config::get(file = config_file, "priors")$indexes if (foi_prior$name == "uniform") { diff --git a/R/fit_seromodel.R b/R/fit_seromodel.R index c01eee82..0677b13a 100644 --- a/R/fit_seromodel.R +++ b/R/fit_seromodel.R @@ -14,6 +14,7 @@ fit_seromodel <- function( serosurvey, model_type = "constant", foi_prior = sf_normal(), + foi_index = NULL, is_seroreversion = FALSE, seroreversion_prior = sf_uniform(), ... @@ -26,6 +27,7 @@ fit_seromodel <- function( serosurvey = serosurvey, model_type = model_type, foi_prior = foi_prior, + foi_index = foi_index, is_seroreversion = is_seroreversion, seroreversion_prior = seroreversion_prior ) From 15aad8de32a7f3e7e240c56dd2d0a706f6adcf75 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Fri, 24 May 2024 21:47:52 -0500 Subject: [PATCH 10/78] feat: add `plot_foi_estimates` with option to add foi trend --- R/plot_seromodel.R | 198 ++++++++++++++++++++++++++++++++++++++++ R/plot_seroprevalence.R | 103 --------------------- 2 files changed, 198 insertions(+), 103 deletions(-) create mode 100644 R/plot_seromodel.R delete mode 100644 R/plot_seroprevalence.R diff --git a/R/plot_seromodel.R b/R/plot_seromodel.R new file mode 100644 index 00000000..a4fe871f --- /dev/null +++ b/R/plot_seromodel.R @@ -0,0 +1,198 @@ +prepare_serosurvey_for_plotting <- function( + serosurvey, + alpha = 0.05 + ) { + + serosurvey <- serosurvey %>% + add_age_group_to_serosurvey() %>% + cbind( + Hmisc::binconf( + serosurvey$n_seropositive, + serosurvey$sample_size, + alpha = alpha, + method = "exact", + return.df = TRUE + ) + ) %>% + dplyr::rename( + seroprev = "PointEst", + seroprev_lower = "Lower", + seroprev_upper = "Upper" + ) %>% + dplyr::arrange(.data$age_group) %>% + dplyr::relocate(age_group) +} + +plot_serosurvey <- function( + serosurvey, + size_text = 11 + ) { + serosurvey <- validate_serosurvey(serosurvey = serosurvey) %>% + prepare_serosurvey_for_plotting() + + min_prev <- min(serosurvey$seroprev_lower) + max_prev <- max(serosurvey$seroprev_upper) + + seroprev_plot <- ggplot2::ggplot( + data = serosurvey, + ggplot2::aes(x = .data$age_group) + ) + + ggplot2::geom_errorbar( + ggplot2::aes( + ymin = .data$seroprev_lower, + ymax = .data$seroprev_upper, + ), + width = 0.1 + ) + + ggplot2::geom_point( + ggplot2::aes( + y = .data$seroprev, + size = .data$sample_size + ), + fill = "#7a0177", colour = "black", shape = 21 + ) + + ggplot2::coord_cartesian( + xlim = c(min(serosurvey$age_min), max(serosurvey$age_max)), + ylim = c(min_prev, max_prev) + ) + + ggplot2::theme_bw(size_text) + + ggplot2::theme(legend.position = "none") + + ggplot2::ylab("Seroprevalence") + + ggplot2::xlab("Age") + + return(seroprev_plot) +} + +extract_central_estimates <- function( + seromodel, + serosurvey, + alpha = 0.05, + par_name = "foi_vector" +) { + samples <- rstan::extract(seromodel, par_name)[[1]] + central_estimates <- data.frame( + median = apply(samples, 2, quantile, 0.5), + lower = apply(samples, 2, quantile, alpha), + upper = apply(samples, 2, quantile, 1 - alpha) + ) + + return(central_estimates) +} + +plot_seroprevalence_estimates <- function( + seromodel, + serosurvey, + alpha = 0.05, + ... +) { + checkmate::assert_class(seromodel, "stanfit", null.ok = TRUE) + + seroprevalence_central_estimates <- extract_central_estimates( + seromodel = seromodel, + serosurvey = serosurvey, + alpha = alpha, + par_name = "prob_infected_expanded" + ) %>% + mutate(age = seq(1, max(serosurvey$age_max))) + + seroprevalence_plot <- plot_serosurvey( + serosurvey = serosurvey, + ... + ) + + ggplot2::geom_line( + data = seroprevalence_central_estimates, + ggplot2::aes(x = age, y = median), + colour = "#7a0177" + ) + + ggplot2::geom_ribbon( + data = seroprevalence_central_estimates, + ggplot2::aes(x = age, ymin = lower, ymax = upper), + fill = "#c994c7", alpha = 0.5 + ) + + ggplot2::coord_cartesian( + xlim = c(0, max(serosurvey$age_max)) + ) + + return(seroprevalence_plot) +} + +plot_foi_estimates <- function( + seromodel, + serosurvey, + alpha = 0.05, + foi_df = NULL, + size_text = 11, + foi_max = NULL +) { + # TODO: Add checks for foi_df (size, colnames, etc.) + checkmate::assert_class(seromodel, "stanfit", null.ok = TRUE) + + foi_central_estimates <- extract_central_estimates( + seromodel = seromodel, + serosurvey = serosurvey, + alpha = alpha, + par_name = "foi_expanded" + ) + + if (is.null(foi_max)) + foi_max <- max(foi_central_estimates$upper) + + if (startsWith(seromodel@model_name, "age")) { + xlab <- "Age" + ages <- 1:max(serosurvey$age_max) + foi_central_estimates <- mutate( + foi_central_estimates, + age = ages + ) + if(!is.null(foi_df)) { + foi_central_estimates <- foi_central_estimates %>% + left_join(foi_df, by = "age") + } + foi_plot <- ggplot2::ggplot( + data = foi_central_estimates, ggplot2::aes(x = age) + ) + } else if (startsWith(seromodel@model_name, "time")) { + xlab <- "Year" + ages <- rev(1:max(serosurvey$age_max)) + years <- unique(serosurvey$tsur) - ages + foi_central_estimates <- mutate( + foi_central_estimates, + year = years + ) + if(!is.null(foi_df)) { + foi_central_estimates <- foi_central_estimates %>% + left_join(foi_df, by = "year") + } + foi_plot <- ggplot2::ggplot( + data = foi_central_estimates, ggplot2::aes(x = year) + ) + } + + foi_plot <- foi_plot + + ggplot2::geom_ribbon( + ggplot2::aes( + ymin = .data$lower, + ymax = .data$upper + ), + fill = "#41b6c4", + alpha = 0.5 + ) + + ggplot2::geom_line( + ggplot2::aes(y = .data$median), + colour = "#253494" + ) + + ggplot2::theme_bw(size_text) + + ggplot2::coord_cartesian(ylim = c(0, foi_max)) + + ggplot2::ylab("Force-of-Infection") + + ggplot2::xlab(xlab) + + if (!is.null(foi_df)) { + foi_plot <- foi_plot + + ggplot2::geom_line( + ggplot2::aes(y = foi), + colour = "#b30909" + ) + } + + return(foi_plot) +} diff --git a/R/plot_seroprevalence.R b/R/plot_seroprevalence.R deleted file mode 100644 index 07a5f187..00000000 --- a/R/plot_seroprevalence.R +++ /dev/null @@ -1,103 +0,0 @@ -prepare_serosurvey_for_plotting <- function( - serosurvey, - alpha = 0.05 - ) { - - serosurvey <- serosurvey %>% - add_age_group_to_serosurvey() %>% - cbind( - Hmisc::binconf( - serosurvey$n_seropositive, - serosurvey$sample_size, - alpha = alpha, - method = "exact", - return.df = TRUE - ) - ) %>% - dplyr::rename( - seroprev = "PointEst", - seroprev_lower = "Lower", - seroprev_upper = "Upper" - ) %>% - dplyr::arrange(.data$age_group) %>% - dplyr::relocate(age_group) -} - -plot_serosurvey <- function( - serosurvey, - size_text = 11 - ) { - serosurvey <- validate_serosurvey(serosurvey = serosurvey) %>% - prepare_serosurvey_for_plotting() - - min_prev <- min(serosurvey$seroprev_lower) - max_prev <- max(serosurvey$seroprev_upper) - - seroprev_plot <- ggplot2::ggplot( - data = serosurvey, - ggplot2::aes(x = .data$age_group) - ) + - ggplot2::geom_errorbar( - ggplot2::aes( - ymin = .data$seroprev_lower, - ymax = .data$seroprev_upper, - ), - width = 0.1 - ) + - ggplot2::geom_point( - ggplot2::aes( - y = .data$seroprev, - size = .data$sample_size - ), - fill = "#7a0177", colour = "black", shape = 21 - ) + - ggplot2::coord_cartesian( - xlim = c(min(serosurvey$age_min), max(serosurvey$age_max)), - ylim = c(min_prev, max_prev) - ) + - ggplot2::theme_bw(size_text) + - ggplot2::theme(legend.position = "none") + - ggplot2::ylab("Seroprevalence") + - ggplot2::xlab("Age") - - return(seroprev_plot) -} - -plot_seroprevalence_estimates <- function( - seromodel, - serosurvey, - alpha = 0.05, - ... -) { - checkmate::assert_class(seromodel, "stanfit", null.ok = TRUE) - - seroprevalence_samples <- rstan::extract( - seromodel, "prob_infected_expanded" - )[[1]] - seroprevalence_estimates <- data.frame( - age = seq(1, max(serosurvey$age_max)), - median = apply(seroprevalence_samples, 2, quantile, 0.5), - lower = apply(seroprevalence_samples, 2, quantile, alpha), - upper = apply(seroprevalence_samples, 2, quantile, 1 - alpha) - ) - - seroprevalence_plot <- plot_serosurvey( - serosurvey = serosurvey, - ... - ) + - ggplot2::geom_line( - data = seroprevalence_estimates, - ggplot2::aes(x = age, y = median), - colour = "#7a0177" - ) + - ggplot2::geom_ribbon( - data = seroprevalence_estimates, - ggplot2::aes(x = age, ymin = lower, ymax = upper), - fill = "#c994c7", alpha = 0.5 - ) + - ggplot2::coord_cartesian( - xlim = c(0, max(serosurvey$age_max)) - ) - - return(seroprevalence_plot) -} From a371ac320ec3a5d52d9c8cb9063006b42cc5ca30 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Sat, 25 May 2024 09:11:17 -0500 Subject: [PATCH 11/78] change model name for models without seroreversion to *_no_seroreversion --- R/fit_seromodel.R | 2 +- ...tant.stan => constant_no_seroreversion.stan} | 0 inst/stan/functions/prob_infected_av.stan | 17 ----------------- 3 files changed, 1 insertion(+), 18 deletions(-) rename inst/stan/{constant.stan => constant_no_seroreversion.stan} (100%) delete mode 100644 inst/stan/functions/prob_infected_av.stan diff --git a/R/fit_seromodel.R b/R/fit_seromodel.R index 0677b13a..5fc64879 100644 --- a/R/fit_seromodel.R +++ b/R/fit_seromodel.R @@ -35,7 +35,7 @@ fit_seromodel <- function( if (is_seroreversion) model_name <- paste0(model_type, "_seroreversion") else - model_name <- model_type + model_name <- paste0(model_type, "_no_seroreversion") # model <- stan_models[[model_name]] model <- rstan::stan_model(paste0("inst/stan/", model_name, ".stan")) diff --git a/inst/stan/constant.stan b/inst/stan/constant_no_seroreversion.stan similarity index 100% rename from inst/stan/constant.stan rename to inst/stan/constant_no_seroreversion.stan diff --git a/inst/stan/functions/prob_infected_av.stan b/inst/stan/functions/prob_infected_av.stan deleted file mode 100644 index a2d09d46..00000000 --- a/inst/stan/functions/prob_infected_av.stan +++ /dev/null @@ -1,17 +0,0 @@ - real prob_infected_age_varying( - int age, - real age_max, - vector foi_vector, - int[] chunks, - real seroreversion_rate - ) { - real prob = 0.0; - for(j in 1:age){ - real foi = foi_vector[chunks[j]]; - real lambda_over_both = foi / (foi + seroreversion_rate); - real e_lower = exp(-(foi + seroreversion_rate)); - - prob = lambda_over_both + e_lower * (prob - lambda_over_both); - } - return prob; - } From 6389d3c81cd07294c1f4fb0f0c656f994abffdf9 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Sat, 25 May 2024 09:12:33 -0500 Subject: [PATCH 12/78] feat: add age-varying model without seroreversion --- inst/stan/age_no_seroreversion.stan | 62 +++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 inst/stan/age_no_seroreversion.stan diff --git a/inst/stan/age_no_seroreversion.stan b/inst/stan/age_no_seroreversion.stan new file mode 100644 index 00000000..c665b4fe --- /dev/null +++ b/inst/stan/age_no_seroreversion.stan @@ -0,0 +1,62 @@ +functions { + #include functions/prob_infected_age.stan +} + +data { + #include data/basic_data.stan + #include data/foi_prior_data.stan +} + +transformed data { + int n_foi = max(foi_index); +} + +parameters { + vector[n_foi] foi_vector; + real sigma; +} + +transformed parameters { + vector[n_observations] prob_infected; + + prob_infected = prob_infected_age( + age_groups, + n_observations, + foi_vector, + foi_index, + 0.0 + ); +} + +model { + n_seropositive ~ binomial(sample_size, prob_infected); + sigma ~ cauchy(0, 1); + + // force of infection prior + if (foi_prior_index == 0) + foi_vector[1] ~ uniform(foi_min, foi_max); + if (foi_prior_index == 1) + foi_vector[1] ~ normal(foi_mean, foi_sd); + + for(i in 2:n_foi) + foi_vector[i] ~ normal(foi_vector[i - 1], sigma); +} + +generated quantities{ + #include generated_quantities/log_likelihood.stan + + vector[age_max] prob_infected_expanded; + vector[age_max] foi_expanded; + + for(age in 1:age_max) { + foi_expanded[age] = foi_vector[foi_index[age]]; + } + + prob_infected_expanded = prob_infected_age( + ages, + age_max, + foi_vector, + foi_index, + 0.0 + ); +} From 3a6171229dbe4549c0e5163fd07996e84307d282 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Sat, 25 May 2024 21:54:11 -0500 Subject: [PATCH 13/78] refac: change stan functions names to avoid ambiguity --- inst/stan/age_no_seroreversion.stan | 4 ++-- inst/stan/age_seroreversion.stan | 4 ++-- inst/stan/constant_no_seroreversion.stan | 4 ++-- inst/stan/constant_seroreversion.stan | 4 ++-- inst/stan/functions/prob_infected_age.stan | 8 +++----- inst/stan/functions/prob_infected_constant.stan | 6 +++--- 6 files changed, 14 insertions(+), 16 deletions(-) diff --git a/inst/stan/age_no_seroreversion.stan b/inst/stan/age_no_seroreversion.stan index c665b4fe..09b83c11 100644 --- a/inst/stan/age_no_seroreversion.stan +++ b/inst/stan/age_no_seroreversion.stan @@ -19,7 +19,7 @@ parameters { transformed parameters { vector[n_observations] prob_infected; - prob_infected = prob_infected_age( + prob_infected = prob_infected_age_model( age_groups, n_observations, foi_vector, @@ -52,7 +52,7 @@ generated quantities{ foi_expanded[age] = foi_vector[foi_index[age]]; } - prob_infected_expanded = prob_infected_age( + prob_infected_expanded = prob_infected_age_model( ages, age_max, foi_vector, diff --git a/inst/stan/age_seroreversion.stan b/inst/stan/age_seroreversion.stan index 0d19418d..b9326285 100644 --- a/inst/stan/age_seroreversion.stan +++ b/inst/stan/age_seroreversion.stan @@ -21,7 +21,7 @@ parameters { transformed parameters { vector[n_observations] prob_infected; - prob_infected = prob_infected_age( + prob_infected = prob_infected_age_model( age_groups, n_observations, foi_vector, @@ -60,7 +60,7 @@ generated quantities{ foi_expanded[age] = foi_vector[foi_index[age]]; } - prob_infected_expanded = prob_infected_age( + prob_infected_expanded = prob_infected_age_model( ages, age_max, foi_vector, diff --git a/inst/stan/constant_no_seroreversion.stan b/inst/stan/constant_no_seroreversion.stan index 6c8862ac..a0bc36c5 100644 --- a/inst/stan/constant_no_seroreversion.stan +++ b/inst/stan/constant_no_seroreversion.stan @@ -13,7 +13,7 @@ parameters { transformed parameters { vector[n_observations] prob_infected; - prob_infected = prob_infected_constant( + prob_infected = prob_infected_constant_model( age_groups, n_observations, foi, @@ -41,7 +41,7 @@ generated quantities{ foi_expanded[i] = foi; } - prob_infected_expanded = prob_infected_constant( + prob_infected_expanded = prob_infected_constant_model( ages, age_max, foi, diff --git a/inst/stan/constant_seroreversion.stan b/inst/stan/constant_seroreversion.stan index 1aa7ee45..7e5f7389 100644 --- a/inst/stan/constant_seroreversion.stan +++ b/inst/stan/constant_seroreversion.stan @@ -16,7 +16,7 @@ parameters { transformed parameters { vector[n_observations] prob_infected; - prob_infected = prob_infected_constant( + prob_infected = prob_infected_constant_model( age_groups, n_observations, foi, @@ -50,7 +50,7 @@ generated quantities{ foi_expanded[i] = foi; } - prob_infected_expanded = prob_infected_constant( + prob_infected_expanded = prob_infected_constant_model( ages, age_max, foi, diff --git a/inst/stan/functions/prob_infected_age.stan b/inst/stan/functions/prob_infected_age.stan index 6daf6b63..48fd8e4b 100644 --- a/inst/stan/functions/prob_infected_age.stan +++ b/inst/stan/functions/prob_infected_age.stan @@ -1,6 +1,5 @@ -real prob_infected_age_single_age( +real prob_infected_age_model_single_age( int age, - real age_max, vector foi_vector, int[] foi_index, real seroreversion_rate @@ -16,7 +15,7 @@ real prob_infected_age_single_age( return prob; } -vector prob_infected_age( +vector prob_infected_age_model( int[] ages, int n_ages, vector foi_vector, @@ -26,9 +25,8 @@ vector prob_infected_age( vector[n_ages] prob_infected; for (i in 1:n_ages) { - prob_infected[i] = prob_infected_age_single_age( + prob_infected[i] = prob_infected_age_model_single_age( ages[i], - n_ages, foi_vector, foi_index, seroreversion_rate diff --git a/inst/stan/functions/prob_infected_constant.stan b/inst/stan/functions/prob_infected_constant.stan index 0fc688ee..6b7939d2 100644 --- a/inst/stan/functions/prob_infected_constant.stan +++ b/inst/stan/functions/prob_infected_constant.stan @@ -1,4 +1,4 @@ -real prob_infected_constant_single_age( +real prob_infected_constant_model_single_age( int age, real foi, real seroreversion_rate @@ -13,7 +13,7 @@ real prob_infected_constant_single_age( return prob; } -vector prob_infected_constant( +vector prob_infected_constant_model( int[] ages, int n_ages, real foi, @@ -22,7 +22,7 @@ vector prob_infected_constant( vector[n_ages] prob_infected; for (i in 1:n_ages) { - prob_infected[i] = prob_infected_constant_single_age( + prob_infected[i] = prob_infected_constant_model_single_age( ages[i], foi, seroreversion_rate From fdcfe9b4f211e7380313a02c2f0bf04bafe8eaec Mon Sep 17 00:00:00 2001 From: ntorresd Date: Sat, 25 May 2024 21:55:31 -0500 Subject: [PATCH 14/78] feat: add time varying model with and without seroreversion --- inst/stan/functions/prob_infected_time.stan | 41 ++++++++++++ inst/stan/time_no_seroreversion.stan | 65 ++++++++++++++++++ inst/stan/time_seroreversion.stan | 73 +++++++++++++++++++++ 3 files changed, 179 insertions(+) create mode 100644 inst/stan/functions/prob_infected_time.stan create mode 100644 inst/stan/time_no_seroreversion.stan create mode 100644 inst/stan/time_seroreversion.stan diff --git a/inst/stan/functions/prob_infected_time.stan b/inst/stan/functions/prob_infected_time.stan new file mode 100644 index 00000000..8489c413 --- /dev/null +++ b/inst/stan/functions/prob_infected_time.stan @@ -0,0 +1,41 @@ +real prob_infected_time_model_single_age( + int age, + int age_max, + vector foi_vector, + int[] foi_index, + real seroreversion_rate +) { + real prob = 0.0; + int birth_year = age_max - age; + for(j in 1:age) { + real foi = foi_vector[foi_index[birth_year + j]]; + real lambda_over_both = foi / (foi + seroreversion_rate); + real e_lower = exp(-(foi + seroreversion_rate)); + + prob = lambda_over_both + e_lower * (prob - lambda_over_both); + } + return prob; +} + +vector prob_infected_time_model( + int[] ages, + int n_ages, + int age_max, + vector foi_vector, + int[] foi_index, + real seroreversion_rate +) { + vector[n_ages] prob_infected; + + for (i in 1:n_ages) { + int age = ages[i]; + prob_infected[i] = prob_infected_time_model_single_age( + age, + age_max, + foi_vector, + foi_index, + seroreversion_rate + ); + } + return prob_infected; +} diff --git a/inst/stan/time_no_seroreversion.stan b/inst/stan/time_no_seroreversion.stan new file mode 100644 index 00000000..1540f359 --- /dev/null +++ b/inst/stan/time_no_seroreversion.stan @@ -0,0 +1,65 @@ +functions { + #include functions/prob_infected_time.stan +} + +data { + #include data/basic_data.stan + #include data/foi_prior_data.stan +} + +transformed data { + int n_foi = max(foi_index); +} + +parameters { + vector[n_foi] foi_vector; + real sigma; +} + +transformed parameters { + vector[n_observations] prob_infected; + + prob_infected = prob_infected_time_model( + age_groups, + n_observations, + age_max, + foi_vector, + foi_index, + 0.0 + ); +} + +model { + n_seropositive ~ binomial(sample_size, prob_infected); + sigma ~ cauchy(0, 1); + + // force of infection prior + if (foi_prior_index == 0) + foi_vector[1] ~ uniform(foi_min, foi_max); + if (foi_prior_index == 1) + foi_vector[1] ~ normal(foi_mean, foi_sd); + + for(i in 2:n_foi) + foi_vector[i] ~ normal(foi_vector[i - 1], sigma); +} + +generated quantities{ + #include generated_quantities/log_likelihood.stan + + vector[age_max] prob_infected_expanded; + vector[age_max] foi_expanded; + + for(i in 1:age_max) { + int time_index = age_max - i + 1; + foi_expanded[i] = foi_vector[foi_index[i]]; + } + + prob_infected_expanded = prob_infected_time_model( + ages, + age_max, // number of ages + age_max, // number of years + foi_vector, + foi_index, + 0.0 + ); +} diff --git a/inst/stan/time_seroreversion.stan b/inst/stan/time_seroreversion.stan new file mode 100644 index 00000000..e1291a08 --- /dev/null +++ b/inst/stan/time_seroreversion.stan @@ -0,0 +1,73 @@ +functions { + #include functions/prob_infected_time.stan +} + +data { + #include data/basic_data.stan + #include data/foi_prior_data.stan + #include data/seroreversion_prior_data.stan +} + +transformed data { + int n_foi = max(foi_index); +} + +parameters { + vector[n_foi] foi_vector; + real seroreversion_rate; + real sigma; +} + +transformed parameters { + vector[n_observations] prob_infected; + + prob_infected = prob_infected_time_model( + age_groups, + n_observations, + age_max, + foi_vector, + foi_index, + seroreversion_rate + ); +} + +model { + n_seropositive ~ binomial(sample_size, prob_infected); + sigma ~ cauchy(0, 1); + + // force of infection prior + if (foi_prior_index == 0) + foi_vector[1] ~ uniform(foi_min, foi_max); + if (foi_prior_index == 1) + foi_vector[1] ~ normal(foi_mean, foi_sd); + + for(i in 2:n_foi) + foi_vector[i] ~ normal(foi_vector[i - 1], sigma); + + // seroreversion prior + if (seroreversion_prior_index == 0) + seroreversion_rate ~ uniform(seroreversion_min, seroreversion_max); + if (seroreversion_prior_index == 1) + seroreversion_rate ~ normal(seroreversion_mean, seroreversion_sd); +} + +generated quantities{ + #include generated_quantities/log_likelihood.stan + + vector[age_max] prob_infected_expanded; + vector[age_max] foi_expanded; + + for(i in 1:age_max) { + int time_index = age_max - i + 1; + foi_expanded[time_index] = foi_vector[foi_index[i]]; + } + + prob_infected_expanded = prob_infected_time_model( + ages, + age_max, // number of ages + age_max, // number of years + foi_vector, + foi_index, + seroreversion_rate + ); +} From 36cf0b2ea63d74a281925a23a181807ceb4334a5 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Sat, 25 May 2024 21:56:08 -0500 Subject: [PATCH 15/78] remove unnecessary line in `probability_exact_time_varying` --- R/simulate_serosurvey.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/simulate_serosurvey.R b/R/simulate_serosurvey.R index de0e98a9..cc6f7afb 100644 --- a/R/simulate_serosurvey.R +++ b/R/simulate_serosurvey.R @@ -47,7 +47,6 @@ probability_exact_time_varying <- function( ) { n_years <- length(years) - ages <- seq(1, n_years, 1) probabilities <- vector(length = length(years)) # solves ODE exactly within pieces From ea07ee872e2912c5284caadcc61612a035a5466d Mon Sep 17 00:00:00 2001 From: ntorresd Date: Mon, 27 May 2024 18:24:25 -0500 Subject: [PATCH 16/78] doc: add documentation for new functions --- R/build_stan_data.R | 32 +++++++++++++++++++++++++ R/fit_seromodel.R | 39 +++++++++++++++++++++++++++++-- R/plot_seromodel.R | 57 +++++++++++++++++++++++++++++++++++++++++++++ R/stanmodels.R | 12 ++++++---- 4 files changed, 133 insertions(+), 7 deletions(-) diff --git a/R/build_stan_data.R b/R/build_stan_data.R index cc652297..08c88d51 100644 --- a/R/build_stan_data.R +++ b/R/build_stan_data.R @@ -1,3 +1,9 @@ +#' Sets normal distribution parameters for sampling +#' +#' @param mean Mean of the normal distribution +#' @param sd Standard deviation of the normal distribution +#' @return List with specified statistics and name of the model +#' @export sf_normal <- function(mean = 0, sd = 1) { # Restricting normal inputs to be non-negative if(mean < 0 | sd <= 0) { @@ -12,6 +18,12 @@ sf_normal <- function(mean = 0, sd = 1) { return(list(mean = mean, sd = sd, name = "normal")) } +#' Sets uniform distribution parameters for sampling +#' +#' @param min Minimum value of the random variable of the uniform distribution +#' @param max Maximum value of the random variable of the uniform distribution +#' @return List with specified statistics and name of the model +#' @export sf_uniform <- function(min = 0, max = 10) { # Restricting uniform inputs to be non-negative if (min < 0 | max < 0) { @@ -29,10 +41,20 @@ sf_uniform <- function(min = 0, max = 10) { return(list(min = min, max = max, name = "uniform")) } +#' Sets empty distribution sf_none <- function() { return(list(name = "none")) } +#' Generates force-of-infection indexes for heterogeneous age groups +#' +#' The max value of the force-of-infection indexes correspond to +#' the number of foi values to be estimated when sampling. +#' @inheritParams fit_seromodel +#' @param group_size Age groups size +#' @return Integer vector with the indexes numerating each year/age +#' (depending on the model). +#' @export get_foi_index <- function( serosurvey, group_size @@ -65,6 +87,11 @@ get_foi_index <- function( return(foi_index) } +#' Set stan data defaults for sampling +#' +#' @param stan_data List to be pased to [rstan][rstan::sampling] +#' @inheritParams fit_seromodel +#' @return List with default values of stan data for sampling set_stan_data_defaults <- function( stan_data, is_seroreversion = FALSE @@ -101,6 +128,11 @@ set_stan_data_defaults <- function( return(stan_data) } +#' Builds stan data for sampling depending on the selected model +#' +#' @inheritParams fit_seromodel +#' @return List with necessary data for sampling the specified model +#' @export build_stan_data <- function( serosurvey, model_type = "constant", diff --git a/R/fit_seromodel.R b/R/fit_seromodel.R index 5fc64879..432933b4 100644 --- a/R/fit_seromodel.R +++ b/R/fit_seromodel.R @@ -1,3 +1,6 @@ +#' Adds age group marker to serosurvey +#' +#' @inheritParams fit_seromodel add_age_group_to_serosurvey <- function(serosurvey) { if (!any(colnames(serosurvey) == "age_group")) { serosurvey <- serosurvey %>% @@ -10,6 +13,39 @@ add_age_group_to_serosurvey <- function(serosurvey) { return(serosurvey) } +#' Runs specified stan model for the force-of-infection +#' +#' @param serosurvey +#' \describe{ +#' \item{`tsur`}{Year in which the survey took place} +#' \item{`age_min`}{Floor value of the average between age_min and age_max} +#' \item{`age_max`}{The size of the sample} +#' \item{`sample_size`}{Number of samples for each age group} +#' \item{`n_seropositive`}{Number of positive samples for each age group} +#' } +#' @param model_type Type of the model. Either "constant", "age" or "time" +#' @param foi_prior Force-of-infection distribution specified by means of +#' the helper functions. Currently available options are: +#' \describe{ +#' \item{`[sf_normal]`} +#' \item{`[sf_uniform]`} +#' \item{`[sf_none]`} +#' } +#' @param foi_index Integer vector specifying the age-groups for which +#' force-of-infection values will be estimated +#' @param is_seroreversion Boolean specifying whether to include +#' seroreversion rate estimation in the model +#' @param seroreversion_prior seroreversion distribution specified by means of +#' the helper functions. Currently available options are: +#' \describe{ +#' \item{`[sf_normal]`} +#' \item{`[sf_uniform]`} +#' \item{`[sf_none]`} +#' } +#' @param ... Additional parameters for [rstan][rstan::sampling] +#' @returns stan_fit object with force-of-infection and seroreversion +#' (when applicable) samples +#' @export fit_seromodel <- function( serosurvey, model_type = "constant", @@ -39,12 +75,11 @@ fit_seromodel <- function( # model <- stan_models[[model_name]] model <- rstan::stan_model(paste0("inst/stan/", model_name, ".stan")) - seromodel <- rstan::sampling( model, data = stan_data, ... ) - + seromodel@model_name <- model_name return(seromodel) } diff --git a/R/plot_seromodel.R b/R/plot_seromodel.R index a4fe871f..51101cf3 100644 --- a/R/plot_seromodel.R +++ b/R/plot_seromodel.R @@ -1,3 +1,19 @@ +#' Prepares serosurvey for plotting +#' +#' Adds seroprevalence values with corresponing binomial confidence interval +#' @inheritParams fit_seromodel +#' @param alpha 1 - alpha indicates the confidence level to be used +#' @return serosurvey with additional columns: +#' \describe{ +#' \item{seroprev}{Seroprevalence computed as the proportion of positive +#' cases `n_seropositive` in the number of samples +#' `sample_size` for each age group} +#' \item{seroprev_lower}{Lower limit of the binomial confidence interval +#' of `seroprev`} +#' \item{seroprev_upper}{Upper limit of the binomial confidence interval +#' of `seroprev`} +#' } +#' @export prepare_serosurvey_for_plotting <- function( serosurvey, alpha = 0.05 @@ -23,6 +39,13 @@ prepare_serosurvey_for_plotting <- function( dplyr::relocate(age_group) } +#' Plots seroprevalence from the given serosurvey +#' +#' @inheritParams fit_seromodel +#' @param size_text Size of text for plotting (`base_size` in +#' [ggplot2][ggplot2::theme_bw]) +#' @return ggplot object with seroprevalence plot +#' @export plot_serosurvey <- function( serosurvey, size_text = 11 @@ -63,6 +86,21 @@ plot_serosurvey <- function( return(seroprev_plot) } +#' Extracts central estimates from stan_fit object for specified parameter +#' +#' @param seromodel stan_fit object obtained from sampling a model +#' with [fit_seromode] +#' @inheritParams fit_seromodel +#' @param alpha 1 - alpha indicates the credibility level to be used +#' @param par_name String specifying the parameter to be extracted +#' from `seromodel` +#' @returns A dataframe with the following columns +#' \describe{ +#' \item{`median`}{Median of the samples computed as the 0.5 quantile} +#' \item{`lower`}{Lower quantile `alpha`} +#' \item{`upper`}{Upper quantile `1 - alpha`} +#' } +#' @export extract_central_estimates <- function( seromodel, serosurvey, @@ -79,6 +117,12 @@ extract_central_estimates <- function( return(central_estimates) } +#' Plot seroprevalence estimates on top of the serosurvey +#' +#' @inheritParams extract_central_estimates +#' @inheritParams plot_serosurvey +#' @returns ggplot object with seroprevalence estimates and serisurvey plots +#' @export plot_seroprevalence_estimates <- function( seromodel, serosurvey, @@ -116,6 +160,19 @@ plot_seroprevalence_estimates <- function( return(seroprevalence_plot) } +#' Plots force-of-infection central estimates +#' +#' @inheritParams extract_central_estimates +#' @inheritParams fit_seromodel +#' @param foi_df Dataframe with columns +#' \describe{ +#' \item{`year`/`age`}{Year/Age (depending on the model)} +#' \item{`foi`}{Force-of-infection values by year/age} +#' } +#' @inheritParams plot_serosurvey +#' @param foi_max Max force-of-infection value for plotting +#' @return ggplot object with estimated force-of-infection +#' @export plot_foi_estimates <- function( seromodel, serosurvey, diff --git a/R/stanmodels.R b/R/stanmodels.R index e3529b84..0d07aea0 100644 --- a/R/stanmodels.R +++ b/R/stanmodels.R @@ -1,13 +1,15 @@ # Generated by rstantools. Do not edit by hand. # names of stan models -stanmodels <- c("av_normal", "constant", "tv_normal", "tv_normal_log") +stanmodels <- c("age_no_seroreversion", "age_seroreversion", "constant_no_seroreversion", "constant_seroreversion", "time_no_seroreversion", "time_seroreversion") # load each stan module -Rcpp::loadModule("stan_fit4av_normal_mod", what = TRUE) -Rcpp::loadModule("stan_fit4constant_mod", what = TRUE) -Rcpp::loadModule("stan_fit4tv_normal_mod", what = TRUE) -Rcpp::loadModule("stan_fit4tv_normal_log_mod", what = TRUE) +Rcpp::loadModule("stan_fit4age_no_seroreversion_mod", what = TRUE) +Rcpp::loadModule("stan_fit4age_seroreversion_mod", what = TRUE) +Rcpp::loadModule("stan_fit4constant_no_seroreversion_mod", what = TRUE) +Rcpp::loadModule("stan_fit4constant_seroreversion_mod", what = TRUE) +Rcpp::loadModule("stan_fit4time_no_seroreversion_mod", what = TRUE) +Rcpp::loadModule("stan_fit4time_seroreversion_mod", what = TRUE) # instantiate each stanmodel object stanmodels <- sapply(stanmodels, function(model_name) { From ea6093f9e46b376994f6a4082443ecba6fbc398f Mon Sep 17 00:00:00 2001 From: ntorresd Date: Wed, 31 Jul 2024 18:42:37 -0500 Subject: [PATCH 17/78] refac: move data simulation validation functions to validation module --- R/simulate_serosurvey.R | 49 ----------------------------------------- R/validation.R | 48 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 49 deletions(-) diff --git a/R/simulate_serosurvey.R b/R/simulate_serosurvey.R index cc6f7afb..934e5441 100644 --- a/R/simulate_serosurvey.R +++ b/R/simulate_serosurvey.R @@ -415,55 +415,6 @@ check_age_constraints <- function(df) { return(TRUE) } -validate_survey <- function(survey_features) { - - if (!is.data.frame(survey_features) || !all(c("age_min", "age_max", "sample_size") %in% names(survey_features))) { - stop("survey_features must be a dataframe with columns 'age_min', 'age_max', and 'sample_size'.") - } - - # check that the age_max of a bin does not coincide with - # the age min of a different bin - is_age_ok <- check_age_constraints(survey_features) - if(!is_age_ok) - stop("Age bins in a survey are inclusive of both bounds, so the age_max of - one bin cannot equal the age_min of another.") -} - -validate_foi_df <- function(foi_df, cnames_additional) { - if (!is.data.frame(foi_df) || !all(cnames_additional %in% names(foi_df)) || ncol(foi_df) != (1 + length(cnames_additional))) { - if(length(cnames_additional) == 1) - message_end <- paste0(" and ", cnames_additional, ".") - else - message_end <- paste0(", ", paste(cnames_additional, collapse=" and "), ".") - message_beginning <- "foi must be a dataframe with columns foi" - stop(paste0(message_beginning, message_end)) - } -} - -validate_seroreversion_rate <- function(seroreversion_rate) { - if (!is.numeric(seroreversion_rate) || seroreversion_rate < 0) { - stop("seroreversion_rate must be a non-negative numeric value.") - } -} - -validate_survey_and_foi_consistency <- function( - survey_features, - foi_df -) { - max_age_foi_df <- nrow(foi_df) - if(max_age_foi_df > max(survey_features$age_max)) - stop("maximum age implicit in foi_df should not exceed max age in survey_features.") -} - -validate_survey_and_foi_consistency_age_time <- function( - survey_features, - foi_df -) { - max_age_foi_df <- max(foi_df$year) - min(foi_df$year) + 1 - if(max_age_foi_df > max(survey_features$age_max)) - stop("maximum age implicit in foi_df should not exceed max age in survey_features.") -} - generate_seropositive_counts_by_age_bin <- function( probability_seropositive_by_age, sample_size_by_age_random, diff --git a/R/validation.R b/R/validation.R index 14e2570a..53110175 100644 --- a/R/validation.R +++ b/R/validation.R @@ -55,3 +55,51 @@ validate_serosurvey <- function(serosurvey) { return(serosurvey) } + +validate_survey <- function(survey_features) { + + if (!is.data.frame(survey_features) || !all(c("age_min", "age_max", "sample_size") %in% names(survey_features))) { + stop("survey_features must be a dataframe with columns 'age_min', 'age_max', and 'sample_size'.") + } + + # check that the age_max of a bin does not coincide with + # the age min of a different bin + is_age_ok <- check_age_constraints(survey_features) + if(!is_age_ok) + stop("Age bins in a survey are inclusive of both bounds, so the age_max of one bin cannot equal the age_min of another.") +} + +validate_foi_df <- function(foi_df, cnames_additional) { + if (!is.data.frame(foi_df) || !all(cnames_additional %in% names(foi_df)) || ncol(foi_df) != (1 + length(cnames_additional))) { + if(length(cnames_additional) == 1) + message_end <- paste0(" and ", cnames_additional, ".") + else + message_end <- paste0(", ", paste(cnames_additional, collapse=" and "), ".") + message_beginning <- "foi must be a dataframe with columns foi" + stop(paste0(message_beginning, message_end)) + } +} + +validate_seroreversion_rate <- function(seroreversion_rate) { + if (!is.numeric(seroreversion_rate) || seroreversion_rate < 0) { + stop("seroreversion_rate must be a non-negative numeric value.") + } +} + +validate_survey_and_foi_consistency <- function( + survey_features, + foi_df +) { + max_age_foi_df <- nrow(foi_df) + if(max_age_foi_df > max(survey_features$age_max)) + stop("maximum age implicit in foi_df should not exceed max age in survey_features.") +} + +validate_survey_and_foi_consistency_age_time <- function( + survey_features, + foi_df +) { + max_age_foi_df <- max(foi_df$year) - min(foi_df$year) + 1 + if(max_age_foi_df > max(survey_features$age_max)) + stop("maximum age implicit in foi_df should not exceed max age in survey_features.") +} From 962df9efccc27d613b213cda2f0ff0d54d9ff51c Mon Sep 17 00:00:00 2001 From: ntorresd Date: Tue, 28 May 2024 09:47:11 -0500 Subject: [PATCH 18/78] fix: change to `extract_central_estimates` to deal with 1-time estimates --- R/plot_seromodel.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/plot_seromodel.R b/R/plot_seromodel.R index 51101cf3..7834cc36 100644 --- a/R/plot_seromodel.R +++ b/R/plot_seromodel.R @@ -107,7 +107,8 @@ extract_central_estimates <- function( alpha = 0.05, par_name = "foi_vector" ) { - samples <- rstan::extract(seromodel, par_name)[[1]] + samples <- rstan::extract(seromodel, par_name)[[1]] %>% + as.matrix() #to deal with 1-time estimates central_estimates <- data.frame( median = apply(samples, 2, quantile, 0.5), lower = apply(samples, 2, quantile, alpha), From aedfc4fdca0f1f23ca8e99e8321b92c4e8993cf4 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Wed, 31 Jul 2024 18:47:19 -0500 Subject: [PATCH 19/78] feat: add 'plot_rhats' function --- R/plot_seromodel.R | 61 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/R/plot_seromodel.R b/R/plot_seromodel.R index 7834cc36..469ca8db 100644 --- a/R/plot_seromodel.R +++ b/R/plot_seromodel.R @@ -254,3 +254,64 @@ plot_foi_estimates <- function( return(foi_plot) } + +#' Plot r-hats convergence criteria for the specified model +#' +#' @inheritParams extract_central_estimates +#' @inheritParams plot_serosurvey +#' @return ggplot object showing the r-hats of the model to be compared with the +#' convergence criteria (horizontal dashed line) +plot_rhats <- function( + seromodel, + serosurvey, + par_name = "foi_expanded", + size_text = 11 +) { + checkmate::assert_class(seromodel, "stanfit", null.ok = TRUE) + + rhats <- bayesplot::rhat(seromodel, par_name) + + if (startsWith(seromodel@model_name, "age")) { + xlab <- "Age" + ages <- 1:max(serosurvey$age_max) + rhats_df <- data.frame( + age = ages, + rhat = rhats + ) + + rhats_plot <- ggplot2::ggplot( + data = rhats_df, ggplot2::aes(x = age) + ) + } else if (startsWith(seromodel@model_name, "time")) { + xlab <- "Year" + ages <- rev(1:max(serosurvey$age_max)) + years <- unique(serosurvey$tsur) - ages + rhats_df <- data.frame( + year = years, + rhat = rhats + ) + + rhats_plot <- ggplot2::ggplot( + data = rhats_df, ggplot2::aes(x = year) + ) + } + + rhats_plot <- rhats_plot + + ggplot2::geom_hline( + yintercept = 1.01, + linetype = 'dashed' + ) + + ggplot2::geom_line(ggplot2::aes(y = rhat)) + + ggplot2::geom_point(ggplot2::aes(y = rhat)) + + ggplot2::coord_cartesian( + ylim = c( + min(1.0, min(rhats_df$rhat)), + max(1.02, max(rhats_df$rhat)) + ) + ) + + ggplot2::theme_bw(size_text) + + ggplot2::xlab(xlab) + + ggplot2::ylab("Convergence (r-hats)") + + return(rhats_plot) +} From daa68732143e877f56474791d18ce8d74c5274ab Mon Sep 17 00:00:00 2001 From: ntorresd Date: Tue, 28 May 2024 17:57:24 -0500 Subject: [PATCH 20/78] remove unnecessary stan file --- inst/stan/functions/prob_infected_tv.stan | 38 ----------------------- 1 file changed, 38 deletions(-) delete mode 100644 inst/stan/functions/prob_infected_tv.stan diff --git a/inst/stan/functions/prob_infected_tv.stan b/inst/stan/functions/prob_infected_tv.stan deleted file mode 100644 index e38f6a81..00000000 --- a/inst/stan/functions/prob_infected_tv.stan +++ /dev/null @@ -1,38 +0,0 @@ -vector prob_infected_noseroreversion( - vector foi_vector, - int[] chunks, - matrix observation_exposure_matrix, - int n_obs, - int age_max - ) { - real scalar_dot_product; - vector[n_obs] prob_infected; - vector[age_max] foi_every_age = foi_vector[chunks]; - - for(i in 1:n_obs){ - scalar_dot_product = dot_product( - observation_exposure_matrix[i,], - foi_every_age - ); - prob_infected[i] = 1 - exp(-scalar_dot_product); - } - - return prob_infected; -} - - real prob_infected_age_varying( - int age, - vector foi_vector, - int[] chunks, - real seroreversion_rate - ) { - real prob = 0.0; - for(j in 1:age){ - real foi = foi_vector[chunks[j]]; - real lambda_over_both = foi / (foi + seroreversion_rate); - real e_lower = exp(-(foi + seroreversion_rate)); - - prob = lambda_over_both + e_lower * (prob - lambda_over_both); - } - return prob; - } From fd75b707ab39e24ece13e2337dc92ec5dc52d136 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Tue, 28 May 2024 17:58:45 -0500 Subject: [PATCH 21/78] fix: add error message for constant model exceptions in plotting functions --- R/plot_seromodel.R | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/R/plot_seromodel.R b/R/plot_seromodel.R index 469ca8db..0e213bd6 100644 --- a/R/plot_seromodel.R +++ b/R/plot_seromodel.R @@ -185,6 +185,12 @@ plot_foi_estimates <- function( # TODO: Add checks for foi_df (size, colnames, etc.) checkmate::assert_class(seromodel, "stanfit", null.ok = TRUE) + model_name <- seromodel@model_name + stopifnot( + "seromodel@name should start with either 'age' or 'time'" = + startsWith(model_name, "age") | startsWith(model_name, "time") + ) + foi_central_estimates <- extract_central_estimates( seromodel = seromodel, serosurvey = serosurvey, @@ -195,7 +201,7 @@ plot_foi_estimates <- function( if (is.null(foi_max)) foi_max <- max(foi_central_estimates$upper) - if (startsWith(seromodel@model_name, "age")) { + if (startsWith(model_name, "age")) { xlab <- "Age" ages <- 1:max(serosurvey$age_max) foi_central_estimates <- mutate( @@ -209,7 +215,7 @@ plot_foi_estimates <- function( foi_plot <- ggplot2::ggplot( data = foi_central_estimates, ggplot2::aes(x = age) ) - } else if (startsWith(seromodel@model_name, "time")) { + } else if (startsWith(model_name, "time")) { xlab <- "Year" ages <- rev(1:max(serosurvey$age_max)) years <- unique(serosurvey$tsur) - ages @@ -269,9 +275,15 @@ plot_rhats <- function( ) { checkmate::assert_class(seromodel, "stanfit", null.ok = TRUE) + model_name <- seromodel@model_name + stopifnot( + "seromodel@name should start with either 'age' or 'time'" = + startsWith(model_name, "age") | startsWith(model_name, "time") + ) + rhats <- bayesplot::rhat(seromodel, par_name) - if (startsWith(seromodel@model_name, "age")) { + if (startsWith(model_name, "age")) { xlab <- "Age" ages <- 1:max(serosurvey$age_max) rhats_df <- data.frame( @@ -282,7 +294,7 @@ plot_rhats <- function( rhats_plot <- ggplot2::ggplot( data = rhats_df, ggplot2::aes(x = age) ) - } else if (startsWith(seromodel@model_name, "time")) { + } else if (startsWith(model_name, "time")) { xlab <- "Year" ages <- rev(1:max(serosurvey$age_max)) years <- unique(serosurvey$tsur) - ages From 8ecde6bfe5469747119414a49936a17f62f21aa5 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Tue, 28 May 2024 18:01:31 -0500 Subject: [PATCH 22/78] feat: add `summarise_model` and `plot_summary` functions --- R/plot_seromodel.R | 137 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 137 insertions(+) diff --git a/R/plot_seromodel.R b/R/plot_seromodel.R index 0e213bd6..c59c0b4b 100644 --- a/R/plot_seromodel.R +++ b/R/plot_seromodel.R @@ -327,3 +327,140 @@ plot_rhats <- function( return(rhats_plot) } + +#' Summarise specified model +#' +#' @inheritParams extract_central_estimates +#' @param elpd_digits Number of elpd digits to show +#' @param foi_digits Number of foi digits to show +#' @param seroreversion_digits Number of seroreversion rate digits to show +#' @return A list showing +#' \describe{ +#' \item{`model_name`}{Name of the model} +#' \item{`elpd`}{elpd and its standard deviation} +#' \item{`foi`}{Estimated foi with credible interval (for 'constant' model)} +#' \item{`foi_rhat`}{foi rhat value (for 'constant' model)} +#' \item{`seroreversion_rate`}{Estimated seroreversion rate} +#' \item{`rhat_seroreversion_rate`}{Seroreversion rate rhat value} +#' } +#' @export +summarise_seromodel <- function( + seromodel, + serosurvey, + alpha = 0.05, + elpd_digits = 1, + foi_digits = 2, + seroreversion_digits = 2, + rhat_digits = 2 +) { + checkmate::assert_class(seromodel, "stanfit", null.ok = TRUE) + + loo_fit <- loo::loo( + seromodel, + pars = c(parameter_name = "log_likelihood") + ) + elp <- loo_fit$estimates["elpd_loo", ] %>% round(elpd_digits) + + summary_list <- list( + model_name = seromodel@model_name, + elpd = paste0(elpd[1], "(se=", elpd[2], ")") + ) + + model_name <- seromodel@model_name + if (startsWith(model_name, "constant")) { + foi_central_estimates <- extract_central_estimates( + seromodel = seromodel, + serosurvey = serosurvey, + alpha = alpha, + par_name = "foi" + ) %>% + signif(foi_digits) + + foi_summary <- paste0( + foi_central_estimates$median, + "(", 1 - alpha, "% CI, ", + foi_central_estimates$lower, "-", + foi_central_estimates$upper, ")" + ) + rhat_foi <- bayesplot::rhat(seromodel, "foi") %>% + signif(rhat_digits) + summary_list <- append( + summary_list, + list( + foi = foi_summary, + rhat_foi = rhat_foi + ) + ) + } + + if (!endsWith(model_name, "no_seroreversion")) { + seroreversion_rate_central_estimates <- extract_central_estimates( + seromodel = seromodel, + serosurvey = serosurvey, + alpha = alpha, + par_name = "seroreversion_rate" + ) %>% + signif(seroreversion_digits) + + seroreversion_rate_summary <- paste0( + seroreversion_rate_central_estimates$median, + "(", 1 - alpha, "% CI, ", + seroreversion_rate_central_estimates$lower, "-", + seroreversion_rate_central_estimates$upper, ")" + ) + rhat_seroreversion_rate <- bayesplot::rhat( + seromodel, + "seroreversion_rate" + ) %>% + signif(rhat_digits) + summary_list <- append( + summary_list, + list( + seroreversion_rate = seroreversion_rate_summary, + rhat_seroreversion_rate = rhat_seroreversion_rate + ) + ) + } + return(summary_list) +} + +#' Plots model summary +#' +#' @inheritParams summarise_seromodel +#' @return ggplot object with a summary of the specified model +#' @export +plot_summary <- function( + seromodel, + serosurvey, + ... +) { + checkmate::assert_class(seromodel, "stanfit", null.ok = TRUE) + + summary_table <- summarise_seromodel( + seromodel = seromodel, + serosurvey = serosurvey, + ... + ) %>% + t() #convert summary to table + + summary_df <- data.frame( + row = NCOL(summary_table):1, + text = paste0(colnames(summary_table), ": ", summary_table[1, ]) + ) + + summary_plot <- ggplot2::ggplot( + summary_df, + ggplot2::aes(x = 1, y = row)) + + ggplot2::scale_y_continuous( + limits = c(0, nrow(summary_df) + 1), + breaks = NULL + ) + + ggplot2::theme_void() + + ggplot2::geom_text( + ggplot2::aes(label = text), + fontface = "bold" + ) + + return(summary_plot) +} + From a93075bf5ea571e8dea7c9a1a4ecb77e701a48be Mon Sep 17 00:00:00 2001 From: ntorresd Date: Tue, 28 May 2024 18:04:53 -0500 Subject: [PATCH 23/78] feat: add new `plot_seromodel` function --- R/plot_seromodel.R | 54 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/R/plot_seromodel.R b/R/plot_seromodel.R index c59c0b4b..d334fd8d 100644 --- a/R/plot_seromodel.R +++ b/R/plot_seromodel.R @@ -464,3 +464,57 @@ plot_summary <- function( return(summary_plot) } +#' Visualise results of the provided model +#' +#' @inheritParams plot_summary +#' @inheritParams plot_seroprevalence_estimates +#' @inheritParams plot_foi_estimates +#' @inheritParams plot_rhats +plot_seromodel <- function( + seromodel, + serosurvey, + ... +) { + checkmate::assert_class(seromodel, "stanfit", null.ok = TRUE) + + summary_plot <- plot_summary( + seromodel, + serosurvey, + ... + ) + + seroprev_plot <- plot_seroprevalence_estimates( + seromodel, + serosurvey, + ... + ) + + plot_list <- list( + summary_plot, + seroprev_plot + ) + + model_name <- seromodel@model_name + if (!startsWith(model_name, "constant")) { + foi_plot <- plot_foi_estimates( + seromodel, + serosurvey, + foi_df = foi_df, + ... + ) + + rhats_plot <- plot_rhats( + seromodel, + serosurvey, + ... + ) + + plot_list <- append( + plot_list, + list(foi_plot, rhats_plot) + ) + } + + seromodel_plot <- cowplot::plot_grid(plotlist = plot_list, ncol = 1) + return(seromodel_plot) +} From 260e74edb8baf09c091740b33ae3a460bc771df9 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Tue, 28 May 2024 18:25:54 -0500 Subject: [PATCH 24/78] fix: change reference to config file --- R/build_stan_data.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/build_stan_data.R b/R/build_stan_data.R index 08c88d51..b81193aa 100644 --- a/R/build_stan_data.R +++ b/R/build_stan_data.R @@ -96,7 +96,7 @@ set_stan_data_defaults <- function( stan_data, is_seroreversion = FALSE ) { - config_file <- "inst/extdata/config.yml" + config_file <- system.file("extdata", "config.yml", package = "serofoi") prior_default <- config::get(file = config_file, "priors")$defaults foi_defaults <- list( @@ -165,9 +165,9 @@ build_stan_data <- function( list(foi_index = foi_index) ) } - - config_file <- "inst/extdata/config.yml" + config_file <- system.file("extdata", "config.yml", package = "serofoi") prior_index <- config::get(file = config_file, "priors")$indexes + if (foi_prior$name == "uniform") { stan_data$foi_prior_index <- prior_index[["uniform"]] stan_data$foi_min <- foi_prior$min From afe85128d90a4deddaf5d504182036848206de4b Mon Sep 17 00:00:00 2001 From: ntorresd Date: Tue, 28 May 2024 18:42:29 -0500 Subject: [PATCH 25/78] fix: correct stan models reference --- R/fit_seromodel.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/fit_seromodel.R b/R/fit_seromodel.R index 432933b4..b7240755 100644 --- a/R/fit_seromodel.R +++ b/R/fit_seromodel.R @@ -74,7 +74,7 @@ fit_seromodel <- function( model_name <- paste0(model_type, "_no_seroreversion") # model <- stan_models[[model_name]] - model <- rstan::stan_model(paste0("inst/stan/", model_name, ".stan")) + model <- stanmodels[[model_name]] seromodel <- rstan::sampling( model, data = stan_data, From d009c50e8461598a7ccfcf183859e05616aaa1bb Mon Sep 17 00:00:00 2001 From: ntorresd Date: Wed, 31 Jul 2024 18:59:44 -0500 Subject: [PATCH 26/78] doc: add export tag to plot_seromodel --- R/plot_seromodel.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/plot_seromodel.R b/R/plot_seromodel.R index d334fd8d..fb23fb5d 100644 --- a/R/plot_seromodel.R +++ b/R/plot_seromodel.R @@ -359,7 +359,7 @@ summarise_seromodel <- function( seromodel, pars = c(parameter_name = "log_likelihood") ) - elp <- loo_fit$estimates["elpd_loo", ] %>% round(elpd_digits) + elpd <- loo_fit$estimates["elpd_loo", ] %>% round(elpd_digits) summary_list <- list( model_name = seromodel@model_name, @@ -470,6 +470,8 @@ plot_summary <- function( #' @inheritParams plot_seroprevalence_estimates #' @inheritParams plot_foi_estimates #' @inheritParams plot_rhats +#' @return seromodel summary plot +#' @export plot_seromodel <- function( seromodel, serosurvey, From de1cca9dcc937729f3ccb712a3478e9db6f789ed Mon Sep 17 00:00:00 2001 From: ntorresd Date: Tue, 28 May 2024 19:34:49 -0500 Subject: [PATCH 27/78] fix: remove uneccessary argument call in `plot_seromodel` --- R/plot_seromodel.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/plot_seromodel.R b/R/plot_seromodel.R index fb23fb5d..872a442b 100644 --- a/R/plot_seromodel.R +++ b/R/plot_seromodel.R @@ -501,7 +501,6 @@ plot_seromodel <- function( foi_plot <- plot_foi_estimates( seromodel, serosurvey, - foi_df = foi_df, ... ) From 80b5ac1b2c8ba6302ad6972d2c1c0605d7006a67 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Thu, 1 Aug 2024 12:32:17 -0500 Subject: [PATCH 28/78] feat: introduce 'summarise_loo_estimate' to simplify 'summarise_seromodel' This function allows to extact an specific loo estimate for model parameters like 'seroreversion_rate' or 'foi'. This introduce changes in: - 'summarise_seromodel' - 'plot_summary' - 'plot_seromodel' --- R/plot_seromodel.R | 152 ++++++++++++++++++++++++++++++++------------- 1 file changed, 110 insertions(+), 42 deletions(-) diff --git a/R/plot_seromodel.R b/R/plot_seromodel.R index 872a442b..e96dfade 100644 --- a/R/plot_seromodel.R +++ b/R/plot_seromodel.R @@ -328,29 +328,89 @@ plot_rhats <- function( return(rhats_plot) } +#' Extract specified loo estimate +#' +#' @inheritParams extract_central_estimates +#' @param par_loo_estimate Name of the loo estimate to be extracted. +#' @param loo_estimate_digits Number of loo estimate digits +#' @return Text summarising specified loo estimate +#' @export +summarise_loo_estimate <- function( + seromodel, + par_loo_estimate = "elpd_loo", + loo_estimate_digits = 2 +) { + checkmate::assert_class(seromodel, "stanfit", null.ok = TRUE) + + loo_fit <- loo::loo( + seromodel, + pars = c(parameter_name = "log_likelihood") + ) + loo_estimate <- loo_fit$estimates[par_loo_estimate, ] %>% + round(loo_estimate_digits) + + loo_estimate_summary <- paste0(loo_estimate[1], "(se=", loo_estimate[2], ")") + + return(loo_estimate_summary) +} + +#' Summarise central estimate +#' +#' @inheritParams extract_central_estimates +#' @param central_estimate_digits Number of central estimate digits +#' @return Text summarising specified central estimate +#' @export +summarise_central_estimate <- function( + seromodel, + serosurvey, + alpha, + par_name = "seroreversion_rate", + central_estimate_digits = 2 +) { + checkmate::assert_class(seromodel, "stanfit", null.ok = TRUE) + + central_estimates <- signif( + extract_central_estimates( + seromodel = seromodel, + serosurvey = serosurvey, + alpha = alpha, + par_name = par_name + ), + digits = 2 + ) + + central_estimate_summary <- paste0( + central_estimates$median, + "(", 100 * (1 - alpha), "% CI, ", + central_estimates$lower, "-", + central_estimates$upper, ")" + ) + + return(central_estimate_summary) +} + #' Summarise specified model #' #' @inheritParams extract_central_estimates -#' @param elpd_digits Number of elpd digits to show -#' @param foi_digits Number of foi digits to show -#' @param seroreversion_digits Number of seroreversion rate digits to show -#' @return A list showing +#' @inheritParams extract_loo_estimate +#' @inheritParams summarise_central_estimate +#' @return A list summarising the specified model #' \describe{ #' \item{`model_name`}{Name of the model} #' \item{`elpd`}{elpd and its standard deviation} #' \item{`foi`}{Estimated foi with credible interval (for 'constant' model)} #' \item{`foi_rhat`}{foi rhat value (for 'constant' model)} #' \item{`seroreversion_rate`}{Estimated seroreversion rate} -#' \item{`rhat_seroreversion_rate`}{Seroreversion rate rhat value} +#' \item{`seroreversion_rate_rhat`}{Seroreversion rate rhat value} #' } #' @export summarise_seromodel <- function( seromodel, serosurvey, alpha = 0.05, - elpd_digits = 1, - foi_digits = 2, - seroreversion_digits = 2, + par_loo_estimate = "elpd_loo", + loo_estimate_digits = 1, + central_estimate_digits = 2, rhat_digits = 2 ) { checkmate::assert_class(seromodel, "stanfit", null.ok = TRUE) @@ -361,85 +421,83 @@ summarise_seromodel <- function( ) elpd <- loo_fit$estimates["elpd_loo", ] %>% round(elpd_digits) - summary_list <- list( - model_name = seromodel@model_name, - elpd = paste0(elpd[1], "(se=", elpd[2], ")") + loo_estimate_summary <- summarise_loo_estimate( + seromodel = seromodel, + par_loo_estimate = par_loo_estimate, + loo_estimate_digits = loo_estimate_digits ) - model_name <- seromodel@model_name + summary_list[par_loo_estimate] = loo_estimate_summary + if (startsWith(model_name, "constant")) { - foi_central_estimates <- extract_central_estimates( + foi_summary <- summarise_central_estimate( seromodel = seromodel, serosurvey = serosurvey, alpha = alpha, - par_name = "foi" - ) %>% - signif(foi_digits) - - foi_summary <- paste0( - foi_central_estimates$median, - "(", 1 - alpha, "% CI, ", - foi_central_estimates$lower, "-", - foi_central_estimates$upper, ")" + par_name = "foi", + central_estimate_digits = central_estimate_digits ) - rhat_foi <- bayesplot::rhat(seromodel, "foi") %>% + + foi_rhat <- bayesplot::rhat(seromodel, "foi") %>% signif(rhat_digits) summary_list <- append( summary_list, list( foi = foi_summary, - rhat_foi = rhat_foi + foi_rhat = foi_rhat ) ) } if (!endsWith(model_name, "no_seroreversion")) { - seroreversion_rate_central_estimates <- extract_central_estimates( + seroreversion_rate_summary <- summarise_central_estimate( seromodel = seromodel, serosurvey = serosurvey, alpha = alpha, - par_name = "seroreversion_rate" - ) %>% - signif(seroreversion_digits) - - seroreversion_rate_summary <- paste0( - seroreversion_rate_central_estimates$median, - "(", 1 - alpha, "% CI, ", - seroreversion_rate_central_estimates$lower, "-", - seroreversion_rate_central_estimates$upper, ")" + par_name = "seroreversion_rate", + central_estimate_digits = central_estimate_digits ) - rhat_seroreversion_rate <- bayesplot::rhat( + + seroreversion_rate_rhat <- bayesplot::rhat( seromodel, "seroreversion_rate" ) %>% signif(rhat_digits) + summary_list <- append( summary_list, list( seroreversion_rate = seroreversion_rate_summary, - rhat_seroreversion_rate = rhat_seroreversion_rate + seroreversion_rate_rhat = seroreversion_rate_rhat ) ) } + return(summary_list) } #' Plots model summary #' #' @inheritParams summarise_seromodel +#' @inheritParams plot_serosurvey #' @return ggplot object with a summary of the specified model #' @export plot_summary <- function( seromodel, serosurvey, - ... + loo_estimate_digits= 1, + central_estimate_digits = 2, + rhat_digits = 2, + size_text = 11 ) { checkmate::assert_class(seromodel, "stanfit", null.ok = TRUE) summary_table <- summarise_seromodel( seromodel = seromodel, serosurvey = serosurvey, - ... + loo_estimate_digits= loo_estimate_digits, + central_estimate_digits = central_estimate_digits, + rhat_digits = rhat_digits ) %>% t() #convert summary to table @@ -475,14 +533,24 @@ plot_summary <- function( plot_seromodel <- function( seromodel, serosurvey, - ... + alpha = 0.05, + foi_df = NULL, + foi_max = NULL, + loo_estimate_digits = 1, + central_estimate_digits = 2, + seroreversion_digits = 2, + rhat_digits = 2, + size_text = 11 ) { checkmate::assert_class(seromodel, "stanfit", null.ok = TRUE) summary_plot <- plot_summary( - seromodel, - serosurvey, - ... + seromodel = seromodel, + serosurvey = serosurvey, + loo_estimate_digits = loo_estimate_digits, + central_estimate_digits = central_estimate_digits, + rhat_digits = rhat_digits, + size_text = size_text ) seroprev_plot <- plot_seroprevalence_estimates( From 5473e030e3fa4a0b234383631d9b12b8490100b8 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Thu, 1 Aug 2024 12:38:46 -0500 Subject: [PATCH 29/78] refac: move 'summarise_seromodel' to a separate file --- R/plot_seromodel.R | 148 ---------------------------------------- R/summarise_seromodel.R | 144 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 144 insertions(+), 148 deletions(-) create mode 100644 R/summarise_seromodel.R diff --git a/R/plot_seromodel.R b/R/plot_seromodel.R index e96dfade..9c6bffbc 100644 --- a/R/plot_seromodel.R +++ b/R/plot_seromodel.R @@ -328,154 +328,6 @@ plot_rhats <- function( return(rhats_plot) } -#' Extract specified loo estimate -#' -#' @inheritParams extract_central_estimates -#' @param par_loo_estimate Name of the loo estimate to be extracted. -#' @param loo_estimate_digits Number of loo estimate digits -#' @return Text summarising specified loo estimate -#' @export -summarise_loo_estimate <- function( - seromodel, - par_loo_estimate = "elpd_loo", - loo_estimate_digits = 2 -) { - checkmate::assert_class(seromodel, "stanfit", null.ok = TRUE) - - loo_fit <- loo::loo( - seromodel, - pars = c(parameter_name = "log_likelihood") - ) - loo_estimate <- loo_fit$estimates[par_loo_estimate, ] %>% - round(loo_estimate_digits) - - loo_estimate_summary <- paste0(loo_estimate[1], "(se=", loo_estimate[2], ")") - - return(loo_estimate_summary) -} - -#' Summarise central estimate -#' -#' @inheritParams extract_central_estimates -#' @param central_estimate_digits Number of central estimate digits -#' @return Text summarising specified central estimate -#' @export -summarise_central_estimate <- function( - seromodel, - serosurvey, - alpha, - par_name = "seroreversion_rate", - central_estimate_digits = 2 -) { - checkmate::assert_class(seromodel, "stanfit", null.ok = TRUE) - - central_estimates <- signif( - extract_central_estimates( - seromodel = seromodel, - serosurvey = serosurvey, - alpha = alpha, - par_name = par_name - ), - digits = 2 - ) - - central_estimate_summary <- paste0( - central_estimates$median, - "(", 100 * (1 - alpha), "% CI, ", - central_estimates$lower, "-", - central_estimates$upper, ")" - ) - - return(central_estimate_summary) -} - -#' Summarise specified model -#' -#' @inheritParams extract_central_estimates -#' @inheritParams extract_loo_estimate -#' @inheritParams summarise_central_estimate -#' @return A list summarising the specified model -#' \describe{ -#' \item{`model_name`}{Name of the model} -#' \item{`elpd`}{elpd and its standard deviation} -#' \item{`foi`}{Estimated foi with credible interval (for 'constant' model)} -#' \item{`foi_rhat`}{foi rhat value (for 'constant' model)} -#' \item{`seroreversion_rate`}{Estimated seroreversion rate} -#' \item{`seroreversion_rate_rhat`}{Seroreversion rate rhat value} -#' } -#' @export -summarise_seromodel <- function( - seromodel, - serosurvey, - alpha = 0.05, - par_loo_estimate = "elpd_loo", - loo_estimate_digits = 1, - central_estimate_digits = 2, - rhat_digits = 2 -) { - checkmate::assert_class(seromodel, "stanfit", null.ok = TRUE) - - loo_fit <- loo::loo( - seromodel, - pars = c(parameter_name = "log_likelihood") - ) - elpd <- loo_fit$estimates["elpd_loo", ] %>% round(elpd_digits) - - loo_estimate_summary <- summarise_loo_estimate( - seromodel = seromodel, - par_loo_estimate = par_loo_estimate, - loo_estimate_digits = loo_estimate_digits - ) - - summary_list[par_loo_estimate] = loo_estimate_summary - - if (startsWith(model_name, "constant")) { - foi_summary <- summarise_central_estimate( - seromodel = seromodel, - serosurvey = serosurvey, - alpha = alpha, - par_name = "foi", - central_estimate_digits = central_estimate_digits - ) - - foi_rhat <- bayesplot::rhat(seromodel, "foi") %>% - signif(rhat_digits) - summary_list <- append( - summary_list, - list( - foi = foi_summary, - foi_rhat = foi_rhat - ) - ) - } - - if (!endsWith(model_name, "no_seroreversion")) { - seroreversion_rate_summary <- summarise_central_estimate( - seromodel = seromodel, - serosurvey = serosurvey, - alpha = alpha, - par_name = "seroreversion_rate", - central_estimate_digits = central_estimate_digits - ) - - seroreversion_rate_rhat <- bayesplot::rhat( - seromodel, - "seroreversion_rate" - ) %>% - signif(rhat_digits) - - summary_list <- append( - summary_list, - list( - seroreversion_rate = seroreversion_rate_summary, - seroreversion_rate_rhat = seroreversion_rate_rhat - ) - ) - } - - return(summary_list) -} - #' Plots model summary #' #' @inheritParams summarise_seromodel diff --git a/R/summarise_seromodel.R b/R/summarise_seromodel.R new file mode 100644 index 00000000..7156f537 --- /dev/null +++ b/R/summarise_seromodel.R @@ -0,0 +1,144 @@ +#' Extract specified loo estimate +#' +#' @inheritParams extract_central_estimates +#' @param par_loo_estimate Name of the loo estimate to be extracted. +#' @param loo_estimate_digits Number of loo estimate digits +#' @return Text summarising specified loo estimate +#' @export +summarise_loo_estimate <- function( + seromodel, + par_loo_estimate = "elpd_loo", + loo_estimate_digits = 2 +) { + checkmate::assert_class(seromodel, "stanfit", null.ok = TRUE) + + loo_fit <- loo::loo( + seromodel, + pars = c(parameter_name = "log_likelihood") + ) + loo_estimate <- loo_fit$estimates[par_loo_estimate, ] %>% + round(loo_estimate_digits) + + loo_estimate_summary <- paste0(loo_estimate[1], "(se=", loo_estimate[2], ")") + + return(loo_estimate_summary) +} + +#' Summarise central estimate +#' +#' @inheritParams extract_central_estimates +#' @param central_estimate_digits Number of central estimate digits +#' @return Text summarising specified central estimate +#' @export +summarise_central_estimate <- function( + seromodel, + serosurvey, + alpha, + par_name = "seroreversion_rate", + central_estimate_digits = 2 +) { + checkmate::assert_class(seromodel, "stanfit", null.ok = TRUE) + + central_estimates <- signif( + extract_central_estimates( + seromodel = seromodel, + serosurvey = serosurvey, + alpha = alpha, + par_name = par_name + ), + digits = 2 + ) + + central_estimate_summary <- paste0( + central_estimates$median, + "(", 100 * (1 - alpha), "% CI, ", + central_estimates$lower, "-", + central_estimates$upper, ")" + ) + + return(central_estimate_summary) +} + +#' Summarise specified model +#' +#' @inheritParams extract_central_estimates +#' @inheritParams extract_loo_estimate +#' @inheritParams summarise_central_estimate +#' @return A list summarising the specified model +#' \describe{ +#' \item{`model_name`}{Name of the model} +#' \item{`elpd`}{elpd and its standard deviation} +#' \item{`foi`}{Estimated foi with credible interval (for 'constant' model)} +#' \item{`foi_rhat`}{foi rhat value (for 'constant' model)} +#' \item{`seroreversion_rate`}{Estimated seroreversion rate} +#' \item{`seroreversion_rate_rhat`}{Seroreversion rate rhat value} +#' } +#' @export +summarise_seromodel <- function( + seromodel, + serosurvey, + alpha = 0.05, + par_loo_estimate = "elpd_loo", + loo_estimate_digits = 1, + central_estimate_digits = 2, + rhat_digits = 2 +) { + checkmate::assert_class(seromodel, "stanfit", null.ok = TRUE) + + model_name <- seromodel@model_name + summary_list <- list(model_name = model_name) + + loo_estimate_summary <- summarise_loo_estimate( + seromodel = seromodel, + par_loo_estimate = par_loo_estimate, + loo_estimate_digits = loo_estimate_digits + ) + + summary_list[par_loo_estimate] = loo_estimate_summary + + if (startsWith(model_name, "constant")) { + foi_summary <- summarise_central_estimate( + seromodel = seromodel, + serosurvey = serosurvey, + alpha = alpha, + par_name = "foi", + central_estimate_digits = central_estimate_digits + ) + + foi_rhat <- bayesplot::rhat(seromodel, "foi") %>% + signif(rhat_digits) + summary_list <- append( + summary_list, + list( + foi = foi_summary, + foi_rhat = foi_rhat + ) + ) + } + + if (!endsWith(model_name, "no_seroreversion")) { + seroreversion_rate_summary <- summarise_central_estimate( + seromodel = seromodel, + serosurvey = serosurvey, + alpha = alpha, + par_name = "seroreversion_rate", + central_estimate_digits = central_estimate_digits + ) + + seroreversion_rate_rhat <- bayesplot::rhat( + seromodel, + "seroreversion_rate" + ) %>% + signif(rhat_digits) + + summary_list <- append( + summary_list, + list( + seroreversion_rate = seroreversion_rate_summary, + seroreversion_rate_rhat = seroreversion_rate_rhat + ) + ) + } + + return(summary_list) +} From 9684420f615fd0a3997e77b093642fb11e5f469b Mon Sep 17 00:00:00 2001 From: ntorresd Date: Thu, 1 Aug 2024 15:39:08 -0500 Subject: [PATCH 30/78] feat: add convergence field to summary in 'summarise_seromodel' --- R/summarise_seromodel.R | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/R/summarise_seromodel.R b/R/summarise_seromodel.R index 7156f537..4cd567d4 100644 --- a/R/summarise_seromodel.R +++ b/R/summarise_seromodel.R @@ -62,7 +62,7 @@ summarise_central_estimate <- function( #' Summarise specified model #' #' @inheritParams extract_central_estimates -#' @inheritParams extract_loo_estimate +#' @inheritParams summarise_loo_estimate #' @inheritParams summarise_central_estimate #' @return A list summarising the specified model #' \describe{ @@ -96,6 +96,7 @@ summarise_seromodel <- function( summary_list[par_loo_estimate] = loo_estimate_summary + check_convergence <- c() if (startsWith(model_name, "constant")) { foi_summary <- summarise_central_estimate( seromodel = seromodel, @@ -107,6 +108,12 @@ summarise_seromodel <- function( foi_rhat <- bayesplot::rhat(seromodel, "foi") %>% signif(rhat_digits) + + check_convergence <- append( + check_convergence, + foi_rhat < 1.01 + ) + summary_list <- append( summary_list, list( @@ -114,6 +121,13 @@ summarise_seromodel <- function( foi_rhat = foi_rhat ) ) + } else { + rhats <- bayesplot::rhat(seromodel, "foi_vector") + + check_convergence <- append( + check_convergence, + all(rhats < 1.01) + ) } if (!endsWith(model_name, "no_seroreversion")) { @@ -128,9 +142,14 @@ summarise_seromodel <- function( seroreversion_rate_rhat <- bayesplot::rhat( seromodel, "seroreversion_rate" - ) %>% + ) %>% signif(rhat_digits) + check_convergence <- append( + check_convergence, + seroreversion_rate_rhat < 1.01 + ) + summary_list <- append( summary_list, list( @@ -140,5 +159,11 @@ summarise_seromodel <- function( ) } + if (all(check_convergence)) { + summary_list["converged"] = "yes" + } else { + summary_list["converged"] = "no" + } + return(summary_list) } From 7961fc9ca1c24d9cbf251efd5754402db2134b7a Mon Sep 17 00:00:00 2001 From: ntorresd Date: Thu, 1 Aug 2024 16:01:47 -0500 Subject: [PATCH 31/78] refac: improve parameter specification in plotting functions --- R/plot_seromodel.R | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/R/plot_seromodel.R b/R/plot_seromodel.R index 9c6bffbc..f83f768c 100644 --- a/R/plot_seromodel.R +++ b/R/plot_seromodel.R @@ -128,7 +128,7 @@ plot_seroprevalence_estimates <- function( seromodel, serosurvey, alpha = 0.05, - ... + size_text = 11 ) { checkmate::assert_class(seromodel, "stanfit", null.ok = TRUE) @@ -142,7 +142,7 @@ plot_seroprevalence_estimates <- function( seroprevalence_plot <- plot_serosurvey( serosurvey = serosurvey, - ... + size_text = size_text ) + ggplot2::geom_line( data = seroprevalence_central_estimates, @@ -179,8 +179,8 @@ plot_foi_estimates <- function( serosurvey, alpha = 0.05, foi_df = NULL, - size_text = 11, - foi_max = NULL + foi_max = NULL, + size_text = 11 ) { # TODO: Add checks for foi_df (size, colnames, etc.) checkmate::assert_class(seromodel, "stanfit", null.ok = TRUE) @@ -368,7 +368,8 @@ plot_summary <- function( ggplot2::theme_void() + ggplot2::geom_text( ggplot2::aes(label = text), - fontface = "bold" + fontface = "bold", + size = size_text / 2.5 ) return(summary_plot) @@ -406,9 +407,10 @@ plot_seromodel <- function( ) seroprev_plot <- plot_seroprevalence_estimates( - seromodel, - serosurvey, - ... + seromodel = seromodel, + serosurvey = serosurvey, + alpha = alpha, + size_text = size_text ) plot_list <- list( @@ -421,13 +423,16 @@ plot_seromodel <- function( foi_plot <- plot_foi_estimates( seromodel, serosurvey, - ... + alpha = alpha, + foi_df = foi_df, + foi_max = foi_max, + size_text ) rhats_plot <- plot_rhats( - seromodel, - serosurvey, - ... + seromodel = seromodel, + serosurvey = serosurvey, + size_text = size_text ) plot_list <- append( From aa544ce54fa4e7174deda3af1f09072e93829b18 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Thu, 30 May 2024 11:23:05 -0500 Subject: [PATCH 32/78] add comments to config.yml --- inst/extdata/config.yml | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/inst/extdata/config.yml b/inst/extdata/config.yml index c27add47..70b9e928 100644 --- a/inst/extdata/config.yml +++ b/inst/extdata/config.yml @@ -1,11 +1,13 @@ default: priors: indexes: - uniform : 0 - normal : 1 + uniform: 0 + normal: 1 defaults: - prior_index : 1 - min : 0 - max : 10 - mean : 0 - sd : 1 + prior_index: 1 + # uniform distribution + min: 0 + max: 10 + # normal distribution + mean: 0 + sd: 1 From a2feb510d54600d09abc6e1e4e637fd4dcd5a2b8 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Thu, 1 Aug 2024 16:37:35 -0500 Subject: [PATCH 33/78] fix: add manually null seroprev for age=0 in expanded prevalence --- R/plot_seromodel.R | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/R/plot_seromodel.R b/R/plot_seromodel.R index f83f768c..599b007f 100644 --- a/R/plot_seromodel.R +++ b/R/plot_seromodel.R @@ -132,13 +132,21 @@ plot_seroprevalence_estimates <- function( ) { checkmate::assert_class(seromodel, "stanfit", null.ok = TRUE) - seroprevalence_central_estimates <- extract_central_estimates( - seromodel = seromodel, - serosurvey = serosurvey, - alpha = alpha, - par_name = "prob_infected_expanded" + seroprevalence_central_estimates <- data.frame( + median = 0.0, + lower = 0.0, + upper = 0.0, + age = 0 + ) %>% + rbind( + extract_central_estimates( + seromodel = seromodel, + serosurvey = serosurvey, + alpha = alpha, + par_name = "prob_infected_expanded" ) %>% mutate(age = seq(1, max(serosurvey$age_max))) + ) seroprevalence_plot <- plot_serosurvey( serosurvey = serosurvey, From f22090f37e4ee196efa99c40cdc578ce6c44d95f Mon Sep 17 00:00:00 2001 From: ntorresd Date: Tue, 6 Aug 2024 11:57:23 -0500 Subject: [PATCH 34/78] doc: lintr functions' documentation --- R/fit_seromodel.R | 12 +- R/simulate_serosurvey.R | 264 ++++++++++++++++++++++------------------ 2 files changed, 154 insertions(+), 122 deletions(-) diff --git a/R/fit_seromodel.R b/R/fit_seromodel.R index b7240755..dacc3a38 100644 --- a/R/fit_seromodel.R +++ b/R/fit_seromodel.R @@ -27,9 +27,9 @@ add_age_group_to_serosurvey <- function(serosurvey) { #' @param foi_prior Force-of-infection distribution specified by means of #' the helper functions. Currently available options are: #' \describe{ -#' \item{`[sf_normal]`} -#' \item{`[sf_uniform]`} -#' \item{`[sf_none]`} +#' \item{`[sf_normal]`} +#' \item{`[sf_uniform]`} +#' \item{`[sf_none]`} #' } #' @param foi_index Integer vector specifying the age-groups for which #' force-of-infection values will be estimated @@ -38,9 +38,9 @@ add_age_group_to_serosurvey <- function(serosurvey) { #' @param seroreversion_prior seroreversion distribution specified by means of #' the helper functions. Currently available options are: #' \describe{ -#' \item{`[sf_normal]`} -#' \item{`[sf_uniform]`} -#' \item{`[sf_none]`} +#' \item{`[sf_normal]`} +#' \item{`[sf_uniform]`} +#' \item{`[sf_none]`} #' } #' @param ... Additional parameters for [rstan][rstan::sampling] #' @returns stan_fit object with force-of-infection and seroreversion diff --git a/R/simulate_serosurvey.R b/R/simulate_serosurvey.R index 934e5441..74740048 100644 --- a/R/simulate_serosurvey.R +++ b/R/simulate_serosurvey.R @@ -34,7 +34,8 @@ probability_exact_age_varying <- function( #' Computes the probability of being seropositive when FOIs vary by time #' -#' @param years Integer indicating the years covering the birth ages of the sample +#' @param years Integer indicating the years covering the birth ages of the +#' sample #' @param fois Numeric atomic vector corresponding to the age-varying #' force-of-infection to simulate from #' @param seroreversion_rate Non-negative seroreversion rate. Default is 0. @@ -68,14 +69,17 @@ probability_exact_time_varying <- function( return(probabilities_oldest_age_last) } -#' Generate probabilities of seropositivity by age based on a time-varying FOI model. +#' Generate probabilities of seropositivity by age based on a time-varying FOI +#' model. #' -#' This function calculates the probabilities of seropositivity by age based on a time-varying FOI model. +#' This function calculates the probabilities of seropositivity by age based on +#' a time-varying FOI model. #' It takes into account the FOI and the rate of seroreversion. #' -#' @param foi A dataframe containing the force of infection (FOI) values for different years. -#' It should have two columns: 'year' and 'foi'. -#' @param seroreversion_rate A non-negative numeric value representing the rate of seroreversion. +#' @param foi A dataframe containing the force of infection (FOI) values +#' for different years. It should have two columns: 'year' and 'foi'. +#' @param seroreversion_rate A non-negative numeric value representing the +#' rate of seroreversion. #' #' @return A dataframe with columns 'age' and 'seropositivity'. probability_seropositive_time_model_by_age <- function( @@ -99,14 +103,17 @@ probability_seropositive_time_model_by_age <- function( } -#' Generate probabilities of seropositivity by age based on an age-varying FOI model. +#' Generate probabilities of seropositivity by age based on an age-varying +#' FOI model. #' -#' This function calculates the probabilities of seropositivity by age based on an age-varying FOI model. +#' This function calculates the probabilities of seropositivity by age based on +#' an age-varying FOI model. #' It takes into account the FOI and the rate of seroreversion. #' -#' @param foi A dataframe containing the force of infection (FOI) values for different ages. -#' It should have two columns: 'age' and 'foi'. -#' @param seroreversion_rate A non-negative numeric value representing the rate of seroreversion. +#' @param foi A dataframe containing the force of infection (FOI) values for +#' different ages. It should have two columns: 'age' and 'foi'. +#' @param seroreversion_rate A non-negative numeric value representing the rate +#' of seroreversion. #' #' @return A dataframe with columns 'age' and 'seropositivity'. probability_seropositive_age_model_by_age <- function( @@ -129,14 +136,17 @@ probability_seropositive_age_model_by_age <- function( return(df) } -#' Generate probabilities of seropositivity by age based on an age-and-time varying FOI model. +#' Generate probabilities of seropositivity by age based on an age-and-time +#' varying FOI model. #' -#' This function calculates the probabilities of seropositivity by age based on an age-and-time-varying FOI model. +#' This function calculates the probabilities of seropositivity by age based on +#' an age-and-time-varying FOI model. #' It takes into account the FOI and the rate of seroreversion. #' -#' @param foi A dataframe containing the force of infection (FOI) values for different ages. -#' It should have three columns: 'year', 'age' and 'foi'. -#' @param seroreversion_rate A non-negative numeric value representing the rate of seroreversion. +#' @param foi A dataframe containing the force of infection (FOI) values +#' for different ages. It should have three columns: 'year', 'age' and 'foi'. +#' @param seroreversion_rate A non-negative numeric value representing +#' the rate of seroreversion. #' #' @return A dataframe with columns 'age' and 'seropositivity'. probability_seropositive_age_and_time_model_by_age <- function( @@ -191,13 +201,15 @@ probability_seropositive_age_and_time_model_by_age <- function( #' time-varying FOI model, an age-varying FOI model, or an age-and-time-varying #' FOI model. In all cases, it is possible to optionally include seroreversion. #' -#' @param model A string specifying the model type which can be one of ['age', 'time', 'age-time']. +#' @param model A string specifying the model type which can be one of +#' ['age', 'time', 'age-time']. #' @param foi A dataframe containing the force of infection (FOI) values. #' For time-varying models the columns should be ['year', 'foi']. #' For age-varying models the columns should be ['age', 'foi']. -#' For age-and-time-varying models the columns should be ['age', 'time', 'foi']. -#' @param seroreversion_rate A non-negative value determining the rate of seroreversion (per year). -#' Default is 0. +#' For age-and-time-varying models the columns should be +#' ['age', 'time', 'foi']. +#' @param seroreversion_rate A non-negative value determining the rate of +#' seroreversion (per year). Default is 0. #' #' @return A dataframe with columns 'age' and 'seropositivity'. #' @export @@ -241,11 +253,12 @@ sum_of_A <- function(t, tau, construct_A_fn, ...) { #' This function calculates the probabilities of seropositivity by age based on #' an abstract model of the serocatalytic system. #' -#' @param construct_A_fn A function that constructs a matrix that defines the multiplier -#' term in the linear ODE system. -#' @param calculate_seropositivity_function A function which takes the state vector -#' and returns the seropositive fraction. -#' @param initial_conditions The initial state vector proportions for each birth cohort. +#' @param construct_A_fn A function that constructs a matrix that defines the +#' multiplier term in the linear ODE system. +#' @param calculate_seropositivity_function A function which takes the state +#' vector and returns the seropositive fraction. +#' @param initial_conditions The initial state vector proportions for each +#' birth cohort. #' @param max_age The maximum age to simulate seropositivity for. #' #' @return A dataframe with columns 'age' and 'seropositivity'. @@ -275,14 +288,15 @@ probability_seropositive_general_model_by_age <- function( #' Add bins based on age intervals. #' -#' It generates a new column 'group' in the survey_features dataframe, representing -#' the group interval for each row based on the age_min and age_max columns. +#' It generates a new column 'group' in the survey_features dataframe, +#' representing the group interval for each row based on the age_min and +#' age_max columns. #' -#' @param survey_features A dataframe containing age_min and age_max columns representing -#' the minimum and maximum age boundaries for each group. +#' @param survey_features A dataframe containing age_min and age_max columns +#' representing the minimum and maximum age boundaries for each group. #' -#' @return A dataframe with an additional 'group' column representing the group interval -#' for each row based on the age_min and age_max columns. +#' @return A dataframe with an additional 'group' column representing the group +#' interval for each row based on the age_min and age_max columns. add_age_bins <- function(survey_features) { intervals <- vector(length = nrow(survey_features)) for(i in seq_along(intervals)) { @@ -298,11 +312,13 @@ add_age_bins <- function(survey_features) { #' Create a survey dataframe with per individual age information. #' #' -#' @param survey_features A dataframe containing information about age groups and sample sizes. +#' @param survey_features A dataframe containing information about age groups +#' and sample sizes. #' @param age_df A dataframe containing 'age' and 'group'. #' -#' @return A dataframe with overall sample sizes calculated by joining survey_features and age_df. -#' This dataframe has columns including 'age' and 'overall_sample_size'. +#' @return A dataframe with overall sample sizes calculated by joining +#' survey_features and age_df. +#' This dataframe has columns including 'age' and 'overall_sample_size'. survey_by_individual_age <- function(survey_features, age_df) { age_df %>% left_join(survey_features, by = "group") %>% @@ -311,9 +327,9 @@ survey_by_individual_age <- function(survey_features, age_df) { #' Generate random sample sizes using multinomial sampling. #' -#' This function generates random sample sizes for each age group using multinomial sampling. -#' It takes the total sample size and the number of age groups as input and returns a vector -#' of sample sizes for each age group. +#' This function generates random sample sizes for each age group using +#' multinomial sampling. It takes the total sample size and the number of age +#' groups as input and returns a vector of sample sizes for each age group. #' #' @param sample_size The total sample size to be distributed among age groups. #' @param n_ages The number of age groups. @@ -330,14 +346,16 @@ multinomial_sampling_group <- function(sample_size, n_ages) { #' Generate random sample sizes for each age group. #' -#' This function generates random sample sizes for each age group based on the overall sample size -#' and the distribution of individuals across age groups. It uses multinomial sampling to allocate -#' the total sample size to each age group proportionally. +#' This function generates random sample sizes for each age group based on the +#' overall sample size and the distribution of individuals across age groups. +#' It uses multinomial sampling to allocate the total sample size to each age +#' group proportionally. #' -#' @param survey_df_long A dataframe with columns 'age', 'group' and 'overall_sample_size'. +#' @param survey_df_long A dataframe with columns 'age', 'group' and +#' 'overall_sample_size'. #' -#' @return A dataframe with random sample sizes generated for each age based on the overall -#' sample size. +#' @return A dataframe with random sample sizes generated for each age based on +#' the overall sample size. generate_random_sample_sizes <- function(survey_df_long) { df_new <- NULL @@ -359,20 +377,22 @@ generate_random_sample_sizes <- function(survey_df_long) { return(df_new) } -#' Generate random sample sizes for each individual age based on survey features. +#' Generate random sample sizes for each individual age based on survey +#' features. #' -#' This function generates random sample sizes for each individual age based on the provided -#' survey features. It first creates age bins, assigns each individual in the survey features -#' to an age bin, calculates the overall sample size by group, and then generates random sample -#' sizes for each age group. Finally, it returns a dataframe with the random sample sizes for -#' each individual age. +#' This function generates random sample sizes for each individual age based on +#' the provided survey features. It first creates age bins, assigns each +#' individual in the survey features to an age bin, calculates the overall +#' sample size by group, and then generates random sample sizes for each age +#' group. Finally, it returns a dataframe with the random sample sizes for each +#' individual age. #' -#' @param survey_features A dataframe containing information about individuals' age ranges and -#' sample sizes. +#' @param survey_features A dataframe containing information about individuals' +#' age ranges and sample sizes. #' -#' @return A dataframe with random sample sizes generated for each individual age based on the -#' provided survey features. -sample_size_by_individual_age_random <- function(survey_features) { +#' @return A dataframe with random sample sizes generated for each individual +#' age based on the provided survey features. +sample_size_by_individual_age_random <- function(survey_features) { #nolint ages <- seq(1, max(survey_features$age_max), 1) @@ -445,20 +465,22 @@ generate_seropositive_counts_by_age_bin <- function( #' Simulate serosurvey data based on a time-varying FOI model. #' -#' This function generates binned serosurvey data based on a time-varying FOI model, -#' optionally including seroreversion. This function allows construction of serosurveys -#' with binned age groups, and it generates uncertainty in the distribution of a sample size -#' within an age bin through multinomial sampling. -#' -#' @param foi A dataframe containing the force of infection (FOI) values for different years. -#' It should have two columns: 'year' and 'foi'. -#' @param survey_features A dataframe containing information about the binned age groups and sample -#' sizes for each. It should contain columns: ['age_min', 'age_max', 'sample_size']. -#' @param seroreversion_rate A non-negative value determining the rate of seroreversion (per year). -#' Default is 0. -#' -#' @return A dataframe with simulated serosurvey data, including age group information, overall -#' sample sizes, the number of seropositive individuals, and other survey features. +#' This function generates binned serosurvey data based on a time-varying FOI +#' model, optionally including seroreversion. This function allows construction +#' of serosurveys with binned age groups, and it generates uncertainty in the +#' distribution of a sample size within an age bin through multinomial sampling. +#' +#' @param foi A dataframe containing the force of infection (FOI) values for +#' different years. It should have two columns: 'year' and 'foi'. +#' @param survey_features A dataframe containing information about the binned +#' age groups and sample sizes for each. It should contain columns: +#' ['age_min', 'age_max', 'sample_size']. +#' @param seroreversion_rate A non-negative value determining the rate of +#' seroreversion (per year). Default is 0. +#' +#' @return A dataframe with simulated serosurvey data, including age group +#' information, overall sample sizes, the number of seropositive individuals, +#' and other survey features. #' @examples #' # specify FOIs for each year #' foi_df <- data.frame( @@ -508,20 +530,22 @@ simulate_serosurvey_time_model <- function( #' Simulate serosurvey data based on an age-varying FOI model. #' -#' This function generates binned serosurvey data based on an age-varying FOI model, -#' optionally including seroreversion. This function allows construction of serosurveys -#' with binned age groups, and it generates uncertainty in the distribution of a sample size -#' within an age bin through multinomial sampling. -#' -#' @param foi A dataframe containing the force of infection (FOI) values for different ages. -#' It should have two columns: 'age' and 'foi'. -#' @param survey_features A dataframe containing information about the binned age groups and sample -#' sizes for each. It should contain columns: ['age_min', 'age_max', 'sample_size']. -#' @param seroreversion_rate A non-negative value determining the rate of seroreversion (per year). -#' Default is 0. -#' -#' @return A dataframe with simulated serosurvey data, including age group information, overall -#' sample sizes, the number of seropositive individuals, and other survey features. +#' This function generates binned serosurvey data based on an age-varying FOI +#' model, optionally including seroreversion. This function allows construction +#' of serosurveys with binned age groups, and it generates uncertainty in the +#' distribution of a sample size within an age bin through multinomial sampling. +#' +#' @param foi A dataframe containing the force of infection (FOI) values for +#' different ages. It should have two columns: 'age' and 'foi'. +#' @param survey_features A dataframe containing information about the binned +#' age groups and sample sizes for each. It should contain columns: +#' ['age_min', 'age_max', 'sample_size']. +#' @param seroreversion_rate A non-negative value determining the rate of +#' seroreversion (per year). Default is 0. +#' +#' @return A dataframe with simulated serosurvey data, including age group +#' information, overall sample sizes, the number of seropositive individuals, +#' and other survey features. #' @examples #' # specify FOIs for each year #' foi_df <- data.frame( @@ -570,20 +594,23 @@ simulate_serosurvey_age_model <- function( #' Simulate serosurvey data based on an age-and-time-varying FOI model. #' -#' This function generates binned serosurvey data based on an age-and-time-varying FOI model, -#' optionally including seroreversion. This function allows construction of serosurveys -#' with binned age groups, and it generates uncertainty in the distribution of a sample size -#' within an age bin through multinomial sampling. -#' -#' @param foi A dataframe containing the force of infection (FOI) values for different ages. -#' It should have two columns: 'year', 'age' and 'foi'. -#' @param survey_features A dataframe containing information about the binned age groups and sample -#' sizes for each. It should contain columns: ['age_min', 'age_max', 'sample_size']. -#' @param seroreversion_rate A non-negative value determining the rate of seroreversion (per year). -#' Default is 0. -#' -#' @return A dataframe with simulated serosurvey data, including age group information, overall -#' sample sizes, the number of seropositive individuals, and other survey features. +#' This function generates binned serosurvey data based on an +#' age-and-time-varying FOI model, optionally including seroreversion. +#' This function allows construction of serosurveys with binned age groups, and +#' it generates uncertainty in the distribution of a sample size within an age +#' bin through multinomial sampling. +#' +#' @param foi A dataframe containing the force of infection (FOI) values for +#' different ages. It should have two columns: 'year', 'age' and 'foi'. +#' @param survey_features A dataframe containing information about the binned +#' age groups and sample sizes for each. It should contain columns: +#' ['age_min', 'age_max', 'sample_size']. +#' @param seroreversion_rate A non-negative value determining the rate of +#' seroreversion (per year). Default is 0. +#' +#' @return A dataframe with simulated serosurvey data, including age group +#' information, overall sample sizes, the number of seropositive individuals, +#' and other survey features. #' @examples #' # specify FOIs for each year #' foi_df <- data.frame( @@ -634,24 +661,28 @@ simulate_serosurvey_age_and_time_model <- function( #' Simulate serosurvey data based on various FOI models. #' -#' This function generates binned serosurvey data based on either a time-varying FOI model, -#' an age-varying FOI model, or an age-and-time-varying FOI model. In all cases, it is possible -#' to optionally include seroreversion. This function allows construction of serosurveys -#' with binned age groups, and it generates uncertainty in the distribution of a sample size -#' within an age bin through multinomial sampling. +#' This function generates binned serosurvey data based on either a time-varying +#' FOI model, an age-varying FOI model, or an age-and-time-varying FOI model. +#' In all cases, it is possible to optionally include seroreversion. This +#' function allows construction of serosurveys with binned age groups, and it +#' generates uncertainty in the distribution of a sample size within an age bin +#' through multinomial sampling. #' -#' @param model A string specifying the model type which can be one of ['age', 'time', 'age-time']. +#' @param model A string specifying the model type which can be one of +#' ['age', 'time', 'age-time']. #' @param foi A dataframe containing the force of infection (FOI) values. -#' For time-varying models the columns should be ['year', 'foi']. -#' For age-varying models the columns should be ['age', 'foi']. -#' For age-and-time-varying models the columns should be ['age', 'time', 'foi']. -#' @param survey_features A dataframe containing information about the binned age groups and sample -#' sizes for each. It should contain columns: ['age_min', 'age_max', 'sample_size']. -#' @param seroreversion_rate A non-negative value determining the rate of seroreversion (per year). -#' Default is 0. -#' -#' @return A dataframe with simulated serosurvey data, including age group information, overall -#' sample sizes, the number of seropositive individuals, and other survey features. +#' For time-varying models the columns should be ['year', 'foi']. +#' For age-varying models the columns should be ['age', 'foi']. +#' For age-and-time-varying models the columns should be ['age', 'time', 'foi']. +#' @param survey_features A dataframe containing information about the binned +#' age groups and sample sizes for each. +#' It should contain columns: ['age_min', 'age_max', 'sample_size']. +#' @param seroreversion_rate A non-negative value determining the rate of +#' seroreversion (per year). Default is 0. +#' +#' @return A dataframe with simulated serosurvey data, including age group +#' information, overall sample sizes, the number of seropositive individuals, +#' and other survey features. #' @examples #' # time-varying model #' foi_df <- data.frame( @@ -732,14 +763,15 @@ simulate_serosurvey <- function( #' Simulate serosurvey data based on general serocatalytic model. #' -#' This simulation method assumes only that the model system can be written as a piecewise- -#' linear ordinary differential equation system. +#' This simulation method assumes only that the model system can be written as +#' a piecewise-linear ordinary differential equation system. #' #' @inheritParams probability_seropositive_general_model_by_age #' @inheritParams simulate_serosurvey #' -#' @return A dataframe with simulated serosurvey data, including age group information, overall -#' sample sizes, the number of seropositive individuals, and other survey features. +#' @return A dataframe with simulated serosurvey data, including age group +#' information, overall sample sizes, the number of seropositive individuals, +#' and other survey features. #' #' @export simulate_serosurvey_general_model <- function( From 768c20f3067aee7f2c580cd6946bcdbc33fc9778 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Tue, 6 Aug 2024 12:03:30 -0500 Subject: [PATCH 35/78] refac: add `#nolint` to long variable and function names --- R/plot_seromodel.R | 4 +-- R/simulate_serosurvey.R | 70 ++++++++++++++++++++++------------------- R/validation.R | 4 +-- 3 files changed, 41 insertions(+), 37 deletions(-) diff --git a/R/plot_seromodel.R b/R/plot_seromodel.R index 599b007f..c2b11f58 100644 --- a/R/plot_seromodel.R +++ b/R/plot_seromodel.R @@ -14,7 +14,7 @@ #' of `seroprev`} #' } #' @export -prepare_serosurvey_for_plotting <- function( +prepare_serosurvey_for_plotting <- function( #nolint serosurvey, alpha = 0.05 ) { @@ -132,7 +132,7 @@ plot_seroprevalence_estimates <- function( ) { checkmate::assert_class(seromodel, "stanfit", null.ok = TRUE) - seroprevalence_central_estimates <- data.frame( + seroprevalence_central_estimates <- data.frame( #nolint median = 0.0, lower = 0.0, upper = 0.0, diff --git a/R/simulate_serosurvey.R b/R/simulate_serosurvey.R index 74740048..d8fce055 100644 --- a/R/simulate_serosurvey.R +++ b/R/simulate_serosurvey.R @@ -82,9 +82,10 @@ probability_exact_time_varying <- function( #' rate of seroreversion. #' #' @return A dataframe with columns 'age' and 'seropositivity'. -probability_seropositive_time_model_by_age <- function( - foi, - seroreversion_rate) { +probability_seropositive_time_model_by_age <- function( #nolint + foi, + seroreversion_rate +) { years <- foi$year @@ -116,9 +117,10 @@ probability_seropositive_time_model_by_age <- function( #' of seroreversion. #' #' @return A dataframe with columns 'age' and 'seropositivity'. -probability_seropositive_age_model_by_age <- function( - foi, - seroreversion_rate) { +probability_seropositive_age_model_by_age <- function( #nolint + foi, + seroreversion_rate +) { ages <- seq_along(foi$age) @@ -149,10 +151,10 @@ probability_seropositive_age_model_by_age <- function( #' the rate of seroreversion. #' #' @return A dataframe with columns 'age' and 'seropositivity'. -probability_seropositive_age_and_time_model_by_age <- function( - foi, - seroreversion_rate - ) { +probability_seropositive_age_and_time_model_by_age <- function( #nolint + foi, + seroreversion_rate +) { foi_matrix <- foi %>% tidyr::pivot_wider( @@ -213,10 +215,11 @@ probability_seropositive_age_and_time_model_by_age <- function( #' #' @return A dataframe with columns 'age' and 'seropositivity'. #' @export -probability_seropositive_by_age <- function( - model, - foi, - seroreversion_rate = 0) { +probability_seropositive_by_age <- function( #nolint + model, + foi, + seroreversion_rate = 0 +) { if(model == "time") { probability_function <- probability_seropositive_time_model_by_age @@ -263,12 +266,13 @@ sum_of_A <- function(t, tau, construct_A_fn, ...) { #' #' @return A dataframe with columns 'age' and 'seropositivity'. #' @export -probability_seropositive_general_model_by_age <- function( - construct_A_function, - calculate_seropositivity_function, - initial_conditions, - max_age, - ...) { +probability_seropositive_general_model_by_age <- function( #nolint + construct_A_fn, + calculate_seropositivity_function, #nolint + initial_conditions, + max_age, + ... +) { probabilities <- vector(length = max_age) for(i in seq_along(probabilities)) { @@ -415,7 +419,7 @@ sample_size_by_individual_age_random <- function(survey_features) { #nolint survey_features <- add_age_bins(survey_features) - survey_features_by_individual_age <- survey_by_individual_age( + survey_features_by_individual_age <- survey_by_individual_age( #nolint survey_features, age_df) @@ -435,8 +439,8 @@ check_age_constraints <- function(df) { return(TRUE) } -generate_seropositive_counts_by_age_bin <- function( - probability_seropositive_by_age, +generate_seropositive_counts_by_age_bin <- function( #nolint + probability_seropositive_by_age, #nolint sample_size_by_age_random, survey_features ) { @@ -625,10 +629,10 @@ simulate_serosurvey_age_model <- function( #' serosurvey <- simulate_serosurvey_age_and_time_model( #' foi_df, survey_features) #' @export -simulate_serosurvey_age_and_time_model <- function( - foi, - survey_features, - seroreversion_rate=0 +simulate_serosurvey_age_and_time_model <- function( #nolint + foi, + survey_features, + seroreversion_rate = 0 ) { # Input validation @@ -774,12 +778,12 @@ simulate_serosurvey <- function( #' and other survey features. #' #' @export -simulate_serosurvey_general_model <- function( - construct_A_function, - calculate_seropositivity_function, - initial_conditions, - survey_features, - ... +simulate_serosurvey_general_model <- function( #nolint + construct_A_function, + calculate_seropositivity_function, #nolint + initial_conditions, + survey_features, + ... ) { # Input validation diff --git a/R/validation.R b/R/validation.R index 53110175..38aab62f 100644 --- a/R/validation.R +++ b/R/validation.R @@ -86,7 +86,7 @@ validate_seroreversion_rate <- function(seroreversion_rate) { } } -validate_survey_and_foi_consistency <- function( +validate_survey_and_foi_consistency <- function( #nolint survey_features, foi_df ) { @@ -95,7 +95,7 @@ validate_survey_and_foi_consistency <- function( stop("maximum age implicit in foi_df should not exceed max age in survey_features.") } -validate_survey_and_foi_consistency_age_time <- function( +validate_survey_and_foi_consistency_age_time <- function( #nolint survey_features, foi_df ) { From f8cd7bb2cbe31e3ad7f821dd4c89b2a8d9a25d00 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Tue, 6 Aug 2024 12:13:26 -0500 Subject: [PATCH 36/78] refac(lintr): add `.data$` where needed in pipes --- R/plot_seromodel.R | 22 +++++++++++----------- R/simulate_serosurvey.R | 20 +++++++++++--------- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/R/plot_seromodel.R b/R/plot_seromodel.R index c2b11f58..94b36a9d 100644 --- a/R/plot_seromodel.R +++ b/R/plot_seromodel.R @@ -36,7 +36,7 @@ prepare_serosurvey_for_plotting <- function( #nolint seroprev_upper = "Upper" ) %>% dplyr::arrange(.data$age_group) %>% - dplyr::relocate(age_group) + dplyr::relocate(.data$age_group) } #' Plots seroprevalence from the given serosurvey @@ -63,7 +63,7 @@ plot_serosurvey <- function( ggplot2::geom_errorbar( ggplot2::aes( ymin = .data$seroprev_lower, - ymax = .data$seroprev_upper, + ymax = .data$seroprev_upper ), width = 0.1 ) + @@ -154,12 +154,12 @@ plot_seroprevalence_estimates <- function( ) + ggplot2::geom_line( data = seroprevalence_central_estimates, - ggplot2::aes(x = age, y = median), + ggplot2::aes(x = .data$age, y = median), colour = "#7a0177" ) + ggplot2::geom_ribbon( data = seroprevalence_central_estimates, - ggplot2::aes(x = age, ymin = lower, ymax = upper), + ggplot2::aes(x = .data$age, ymin = .data$lower, ymax = .data$upper), fill = "#c994c7", alpha = 0.5 ) + ggplot2::coord_cartesian( @@ -221,7 +221,7 @@ plot_foi_estimates <- function( left_join(foi_df, by = "age") } foi_plot <- ggplot2::ggplot( - data = foi_central_estimates, ggplot2::aes(x = age) + data = foi_central_estimates, ggplot2::aes(x = .data$age) ) } else if (startsWith(model_name, "time")) { xlab <- "Year" @@ -236,7 +236,7 @@ plot_foi_estimates <- function( left_join(foi_df, by = "year") } foi_plot <- ggplot2::ggplot( - data = foi_central_estimates, ggplot2::aes(x = year) + data = foi_central_estimates, ggplot2::aes(x = .data$year) ) } @@ -261,7 +261,7 @@ plot_foi_estimates <- function( if (!is.null(foi_df)) { foi_plot <- foi_plot + ggplot2::geom_line( - ggplot2::aes(y = foi), + ggplot2::aes(y = .data$foi), colour = "#b30909" ) } @@ -300,7 +300,7 @@ plot_rhats <- function( ) rhats_plot <- ggplot2::ggplot( - data = rhats_df, ggplot2::aes(x = age) + data = rhats_df, ggplot2::aes(x = .data$age) ) } else if (startsWith(model_name, "time")) { xlab <- "Year" @@ -312,7 +312,7 @@ plot_rhats <- function( ) rhats_plot <- ggplot2::ggplot( - data = rhats_df, ggplot2::aes(x = year) + data = rhats_df, ggplot2::aes(x = .data$year) ) } @@ -321,8 +321,8 @@ plot_rhats <- function( yintercept = 1.01, linetype = 'dashed' ) + - ggplot2::geom_line(ggplot2::aes(y = rhat)) + - ggplot2::geom_point(ggplot2::aes(y = rhat)) + + ggplot2::geom_line(ggplot2::aes(y = .data$rhat)) + + ggplot2::geom_point(ggplot2::aes(y = .data$rhat)) + ggplot2::coord_cartesian( ylim = c( min(1.0, min(rhats_df$rhat)), diff --git a/R/simulate_serosurvey.R b/R/simulate_serosurvey.R index d8fce055..212a1d1a 100644 --- a/R/simulate_serosurvey.R +++ b/R/simulate_serosurvey.R @@ -159,7 +159,7 @@ probability_seropositive_age_and_time_model_by_age <- function( #nolint foi_matrix <- foi %>% tidyr::pivot_wider( values_from = foi, - names_from = c(year)) %>% + names_from = c(.data$year)) %>% tibble::column_to_rownames("age") %>% as.matrix() @@ -326,7 +326,7 @@ add_age_bins <- function(survey_features) { survey_by_individual_age <- function(survey_features, age_df) { age_df %>% left_join(survey_features, by = "group") %>% - rename(overall_sample_size = sample_size) + rename(overall_sample_size = .data$sample_size) } #' Generate random sample sizes using multinomial sampling. @@ -366,7 +366,7 @@ generate_random_sample_sizes <- function(survey_df_long) { intervals <- unique(survey_df_long$group) for (interval_aux in na.omit(intervals)) { df_tmp <- survey_df_long %>% - filter(group == interval_aux) + filter(.data$group == interval_aux) sample_size <- df_tmp$overall_sample_size[1] sample_size_by_age <- multinomial_sampling_group(sample_size, nrow(df_tmp)) df_tmp <- df_tmp %>% @@ -448,15 +448,17 @@ generate_seropositive_counts_by_age_bin <- function( #nolint combined_df <- probability_seropositive_by_age %>% dplyr::left_join(sample_size_by_age_random, by="age") %>% dplyr::mutate( - n_seropositive=rbinom(nrow(probability_seropositive_by_age), - sample_size, - seropositivity)) + n_seropositive = rbinom( + nrow(probability_seropositive_by_age), + .data$sample_size, + .data$seropositivity) + ) grouped_df <- combined_df %>% - dplyr::group_by(age_min, age_max) %>% + dplyr::group_by(.data$age_min, .data$age_max) %>% dplyr::summarise( - sample_size=sum(sample_size), - n_seropositive=sum(n_seropositive), + sample_size = sum(.data$sample_size), + n_seropositive = sum(.data$n_seropositive), .groups = "drop" ) %>% left_join( From 691b6c0461b308efff107d5f14afd5b50c620ff6 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Tue, 6 Aug 2024 12:19:28 -0500 Subject: [PATCH 37/78] refac(lintr): other syntax and lintr corrections --- R/build_stan_data.R | 12 +++-- R/fit_seromodel.R | 9 ++-- R/plot_seromodel.R | 12 ++--- R/simulate_serosurvey.R | 97 +++++++++++++++++++++-------------------- R/summarise_seromodel.R | 8 ++-- R/validation.R | 52 +++++++++++++++------- 6 files changed, 105 insertions(+), 85 deletions(-) diff --git a/R/build_stan_data.R b/R/build_stan_data.R index b81193aa..86b3874c 100644 --- a/R/build_stan_data.R +++ b/R/build_stan_data.R @@ -6,7 +6,7 @@ #' @export sf_normal <- function(mean = 0, sd = 1) { # Restricting normal inputs to be non-negative - if(mean < 0 | sd <= 0) { + if (mean < 0 || sd <= 0) { msg <- paste0( "Normal distribution here only accepts", " non-negative values for mean and standard deviation" @@ -26,7 +26,7 @@ sf_normal <- function(mean = 0, sd = 1) { #' @export sf_uniform <- function(min = 0, max = 10) { # Restricting uniform inputs to be non-negative - if (min < 0 | max < 0) { + if (min < 0 || max < 0) { msg <- paste0( "Uniform distribution here only accepts", " non-negative values for min and max" @@ -179,16 +179,14 @@ build_stan_data <- function( } if (is_seroreversion) { - if(seroreversion_prior$name == "none") { + if (seroreversion_prior$name == "none") { message("seroreversion_prior not specified") stop() - } - else if (seroreversion_prior$name == "uniform") { + } else if (seroreversion_prior$name == "uniform") { stan_data$seroreversion_prior_index <- prior_index[["uniform"]] stan_data$seroreversion_min <- seroreversion_prior$min stan_data$seroreversion_max <- seroreversion_prior$max - } - else if (seroreversion_prior$name == "normal") { + } else if (seroreversion_prior$name == "normal") { stan_data$seroreversion_prior_index <- prior_index[["normal"]] stan_data$seroreversion_mean <- seroreversion_prior$mean stan_data$seroreversion_sd <- seroreversion_prior$sd diff --git a/R/fit_seromodel.R b/R/fit_seromodel.R index dacc3a38..b3e84253 100644 --- a/R/fit_seromodel.R +++ b/R/fit_seromodel.R @@ -2,14 +2,14 @@ #' #' @inheritParams fit_seromodel add_age_group_to_serosurvey <- function(serosurvey) { - if (!any(colnames(serosurvey) == "age_group")) { + if (any(colnames(serosurvey) == "age_group")) { + message("Using `age_group` already present in serosurvey") + } else { serosurvey <- serosurvey %>% dplyr::mutate( age_group = floor((.data$age_min + .data$age_max) / 2) ) - } else { - message("Using `age_group` already present in serosurvey") - } + } return(serosurvey) } @@ -73,7 +73,6 @@ fit_seromodel <- function( else model_name <- paste0(model_type, "_no_seroreversion") - # model <- stan_models[[model_name]] model <- stanmodels[[model_name]] seromodel <- rstan::sampling( model, diff --git a/R/plot_seromodel.R b/R/plot_seromodel.R index 94b36a9d..bc947449 100644 --- a/R/plot_seromodel.R +++ b/R/plot_seromodel.R @@ -216,7 +216,7 @@ plot_foi_estimates <- function( foi_central_estimates, age = ages ) - if(!is.null(foi_df)) { + if (!is.null(foi_df)) { foi_central_estimates <- foi_central_estimates %>% left_join(foi_df, by = "age") } @@ -231,7 +231,7 @@ plot_foi_estimates <- function( foi_central_estimates, year = years ) - if(!is.null(foi_df)) { + if (!is.null(foi_df)) { foi_central_estimates <- foi_central_estimates %>% left_join(foi_df, by = "year") } @@ -319,7 +319,7 @@ plot_rhats <- function( rhats_plot <- rhats_plot + ggplot2::geom_hline( yintercept = 1.01, - linetype = 'dashed' + linetype = "dashed" ) + ggplot2::geom_line(ggplot2::aes(y = .data$rhat)) + ggplot2::geom_point(ggplot2::aes(y = .data$rhat)) + @@ -345,7 +345,7 @@ plot_rhats <- function( plot_summary <- function( seromodel, serosurvey, - loo_estimate_digits= 1, + loo_estimate_digits = 1, central_estimate_digits = 2, rhat_digits = 2, size_text = 11 @@ -355,14 +355,14 @@ plot_summary <- function( summary_table <- summarise_seromodel( seromodel = seromodel, serosurvey = serosurvey, - loo_estimate_digits= loo_estimate_digits, + loo_estimate_digits = loo_estimate_digits, central_estimate_digits = central_estimate_digits, rhat_digits = rhat_digits ) %>% t() #convert summary to table summary_df <- data.frame( - row = NCOL(summary_table):1, + row = rev(seq_len(NCOL(summary_table))), text = paste0(colnames(summary_table), ": ", summary_table[1, ]) ) diff --git a/R/simulate_serosurvey.R b/R/simulate_serosurvey.R index 212a1d1a..7fd0561e 100644 --- a/R/simulate_serosurvey.R +++ b/R/simulate_serosurvey.R @@ -8,20 +8,20 @@ #' @return vector of probabilities of being seropositive for age-varying FoI #' including seroreversion (ordered from youngest to oldest individuals) probability_exact_age_varying <- function( - ages, - fois, - seroreversion_rate = 0 + ages, + fois, + seroreversion_rate = 0 ) { probabilities <- vector(length = length(ages)) # solves ODE exactly within pieces for (i in seq_along(ages)) { foi_tmp <- fois[i] - if(i == 1) + if (i == 1) probability_previous <- 0 else probability_previous <- probabilities[i - 1] - if(foi_tmp == 0) + if (foi_tmp == 0) lambda_over_both <- 0 else lambda_over_both <- (foi_tmp / (foi_tmp + seroreversion_rate)) @@ -42,9 +42,9 @@ probability_exact_age_varying <- function( #' @return vector of probabilities of being seropositive for age-varying FoI #' including seroreversion (ordered from youngest to oldest individuals) probability_exact_time_varying <- function( - years, - fois, - seroreversion_rate = 0 + years, + fois, + seroreversion_rate = 0 ) { n_years <- length(years) @@ -53,9 +53,9 @@ probability_exact_time_varying <- function( # solves ODE exactly within pieces for (i in seq_along(years)) { # birth cohorts probability_previous <- 0 - for(j in 1:(n_years - i + 1)) { # exposure during lifetime + for (j in 1:(n_years - i + 1)) { # exposure during lifetime foi_tmp <- fois[i + j - 1] - if(foi_tmp == 0) + if (foi_tmp == 0) lambda_over_both <- 0 else lambda_over_both <- (foi_tmp / (foi_tmp + seroreversion_rate)) @@ -175,9 +175,9 @@ probability_seropositive_age_and_time_model_by_age <- function( #nolint foi_matrix_subset <- foi_matrix[1:(n_ages - i + 1), i:n_ages] %>% as.matrix() # only to handle single element matrix case foi_diag <- diag(foi_matrix_subset) - for(j in 1:(n_years - i + 1)) { # exposure during lifetime + for (j in 1:(n_years - i + 1)) { # exposure during lifetime foi_tmp <- foi_diag[j] - if(foi_tmp == 0) + if (foi_tmp == 0) lambda_over_both <- 0 else lambda_over_both <- (foi_tmp / (foi_tmp + seroreversion_rate)) @@ -221,11 +221,11 @@ probability_seropositive_by_age <- function( #nolint seroreversion_rate = 0 ) { - if(model == "time") { + if (model == "time") { probability_function <- probability_seropositive_time_model_by_age - } else if(model == "age") { + } else if (model == "age") { probability_function <- probability_seropositive_age_model_by_age - } else if(model == "age-time" || model == "time-age") { + } else if (model == "age-time" || model == "time-age") { probability_function <- probability_seropositive_age_and_time_model_by_age } @@ -239,16 +239,16 @@ probability_seropositive_by_age <- function( #nolint sum_of_A <- function(t, tau, construct_A_fn, ...) { k <- 1 - for(t_primed in (tau + 1):t) { - if(k == 1) + for (t_primed in (tau + 1):t) { + if (k == 1) { A <- construct_A_fn(t_primed, tau, ...) - else { + } else { tmp <- construct_A_fn(t_primed, tau, ...) A <- A + tmp } k <- k + 1 } - A + return(A) } #' Generate probabilities of seropositivity by age based on a general FOI model. @@ -275,8 +275,8 @@ probability_seropositive_general_model_by_age <- function( #nolint ) { probabilities <- vector(length = max_age) - for(i in seq_along(probabilities)) { - A_sum <- sum_of_A(max_age, max_age - i, construct_A, ...) + for (i in seq_along(probabilities)) { + A_sum <- sum_of_A(max_age, max_age - i, construct_A_fn, ...) Y <- Matrix::expm(A_sum) %>% as.matrix() %*% initial_conditions probabilities[i] <- calculate_seropositivity_function(Y) } @@ -303,7 +303,7 @@ probability_seropositive_general_model_by_age <- function( #nolint #' interval for each row based on the age_min and age_max columns. add_age_bins <- function(survey_features) { intervals <- vector(length = nrow(survey_features)) - for(i in seq_along(intervals)) { + for (i in seq_along(intervals)) { age_min <- survey_features$age_min[i] age_max <- survey_features$age_max[i] intervals[i] <- paste0("[", age_min, ",", age_max, "]") @@ -343,7 +343,7 @@ multinomial_sampling_group <- function(sample_size, n_ages) { prob_value <- 1 / n_ages probs <- rep(prob_value, n_ages) sample_size_by_age <- as.vector( - rmultinom(1, sample_size, prob=probs) + rmultinom(1, sample_size, prob = probs) ) return(sample_size_by_age) } @@ -402,7 +402,7 @@ sample_size_by_individual_age_random <- function(survey_features) { #nolint age_bins <- rep(NA, length(ages)) for (i in seq_along(ages)) { - # Find the index of the row in survey_features where age falls within the range + # Find index of the row in survey_features where age falls within the range age_min <- survey_features$age_min age_max <- survey_features$age_max idx <- which(ages[i] >= age_min & ages[i] <= age_max) @@ -429,8 +429,8 @@ sample_size_by_individual_age_random <- function(survey_features) { #nolint } check_age_constraints <- function(df) { - for (i in 1:nrow(df)) { - for (j in 1:nrow(df)) { + for (i in seq_len(nrow(df))) { + for (j in seq_len(nrow(df))) { if (i != j && df$age_max[i] == df$age_min[j]) { return(FALSE) } @@ -443,10 +443,10 @@ generate_seropositive_counts_by_age_bin <- function( #nolint probability_seropositive_by_age, #nolint sample_size_by_age_random, survey_features - ) { +) { combined_df <- probability_seropositive_by_age %>% - dplyr::left_join(sample_size_by_age_random, by="age") %>% + dplyr::left_join(sample_size_by_age_random, by = "age") %>% dplyr::mutate( n_seropositive = rbinom( nrow(probability_seropositive_by_age), @@ -501,13 +501,13 @@ generate_seropositive_counts_by_age_bin <- function( #nolint #' foi_df, survey_features) #' @export simulate_serosurvey_time_model <- function( - foi, - survey_features, - seroreversion_rate=0 + foi, + survey_features, + seroreversion_rate = 0 ) { # Input validation - validate_foi_df(foi, c("year")) + validate_foi_df(foi, "year") validate_survey(survey_features) validate_seroreversion_rate(seroreversion_rate) validate_survey_and_foi_consistency( @@ -566,13 +566,13 @@ simulate_serosurvey_time_model <- function( #' foi_df, survey_features) #' @export simulate_serosurvey_age_model <- function( - foi, - survey_features, - seroreversion_rate=0 + foi, + survey_features, + seroreversion_rate = 0 ) { # Input validation - validate_foi_df(foi, c("age")) + validate_foi_df(foi, "age") validate_survey(survey_features) validate_seroreversion_rate(seroreversion_rate) validate_survey_and_foi_consistency( @@ -646,10 +646,11 @@ simulate_serosurvey_age_and_time_model <- function( #nolint foi ) - probability_serop_by_age <- probability_seropositive_age_and_time_model_by_age( - foi = foi, - seroreversion_rate = seroreversion_rate - ) + probability_serop_by_age <- + probability_seropositive_age_and_time_model_by_age( + foi = foi, + seroreversion_rate = seroreversion_rate + ) sample_size_by_age_random <- sample_size_by_individual_age_random( survey_features = survey_features @@ -734,29 +735,29 @@ simulate_serosurvey_age_and_time_model <- function( #nolint #' survey_features = survey_features) #' @export simulate_serosurvey <- function( - model, - foi, - survey_features, - seroreversion_rate=0 + model, + foi, + survey_features, + seroreversion_rate = 0 ) { # don't advertise time-age in case people think this is something else - if(!model %in% c("age", "time", "age-time", "time-age")) + if (!model %in% c("age", "time", "age-time", "time-age")) stop("model must be one of 'age', 'time', or 'age-time'.") - if(model == "time") { + if (model == "time") { serosurvey <- simulate_serosurvey_time_model( foi, survey_features, seroreversion_rate ) - } else if(model == "age") { + } else if (model == "age") { serosurvey <- simulate_serosurvey_age_model( foi, survey_features, seroreversion_rate ) - } else if(model == "age-time" || model == "time-age") { + } else if (model == "age-time" || model == "time-age") { serosurvey <- simulate_serosurvey_age_and_time_model( foi, survey_features, diff --git a/R/summarise_seromodel.R b/R/summarise_seromodel.R index 4cd567d4..bf83de2f 100644 --- a/R/summarise_seromodel.R +++ b/R/summarise_seromodel.R @@ -94,9 +94,9 @@ summarise_seromodel <- function( loo_estimate_digits = loo_estimate_digits ) - summary_list[par_loo_estimate] = loo_estimate_summary + summary_list[par_loo_estimate] <- loo_estimate_summary - check_convergence <- c() + check_convergence <- NULL if (startsWith(model_name, "constant")) { foi_summary <- summarise_central_estimate( seromodel = seromodel, @@ -160,9 +160,9 @@ summarise_seromodel <- function( } if (all(check_convergence)) { - summary_list["converged"] = "yes" + summary_list["converged"] <- "yes" } else { - summary_list["converged"] = "no" + summary_list["converged"] <- "no" } return(summary_list) diff --git a/R/validation.R b/R/validation.R index 38aab62f..22050979 100644 --- a/R/validation.R +++ b/R/validation.R @@ -5,7 +5,8 @@ stop_if_missing <- function(serosurvey, must_have_cols) { %in% colnames(serosurvey) ) ) { - missing <- must_have_cols[which(!(must_have_cols %in% colnames(serosurvey)))] + missing <- + must_have_cols[which(!(must_have_cols %in% colnames(serosurvey)))] stop( "The following mandatory columns in `serosurvey` are missing.\n", toString(missing) @@ -58,25 +59,40 @@ validate_serosurvey <- function(serosurvey) { validate_survey <- function(survey_features) { - if (!is.data.frame(survey_features) || !all(c("age_min", "age_max", "sample_size") %in% names(survey_features))) { - stop("survey_features must be a dataframe with columns 'age_min', 'age_max', and 'sample_size'.") + if (!is.data.frame(survey_features) || + !all( + c("age_min", "age_max", "sample_size") %in% names(survey_features)) + ) { + stop( + "survey_features must be a dataframe with columns ", + "'age_min', 'age_max', and 'sample_size'." + ) } # check that the age_max of a bin does not coincide with # the age min of a different bin is_age_ok <- check_age_constraints(survey_features) - if(!is_age_ok) - stop("Age bins in a survey are inclusive of both bounds, so the age_max of one bin cannot equal the age_min of another.") + if (!is_age_ok) + stop( + "Age bins in a survey are inclusive of both bounds, ", + "so the age_max of one bin cannot equal the age_min of another.") } validate_foi_df <- function(foi_df, cnames_additional) { - if (!is.data.frame(foi_df) || !all(cnames_additional %in% names(foi_df)) || ncol(foi_df) != (1 + length(cnames_additional))) { - if(length(cnames_additional) == 1) + if ( + !is.data.frame(foi_df) || + !all(cnames_additional %in% names(foi_df)) || + ncol(foi_df) != (1 + length(cnames_additional)) + ) { + if (length(cnames_additional) == 1) { message_end <- paste0(" and ", cnames_additional, ".") - else - message_end <- paste0(", ", paste(cnames_additional, collapse=" and "), ".") - message_beginning <- "foi must be a dataframe with columns foi" - stop(paste0(message_beginning, message_end)) + } else { + message_end <- paste0( + ", ", paste(cnames_additional, collapse = " and "), "." + ) + message_beginning <- "foi must be a dataframe with columns foi" + stop(glue::glue("{message_beginning}", "{message_end}")) + } } } @@ -91,8 +107,11 @@ validate_survey_and_foi_consistency <- function( #nolint foi_df ) { max_age_foi_df <- nrow(foi_df) - if(max_age_foi_df > max(survey_features$age_max)) - stop("maximum age implicit in foi_df should not exceed max age in survey_features.") + if (max_age_foi_df > max(survey_features$age_max)) + stop( + "maximum age implicit in foi_df should ", + "not exceed max age in survey_features." + ) } validate_survey_and_foi_consistency_age_time <- function( #nolint @@ -100,6 +119,9 @@ validate_survey_and_foi_consistency_age_time <- function( #nolint foi_df ) { max_age_foi_df <- max(foi_df$year) - min(foi_df$year) + 1 - if(max_age_foi_df > max(survey_features$age_max)) - stop("maximum age implicit in foi_df should not exceed max age in survey_features.") + if (max_age_foi_df > max(survey_features$age_max)) + stop( + "maximum age implicit in foi_df should ", + "not exceed max age in survey_features." + ) } From 4fbc02b75679b22562cad3516b0fda721e0fa28b Mon Sep 17 00:00:00 2001 From: ntorresd Date: Tue, 6 Aug 2024 12:20:09 -0500 Subject: [PATCH 38/78] fix: remove deprecated `@docType` tag --- R/serofoi-package.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/serofoi-package.R b/R/serofoi-package.R index fc3ef874..7cd8525b 100644 --- a/R/serofoi-package.R +++ b/R/serofoi-package.R @@ -2,7 +2,6 @@ #' #' @description A DESCRIPTION OF THE PACKAGE #' -#' @docType package #' @name serofoi-package #' @aliases serofoi #' @useDynLib serofoi, .registration = TRUE @@ -19,4 +18,7 @@ #' @references Stan Development Team (NA). RStan: the R interface to Stan. R #' package version 2.26.22. https://mc-stan.org #' +#' #' @keywords internal +"_PACKAGE" + NULL From 67da8f495f216a09cfe8e479011136a3f96d707e Mon Sep 17 00:00:00 2001 From: ntorresd Date: Tue, 6 Aug 2024 12:23:50 -0500 Subject: [PATCH 39/78] doc(lintr): syntax and lintr corrections to data simulation vignette --- vignettes/simulating_serosurveys.Rmd | 116 +++++++++++++-------------- 1 file changed, 58 insertions(+), 58 deletions(-) diff --git a/vignettes/simulating_serosurveys.Rmd b/vignettes/simulating_serosurveys.Rmd index 87fbd000..4511f75a 100644 --- a/vignettes/simulating_serosurveys.Rmd +++ b/vignettes/simulating_serosurveys.Rmd @@ -38,8 +38,8 @@ foi_constant <- data.frame( foi = rep(0.05, max_age) ) -foi_constant %>% - ggplot(aes(x=age, y=foi)) + +foi_constant %>% + ggplot(aes(x = age, y = foi)) + geom_line() ``` @@ -68,11 +68,11 @@ Now, we can plot the proportions of those seropositive, including the posterior ```{r, echo=FALSE, fig.align="center"} add_posterior_quantiles <- function(df) { - df %>% + df %>% mutate( - alpha = 1 + n_seropositive, - beta = 1 + sample_size - n_seropositive - ) %>% + alpha = 1 + .data$n_seropositive, + beta = 1 + .data$sample_size - .data$n_seropositive + ) %>% mutate( lower = qbeta(0.025, alpha, beta), middle = qbeta(0.5, alpha, beta), @@ -95,7 +95,7 @@ ggplot() + color = "blue", linewidth = 1 ) + - scale_y_continuous(labels=scales::percent) + + scale_y_continuous(labels = scales::percent) + ylab("Seropositivity") + xlab("Age") ``` @@ -106,11 +106,11 @@ We now consider a pathogen which has an FOI that varies throughout the age of in ```{r, fig.align="center"} foi_age_varying <- data.frame( age = seq(1, max_age, 1) -) %>% - mutate(foi=0.1 * exp(-0.1 * age)) +) %>% + mutate(foi = 0.1 * exp(-0.1 * age)) -foi_age_varying %>% - ggplot(aes(x=age, y=foi)) + +foi_age_varying %>% + ggplot(aes(x = age, y = foi)) + geom_line() ``` @@ -142,7 +142,7 @@ serosurvey_combined <- serosurvey_constant %>% by = "age") %>% bind_rows( serosurvey_age_dep %>% - add_posterior_quantiles() %>% + add_posterior_quantiles() %>% rename(age = age_min) %>% mutate(type = "age-dependent FOI") %>% left_join( @@ -152,8 +152,8 @@ serosurvey_combined <- serosurvey_constant %>% seroreversion_rate = 0 ), by = "age") - ) %>% - mutate(type = as.factor(type)) + ) %>% + mutate(type = as.factor(type)) # plot both ggplot(data = serosurvey_combined) + @@ -165,7 +165,7 @@ ggplot(data = serosurvey_combined) + color = "blue", linewidth = 1 ) + - scale_y_continuous(labels=scales::percent) + + scale_y_continuous(labels = scales::percent) + ylab("Seropositivity") + xlab("Age") + facet_wrap(~type) @@ -189,8 +189,8 @@ foi_spiky <- data.frame( ) # plot -foi_spiky %>% - ggplot(aes(x=year, y=foi)) + +foi_spiky %>% + ggplot(aes(x = year, y = foi)) + geom_line() ``` @@ -222,7 +222,7 @@ ggplot() + color = "blue", linewidth = 1 ) + - scale_y_continuous(labels=scales::percent) + + scale_y_continuous(labels = scales::percent) + ylab("Seropositivity") + xlab("Age") ``` @@ -239,8 +239,8 @@ foi_df_time <- data.frame( ) # plot -foi_df_time %>% - ggplot(aes(x=year, y=foi)) + +foi_df_time %>% + ggplot(aes(x = year, y = foi)) + geom_line() ``` @@ -256,35 +256,35 @@ foi_df_age <- data.frame( ) # plot -foi_df_age %>% - ggplot(aes(x=age, y=foi)) + +foi_df_age %>% + ggplot(aes(x = age, y = foi)) + geom_line() ``` To create the overall FOI (which is a function of both time and age), we create the product of the time- and age-dependent parts of it. ```{r} foi_age_time <- expand.grid( - year=foi_df_time$year, - age=foi_df_age$age -) %>% - left_join(foi_df_age, by="age") %>% - rename(foi_age=foi) %>% - left_join(foi_df_time, by="year") %>% - rename(foi_time=foi) %>% - mutate(foi = foi_age * foi_time) %>% + year = foi_df_time$year, + age = foi_df_age$age +) %>% + left_join(foi_df_age, by = "age") %>% + rename(foi_age = foi) %>% + left_join(foi_df_time, by = "year") %>% + rename(foi_time = foi) %>% + mutate(foi = foi_age * foi_time) %>% select(-c("foi_age", "foi_time")) ``` ```{r, fig.align="center", echo=FALSE} # plot a few birth years select_birth_years <- c(1955, 1965, 1975, 1985, 1995, 2005, 2015) -foi_age_time %>% - mutate(birth_year = year - age) %>% - filter(birth_year >= 1945) %>% +foi_age_time %>% + mutate(birth_year = year - age) %>% + filter(birth_year >= 1945) %>% arrange(birth_year, age) %>% - filter(birth_year %in% select_birth_years) %>% - mutate(birth_year = as.factor(birth_year)) %>% - ggplot(aes(x=year, y=foi, colour=birth_year)) + + filter(birth_year %in% select_birth_years) %>% + mutate(birth_year = as.factor(birth_year)) %>% + ggplot(aes(x = year, y = foi, colour = birth_year)) + geom_line() ``` @@ -294,7 +294,7 @@ max_age <- 80 sample_size <- 50 survey_features <- data.frame( age_min = seq(1, max_age, 5), - age_max = seq(5, max_age, 5)) %>% + age_max = seq(5, max_age, 5)) %>% mutate(sample_size = rep(sample_size, length(age_min))) serosurvey <- simulate_serosurvey( @@ -321,7 +321,7 @@ ggplot() + color = "blue", linewidth = 1 ) + - scale_y_continuous(labels=scales::percent) + + scale_y_continuous(labels = scales::percent) + ylab("Seropositivity") + xlab("Age") ``` @@ -352,7 +352,7 @@ serosurvey_combined <- serosurvey %>% by = "age") %>% bind_rows( serosurvey_serorevert %>% - add_posterior_quantiles() %>% + add_posterior_quantiles() %>% rename(age = age_min) %>% mutate(type = "seroreverting") %>% left_join( @@ -362,8 +362,8 @@ serosurvey_combined <- serosurvey %>% seroreversion_rate = 0.01 ), by = "age") - ) %>% - mutate(type = as.factor(type)) + ) %>% + mutate(type = as.factor(type)) # plot both ggplot(data = serosurvey_combined) + @@ -375,7 +375,7 @@ ggplot(data = serosurvey_combined) + color = "blue", linewidth = 1 ) + - scale_y_continuous(labels=scales::percent) + + scale_y_continuous(labels = scales::percent) + ylab("Seropositivity") + xlab("Age") + facet_wrap(~type) @@ -435,18 +435,18 @@ v <- foi_df_time$foi construct_A <- function(t, tau, u, v) { u_bar <- u[t - tau] v_bar <- v[t] - + A <- diag(-1, ncol = 12, nrow = 12) A[row(A) == (col(A) + 1)] <- 1 A[1, 1] <- -u_bar * v_bar A[2, 1] <- u_bar * v_bar A[12, 12] <- 0 - + A } # determines the sum of seropositive compartments of those still alive -calculate_seropositivity_function <- function(Y) { +calculate_seropositivity_fn <- function(Y) { sum(Y[2:11]) / (1 - Y[12]) } @@ -457,16 +457,16 @@ initial_conditions[1] <- 1 # solve seropositive_hiv <- probability_seropositive_general_model_by_age( construct_A, - calculate_seropositivity_function, + calculate_seropositivity_fn, initial_conditions, - max_age=80, + max_age = 80, u, v ) -seropositive_hiv %>% - ggplot(aes(x=age, y=seropositivity)) + +seropositive_hiv %>% + ggplot(aes(x = age, y = seropositivity)) + geom_line() + - scale_y_continuous(labels=scales::percent) + + scale_y_continuous(labels = scales::percent) + ylab("Seropositivity") + xlab("Age") ``` @@ -475,21 +475,21 @@ We can also simulate a serosurvey assuming the above model. ```{r} serosurvey <- simulate_serosurvey_general_model( construct_A, - calculate_seropositivity_function, + calculate_seropositivity_fn, initial_conditions, survey_features, u, v -) +) ``` ```{r, echo=FALSE, fig.align="center"} # plot -serosurvey %>% - add_posterior_quantiles() %>% - ggplot(aes(x=age_min, y=middle)) + - geom_pointrange(aes(ymin=lower, ymax=upper)) + - geom_smooth(se=FALSE) + - scale_y_continuous(labels=scales::percent) + +serosurvey %>% + add_posterior_quantiles() %>% + ggplot(aes(x = age_min, y = middle)) + + geom_pointrange(aes(ymin = lower, ymax = upper)) + + geom_smooth(se = FALSE) + + scale_y_continuous(labels = scales::percent) + ylab("Seropositivity") ``` From de8fa3085add699c8bba773488e3bd3ef716c65b Mon Sep 17 00:00:00 2001 From: ntorresd Date: Tue, 6 Aug 2024 12:24:48 -0500 Subject: [PATCH 40/78] doc: update documentation --- NAMESPACE | 26 ++-- R/build_stan_data.R | 2 +- R/fit_seromodel.R | 12 +- R/plot_seromodel.R | 4 +- man/add_age_bins.Rd | 13 +- man/add_age_group_to_serosurvey.Rd | 20 ++++ man/build_stan_data.Rd | 54 +++++++++ man/chagas2012.Rd | 19 --- man/chik2015.Rd | 19 --- man/extract_central_estimates.Rd | 41 +++++++ man/extract_seromodel_summary.Rd | 63 ---------- man/fit_seromodel.Rd | 113 ++++++------------ man/generate_random_sample_sizes.Rd | 14 ++- man/get_chunk_structure.Rd | 41 ------- man/get_cohort_ages.Rd | 39 ------ man/get_foi_central_estimates.Rd | 56 --------- man/get_foi_index.Rd | 27 +++++ man/get_prev_expanded.Rd | 56 --------- man/get_table_rhats.Rd | 35 ------ man/multinomial_sampling_group.Rd | 6 +- man/plot_foi.Rd | 67 ----------- man/plot_foi_estimates.Rd | 46 +++++++ man/plot_info_table.Rd | 33 ----- man/plot_rhats.Rd | 49 ++++---- man/plot_seromodel.Rd | 87 +++++--------- man/plot_seroprev.Rd | 51 -------- man/plot_seroprev_fitted.Rd | 69 ----------- man/plot_seroprevalence_estimates.Rd | 36 ++++++ man/plot_serosurvey.Rd | 26 ++++ man/plot_summary.Rd | 40 +++++++ man/prepare_serodata.Rd | 54 --------- man/prepare_serosurvey_for_plotting.Rd | 34 ++++++ man/probability_exact_time_varying.Rd | 3 +- ..._seropositive_age_and_time_model_by_age.Rd | 13 +- ...obability_seropositive_age_model_by_age.Rd | 13 +- man/probability_seropositive_by_age.Rd | 10 +- ...ility_seropositive_general_model_by_age.Rd | 15 +-- ...bability_seropositive_time_model_by_age.Rd | 13 +- man/run_seromodel.Rd | 108 ----------------- man/sample_size_by_individual_age_random.Rd | 22 ++-- man/serofoi-package.Rd | 20 ++++ man/set_stan_data_defaults.Rd | 20 ++++ man/sf_none.Rd | 11 ++ man/sf_normal.Rd | 19 +++ man/sf_uniform.Rd | 19 +++ man/simulate_serosurvey.Rd | 28 +++-- man/simulate_serosurvey_age_and_time_model.Rd | 27 +++-- man/simulate_serosurvey_age_model.Rd | 26 ++-- man/simulate_serosurvey_general_model.Rd | 21 ++-- man/simulate_serosurvey_time_model.Rd | 26 ++-- man/summarise_central_estimate.Rd | 39 ++++++ man/summarise_loo_estimate.Rd | 26 ++++ man/summarise_seromodel.Rd | 50 ++++++++ man/survey_by_individual_age.Rd | 6 +- man/veev2012.Rd | 19 --- 55 files changed, 783 insertions(+), 1023 deletions(-) create mode 100644 man/add_age_group_to_serosurvey.Rd create mode 100644 man/build_stan_data.Rd delete mode 100644 man/chagas2012.Rd delete mode 100644 man/chik2015.Rd create mode 100644 man/extract_central_estimates.Rd delete mode 100644 man/extract_seromodel_summary.Rd delete mode 100644 man/get_chunk_structure.Rd delete mode 100644 man/get_cohort_ages.Rd delete mode 100644 man/get_foi_central_estimates.Rd create mode 100644 man/get_foi_index.Rd delete mode 100644 man/get_prev_expanded.Rd delete mode 100644 man/get_table_rhats.Rd delete mode 100644 man/plot_foi.Rd create mode 100644 man/plot_foi_estimates.Rd delete mode 100644 man/plot_info_table.Rd delete mode 100644 man/plot_seroprev.Rd delete mode 100644 man/plot_seroprev_fitted.Rd create mode 100644 man/plot_seroprevalence_estimates.Rd create mode 100644 man/plot_serosurvey.Rd create mode 100644 man/plot_summary.Rd delete mode 100644 man/prepare_serodata.Rd create mode 100644 man/prepare_serosurvey_for_plotting.Rd delete mode 100644 man/run_seromodel.Rd create mode 100644 man/set_stan_data_defaults.Rd create mode 100644 man/sf_none.Rd create mode 100644 man/sf_normal.Rd create mode 100644 man/sf_uniform.Rd create mode 100644 man/summarise_central_estimate.Rd create mode 100644 man/summarise_loo_estimate.Rd create mode 100644 man/summarise_seromodel.Rd delete mode 100644 man/veev2012.Rd diff --git a/NAMESPACE b/NAMESPACE index 6dad2c97..53361878 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,27 +1,27 @@ # Generated by roxygen2: do not edit by hand -export(extract_seromodel_summary) +export(build_stan_data) +export(extract_central_estimates) export(fit_seromodel) -export(get_chunk_structure) -export(get_cohort_ages) -export(get_foi_central_estimates) -export(get_prev_expanded) -export(get_table_rhats) -export(plot_foi) -export(plot_info_table) -export(plot_rhats) +export(get_foi_index) +export(plot_foi_estimates) export(plot_seromodel) -export(plot_seroprev) -export(plot_seroprev_fitted) -export(prepare_serodata) +export(plot_seroprevalence_estimates) +export(plot_serosurvey) +export(plot_summary) +export(prepare_serosurvey_for_plotting) export(probability_seropositive_by_age) export(probability_seropositive_general_model_by_age) -export(run_seromodel) +export(sf_normal) +export(sf_uniform) export(simulate_serosurvey) export(simulate_serosurvey_age_and_time_model) export(simulate_serosurvey_age_model) export(simulate_serosurvey_general_model) export(simulate_serosurvey_time_model) +export(summarise_central_estimate) +export(summarise_loo_estimate) +export(summarise_seromodel) import(Rcpp) import(dplyr) import(methods) diff --git a/R/build_stan_data.R b/R/build_stan_data.R index 86b3874c..735d869a 100644 --- a/R/build_stan_data.R +++ b/R/build_stan_data.R @@ -89,7 +89,7 @@ get_foi_index <- function( #' Set stan data defaults for sampling #' -#' @param stan_data List to be pased to [rstan][rstan::sampling] +#' @param stan_data List to be passed to [rstan][rstan::sampling] #' @inheritParams fit_seromodel #' @return List with default values of stan data for sampling set_stan_data_defaults <- function( diff --git a/R/fit_seromodel.R b/R/fit_seromodel.R index b3e84253..0fc243e7 100644 --- a/R/fit_seromodel.R +++ b/R/fit_seromodel.R @@ -27,9 +27,9 @@ add_age_group_to_serosurvey <- function(serosurvey) { #' @param foi_prior Force-of-infection distribution specified by means of #' the helper functions. Currently available options are: #' \describe{ -#' \item{`[sf_normal]`} -#' \item{`[sf_uniform]`} -#' \item{`[sf_none]`} +#' \item{`sf_normal`} +#' \item{`sf_uniform`} +#' \item{`sf_none`} #' } #' @param foi_index Integer vector specifying the age-groups for which #' force-of-infection values will be estimated @@ -38,9 +38,9 @@ add_age_group_to_serosurvey <- function(serosurvey) { #' @param seroreversion_prior seroreversion distribution specified by means of #' the helper functions. Currently available options are: #' \describe{ -#' \item{`[sf_normal]`} -#' \item{`[sf_uniform]`} -#' \item{`[sf_none]`} +#' \item{`sf_normal`} +#' \item{`sf_uniform`} +#' \item{`sf_none`} #' } #' @param ... Additional parameters for [rstan][rstan::sampling] #' @returns stan_fit object with force-of-infection and seroreversion diff --git a/R/plot_seromodel.R b/R/plot_seromodel.R index bc947449..e79e470a 100644 --- a/R/plot_seromodel.R +++ b/R/plot_seromodel.R @@ -1,6 +1,6 @@ #' Prepares serosurvey for plotting #' -#' Adds seroprevalence values with corresponing binomial confidence interval +#' Adds seroprevalence values with corresponding binomial confidence interval #' @inheritParams fit_seromodel #' @param alpha 1 - alpha indicates the confidence level to be used #' @return serosurvey with additional columns: @@ -122,7 +122,7 @@ extract_central_estimates <- function( #' #' @inheritParams extract_central_estimates #' @inheritParams plot_serosurvey -#' @returns ggplot object with seroprevalence estimates and serisurvey plots +#' @returns ggplot object with seroprevalence estimates and serosurveys plots #' @export plot_seroprevalence_estimates <- function( seromodel, diff --git a/man/add_age_bins.Rd b/man/add_age_bins.Rd index 6ab75844..9caa676a 100644 --- a/man/add_age_bins.Rd +++ b/man/add_age_bins.Rd @@ -7,14 +7,15 @@ add_age_bins(survey_features) } \arguments{ -\item{survey_features}{A dataframe containing age_min and age_max columns representing -the minimum and maximum age boundaries for each group.} +\item{survey_features}{A dataframe containing age_min and age_max columns +representing the minimum and maximum age boundaries for each group.} } \value{ -A dataframe with an additional 'group' column representing the group interval -for each row based on the age_min and age_max columns. +A dataframe with an additional 'group' column representing the group +interval for each row based on the age_min and age_max columns. } \description{ -It generates a new column 'group' in the survey_features dataframe, representing -the group interval for each row based on the age_min and age_max columns. +It generates a new column 'group' in the survey_features dataframe, +representing the group interval for each row based on the age_min and +age_max columns. } diff --git a/man/add_age_group_to_serosurvey.Rd b/man/add_age_group_to_serosurvey.Rd new file mode 100644 index 00000000..7f519c0e --- /dev/null +++ b/man/add_age_group_to_serosurvey.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fit_seromodel.R +\name{add_age_group_to_serosurvey} +\alias{add_age_group_to_serosurvey} +\title{Adds age group marker to serosurvey} +\usage{ +add_age_group_to_serosurvey(serosurvey) +} +\arguments{ +\item{serosurvey}{\describe{ +\item{\code{tsur}}{Year in which the survey took place} +\item{\code{age_min}}{Floor value of the average between age_min and age_max} +\item{\code{age_max}}{The size of the sample} +\item{\code{sample_size}}{Number of samples for each age group} +\item{\code{n_seropositive}}{Number of positive samples for each age group} +}} +} +\description{ +Adds age group marker to serosurvey +} diff --git a/man/build_stan_data.Rd b/man/build_stan_data.Rd new file mode 100644 index 00000000..37d99f16 --- /dev/null +++ b/man/build_stan_data.Rd @@ -0,0 +1,54 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/build_stan_data.R +\name{build_stan_data} +\alias{build_stan_data} +\title{Builds stan data for sampling depending on the selected model} +\usage{ +build_stan_data( + serosurvey, + model_type = "constant", + foi_prior = sf_uniform(), + foi_index = NULL, + is_seroreversion = FALSE, + seroreversion_prior = sf_none() +) +} +\arguments{ +\item{serosurvey}{\describe{ +\item{\code{tsur}}{Year in which the survey took place} +\item{\code{age_min}}{Floor value of the average between age_min and age_max} +\item{\code{age_max}}{The size of the sample} +\item{\code{sample_size}}{Number of samples for each age group} +\item{\code{n_seropositive}}{Number of positive samples for each age group} +}} + +\item{model_type}{Type of the model. Either "constant", "age" or "time"} + +\item{foi_prior}{Force-of-infection distribution specified by means of +the helper functions. Currently available options are: +\describe{ +\item{\code{sf_normal}} +\item{\code{sf_uniform}} +\item{\code{sf_none}} +}} + +\item{foi_index}{Integer vector specifying the age-groups for which +force-of-infection values will be estimated} + +\item{is_seroreversion}{Boolean specifying whether to include +seroreversion rate estimation in the model} + +\item{seroreversion_prior}{seroreversion distribution specified by means of +the helper functions. Currently available options are: +\describe{ +\item{\code{sf_normal}} +\item{\code{sf_uniform}} +\item{\code{sf_none}} +}} +} +\value{ +List with necessary data for sampling the specified model +} +\description{ +Builds stan data for sampling depending on the selected model +} diff --git a/man/chagas2012.Rd b/man/chagas2012.Rd deleted file mode 100644 index 995fce31..00000000 --- a/man/chagas2012.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/chagas2012.R -\docType{data} -\name{chagas2012} -\alias{chagas2012} -\title{Seroprevalence data on serofoi} -\format{ -An object of class \code{"cross"}; see \code{\link[qtl:read.cross]{qtl::read.cross()}}. -} -\usage{ -chagas2012 -} -\description{ -Data from a serological surveys -} -\examples{ -chagas2012 -} -\keyword{datasets} diff --git a/man/chik2015.Rd b/man/chik2015.Rd deleted file mode 100644 index fa4991b3..00000000 --- a/man/chik2015.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/chik2015.R -\docType{data} -\name{chik2015} -\alias{chik2015} -\title{Seroprevalence data on serofoi} -\format{ -An object of class \code{"cross"}; see \code{\link[qtl:read.cross]{qtl::read.cross()}}. -} -\usage{ -chik2015 -} -\description{ -Data from a serological surveys -} -\examples{ -chik2015 -} -\keyword{datasets} diff --git a/man/extract_central_estimates.Rd b/man/extract_central_estimates.Rd new file mode 100644 index 00000000..73f6664c --- /dev/null +++ b/man/extract_central_estimates.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_seromodel.R +\name{extract_central_estimates} +\alias{extract_central_estimates} +\title{Extracts central estimates from stan_fit object for specified parameter} +\usage{ +extract_central_estimates( + seromodel, + serosurvey, + alpha = 0.05, + par_name = "foi_vector" +) +} +\arguments{ +\item{seromodel}{stan_fit object obtained from sampling a model +with \link{fit_seromode}} + +\item{serosurvey}{\describe{ +\item{\code{tsur}}{Year in which the survey took place} +\item{\code{age_min}}{Floor value of the average between age_min and age_max} +\item{\code{age_max}}{The size of the sample} +\item{\code{sample_size}}{Number of samples for each age group} +\item{\code{n_seropositive}}{Number of positive samples for each age group} +}} + +\item{alpha}{1 - alpha indicates the credibility level to be used} + +\item{par_name}{String specifying the parameter to be extracted +from \code{seromodel}} +} +\value{ +A dataframe with the following columns +\describe{ +\item{\code{median}}{Median of the samples computed as the 0.5 quantile} +\item{\code{lower}}{Lower quantile \code{alpha}} +\item{\code{upper}}{Upper quantile \code{1 - alpha}} +} +} +\description{ +Extracts central estimates from stan_fit object for specified parameter +} diff --git a/man/extract_seromodel_summary.Rd b/man/extract_seromodel_summary.Rd deleted file mode 100644 index 6f7effda..00000000 --- a/man/extract_seromodel_summary.Rd +++ /dev/null @@ -1,63 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/modelling.R -\name{extract_seromodel_summary} -\alias{extract_seromodel_summary} -\title{Function to extract a summary of the specified serological model object} -\usage{ -extract_seromodel_summary(seromodel_object, serodata) -} -\arguments{ -\item{seromodel_object}{Stanfit object containing the results of fitting a -model by means of \link{fit_seromodel}.} - -\item{serodata}{A data frame containing the data from a seroprevalence -survey. This data frame must contain at least the following columns: -\describe{ -\item{\code{total}}{Number of samples for each age group} -\item{\code{counts}}{Number of positive samples for each age group} -\item{\code{tsur}}{Year in which the survey took place} -\item{\code{age_mean_f}}{Floor value of the average between age_min and age_max} -\item{\code{sample_size}}{The size of the sample} -\item{\code{birth_year}}{The year in which the individuals of each age group -were born} -} -The last six columns can be added to \code{serodata} by means of the function -\code{\link[=prepare_serodata]{prepare_serodata()}}.} -} -\value{ -\code{model_summary}. Object with a summary of \code{seromodel_object} -containing the following: -\describe{ -\item{\code{foi_model}}{Name of the selected model.} -\item{\code{data_set}}{Seroprevalence survey label} -\item{\code{country}}{Name of the country were the survey was conducted in.} -\item{\code{year}}{Year in which the survey was conducted.} -\item{\code{test}}{Type of test of the survey.} -\item{\code{antibody}}{Antibody} -\item{\code{n_sample}}{Total number of samples in the survey.} -\item{\code{n_agec}}{Number of age groups considered.} -\item{\code{n_iter}}{Number of iterations by chain including warmup.} -\item{\code{elpd}}{elpd} -\item{\code{se}}{se} -\item{\code{converged}}{convergence} -} -} -\description{ -This function extracts a summary corresponding to a serological model object -containing information about the original serological survey data used to -fit the model, such as the year when the survey took place, the type of test -taken and the corresponding antibody, as well as information about the -convergence of the model, like the expected log pointwise predictive density -\code{elpd} and its corresponding standard deviation. -} -\examples{ -data(chagas2012) -serodata <- prepare_serodata(chagas2012) -seromodel_object <- fit_seromodel( - serodata = serodata, - foi_model = "constant" -) -extract_seromodel_summary(seromodel_object, - serodata = serodata -) -} diff --git a/man/fit_seromodel.Rd b/man/fit_seromodel.Rd index 2aad529e..34851ad8 100644 --- a/man/fit_seromodel.Rd +++ b/man/fit_seromodel.Rd @@ -1,101 +1,58 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/modelling.R +% Please edit documentation in R/fit_seromodel.R \name{fit_seromodel} \alias{fit_seromodel} -\title{Fit selected model to the specified seroprevalence survey -data} +\title{Runs specified stan model for the force-of-infection} \usage{ fit_seromodel( - serodata, - foi_model = c("constant", "tv_normal_log", "tv_normal", "av_normal"), - foi_parameters = NULL, - chunks = NULL, - chunk_size = 1, - iter = 1000, - adapt_delta = 0.9, - max_treedepth = 10, - seed = 12345, + serosurvey, + model_type = "constant", + foi_prior = sf_normal(), + foi_index = NULL, + is_seroreversion = FALSE, + seroreversion_prior = sf_uniform(), ... ) } \arguments{ -\item{serodata}{A data frame containing the data from a seroprevalence -survey. This data frame must contain at least the following columns: -\describe{ -\item{\code{total}}{Number of samples for each age group} -\item{\code{counts}}{Number of positive samples for each age group} +\item{serosurvey}{\describe{ \item{\code{tsur}}{Year in which the survey took place} -\item{\code{age_mean_f}}{Floor value of the average between age_min and age_max} -\item{\code{sample_size}}{The size of the sample} -\item{\code{birth_year}}{The year in which the individuals of each age group -were born} -} -The last six columns can be added to \code{serodata} by means of the function -\code{\link[=prepare_serodata]{prepare_serodata()}}.} - -\item{foi_model}{Name of the selected model. Current version provides three -options: -\describe{ -\item{\code{"constant"}}{Runs a constant model} -\item{\code{"tv_normal"}}{Runs a normal model} -\item{\code{"tv_normal_log"}}{Runs a normal logarithmic model} +\item{\code{age_min}}{Floor value of the average between age_min and age_max} +\item{\code{age_max}}{The size of the sample} +\item{\code{sample_size}}{Number of samples for each age group} +\item{\code{n_seropositive}}{Number of positive samples for each age group} }} -\item{foi_parameters}{List specifying the initial prior parameters of the -model \code{foi_model} to be specified as (e.g.): +\item{model_type}{Type of the model. Either "constant", "age" or "time"} + +\item{foi_prior}{Force-of-infection distribution specified by means of +the helper functions. Currently available options are: \describe{ -\item{\code{"constant"}}{\code{list(foi_a = 0, foi_b = 2)}} -\item{\code{"tv_normal"}}{\code{list(foi_location = 0, foi_scale = 1)}} -\item{\code{"tv_normal_log"}}{\code{list(foi_location = -6, foi_scale = 4)}} +\item{\code{sf_normal}} +\item{\code{sf_uniform}} +\item{\code{sf_none}} }} -\item{chunks}{Numeric list specifying the chunk structure of the time -interval from the birth year of the oldest age cohort -\code{min(serodata$age_mean_f)} to the time when the serosurvey was conducted -\code{t_sur}. If \code{NULL}, the time interval is divided in chunks of size -\code{chunk_size}.} - -\item{chunk_size}{Size of the chunks to be used in case that the chunk -structure \code{chunks} is not specified in \link{fit_seromodel}. -Default is 1, meaning that one force of infection value is to be estimated -for every year in the time interval spanned by the serosurvey. -If the length of the time interval is not exactly divisible by \code{chunk_size}, -the remainder years are included in the last chunk.} - -\item{iter}{Number of interactions for each chain including the warmup. -\code{iter} in \link[rstan:stanmodel-method-sampling]{sampling}.} +\item{foi_index}{Integer vector specifying the age-groups for which +force-of-infection values will be estimated} -\item{adapt_delta}{Real number between 0 and 1 that represents the target -average acceptance probability. Increasing the value of \code{adapt_delta} will -result in a smaller step size and fewer divergences. For further details -refer to the \code{control} parameter in \link[rstan:stanmodel-method-sampling]{sampling} or -\href{https://mc-stan.org/rstanarm/reference/adapt_delta.html}{here}.} +\item{is_seroreversion}{Boolean specifying whether to include +seroreversion rate estimation in the model} -\item{max_treedepth}{Maximum tree depth for the binary tree used in the NUTS -stan sampler. For further details refer to the \code{control} parameter in -\link[rstan:stanmodel-method-sampling]{sampling}.} - -\item{seed}{For further details refer to the \code{seed} parameter in -\link[rstan:stanmodel-method-sampling]{sampling}.} +\item{seroreversion_prior}{seroreversion distribution specified by means of +the helper functions. Currently available options are: +\describe{ +\item{\code{sf_normal}} +\item{\code{sf_uniform}} +\item{\code{sf_none}} +}} -\item{...}{Additional parameters for \link[rstan:stanmodel-method-sampling]{sampling}.} +\item{...}{Additional parameters for \link[rstan:stanmodel-method-sampling]{rstan}} } \value{ -\code{seromodel_object}. \code{stanfit} object returned by the function -\link[rstan:stanmodel-method-sampling]{sampling} +stan_fit object with force-of-infection and seroreversion +(when applicable) samples } \description{ -This function fits the specified model \code{foi_model} to the serological survey -data \code{serodata} by means of \link[rstan:stanmodel-method-sampling]{sampling}. The -function determines whether the corresponding stan model object needs to be -compiled by rstan. -} -\examples{ -data(chagas2012) -serodata <- prepare_serodata(chagas2012) -seromodel_fit <- fit_seromodel( - serodata = serodata, - foi_model = "constant" -) - +Runs specified stan model for the force-of-infection } diff --git a/man/generate_random_sample_sizes.Rd b/man/generate_random_sample_sizes.Rd index dc8b7039..6f5cfc8d 100644 --- a/man/generate_random_sample_sizes.Rd +++ b/man/generate_random_sample_sizes.Rd @@ -7,14 +7,16 @@ generate_random_sample_sizes(survey_df_long) } \arguments{ -\item{survey_df_long}{A dataframe with columns 'age', 'group' and 'overall_sample_size'.} +\item{survey_df_long}{A dataframe with columns 'age', 'group' and +'overall_sample_size'.} } \value{ -A dataframe with random sample sizes generated for each age based on the overall -sample size. +A dataframe with random sample sizes generated for each age based on +the overall sample size. } \description{ -This function generates random sample sizes for each age group based on the overall sample size -and the distribution of individuals across age groups. It uses multinomial sampling to allocate -the total sample size to each age group proportionally. +This function generates random sample sizes for each age group based on the +overall sample size and the distribution of individuals across age groups. +It uses multinomial sampling to allocate the total sample size to each age +group proportionally. } diff --git a/man/get_chunk_structure.Rd b/man/get_chunk_structure.Rd deleted file mode 100644 index c2e171d9..00000000 --- a/man/get_chunk_structure.Rd +++ /dev/null @@ -1,41 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/modelling.R -\name{get_chunk_structure} -\alias{get_chunk_structure} -\title{Generate list containing the chunk structure to be used in the retrospective -estimation of the force of infection.} -\usage{ -get_chunk_structure(serodata, chunk_size) -} -\arguments{ -\item{serodata}{A data frame containing the data from a seroprevalence -survey. This data frame must contain at least the following columns: -\describe{ -\item{\code{total}}{Number of samples for each age group} -\item{\code{counts}}{Number of positive samples for each age group} -\item{\code{tsur}}{Year in which the survey took place} -\item{\code{age_mean_f}}{Floor value of the average between age_min and age_max} -\item{\code{sample_size}}{The size of the sample} -\item{\code{birth_year}}{The year in which the individuals of each age group -were born} -} -The last six columns can be added to \code{serodata} by means of the function -\code{\link[=prepare_serodata]{prepare_serodata()}}.} - -\item{chunk_size}{Size of the chunks to be used in case that the chunk -structure \code{chunks} is not specified in \link{fit_seromodel}. -Default is 1, meaning that one force of infection value is to be estimated -for every year in the time interval spanned by the serosurvey. -If the length of the time interval is not exactly divisible by \code{chunk_size}, -the remainder years are included in the last chunk.} -} -\description{ -This function generates a numeric list specifying the chunk structure of the -time interval spanning from the year of birth of the oldest age cohort up to -the time when the serosurvey was conducted. -} -\examples{ -data(chagas2012) -serodata <- prepare_serodata(serodata = chagas2012, alpha = 0.05) -cohort_ages <- get_cohort_ages(serodata = serodata) -} diff --git a/man/get_cohort_ages.Rd b/man/get_cohort_ages.Rd deleted file mode 100644 index 148abc55..00000000 --- a/man/get_cohort_ages.Rd +++ /dev/null @@ -1,39 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/modelling.R -\name{get_cohort_ages} -\alias{get_cohort_ages} -\title{Generate data frame containing the age of each cohort -corresponding to each birth year excluding the year of the survey.} -\usage{ -get_cohort_ages(serodata) -} -\arguments{ -\item{serodata}{A data frame containing the data from a seroprevalence -survey. This data frame must contain at least the following columns: -\describe{ -\item{\code{total}}{Number of samples for each age group} -\item{\code{counts}}{Number of positive samples for each age group} -\item{\code{tsur}}{Year in which the survey took place} -\item{\code{age_mean_f}}{Floor value of the average between age_min and age_max} -\item{\code{sample_size}}{The size of the sample} -\item{\code{birth_year}}{The year in which the individuals of each age group -were born} -} -The last six columns can be added to \code{serodata} by means of the function -\code{\link[=prepare_serodata]{prepare_serodata()}}.} -} -\value{ -\code{cohort_ages}. A data frame containing the age of each cohort -corresponding to each birth year -} -\description{ -This function generates a data frame containing the age of each cohort -corresponding to each \code{birth_year} excluding the year of the survey, for -which the cohort age is still 0. specified serological survey data \code{serodata} -excluding the year of the survey. -} -\examples{ -data(chagas2012) -serodata <- prepare_serodata(serodata = chagas2012, alpha = 0.05) -cohort_ages <- get_cohort_ages(serodata = serodata) -} diff --git a/man/get_foi_central_estimates.Rd b/man/get_foi_central_estimates.Rd deleted file mode 100644 index cbbcc63b..00000000 --- a/man/get_foi_central_estimates.Rd +++ /dev/null @@ -1,56 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/modelling.R -\name{get_foi_central_estimates} -\alias{get_foi_central_estimates} -\title{Extract central estimates for the fitted forced FoI} -\usage{ -get_foi_central_estimates( - seromodel_object, - serodata, - lower_quantile = 0.05, - upper_quantile = 0.95 -) -} -\arguments{ -\item{seromodel_object}{Stanfit object containing the results of fitting a -model by means of \link{fit_seromodel}.} - -\item{serodata}{A data frame containing the data from a seroprevalence -survey. This data frame must contain at least the following columns: -\describe{ -\item{\code{total}}{Number of samples for each age group} -\item{\code{counts}}{Number of positive samples for each age group} -\item{\code{tsur}}{Year in which the survey took place} -\item{\code{age_mean_f}}{Floor value of the average between age_min and age_max} -\item{\code{sample_size}}{The size of the sample} -\item{\code{birth_year}}{The year in which the individuals of each age group -were born} -} -The last six columns can be added to \code{serodata} by means of the function -\code{\link[=prepare_serodata]{prepare_serodata()}}.} - -\item{lower_quantile}{Lower quantile used to compute the credible interval of -the fitted force-of-infection.} - -\item{upper_quantile}{Lower quantile used to compute the credible interval of -the fitted force-of-infection.} -} -\value{ -\code{foi_central_estimates}. Central estimates for the fitted forced FoI -} -\description{ -Extract central estimates for the fitted forced FoI -} -\examples{ -data(chagas2012) -serodata <- prepare_serodata(chagas2012) -seromodel_object <- fit_seromodel( - serodata = serodata, - foi_model = "constant" -) -cohort_ages <- get_cohort_ages(serodata = serodata) -foi_central_estimates <- get_foi_central_estimates( - seromodel_object = seromodel_object, - serodata = serodata -) -} diff --git a/man/get_foi_index.Rd b/man/get_foi_index.Rd new file mode 100644 index 00000000..4f7f08db --- /dev/null +++ b/man/get_foi_index.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/build_stan_data.R +\name{get_foi_index} +\alias{get_foi_index} +\title{Generates force-of-infection indexes for heterogeneous age groups} +\usage{ +get_foi_index(serosurvey, group_size) +} +\arguments{ +\item{serosurvey}{\describe{ +\item{\code{tsur}}{Year in which the survey took place} +\item{\code{age_min}}{Floor value of the average between age_min and age_max} +\item{\code{age_max}}{The size of the sample} +\item{\code{sample_size}}{Number of samples for each age group} +\item{\code{n_seropositive}}{Number of positive samples for each age group} +}} + +\item{group_size}{Age groups size} +} +\value{ +Integer vector with the indexes numerating each year/age +(depending on the model). +} +\description{ +The max value of the force-of-infection indexes correspond to +the number of foi values to be estimated when sampling. +} diff --git a/man/get_prev_expanded.Rd b/man/get_prev_expanded.Rd deleted file mode 100644 index 4ee85cb7..00000000 --- a/man/get_prev_expanded.Rd +++ /dev/null @@ -1,56 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/modelling.R -\name{get_prev_expanded} -\alias{get_prev_expanded} -\title{Generate data frame containing the confidence interval based on -a force-of-infection fitting} -\usage{ -get_prev_expanded(foi, serodata, alpha = 0.05, bin_data = FALSE, bin_step = 5) -} -\arguments{ -\item{foi}{Object containing the information of the force-of-infection. It is -obtained from \code{rstan::extract(seromodel_object$seromodel, "foi", inc_warmup = FALSE)[[1]]}.} - -\item{serodata}{A data frame containing the data from a seroprevalence -survey. This data frame must contain at least the following columns: -\describe{ -\item{\code{total}}{Number of samples for each age group} -\item{\code{counts}}{Number of positive samples for each age group} -\item{\code{tsur}}{Year in which the survey took place} -\item{\code{age_mean_f}}{Floor value of the average between age_min and age_max} -\item{\code{sample_size}}{The size of the sample} -\item{\code{birth_year}}{The year in which the individuals of each age group -were born} -} -The last six columns can be added to \code{serodata} by means of the function -\code{\link[=prepare_serodata]{prepare_serodata()}}.} - -\item{alpha}{Probability threshold for statistical significance used for both -the binomial confidence interval, and the lower and upper quantiles of the -estimated prevalence.} - -\item{bin_data}{If \code{TRUE}, \code{serodata} is binned by means of -\code{prepare_bin_data}. Otherwise, age groups are kept as originally input.} - -\item{bin_step}{Integer specifying the age groups bin size to be used when -\code{bin_data} is set to \code{TRUE}.} -} -\value{ -\code{prev_final}. The expanded prevalence data. This is used for plotting -purposes in the \code{visualization} module. -} -\description{ -This function computes the corresponding binomial confidence intervals for -the obtained prevalence based on a fitting of the force-of-infection \code{foi} -for plotting an analysis purposes. -} -\examples{ -data(chagas2012) -serodata <- prepare_serodata(chagas2012) -seromodel_object <- fit_seromodel( - serodata = serodata, - foi_model = "constant" -) -foi <- rstan::extract(seromodel_object, "foi")[[1]] -get_prev_expanded(foi, serodata) -} diff --git a/man/get_table_rhats.Rd b/man/get_table_rhats.Rd deleted file mode 100644 index fe6e9e1b..00000000 --- a/man/get_table_rhats.Rd +++ /dev/null @@ -1,35 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/model_comparison.R -\name{get_table_rhats} -\alias{get_table_rhats} -\title{Build dataframe containing the R-hat estimates for a given -serological model} -\usage{ -get_table_rhats(seromodel_object, cohort_ages) -} -\arguments{ -\item{seromodel_object}{Stanfit object containing the results of fitting a -model by means of \link{fit_seromodel}.} -} -\value{ -rhats table -} -\description{ -This function relies on \link[bayesplot:bayesplot-extractors]{rhat} to extract the -R-hat estimates of the serological model object \code{seromodel_object} and -returns a table a dataframe with the estimates for each year of birth. -} -\examples{ -data(chagas2012) -serodata <- prepare_serodata(serodata = chagas2012) -model_constant <- fit_seromodel( - serodata = serodata, - foi_model = "constant", - iter = 1500 -) -cohort_ages <- get_cohort_ages(serodata) -get_table_rhats( - seromodel_object = model_constant, - cohort_ages = cohort_ages -) -} diff --git a/man/multinomial_sampling_group.Rd b/man/multinomial_sampling_group.Rd index 42c5681a..08558564 100644 --- a/man/multinomial_sampling_group.Rd +++ b/man/multinomial_sampling_group.Rd @@ -15,7 +15,7 @@ multinomial_sampling_group(sample_size, n_ages) A vector containing random sample sizes for each age group. } \description{ -This function generates random sample sizes for each age group using multinomial sampling. -It takes the total sample size and the number of age groups as input and returns a vector -of sample sizes for each age group. +This function generates random sample sizes for each age group using +multinomial sampling. It takes the total sample size and the number of age +groups as input and returns a vector of sample sizes for each age group. } diff --git a/man/plot_foi.Rd b/man/plot_foi.Rd deleted file mode 100644 index 6033a711..00000000 --- a/man/plot_foi.Rd +++ /dev/null @@ -1,67 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/visualisation.R -\name{plot_foi} -\alias{plot_foi} -\title{Generate force-of-infection plot corresponding to the -specified fitted serological model} -\usage{ -plot_foi( - seromodel_object, - serodata, - max_lambda = NA, - size_text = 25, - foi = NULL -) -} -\arguments{ -\item{seromodel_object}{Stanfit object containing the results of fitting a -model by means of \link{fit_seromodel}.} - -\item{serodata}{A data frame containing the data from a seroprevalence -survey. This data frame must contain at least the following columns: -\describe{ -\item{\code{total}}{Number of samples for each age group} -\item{\code{counts}}{Number of positive samples for each age group} -\item{\code{tsur}}{Year in which the survey took place} -\item{\code{age_mean_f}}{Floor value of the average between age_min and age_max} -\item{\code{sample_size}}{The size of the sample} -\item{\code{birth_year}}{The year in which the individuals of each age group -were born} -} -The last six columns can be added to \code{serodata} by means of the function -\code{\link[=prepare_serodata]{prepare_serodata()}}.} - -\item{max_lambda}{Upper \code{ylim}for force-of-infection plot} - -\item{size_text}{Text size use in the theme of the graph returned by the -function.} - -\item{foi}{Data frame with the Force-of-infection trend to be plotted -alongside the estimated force-of-infection. Typically this corresponds to -the force-of-infection used to simulate the serosurvey used to model.} -} -\value{ -A ggplot2 object containing the force-of-infection vs time including -the corresponding confidence interval. -} -\description{ -This function generates a force-of-infection plot from the results obtained -by fitting a serological model. This includes the corresponding binomial -confidence interval. The x axis corresponds to the decades covered by the -survey the y axis to the force-of-infection. -} -\examples{ -data(chagas2012) -serodata <- prepare_serodata(chagas2012) -seromodel_object <- fit_seromodel( - serodata = serodata, - foi_model = "constant", - iter = 1000 -) -cohort_ages <- get_cohort_ages(serodata) -plot_foi( - seromodel_object = seromodel_object, - cohort_ages = cohort_ages, - size_text = 15 -) -} diff --git a/man/plot_foi_estimates.Rd b/man/plot_foi_estimates.Rd new file mode 100644 index 00000000..456b41d6 --- /dev/null +++ b/man/plot_foi_estimates.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_seromodel.R +\name{plot_foi_estimates} +\alias{plot_foi_estimates} +\title{Plots force-of-infection central estimates} +\usage{ +plot_foi_estimates( + seromodel, + serosurvey, + alpha = 0.05, + foi_df = NULL, + foi_max = NULL, + size_text = 11 +) +} +\arguments{ +\item{seromodel}{stan_fit object obtained from sampling a model +with \link{fit_seromode}} + +\item{serosurvey}{\describe{ +\item{\code{tsur}}{Year in which the survey took place} +\item{\code{age_min}}{Floor value of the average between age_min and age_max} +\item{\code{age_max}}{The size of the sample} +\item{\code{sample_size}}{Number of samples for each age group} +\item{\code{n_seropositive}}{Number of positive samples for each age group} +}} + +\item{alpha}{1 - alpha indicates the credibility level to be used} + +\item{foi_df}{Dataframe with columns +\describe{ +\item{\code{year}/\code{age}}{Year/Age (depending on the model)} +\item{\code{foi}}{Force-of-infection values by year/age} +}} + +\item{foi_max}{Max force-of-infection value for plotting} + +\item{size_text}{Size of text for plotting (\code{base_size} in +\link[ggplot2:ggtheme]{ggplot2})} +} +\value{ +ggplot object with estimated force-of-infection +} +\description{ +Plots force-of-infection central estimates +} diff --git a/man/plot_info_table.Rd b/man/plot_info_table.Rd deleted file mode 100644 index 84c5cbb5..00000000 --- a/man/plot_info_table.Rd +++ /dev/null @@ -1,33 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/visualisation.R -\name{plot_info_table} -\alias{plot_info_table} -\title{Generate plot summarizing a given table} -\usage{ -plot_info_table(info_table, size_text) -} -\arguments{ -\item{info_table}{Table with the information to be summarised} - -\item{size_text}{Text size of the graph returned by the function} -} -\value{ -ggplot object summarizing the information in \code{info_table} -} -\description{ -Generate plot summarizing a given table -} -\examples{ -serodata <- prepare_serodata(chagas2012) -seromodel_object <- fit_seromodel( - serodata = serodata, - foi_model = "constant", - iter = 1000 -) -seromodel_summary <- extract_seromodel_summary( - seromodel_object = seromodel_object, - serodata = serodata -) -info_table <- t(seromodel_summary) -plot_info_table(info_table, size_text = 15) -} diff --git a/man/plot_rhats.Rd b/man/plot_rhats.Rd index 7effa44d..0018adcd 100644 --- a/man/plot_rhats.Rd +++ b/man/plot_rhats.Rd @@ -1,40 +1,33 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/visualisation.R +% Please edit documentation in R/plot_seromodel.R \name{plot_rhats} \alias{plot_rhats} -\title{Generate plot of the R-hat estimates for the specified fitted -serological model} +\title{Plot r-hats convergence criteria for the specified model} \usage{ -plot_rhats(seromodel_object, cohort_ages, size_text = 25) +plot_rhats(seromodel, serosurvey, par_name = "foi_expanded", size_text = 11) } \arguments{ -\item{seromodel_object}{Stanfit object containing the results of fitting a -model by means of \link{fit_seromodel}.} +\item{seromodel}{stan_fit object obtained from sampling a model +with \link{fit_seromode}} -\item{size_text}{Text size use in the theme of the graph returned by the -function.} +\item{serosurvey}{\describe{ +\item{\code{tsur}}{Year in which the survey took place} +\item{\code{age_min}}{Floor value of the average between age_min and age_max} +\item{\code{age_max}}{The size of the sample} +\item{\code{sample_size}}{Number of samples for each age group} +\item{\code{n_seropositive}}{Number of positive samples for each age group} +}} + +\item{par_name}{String specifying the parameter to be extracted +from \code{seromodel}} + +\item{size_text}{Size of text for plotting (\code{base_size} in +\link[ggplot2:ggtheme]{ggplot2})} } \value{ -The rhats-convergence plot of the selected model. +ggplot object showing the r-hats of the model to be compared with the +convergence criteria (horizontal dashed line) } \description{ -This function generates a plot of the R-hat estimates obtained for a -specified fitted serological model \code{seromodel_object}. The x axis corresponds -to the decades covered by the survey and the y axis to the value of the -rhats. All rhats must be smaller than 1 to ensure convergence (for further -details check \link[bayesplot:bayesplot-extractors]{rhat}). -} -\examples{ -data(chagas2012) -serodata <- prepare_serodata(chagas2012) -seromodel_object <- fit_seromodel( - serodata = serodata, - foi_model = "constant", - iter = 1000 -) -cohort_ages <- get_cohort_ages(serodata = serodata) -plot_rhats(seromodel_object, - cohort_ages = cohort_ages, - size_text = 15 -) +Plot r-hats convergence criteria for the specified model } diff --git a/man/plot_seromodel.Rd b/man/plot_seromodel.Rd index 7fcb0562..1d483b96 100644 --- a/man/plot_seromodel.Rd +++ b/man/plot_seromodel.Rd @@ -1,77 +1,54 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/visualisation.R +% Please edit documentation in R/plot_seromodel.R \name{plot_seromodel} \alias{plot_seromodel} -\title{Generate vertical arrangement of plots showing a summary of a -model, the estimated seroprevalence, the force-of-infection fit and the R-hat -estimates plots.} +\title{Visualise results of the provided model} \usage{ plot_seromodel( - seromodel_object, - serodata, + seromodel, + serosurvey, alpha = 0.05, - max_lambda = NA, - size_text = 25, - bin_data = TRUE, - bin_step = 5, - foi = NULL + foi_df = NULL, + foi_max = NULL, + loo_estimate_digits = 1, + central_estimate_digits = 2, + seroreversion_digits = 2, + rhat_digits = 2, + size_text = 11 ) } \arguments{ -\item{seromodel_object}{Stanfit object containing the results of fitting a -model by means of \link{fit_seromodel}.} +\item{seromodel}{stan_fit object obtained from sampling a model +with \link{fit_seromode}} -\item{serodata}{A data frame containing the data from a seroprevalence -survey. This data frame must contain at least the following columns: -\describe{ -\item{\code{total}}{Number of samples for each age group} -\item{\code{counts}}{Number of positive samples for each age group} +\item{serosurvey}{\describe{ \item{\code{tsur}}{Year in which the survey took place} -\item{\code{age_mean_f}}{Floor value of the average between age_min and age_max} -\item{\code{sample_size}}{The size of the sample} -\item{\code{birth_year}}{The year in which the individuals of each age group -were born} -} -The last six columns can be added to \code{serodata} by means of the function -\code{\link[=prepare_serodata]{prepare_serodata()}}.} +\item{\code{age_min}}{Floor value of the average between age_min and age_max} +\item{\code{age_max}}{The size of the sample} +\item{\code{sample_size}}{Number of samples for each age group} +\item{\code{n_seropositive}}{Number of positive samples for each age group} +}} -\item{alpha}{Probability threshold for statistical significance used for both -the binomial confidence interval, and the lower and upper quantiles of the -estimated prevalence.} +\item{alpha}{1 - alpha indicates the credibility level to be used} -\item{max_lambda}{Upper \code{ylim}for force-of-infection plot} +\item{foi_df}{Dataframe with columns +\describe{ +\item{\code{year}/\code{age}}{Year/Age (depending on the model)} +\item{\code{foi}}{Force-of-infection values by year/age} +}} -\item{size_text}{Text size use in the theme of the graph returned by the -function.} +\item{foi_max}{Max force-of-infection value for plotting} -\item{bin_data}{If \code{TRUE}, \code{serodata} is binned by means of -\code{prepare_bin_data}. Otherwise, age groups are kept as originally input.} +\item{loo_estimate_digits}{Number of loo estimate digits} -\item{bin_step}{Integer specifying the age groups bin size to be used when -\code{bin_data} is set to \code{TRUE}.} +\item{central_estimate_digits}{Number of central estimate digits} -\item{foi}{Object containing the information of the force-of-infection. It is -obtained from \code{rstan::extract(seromodel_object$seromodel, "foi", inc_warmup = FALSE)[[1]]}.} +\item{size_text}{Size of text for plotting (\code{base_size} in +\link[ggplot2:ggtheme]{ggplot2})} } \value{ -A ggplot object with a vertical arrange containing the -seropositivity, force of infection, and convergence plots. +seromodel summary plot } \description{ -Generate vertical arrangement of plots showing a summary of a -model, the estimated seroprevalence, the force-of-infection fit and the R-hat -estimates plots. -} -\examples{ -data(chagas2012) -serodata <- prepare_serodata(chagas2012) -seromodel_object <- fit_seromodel( - serodata = serodata, - foi_model = "constant", - iter = 1000 -) -plot_seromodel(seromodel_object, - serodata = serodata, - size_text = 15 -) +Visualise results of the provided model } diff --git a/man/plot_seroprev.Rd b/man/plot_seroprev.Rd deleted file mode 100644 index e123b697..00000000 --- a/man/plot_seroprev.Rd +++ /dev/null @@ -1,51 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/visualisation.R -\name{plot_seroprev} -\alias{plot_seroprev} -\title{Generate seropositivity plot from a raw serological -survey dataset} -\usage{ -plot_seroprev(serodata, size_text = 6, bin_data = TRUE, bin_step = 5) -} -\arguments{ -\item{serodata}{A data frame containing the data from a serological survey. -This data frame must contain the following columns: -\describe{ -\item{\code{survey}}{survey Label of the current survey} -\item{\code{total}}{Number of samples for each age group} -\item{\code{counts}}{Number of positive samples for each age group} -\item{\code{age_min}}{age_min} -\item{\code{age_max}}{age_max} -\item{\code{tsur}}{Year in which the survey took place} -\item{\code{country}}{The country where the survey took place} -\item{\code{test}}{The type of test taken} -\item{\code{antibody}}{antibody} -} -Alternatively to \code{age_min} and \code{age_max}, the dataset could already include -the age group marker \code{age_mean_f}, representing the middle point between -\code{age_min} and \code{age_max}. If \code{afe_mean_f} is missing, it will be generated -by the function.} - -\item{size_text}{Text size use in the theme of the graph returned by the -function.} - -\item{bin_data}{If \code{TRUE}, \code{serodata} is binned by means of -\code{prepare_bin_data}. Otherwise, age groups are kept as originally input.} - -\item{bin_step}{Integer specifying the age groups bin size to be used when -\code{bin_data} is set to \code{TRUE}.} -} -\value{ -A ggplot object containing the seropositivity-vs-age graph of the raw -data of a given seroprevalence survey with its corresponding binomial -confidence interval. -} -\description{ -Generate seropositivity plot from a raw serological -survey dataset -} -\examples{ -data(chagas2012) -serodata <- prepare_serodata(chagas2012) -plot_seroprev(serodata, size_text = 15) -} diff --git a/man/plot_seroprev_fitted.Rd b/man/plot_seroprev_fitted.Rd deleted file mode 100644 index 6e5d0313..00000000 --- a/man/plot_seroprev_fitted.Rd +++ /dev/null @@ -1,69 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/visualisation.R -\name{plot_seroprev_fitted} -\alias{plot_seroprev_fitted} -\title{Generate seropositivity plot corresponding to the specified -fitted serological model} -\usage{ -plot_seroprev_fitted( - seromodel_object, - serodata, - size_text = 6, - bin_data = TRUE, - bin_step = 5, - alpha = 0.05 -) -} -\arguments{ -\item{seromodel_object}{Stanfit object containing the results of fitting a -model by means of \link{fit_seromodel}.} - -\item{serodata}{A data frame containing the data from a seroprevalence -survey. This data frame must contain at least the following columns: -\describe{ -\item{\code{total}}{Number of samples for each age group} -\item{\code{counts}}{Number of positive samples for each age group} -\item{\code{tsur}}{Year in which the survey took place} -\item{\code{age_mean_f}}{Floor value of the average between age_min and age_max} -\item{\code{sample_size}}{The size of the sample} -\item{\code{birth_year}}{The year in which the individuals of each age group -were born} -} -The last six columns can be added to \code{serodata} by means of the function -\code{\link[=prepare_serodata]{prepare_serodata()}}.} - -\item{size_text}{Text size of the graph returned by the function.} - -\item{bin_data}{If \code{TRUE}, \code{serodata} is binned by means of -\code{prepare_bin_data}. Otherwise, age groups are kept as originally input.} - -\item{bin_step}{Integer specifying the age groups bin size to be used when -\code{bin_data} is set to \code{TRUE}.} - -\item{alpha}{Probability threshold for statistical significance used for both -the binomial confidence interval, and the lower and upper quantiles of the -estimated prevalence.} -} -\value{ -A ggplot object containing the seropositivity-vs-age graph including -the data, the fitted model and their corresponding confidence intervals. -} -\description{ -This function generates a seropositivity plot of the specified serological -model object. This includes the original data grouped by age as well as the -obtained fitting from the model implementation. Age is located on the x axis -and seropositivity on the y axis with its corresponding confidence interval. -} -\examples{ -data(chagas2012) -serodata <- prepare_serodata(chagas2012) -seromodel_object <- fit_seromodel( - serodata = serodata, - foi_model = "constant", - iter = 1000 -) -plot_seroprev_fitted(seromodel_object, - serodata = serodata, - size_text = 15 -) -} diff --git a/man/plot_seroprevalence_estimates.Rd b/man/plot_seroprevalence_estimates.Rd new file mode 100644 index 00000000..d83d727f --- /dev/null +++ b/man/plot_seroprevalence_estimates.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_seromodel.R +\name{plot_seroprevalence_estimates} +\alias{plot_seroprevalence_estimates} +\title{Plot seroprevalence estimates on top of the serosurvey} +\usage{ +plot_seroprevalence_estimates( + seromodel, + serosurvey, + alpha = 0.05, + size_text = 11 +) +} +\arguments{ +\item{seromodel}{stan_fit object obtained from sampling a model +with \link{fit_seromode}} + +\item{serosurvey}{\describe{ +\item{\code{tsur}}{Year in which the survey took place} +\item{\code{age_min}}{Floor value of the average between age_min and age_max} +\item{\code{age_max}}{The size of the sample} +\item{\code{sample_size}}{Number of samples for each age group} +\item{\code{n_seropositive}}{Number of positive samples for each age group} +}} + +\item{alpha}{1 - alpha indicates the credibility level to be used} + +\item{size_text}{Size of text for plotting (\code{base_size} in +\link[ggplot2:ggtheme]{ggplot2})} +} +\value{ +ggplot object with seroprevalence estimates and serosurveys plots +} +\description{ +Plot seroprevalence estimates on top of the serosurvey +} diff --git a/man/plot_serosurvey.Rd b/man/plot_serosurvey.Rd new file mode 100644 index 00000000..38e6014a --- /dev/null +++ b/man/plot_serosurvey.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_seromodel.R +\name{plot_serosurvey} +\alias{plot_serosurvey} +\title{Plots seroprevalence from the given serosurvey} +\usage{ +plot_serosurvey(serosurvey, size_text = 11) +} +\arguments{ +\item{serosurvey}{\describe{ +\item{\code{tsur}}{Year in which the survey took place} +\item{\code{age_min}}{Floor value of the average between age_min and age_max} +\item{\code{age_max}}{The size of the sample} +\item{\code{sample_size}}{Number of samples for each age group} +\item{\code{n_seropositive}}{Number of positive samples for each age group} +}} + +\item{size_text}{Size of text for plotting (\code{base_size} in +\link[ggplot2:ggtheme]{ggplot2})} +} +\value{ +ggplot object with seroprevalence plot +} +\description{ +Plots seroprevalence from the given serosurvey +} diff --git a/man/plot_summary.Rd b/man/plot_summary.Rd new file mode 100644 index 00000000..41d05737 --- /dev/null +++ b/man/plot_summary.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_seromodel.R +\name{plot_summary} +\alias{plot_summary} +\title{Plots model summary} +\usage{ +plot_summary( + seromodel, + serosurvey, + loo_estimate_digits = 1, + central_estimate_digits = 2, + rhat_digits = 2, + size_text = 11 +) +} +\arguments{ +\item{seromodel}{stan_fit object obtained from sampling a model +with \link{fit_seromode}} + +\item{serosurvey}{\describe{ +\item{\code{tsur}}{Year in which the survey took place} +\item{\code{age_min}}{Floor value of the average between age_min and age_max} +\item{\code{age_max}}{The size of the sample} +\item{\code{sample_size}}{Number of samples for each age group} +\item{\code{n_seropositive}}{Number of positive samples for each age group} +}} + +\item{loo_estimate_digits}{Number of loo estimate digits} + +\item{central_estimate_digits}{Number of central estimate digits} + +\item{size_text}{Size of text for plotting (\code{base_size} in +\link[ggplot2:ggtheme]{ggplot2})} +} +\value{ +ggplot object with a summary of the specified model +} +\description{ +Plots model summary +} diff --git a/man/prepare_serodata.Rd b/man/prepare_serodata.Rd deleted file mode 100644 index 4616124a..00000000 --- a/man/prepare_serodata.Rd +++ /dev/null @@ -1,54 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/seroprevalence_data.R -\name{prepare_serodata} -\alias{prepare_serodata} -\title{Prepare data from a serological survey for modelling} -\usage{ -prepare_serodata(serodata = serodata, alpha = 0.05) -} -\arguments{ -\item{serodata}{A data frame containing the data from a serological survey. -This data frame must contain the following columns: -\describe{ -\item{\code{survey}}{survey Label of the current survey} -\item{\code{total}}{Number of samples for each age group} -\item{\code{counts}}{Number of positive samples for each age group} -\item{\code{age_min}}{age_min} -\item{\code{age_max}}{age_max} -\item{\code{tsur}}{Year in which the survey took place} -\item{\code{country}}{The country where the survey took place} -\item{\code{test}}{The type of test taken} -\item{\code{antibody}}{antibody} -} -Alternatively to \code{age_min} and \code{age_max}, the dataset could already include -the age group marker \code{age_mean_f}, representing the middle point between -\code{age_min} and \code{age_max}. If \code{afe_mean_f} is missing, it will be generated -by the function.} - -\item{alpha}{probability of a type I error. For further details refer to -\link[Hmisc:binconf]{binconf}.} -} -\value{ -serodata with additional columns necessary for the analysis. These -columns are: -\describe{ -\item{\code{age_mean_f}}{Floor value of the average between age_min and age_max -for the age groups delimited by \code{age_min} and \code{age_max}} -\item{\code{sample_size}}{The size of the sample} -\item{\code{birth_year}}{Years in which the subject was born according to the -age group marker \code{age_mean_f}} -\item{\code{prev_obs}}{Observed prevalence} -\item{\code{prev_obs_lower}}{Lower limit of the confidence interval for the -observed prevalence} -\item{\code{prev_obs_upper}}{Upper limit of the confidence interval for the -observed prevalence} -} -} -\description{ -This function adds the necessary additional variables to the given dataset -\code{serodata} corresponding to a serological survey. -} -\examples{ -data(chagas2012) -serodata <- prepare_serodata(chagas2012) -} diff --git a/man/prepare_serosurvey_for_plotting.Rd b/man/prepare_serosurvey_for_plotting.Rd new file mode 100644 index 00000000..7296cda6 --- /dev/null +++ b/man/prepare_serosurvey_for_plotting.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_seromodel.R +\name{prepare_serosurvey_for_plotting} +\alias{prepare_serosurvey_for_plotting} +\title{Prepares serosurvey for plotting} +\usage{ +prepare_serosurvey_for_plotting(serosurvey, alpha = 0.05) +} +\arguments{ +\item{serosurvey}{\describe{ +\item{\code{tsur}}{Year in which the survey took place} +\item{\code{age_min}}{Floor value of the average between age_min and age_max} +\item{\code{age_max}}{The size of the sample} +\item{\code{sample_size}}{Number of samples for each age group} +\item{\code{n_seropositive}}{Number of positive samples for each age group} +}} + +\item{alpha}{1 - alpha indicates the confidence level to be used} +} +\value{ +serosurvey with additional columns: +\describe{ +\item{seroprev}{Seroprevalence computed as the proportion of positive +cases \code{n_seropositive} in the number of samples +\code{sample_size} for each age group} +\item{seroprev_lower}{Lower limit of the binomial confidence interval +of \code{seroprev}} +\item{seroprev_upper}{Upper limit of the binomial confidence interval +of \code{seroprev}} +} +} +\description{ +Adds seroprevalence values with corresponding binomial confidence interval +} diff --git a/man/probability_exact_time_varying.Rd b/man/probability_exact_time_varying.Rd index 36e7da06..2a923288 100644 --- a/man/probability_exact_time_varying.Rd +++ b/man/probability_exact_time_varying.Rd @@ -7,7 +7,8 @@ probability_exact_time_varying(years, fois, seroreversion_rate = 0) } \arguments{ -\item{years}{Integer indicating the years covering the birth ages of the sample} +\item{years}{Integer indicating the years covering the birth ages of the +sample} \item{fois}{Numeric atomic vector corresponding to the age-varying force-of-infection to simulate from} diff --git a/man/probability_seropositive_age_and_time_model_by_age.Rd b/man/probability_seropositive_age_and_time_model_by_age.Rd index 2c9e71ad..3c32fe9e 100644 --- a/man/probability_seropositive_age_and_time_model_by_age.Rd +++ b/man/probability_seropositive_age_and_time_model_by_age.Rd @@ -2,20 +2,23 @@ % Please edit documentation in R/simulate_serosurvey.R \name{probability_seropositive_age_and_time_model_by_age} \alias{probability_seropositive_age_and_time_model_by_age} -\title{Generate probabilities of seropositivity by age based on an age-and-time varying FOI model.} +\title{Generate probabilities of seropositivity by age based on an age-and-time +varying FOI model.} \usage{ probability_seropositive_age_and_time_model_by_age(foi, seroreversion_rate) } \arguments{ -\item{foi}{A dataframe containing the force of infection (FOI) values for different ages. -It should have three columns: 'year', 'age' and 'foi'.} +\item{foi}{A dataframe containing the force of infection (FOI) values +for different ages. It should have three columns: 'year', 'age' and 'foi'.} -\item{seroreversion_rate}{A non-negative numeric value representing the rate of seroreversion.} +\item{seroreversion_rate}{A non-negative numeric value representing +the rate of seroreversion.} } \value{ A dataframe with columns 'age' and 'seropositivity'. } \description{ -This function calculates the probabilities of seropositivity by age based on an age-and-time-varying FOI model. +This function calculates the probabilities of seropositivity by age based on +an age-and-time-varying FOI model. It takes into account the FOI and the rate of seroreversion. } diff --git a/man/probability_seropositive_age_model_by_age.Rd b/man/probability_seropositive_age_model_by_age.Rd index 24c93102..85f52bdb 100644 --- a/man/probability_seropositive_age_model_by_age.Rd +++ b/man/probability_seropositive_age_model_by_age.Rd @@ -2,20 +2,23 @@ % Please edit documentation in R/simulate_serosurvey.R \name{probability_seropositive_age_model_by_age} \alias{probability_seropositive_age_model_by_age} -\title{Generate probabilities of seropositivity by age based on an age-varying FOI model.} +\title{Generate probabilities of seropositivity by age based on an age-varying +FOI model.} \usage{ probability_seropositive_age_model_by_age(foi, seroreversion_rate) } \arguments{ -\item{foi}{A dataframe containing the force of infection (FOI) values for different ages. -It should have two columns: 'age' and 'foi'.} +\item{foi}{A dataframe containing the force of infection (FOI) values for +different ages. It should have two columns: 'age' and 'foi'.} -\item{seroreversion_rate}{A non-negative numeric value representing the rate of seroreversion.} +\item{seroreversion_rate}{A non-negative numeric value representing the rate +of seroreversion.} } \value{ A dataframe with columns 'age' and 'seropositivity'. } \description{ -This function calculates the probabilities of seropositivity by age based on an age-varying FOI model. +This function calculates the probabilities of seropositivity by age based on +an age-varying FOI model. It takes into account the FOI and the rate of seroreversion. } diff --git a/man/probability_seropositive_by_age.Rd b/man/probability_seropositive_by_age.Rd index e416a8b4..f2910a5b 100644 --- a/man/probability_seropositive_by_age.Rd +++ b/man/probability_seropositive_by_age.Rd @@ -7,15 +7,17 @@ probability_seropositive_by_age(model, foi, seroreversion_rate = 0) } \arguments{ -\item{model}{A string specifying the model type which can be one of \link{'age', 'time', 'age-time'}.} +\item{model}{A string specifying the model type which can be one of +\link{'age', 'time', 'age-time'}.} \item{foi}{A dataframe containing the force of infection (FOI) values. For time-varying models the columns should be \link{'year', 'foi'}. For age-varying models the columns should be \link{'age', 'foi'}. -For age-and-time-varying models the columns should be \link{'age', 'time', 'foi'}.} +For age-and-time-varying models the columns should be +\link{'age', 'time', 'foi'}.} -\item{seroreversion_rate}{A non-negative value determining the rate of seroreversion (per year). -Default is 0.} +\item{seroreversion_rate}{A non-negative value determining the rate of +seroreversion (per year). Default is 0.} } \value{ A dataframe with columns 'age' and 'seropositivity'. diff --git a/man/probability_seropositive_general_model_by_age.Rd b/man/probability_seropositive_general_model_by_age.Rd index d80e3d8a..db451eb8 100644 --- a/man/probability_seropositive_general_model_by_age.Rd +++ b/man/probability_seropositive_general_model_by_age.Rd @@ -5,7 +5,7 @@ \title{Generate probabilities of seropositivity by age based on a general FOI model.} \usage{ probability_seropositive_general_model_by_age( - construct_A_function, + construct_A_fn, calculate_seropositivity_function, initial_conditions, max_age, @@ -13,15 +13,16 @@ probability_seropositive_general_model_by_age( ) } \arguments{ -\item{calculate_seropositivity_function}{A function which takes the state vector -and returns the seropositive fraction.} +\item{construct_A_fn}{A function that constructs a matrix that defines the +multiplier term in the linear ODE system.} -\item{initial_conditions}{The initial state vector proportions for each birth cohort.} +\item{calculate_seropositivity_function}{A function which takes the state +vector and returns the seropositive fraction.} -\item{max_age}{The maximum age to simulate seropositivity for.} +\item{initial_conditions}{The initial state vector proportions for each +birth cohort.} -\item{construct_A_fn}{A function that constructs a matrix that defines the multiplier -term in the linear ODE system.} +\item{max_age}{The maximum age to simulate seropositivity for.} } \value{ A dataframe with columns 'age' and 'seropositivity'. diff --git a/man/probability_seropositive_time_model_by_age.Rd b/man/probability_seropositive_time_model_by_age.Rd index 080f6323..cba7321c 100644 --- a/man/probability_seropositive_time_model_by_age.Rd +++ b/man/probability_seropositive_time_model_by_age.Rd @@ -2,20 +2,23 @@ % Please edit documentation in R/simulate_serosurvey.R \name{probability_seropositive_time_model_by_age} \alias{probability_seropositive_time_model_by_age} -\title{Generate probabilities of seropositivity by age based on a time-varying FOI model.} +\title{Generate probabilities of seropositivity by age based on a time-varying FOI +model.} \usage{ probability_seropositive_time_model_by_age(foi, seroreversion_rate) } \arguments{ -\item{foi}{A dataframe containing the force of infection (FOI) values for different years. -It should have two columns: 'year' and 'foi'.} +\item{foi}{A dataframe containing the force of infection (FOI) values +for different years. It should have two columns: 'year' and 'foi'.} -\item{seroreversion_rate}{A non-negative numeric value representing the rate of seroreversion.} +\item{seroreversion_rate}{A non-negative numeric value representing the +rate of seroreversion.} } \value{ A dataframe with columns 'age' and 'seropositivity'. } \description{ -This function calculates the probabilities of seropositivity by age based on a time-varying FOI model. +This function calculates the probabilities of seropositivity by age based on +a time-varying FOI model. It takes into account the FOI and the rate of seroreversion. } diff --git a/man/run_seromodel.Rd b/man/run_seromodel.Rd deleted file mode 100644 index f564ca8e..00000000 --- a/man/run_seromodel.Rd +++ /dev/null @@ -1,108 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/modelling.R -\name{run_seromodel} -\alias{run_seromodel} -\title{Run specified stan model for the force-of-infection and -estimate the seroprevalence based on the result of the fit} -\usage{ -run_seromodel( - serodata, - foi_model = c("constant", "tv_normal_log", "tv_normal"), - foi_parameters = NULL, - chunks = NULL, - chunk_size = 1, - iter = 1000, - adapt_delta = 0.9, - max_treedepth = 10, - seed = 12345, - print_summary = TRUE, - ... -) -} -\arguments{ -\item{serodata}{A data frame containing the data from a seroprevalence -survey. This data frame must contain at least the following columns: -\describe{ -\item{\code{total}}{Number of samples for each age group} -\item{\code{counts}}{Number of positive samples for each age group} -\item{\code{tsur}}{Year in which the survey took place} -\item{\code{age_mean_f}}{Floor value of the average between age_min and age_max} -\item{\code{sample_size}}{The size of the sample} -\item{\code{birth_year}}{The year in which the individuals of each age group -were born} -} -The last six columns can be added to \code{serodata} by means of the function -\code{\link[=prepare_serodata]{prepare_serodata()}}.} - -\item{foi_model}{Name of the selected model. Current version provides three -options: -\describe{ -\item{\code{"constant"}}{Runs a constant model} -\item{\code{"tv_normal"}}{Runs a normal model} -\item{\code{"tv_normal_log"}}{Runs a normal logarithmic model} -}} - -\item{foi_parameters}{List specifying the initial prior parameters of the -model \code{foi_model} to be specified as (e.g.): -\describe{ -\item{\code{"constant"}}{\code{list(foi_a = 0, foi_b = 2)}} -\item{\code{"tv_normal"}}{\code{list(foi_location = 0, foi_scale = 1)}} -\item{\code{"tv_normal_log"}}{\code{list(foi_location = -6, foi_scale = 4)}} -}} - -\item{chunks}{Numeric list specifying the chunk structure of the time -interval from the birth year of the oldest age cohort -\code{min(serodata$age_mean_f)} to the time when the serosurvey was conducted -\code{t_sur}. If \code{NULL}, the time interval is divided in chunks of size -\code{chunk_size}.} - -\item{chunk_size}{Size of the chunks to be used in case that the chunk -structure \code{chunks} is not specified in \link{fit_seromodel}. -Default is 1, meaning that one force of infection value is to be estimated -for every year in the time interval spanned by the serosurvey. -If the length of the time interval is not exactly divisible by \code{chunk_size}, -the remainder years are included in the last chunk.} - -\item{iter}{Number of interactions for each chain including the warmup. -\code{iter} in \link[rstan:stanmodel-method-sampling]{sampling}.} - -\item{adapt_delta}{Real number between 0 and 1 that represents the target -average acceptance probability. Increasing the value of \code{adapt_delta} will -result in a smaller step size and fewer divergences. For further details -refer to the \code{control} parameter in \link[rstan:stanmodel-method-sampling]{sampling} or -\href{https://mc-stan.org/rstanarm/reference/adapt_delta.html}{here}.} - -\item{max_treedepth}{Maximum tree depth for the binary tree used in the NUTS -stan sampler. For further details refer to the \code{control} parameter in -\link[rstan:stanmodel-method-sampling]{sampling}.} - -\item{seed}{For further details refer to the \code{seed} parameter in -\link[rstan:stanmodel-method-sampling]{sampling}.} - -\item{print_summary}{Boolean. If \code{TRUE}, a table summarizing modelling -results is printed.} - -\item{...}{Additional parameters for \link[rstan:stanmodel-method-sampling]{sampling}.} -} -\value{ -\code{seromodel_object}. An object containing relevant information about -the implementation of the model. For further details refer to -\link{fit_seromodel}. -} -\description{ -Starting on v.0.1.0, this function will be DEPRECATED. Use \code{fit_seromodel} -instead. -This function runs the specified model for the force-of-infection \code{foi_model} -using the data from a seroprevalence survey \code{serodata} as the input data. See -\link{fit_seromodel} for further details. -} -\examples{ -\dontrun{ -data(chagas2012) -serodata <- prepare_serodata(chagas2012) -fit_seromodel( - serodata, - foi_model = "constant" -) -} -} diff --git a/man/sample_size_by_individual_age_random.Rd b/man/sample_size_by_individual_age_random.Rd index cb046c9e..117284c5 100644 --- a/man/sample_size_by_individual_age_random.Rd +++ b/man/sample_size_by_individual_age_random.Rd @@ -2,22 +2,24 @@ % Please edit documentation in R/simulate_serosurvey.R \name{sample_size_by_individual_age_random} \alias{sample_size_by_individual_age_random} -\title{Generate random sample sizes for each individual age based on survey features.} +\title{Generate random sample sizes for each individual age based on survey +features.} \usage{ sample_size_by_individual_age_random(survey_features) } \arguments{ -\item{survey_features}{A dataframe containing information about individuals' age ranges and -sample sizes.} +\item{survey_features}{A dataframe containing information about individuals' +age ranges and sample sizes.} } \value{ -A dataframe with random sample sizes generated for each individual age based on the -provided survey features. +A dataframe with random sample sizes generated for each individual +age based on the provided survey features. } \description{ -This function generates random sample sizes for each individual age based on the provided -survey features. It first creates age bins, assigns each individual in the survey features -to an age bin, calculates the overall sample size by group, and then generates random sample -sizes for each age group. Finally, it returns a dataframe with the random sample sizes for -each individual age. +This function generates random sample sizes for each individual age based on +the provided survey features. It first creates age bins, assigns each +individual in the survey features to an age bin, calculates the overall +sample size by group, and then generates random sample sizes for each age +group. Finally, it returns a dataframe with the random sample sizes for each +individual age. } diff --git a/man/serofoi-package.Rd b/man/serofoi-package.Rd index f2c8e932..8fcb4efa 100644 --- a/man/serofoi-package.Rd +++ b/man/serofoi-package.Rd @@ -11,4 +11,24 @@ A DESCRIPTION OF THE PACKAGE \references{ Stan Development Team (NA). RStan: the R interface to Stan. R package version 2.26.22. https://mc-stan.org + +#' @keywords internal +} +\seealso{ +Useful links: +\itemize{ + \item \url{https://trace-lac.github.io/serofoi/} +} + +} +\author{ +\strong{Maintainer}: Zulma M. Cucunubá \email{zulma.cucunuba@javeriana.edu.co} (\href{https://orcid.org/0000-0002-8165-3198}{ORCID}) + +Authors: +\itemize{ + \item Nicolás T. Domínguez \email{ex-ntorres@javeriana.edu.co} (\href{https://orcid.org/0009-0002-8484-1298}{ORCID}) + \item Ben Lambert + \item Pierre Nouvellet +} + } diff --git a/man/set_stan_data_defaults.Rd b/man/set_stan_data_defaults.Rd new file mode 100644 index 00000000..45d1681b --- /dev/null +++ b/man/set_stan_data_defaults.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/build_stan_data.R +\name{set_stan_data_defaults} +\alias{set_stan_data_defaults} +\title{Set stan data defaults for sampling} +\usage{ +set_stan_data_defaults(stan_data, is_seroreversion = FALSE) +} +\arguments{ +\item{stan_data}{List to be passed to \link[rstan:stanmodel-method-sampling]{rstan}} + +\item{is_seroreversion}{Boolean specifying whether to include +seroreversion rate estimation in the model} +} +\value{ +List with default values of stan data for sampling +} +\description{ +Set stan data defaults for sampling +} diff --git a/man/sf_none.Rd b/man/sf_none.Rd new file mode 100644 index 00000000..f101d850 --- /dev/null +++ b/man/sf_none.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/build_stan_data.R +\name{sf_none} +\alias{sf_none} +\title{Sets empty distribution} +\usage{ +sf_none() +} +\description{ +Sets empty distribution +} diff --git a/man/sf_normal.Rd b/man/sf_normal.Rd new file mode 100644 index 00000000..32977287 --- /dev/null +++ b/man/sf_normal.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/build_stan_data.R +\name{sf_normal} +\alias{sf_normal} +\title{Sets normal distribution parameters for sampling} +\usage{ +sf_normal(mean = 0, sd = 1) +} +\arguments{ +\item{mean}{Mean of the normal distribution} + +\item{sd}{Standard deviation of the normal distribution} +} +\value{ +List with specified statistics and name of the model +} +\description{ +Sets normal distribution parameters for sampling +} diff --git a/man/sf_uniform.Rd b/man/sf_uniform.Rd new file mode 100644 index 00000000..a2ef7279 --- /dev/null +++ b/man/sf_uniform.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/build_stan_data.R +\name{sf_uniform} +\alias{sf_uniform} +\title{Sets uniform distribution parameters for sampling} +\usage{ +sf_uniform(min = 0, max = 10) +} +\arguments{ +\item{min}{Minimum value of the random variable of the uniform distribution} + +\item{max}{Maximum value of the random variable of the uniform distribution} +} +\value{ +List with specified statistics and name of the model +} +\description{ +Sets uniform distribution parameters for sampling +} diff --git a/man/simulate_serosurvey.Rd b/man/simulate_serosurvey.Rd index 2e250102..3b9871f6 100644 --- a/man/simulate_serosurvey.Rd +++ b/man/simulate_serosurvey.Rd @@ -7,29 +7,33 @@ simulate_serosurvey(model, foi, survey_features, seroreversion_rate = 0) } \arguments{ -\item{model}{A string specifying the model type which can be one of \link{'age', 'time', 'age-time'}.} +\item{model}{A string specifying the model type which can be one of +\link{'age', 'time', 'age-time'}.} \item{foi}{A dataframe containing the force of infection (FOI) values. For time-varying models the columns should be \link{'year', 'foi'}. For age-varying models the columns should be \link{'age', 'foi'}. For age-and-time-varying models the columns should be \link{'age', 'time', 'foi'}.} -\item{survey_features}{A dataframe containing information about the binned age groups and sample -sizes for each. It should contain columns: \link{'age_min', 'age_max', 'sample_size'}.} +\item{survey_features}{A dataframe containing information about the binned +age groups and sample sizes for each. +It should contain columns: \link{'age_min', 'age_max', 'sample_size'}.} -\item{seroreversion_rate}{A non-negative value determining the rate of seroreversion (per year). -Default is 0.} +\item{seroreversion_rate}{A non-negative value determining the rate of +seroreversion (per year). Default is 0.} } \value{ -A dataframe with simulated serosurvey data, including age group information, overall -sample sizes, the number of seropositive individuals, and other survey features. +A dataframe with simulated serosurvey data, including age group +information, overall sample sizes, the number of seropositive individuals, +and other survey features. } \description{ -This function generates binned serosurvey data based on either a time-varying FOI model, -an age-varying FOI model, or an age-and-time-varying FOI model. In all cases, it is possible -to optionally include seroreversion. This function allows construction of serosurveys -with binned age groups, and it generates uncertainty in the distribution of a sample size -within an age bin through multinomial sampling. +This function generates binned serosurvey data based on either a time-varying +FOI model, an age-varying FOI model, or an age-and-time-varying FOI model. +In all cases, it is possible to optionally include seroreversion. This +function allows construction of serosurveys with binned age groups, and it +generates uncertainty in the distribution of a sample size within an age bin +through multinomial sampling. } \examples{ # time-varying model diff --git a/man/simulate_serosurvey_age_and_time_model.Rd b/man/simulate_serosurvey_age_and_time_model.Rd index 4a3f761a..02f66c39 100644 --- a/man/simulate_serosurvey_age_and_time_model.Rd +++ b/man/simulate_serosurvey_age_and_time_model.Rd @@ -11,24 +11,27 @@ simulate_serosurvey_age_and_time_model( ) } \arguments{ -\item{foi}{A dataframe containing the force of infection (FOI) values for different ages. -It should have two columns: 'year', 'age' and 'foi'.} +\item{foi}{A dataframe containing the force of infection (FOI) values for +different ages. It should have two columns: 'year', 'age' and 'foi'.} -\item{survey_features}{A dataframe containing information about the binned age groups and sample -sizes for each. It should contain columns: \link{'age_min', 'age_max', 'sample_size'}.} +\item{survey_features}{A dataframe containing information about the binned +age groups and sample sizes for each. It should contain columns: +\link{'age_min', 'age_max', 'sample_size'}.} -\item{seroreversion_rate}{A non-negative value determining the rate of seroreversion (per year). -Default is 0.} +\item{seroreversion_rate}{A non-negative value determining the rate of +seroreversion (per year). Default is 0.} } \value{ -A dataframe with simulated serosurvey data, including age group information, overall -sample sizes, the number of seropositive individuals, and other survey features. +A dataframe with simulated serosurvey data, including age group +information, overall sample sizes, the number of seropositive individuals, +and other survey features. } \description{ -This function generates binned serosurvey data based on an age-and-time-varying FOI model, -optionally including seroreversion. This function allows construction of serosurveys -with binned age groups, and it generates uncertainty in the distribution of a sample size -within an age bin through multinomial sampling. +This function generates binned serosurvey data based on an +age-and-time-varying FOI model, optionally including seroreversion. +This function allows construction of serosurveys with binned age groups, and +it generates uncertainty in the distribution of a sample size within an age +bin through multinomial sampling. } \examples{ # specify FOIs for each year diff --git a/man/simulate_serosurvey_age_model.Rd b/man/simulate_serosurvey_age_model.Rd index 2302aecc..d617cbf4 100644 --- a/man/simulate_serosurvey_age_model.Rd +++ b/man/simulate_serosurvey_age_model.Rd @@ -7,24 +7,26 @@ simulate_serosurvey_age_model(foi, survey_features, seroreversion_rate = 0) } \arguments{ -\item{foi}{A dataframe containing the force of infection (FOI) values for different ages. -It should have two columns: 'age' and 'foi'.} +\item{foi}{A dataframe containing the force of infection (FOI) values for +different ages. It should have two columns: 'age' and 'foi'.} -\item{survey_features}{A dataframe containing information about the binned age groups and sample -sizes for each. It should contain columns: \link{'age_min', 'age_max', 'sample_size'}.} +\item{survey_features}{A dataframe containing information about the binned +age groups and sample sizes for each. It should contain columns: +\link{'age_min', 'age_max', 'sample_size'}.} -\item{seroreversion_rate}{A non-negative value determining the rate of seroreversion (per year). -Default is 0.} +\item{seroreversion_rate}{A non-negative value determining the rate of +seroreversion (per year). Default is 0.} } \value{ -A dataframe with simulated serosurvey data, including age group information, overall -sample sizes, the number of seropositive individuals, and other survey features. +A dataframe with simulated serosurvey data, including age group +information, overall sample sizes, the number of seropositive individuals, +and other survey features. } \description{ -This function generates binned serosurvey data based on an age-varying FOI model, -optionally including seroreversion. This function allows construction of serosurveys -with binned age groups, and it generates uncertainty in the distribution of a sample size -within an age bin through multinomial sampling. +This function generates binned serosurvey data based on an age-varying FOI +model, optionally including seroreversion. This function allows construction +of serosurveys with binned age groups, and it generates uncertainty in the +distribution of a sample size within an age bin through multinomial sampling. } \examples{ # specify FOIs for each year diff --git a/man/simulate_serosurvey_general_model.Rd b/man/simulate_serosurvey_general_model.Rd index 0d7ca514..af61bf5b 100644 --- a/man/simulate_serosurvey_general_model.Rd +++ b/man/simulate_serosurvey_general_model.Rd @@ -13,19 +13,22 @@ simulate_serosurvey_general_model( ) } \arguments{ -\item{calculate_seropositivity_function}{A function which takes the state vector -and returns the seropositive fraction.} +\item{calculate_seropositivity_function}{A function which takes the state +vector and returns the seropositive fraction.} -\item{initial_conditions}{The initial state vector proportions for each birth cohort.} +\item{initial_conditions}{The initial state vector proportions for each +birth cohort.} -\item{survey_features}{A dataframe containing information about the binned age groups and sample -sizes for each. It should contain columns: \link{'age_min', 'age_max', 'sample_size'}.} +\item{survey_features}{A dataframe containing information about the binned +age groups and sample sizes for each. +It should contain columns: \link{'age_min', 'age_max', 'sample_size'}.} } \value{ -A dataframe with simulated serosurvey data, including age group information, overall -sample sizes, the number of seropositive individuals, and other survey features. +A dataframe with simulated serosurvey data, including age group +information, overall sample sizes, the number of seropositive individuals, +and other survey features. } \description{ -This simulation method assumes only that the model system can be written as a piecewise- -linear ordinary differential equation system. +This simulation method assumes only that the model system can be written as +a piecewise-linear ordinary differential equation system. } diff --git a/man/simulate_serosurvey_time_model.Rd b/man/simulate_serosurvey_time_model.Rd index 7b965831..d5baebbe 100644 --- a/man/simulate_serosurvey_time_model.Rd +++ b/man/simulate_serosurvey_time_model.Rd @@ -7,24 +7,26 @@ simulate_serosurvey_time_model(foi, survey_features, seroreversion_rate = 0) } \arguments{ -\item{foi}{A dataframe containing the force of infection (FOI) values for different years. -It should have two columns: 'year' and 'foi'.} +\item{foi}{A dataframe containing the force of infection (FOI) values for +different years. It should have two columns: 'year' and 'foi'.} -\item{survey_features}{A dataframe containing information about the binned age groups and sample -sizes for each. It should contain columns: \link{'age_min', 'age_max', 'sample_size'}.} +\item{survey_features}{A dataframe containing information about the binned +age groups and sample sizes for each. It should contain columns: +\link{'age_min', 'age_max', 'sample_size'}.} -\item{seroreversion_rate}{A non-negative value determining the rate of seroreversion (per year). -Default is 0.} +\item{seroreversion_rate}{A non-negative value determining the rate of +seroreversion (per year). Default is 0.} } \value{ -A dataframe with simulated serosurvey data, including age group information, overall -sample sizes, the number of seropositive individuals, and other survey features. +A dataframe with simulated serosurvey data, including age group +information, overall sample sizes, the number of seropositive individuals, +and other survey features. } \description{ -This function generates binned serosurvey data based on a time-varying FOI model, -optionally including seroreversion. This function allows construction of serosurveys -with binned age groups, and it generates uncertainty in the distribution of a sample size -within an age bin through multinomial sampling. +This function generates binned serosurvey data based on a time-varying FOI +model, optionally including seroreversion. This function allows construction +of serosurveys with binned age groups, and it generates uncertainty in the +distribution of a sample size within an age bin through multinomial sampling. } \examples{ # specify FOIs for each year diff --git a/man/summarise_central_estimate.Rd b/man/summarise_central_estimate.Rd new file mode 100644 index 00000000..a6071d96 --- /dev/null +++ b/man/summarise_central_estimate.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/summarise_seromodel.R +\name{summarise_central_estimate} +\alias{summarise_central_estimate} +\title{Summarise central estimate} +\usage{ +summarise_central_estimate( + seromodel, + serosurvey, + alpha, + par_name = "seroreversion_rate", + central_estimate_digits = 2 +) +} +\arguments{ +\item{seromodel}{stan_fit object obtained from sampling a model +with \link{fit_seromode}} + +\item{serosurvey}{\describe{ +\item{\code{tsur}}{Year in which the survey took place} +\item{\code{age_min}}{Floor value of the average between age_min and age_max} +\item{\code{age_max}}{The size of the sample} +\item{\code{sample_size}}{Number of samples for each age group} +\item{\code{n_seropositive}}{Number of positive samples for each age group} +}} + +\item{alpha}{1 - alpha indicates the credibility level to be used} + +\item{par_name}{String specifying the parameter to be extracted +from \code{seromodel}} + +\item{central_estimate_digits}{Number of central estimate digits} +} +\value{ +Text summarising specified central estimate +} +\description{ +Summarise central estimate +} diff --git a/man/summarise_loo_estimate.Rd b/man/summarise_loo_estimate.Rd new file mode 100644 index 00000000..055038e0 --- /dev/null +++ b/man/summarise_loo_estimate.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/summarise_seromodel.R +\name{summarise_loo_estimate} +\alias{summarise_loo_estimate} +\title{Extract specified loo estimate} +\usage{ +summarise_loo_estimate( + seromodel, + par_loo_estimate = "elpd_loo", + loo_estimate_digits = 2 +) +} +\arguments{ +\item{seromodel}{stan_fit object obtained from sampling a model +with \link{fit_seromode}} + +\item{par_loo_estimate}{Name of the loo estimate to be extracted.} + +\item{loo_estimate_digits}{Number of loo estimate digits} +} +\value{ +Text summarising specified loo estimate +} +\description{ +Extract specified loo estimate +} diff --git a/man/summarise_seromodel.Rd b/man/summarise_seromodel.Rd new file mode 100644 index 00000000..488704ca --- /dev/null +++ b/man/summarise_seromodel.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/summarise_seromodel.R +\name{summarise_seromodel} +\alias{summarise_seromodel} +\title{Summarise specified model} +\usage{ +summarise_seromodel( + seromodel, + serosurvey, + alpha = 0.05, + par_loo_estimate = "elpd_loo", + loo_estimate_digits = 1, + central_estimate_digits = 2, + rhat_digits = 2 +) +} +\arguments{ +\item{seromodel}{stan_fit object obtained from sampling a model +with \link{fit_seromode}} + +\item{serosurvey}{\describe{ +\item{\code{tsur}}{Year in which the survey took place} +\item{\code{age_min}}{Floor value of the average between age_min and age_max} +\item{\code{age_max}}{The size of the sample} +\item{\code{sample_size}}{Number of samples for each age group} +\item{\code{n_seropositive}}{Number of positive samples for each age group} +}} + +\item{alpha}{1 - alpha indicates the credibility level to be used} + +\item{par_loo_estimate}{Name of the loo estimate to be extracted.} + +\item{loo_estimate_digits}{Number of loo estimate digits} + +\item{central_estimate_digits}{Number of central estimate digits} +} +\value{ +A list summarising the specified model +\describe{ +\item{\code{model_name}}{Name of the model} +\item{\code{elpd}}{elpd and its standard deviation} +\item{\code{foi}}{Estimated foi with credible interval (for 'constant' model)} +\item{\code{foi_rhat}}{foi rhat value (for 'constant' model)} +\item{\code{seroreversion_rate}}{Estimated seroreversion rate} +\item{\code{seroreversion_rate_rhat}}{Seroreversion rate rhat value} +} +} +\description{ +Summarise specified model +} diff --git a/man/survey_by_individual_age.Rd b/man/survey_by_individual_age.Rd index 6b2a0af7..a9b4b47e 100644 --- a/man/survey_by_individual_age.Rd +++ b/man/survey_by_individual_age.Rd @@ -7,12 +7,14 @@ survey_by_individual_age(survey_features, age_df) } \arguments{ -\item{survey_features}{A dataframe containing information about age groups and sample sizes.} +\item{survey_features}{A dataframe containing information about age groups +and sample sizes.} \item{age_df}{A dataframe containing 'age' and 'group'.} } \value{ -A dataframe with overall sample sizes calculated by joining survey_features and age_df. +A dataframe with overall sample sizes calculated by joining +survey_features and age_df. This dataframe has columns including 'age' and 'overall_sample_size'. } \description{ diff --git a/man/veev2012.Rd b/man/veev2012.Rd deleted file mode 100644 index 65899a42..00000000 --- a/man/veev2012.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/veev2012.R -\docType{data} -\name{veev2012} -\alias{veev2012} -\title{Seroprevalence data on serofoi} -\format{ -An object of class \code{"cross"}; see \code{\link[qtl:read.cross]{qtl::read.cross()}}. -} -\usage{ -veev2012 -} -\description{ -Data from a serological surveys -} -\examples{ -veev2012 -} -\keyword{datasets} From db19c9a5db856af3e1ffcede796378d51a20fa50 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Tue, 6 Aug 2024 18:09:01 -0500 Subject: [PATCH 41/78] update DESCRIPTION --- DESCRIPTION | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b06d8618..3dfb3665 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -36,7 +36,7 @@ License: MIT + file LICENSE Encoding: UTF-8 Language: en-GB LazyData: true -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 NeedsCompilation: yes Depends: R (>= 3.5.0) @@ -55,7 +55,8 @@ Imports: purrr, tidyr, tibble, - Matrix + Matrix, + glue LinkingTo: BH (>= 1.66.0), Rcpp (>= 0.12.0), From 5bd65608aceafa2e1eaeb2ed4ac7612b30312a78 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Tue, 6 Aug 2024 18:09:18 -0500 Subject: [PATCH 42/78] update WORDLIST --- inst/WORDLIST | 104 ++++++++++++++++++++++---------------------------- 1 file changed, 46 insertions(+), 58 deletions(-) diff --git a/inst/WORDLIST b/inst/WORDLIST index aa4b41e8..80e6482c 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -1,101 +1,89 @@ Aedes Aki -al Alphavirus Alphaviruses -alphaviruses -arboviruses Angulo -Basáñez +BMJ Bahia Bajaj -bayesian -binconf -binom -BMJ +Basáñez CMD -Carrera -Chagas -Chikungunya -chikungunya Codecov Conteh -cruzi -Cucunubá -Darien -de Dib -disaggregated Domínguez -each -elpd Epiverse -et Everlyn +FOI +FOIs FoI Gamez Garzón -ggplot -Gómez Gruson -https HMC Henao IgG IgM -Javeriana Lesong -lifecycle Liscano Mariscal María -mc Neira -Nicolás -org -packagetemplate +ORCID Panamá Parra Pavlich Pittí -Pontificia -Programmes Quevedo -ORCID -rbinom -refactorization +RStan +Refactorization +Seroreversion +Serosurveys +Sumali +Triatomine +Trypanosoma +VEEV +Vehtari +Yaneth +Zulma +alphaviruses +bayesian +binom +bmatrix +boldsymbol +cdots +cruzi +dD +dS +dX +ddots +df +dt +elpd +eq +foi +forall +frac +ggplot +hiv +lifecycle +mc +numerating +org +packagetemplate rhat -rhats rstan -RStan -Réunion se -serodata -serological +serocatalytic seromodel -Seroprevalence -seropositive -seropositivity +seropositivities +seroprev seroreversion serosurvey serosurveys -Serosurveys -seroprev -seroprevalence -seroreversion sim stan -Stanfit -Sumali sur -TBD triatomine -Triatomine -Trypanosoma -Torres -Universidad -VEEV -Vehtari -warmup -Yaneth -Zulma +u +vdots From 76cd72e4907a93826987ec2fea4cc4b4197f4f76 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Thu, 15 Aug 2024 00:31:41 -0500 Subject: [PATCH 43/78] fix: change `append()` by `c()` to append when possible --- R/build_stan_data.R | 10 +++++----- R/plot_seromodel.R | 2 +- R/summarise_seromodel.R | 10 +++++----- R/validation.R | 2 +- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/R/build_stan_data.R b/R/build_stan_data.R index 735d869a..3c93c579 100644 --- a/R/build_stan_data.R +++ b/R/build_stan_data.R @@ -76,7 +76,7 @@ get_foi_index <- function( ) ) - foi_index <- append( + foi_index <- c( foi_index, rep( max(foi_index), @@ -106,7 +106,7 @@ set_stan_data_defaults <- function( foi_mean = prior_default$mean, foi_sd = prior_default$sd ) - stan_data <- append( + stan_data <- c( stan_data, foi_defaults ) @@ -119,7 +119,7 @@ set_stan_data_defaults <- function( seroreversion_mean = prior_default$mean, seroreversion_sd = prior_default$sd ) - stan_data <- append( + stan_data <- c( stan_data, seroreversion_defaults ) @@ -154,13 +154,13 @@ build_stan_data <- function( if (is.null(foi_index)) { foi_index_default <- get_foi_index(serosurvey = serosurvey, group_size = 1) - stan_data <- append( + stan_data <- c( stan_data, list(foi_index = foi_index_default) ) } else { # TODO: check that foi_index is the right size - stan_data <- append( + stan_data <- c( stan_data, list(foi_index = foi_index) ) diff --git a/R/plot_seromodel.R b/R/plot_seromodel.R index e79e470a..e4159d63 100644 --- a/R/plot_seromodel.R +++ b/R/plot_seromodel.R @@ -443,7 +443,7 @@ plot_seromodel <- function( size_text = size_text ) - plot_list <- append( + plot_list <- c( plot_list, list(foi_plot, rhats_plot) ) diff --git a/R/summarise_seromodel.R b/R/summarise_seromodel.R index bf83de2f..79439d93 100644 --- a/R/summarise_seromodel.R +++ b/R/summarise_seromodel.R @@ -109,12 +109,12 @@ summarise_seromodel <- function( foi_rhat <- bayesplot::rhat(seromodel, "foi") %>% signif(rhat_digits) - check_convergence <- append( + check_convergence <- c( check_convergence, foi_rhat < 1.01 ) - summary_list <- append( + summary_list <- c( summary_list, list( foi = foi_summary, @@ -124,7 +124,7 @@ summarise_seromodel <- function( } else { rhats <- bayesplot::rhat(seromodel, "foi_vector") - check_convergence <- append( + check_convergence <- c( check_convergence, all(rhats < 1.01) ) @@ -145,12 +145,12 @@ summarise_seromodel <- function( ) %>% signif(rhat_digits) - check_convergence <- append( + check_convergence <- c( check_convergence, seroreversion_rate_rhat < 1.01 ) - summary_list <- append( + summary_list <- c( summary_list, list( seroreversion_rate = seroreversion_rate_summary, diff --git a/R/validation.R b/R/validation.R index 22050979..1fe005e0 100644 --- a/R/validation.R +++ b/R/validation.R @@ -24,7 +24,7 @@ stop_if_wrong_type <- function(serosurvey, col_types) { !any(vapply(valid_col_types, function(type) { do.call(sprintf("is.%s", type), list(serosurvey[[col]])) }, logical(1)))) { - error_messages <- append( + error_messages <- c( error_messages, sprintf( "`%s` must be of any of these types: `%s`", From a6cf80cd71ee429dd457d73041cb88e6446ed4ee Mon Sep 17 00:00:00 2001 From: ntorresd Date: Thu, 15 Aug 2024 00:34:39 -0500 Subject: [PATCH 44/78] fix: improve error messages readability --- R/build_stan_data.R | 22 ++++++++-------------- R/validation.R | 3 ++- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/R/build_stan_data.R b/R/build_stan_data.R index 3c93c579..40de7d5d 100644 --- a/R/build_stan_data.R +++ b/R/build_stan_data.R @@ -7,12 +7,9 @@ sf_normal <- function(mean = 0, sd = 1) { # Restricting normal inputs to be non-negative if (mean < 0 || sd <= 0) { - msg <- paste0( - "Normal distribution here only accepts", - " non-negative values for mean and standard deviation" - ) - message(msg) - stop() + stop( + "Normal distribution only accepts", + " `mean>=0` and `sd>0` for mean and standard deviation") } return(list(mean = mean, sd = sd, name = "normal")) @@ -26,13 +23,11 @@ sf_normal <- function(mean = 0, sd = 1) { #' @export sf_uniform <- function(min = 0, max = 10) { # Restricting uniform inputs to be non-negative - if (min < 0 || max < 0) { - msg <- paste0( - "Uniform distribution here only accepts", - " non-negative values for min and max" + if (min < 0 || (min >= max)) { + stop( + "Uniform distribution only accepts", + " 0<=min= max) { message("Uniform distribution only accepts min < max") @@ -180,8 +175,7 @@ build_stan_data <- function( if (is_seroreversion) { if (seroreversion_prior$name == "none") { - message("seroreversion_prior not specified") - stop() + stop("seroreversion_prior not specified") } else if (seroreversion_prior$name == "uniform") { stan_data$seroreversion_prior_index <- prior_index[["uniform"]] stan_data$seroreversion_min <- seroreversion_prior$min diff --git a/R/validation.R b/R/validation.R index 1fe005e0..c7475429 100644 --- a/R/validation.R +++ b/R/validation.R @@ -75,7 +75,8 @@ validate_survey <- function(survey_features) { if (!is_age_ok) stop( "Age bins in a survey are inclusive of both bounds, ", - "so the age_max of one bin cannot equal the age_min of another.") + "so the age_max of one bin cannot equal the age_min of another." + ) } validate_foi_df <- function(foi_df, cnames_additional) { From dc1166c8a0890506b96f80e69297c149296769a1 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Thu, 15 Aug 2024 00:36:58 -0500 Subject: [PATCH 45/78] fix: replace validation function by `checkmate::assert_names()` - Remove two validation functions --- R/simulate_serosurvey.R | 8 ++++---- R/validation.R | 44 ++++++++++++----------------------------- 2 files changed, 17 insertions(+), 35 deletions(-) diff --git a/R/simulate_serosurvey.R b/R/simulate_serosurvey.R index 7fd0561e..2b2c3127 100644 --- a/R/simulate_serosurvey.R +++ b/R/simulate_serosurvey.R @@ -508,7 +508,7 @@ simulate_serosurvey_time_model <- function( # Input validation validate_foi_df(foi, "year") - validate_survey(survey_features) + validate_survey_features(survey_features) validate_seroreversion_rate(seroreversion_rate) validate_survey_and_foi_consistency( survey_features, @@ -573,7 +573,7 @@ simulate_serosurvey_age_model <- function( # Input validation validate_foi_df(foi, "age") - validate_survey(survey_features) + validate_survey_features(survey_features) validate_seroreversion_rate(seroreversion_rate) validate_survey_and_foi_consistency( survey_features, @@ -639,7 +639,7 @@ simulate_serosurvey_age_and_time_model <- function( #nolint # Input validation validate_foi_df(foi, c("age", "year")) - validate_survey(survey_features) + validate_survey_features(survey_features) validate_seroreversion_rate(seroreversion_rate) validate_survey_and_foi_consistency_age_time( survey_features, @@ -790,7 +790,7 @@ simulate_serosurvey_general_model <- function( #nolint ) { # Input validation - validate_survey(survey_features) + validate_survey_features(survey_features) probability_serop_by_age <- probability_seropositive_general_model_by_age( construct_A_function, diff --git a/R/validation.R b/R/validation.R index c7475429..58797958 100644 --- a/R/validation.R +++ b/R/validation.R @@ -1,20 +1,16 @@ -stop_if_missing <- function(serosurvey, must_have_cols) { - if ( - !all( - must_have_cols - %in% colnames(serosurvey) - ) - ) { - missing <- - must_have_cols[which(!(must_have_cols %in% colnames(serosurvey)))] - stop( - "The following mandatory columns in `serosurvey` are missing.\n", - toString(missing) - ) - } -} +validate_serosurvey <- function(serosurvey) { + # Check that necessary columns are present + col_types <- list( + age_min = "numeric", + age_max = "numeric", + sample_size = "numeric", + n_seropositive = "numeric", + tsur = "numeric" + ) -stop_if_wrong_type <- function(serosurvey, col_types) { + checkmate::assert_names(names(serosurvey), must.include = names(col_types)) + + # Validates column types error_messages <- list() for (col in names(col_types)) { valid_col_types <- as.list(col_types[[col]]) @@ -39,25 +35,11 @@ stop_if_wrong_type <- function(serosurvey, col_types) { toString(error_messages) ) } -} - -validate_serosurvey <- function(serosurvey) { - col_types <- list( - age_min = "numeric", - age_max = "numeric", - sample_size = "numeric", - n_seropositive = "numeric", - tsur = "numeric" - ) - - stop_if_missing(serosurvey, must_have_cols = names(col_types)) - - stop_if_wrong_type(serosurvey, col_types) return(serosurvey) } -validate_survey <- function(survey_features) { +validate_survey_features <- function(survey_features) { if (!is.data.frame(survey_features) || !all( From 59a3abcbdc44d3286bf4151b788ddd777069ae5c Mon Sep 17 00:00:00 2001 From: ntorresd Date: Thu, 15 Aug 2024 00:56:36 -0500 Subject: [PATCH 46/78] feat: add stan time-varying models in log-scale --- inst/stan/data/foi_prior_data.stan | 3 + inst/stan/time_log_no_seroreversion.stan | 69 +++++++++++++++++++++ inst/stan/time_log_seroreversion.stan | 77 ++++++++++++++++++++++++ 3 files changed, 149 insertions(+) create mode 100644 inst/stan/time_log_no_seroreversion.stan create mode 100644 inst/stan/time_log_seroreversion.stan diff --git a/inst/stan/data/foi_prior_data.stan b/inst/stan/data/foi_prior_data.stan index c731f3f9..2df33061 100644 --- a/inst/stan/data/foi_prior_data.stan +++ b/inst/stan/data/foi_prior_data.stan @@ -8,3 +8,6 @@ // normal real foi_mean; real foi_sd; + // cauchy + real foi_sigma_rw_loc; + real foi_sigma_rw_sc; diff --git a/inst/stan/time_log_no_seroreversion.stan b/inst/stan/time_log_no_seroreversion.stan new file mode 100644 index 00000000..ee5b86a9 --- /dev/null +++ b/inst/stan/time_log_no_seroreversion.stan @@ -0,0 +1,69 @@ +functions { + #include functions/prob_infected_time.stan +} + +data { + #include data/basic_data.stan + #include data/foi_prior_data.stan +} + +transformed data { + int n_foi = max(foi_index); +} + +parameters { + vector[n_foi] log_foi_vector; + real sigma; +} + +transformed parameters { + vector[n_foi] foi_vector; + vector[n_observations] prob_infected; + + foi_vector = exp(log_foi_vector); + + prob_infected = prob_infected_time_model( + age_groups, + n_observations, + age_max, + foi_vector, + foi_index, + 0.0 + ); +} + +model { + n_seropositive ~ binomial(sample_size, prob_infected); + sigma ~ normal(foi_sigma_rw_loc, foi_sigma_rw_sc); + + // force of infection prior + if (foi_prior_index == 0) + foi_vector[1] ~ uniform(foi_min, foi_max); + if (foi_prior_index == 1) + foi_vector[1] ~ normal(foi_mean, foi_sd); + target += log(exp(log_foi_vector[1])); // Jacobian term + + for(i in 2:n_foi) + log_foi_vector[i] ~ normal(log_foi_vector[i - 1], sigma); +} + +generated quantities{ + #include generated_quantities/log_likelihood.stan + + vector[age_max] prob_infected_expanded; + vector[age_max] foi_expanded; + + for(i in 1:age_max) { + int time_index = age_max - i + 1; + foi_expanded[i] = foi_vector[foi_index[i]]; + } + + prob_infected_expanded = prob_infected_time_model( + ages, + age_max, // number of ages + age_max, // number of years + foi_vector, + foi_index, + 0.0 + ); +} diff --git a/inst/stan/time_log_seroreversion.stan b/inst/stan/time_log_seroreversion.stan new file mode 100644 index 00000000..7410d1c9 --- /dev/null +++ b/inst/stan/time_log_seroreversion.stan @@ -0,0 +1,77 @@ +functions { + #include functions/prob_infected_time.stan +} + +data { + #include data/basic_data.stan + #include data/foi_prior_data.stan + #include data/seroreversion_prior_data.stan +} + +transformed data { + int n_foi = max(foi_index); +} + +parameters { + vector[n_foi] log_foi_vector; + real seroreversion_rate; + real sigma; +} + +transformed parameters { + vector[n_foi] foi_vector; + vector[n_observations] prob_infected; + + foi_vector = exp(log_foi_vector); + + prob_infected = prob_infected_time_model( + age_groups, + n_observations, + age_max, + foi_vector, + foi_index, + seroreversion_rate + ); +} + +model { + n_seropositive ~ binomial(sample_size, prob_infected); + sigma ~ normal(foi_sigma_rw_loc, foi_sigma_rw_sc); + + // force of infection prior + if (foi_prior_index == 0) + foi_vector[1] ~ uniform(foi_min, foi_max); + if (foi_prior_index == 1) + foi_vector[1] ~ normal(foi_mean, foi_sd); + target += log(exp(log_foi_vector[1])); // Jacobian term + + for(i in 2:n_foi) + log_foi_vector[i] ~ normal(log_foi_vector[i - 1], sigma); + + // seroreversion prior + if (seroreversion_prior_index == 0) + seroreversion_rate ~ uniform(seroreversion_min, seroreversion_max); + if (seroreversion_prior_index == 1) + seroreversion_rate ~ normal(seroreversion_mean, seroreversion_sd); +} + +generated quantities{ + #include generated_quantities/log_likelihood.stan + + vector[age_max] prob_infected_expanded; + vector[age_max] foi_expanded; + + for(i in 1:age_max) { + int time_index = age_max - i + 1; + foi_expanded[time_index] = foi_vector[foi_index[i]]; + } + + prob_infected_expanded = prob_infected_time_model( + ages, + age_max, // number of ages + age_max, // number of years + foi_vector, + foi_index, + seroreversion_rate + ); +} From b7b66a30bb5c8dff4f7bedf5f116131658eab655 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Thu, 15 Aug 2024 02:55:33 -0500 Subject: [PATCH 47/78] feat: modify R modules to allow log-scale model implementation - Add `sf_cauchy()` to set sigma prior in tv log modules - Add `is_log_foi` parameter to `fit_seromodel` - Add corresponding defaults --- R/build_stan_data.R | 29 ++++++++++++++++++++++++++++- R/fit_seromodel.R | 21 +++++++++++++++++++-- inst/extdata/config.yml | 3 +++ 3 files changed, 50 insertions(+), 3 deletions(-) diff --git a/R/build_stan_data.R b/R/build_stan_data.R index 40de7d5d..e7fc8891 100644 --- a/R/build_stan_data.R +++ b/R/build_stan_data.R @@ -36,6 +36,24 @@ sf_uniform <- function(min = 0, max = 10) { return(list(min = min, max = max, name = "uniform")) } +#' Sets cauchy distribution parameters for sampling +#' +#' @param scale Scale +#' of the normal distribution +#' @param sd Standard deviation of the normal distribution +#' @return List with specified statistics and name of the model +#' @export +sf_cauchy <- function(location = 0, scale = 1) { + # Restricting normal inputs to be non-negative + if (location < 0 || scale < 0) { + stop( + "Normal distribution only accepts", + " `location>=0` and `scale=>0` for mean and standard deviation") + } + + return(list(location = location, scale = scale, name = "cauchy")) +} + #' Sets empty distribution sf_none <- function() { return(list(name = "none")) @@ -99,7 +117,9 @@ set_stan_data_defaults <- function( foi_min = prior_default$min, foi_max = prior_default$max, foi_mean = prior_default$mean, - foi_sd = prior_default$sd + foi_sd = prior_default$sd, + foi_sigma_rw_loc = prior_default$location, + foi_sigma_rw_sc = prior_default$scale ) stan_data <- c( stan_data, @@ -133,6 +153,8 @@ build_stan_data <- function( model_type = "constant", foi_prior = sf_uniform(), foi_index = NULL, + is_log_foi = FALSE, + foi_sigma_rw = sf_none(), is_seroreversion = FALSE, seroreversion_prior = sf_none() ) { @@ -173,6 +195,11 @@ build_stan_data <- function( stan_data$foi_sd <- foi_prior$sd } + if(is_log_foi) { + stan_data$foi_sigma_rw_loc <- foi_sigma_rw$location + stan_data$foi_sigma_rw_sc <- foi_sigma_rw$scale + } + if (is_seroreversion) { if (seroreversion_prior$name == "none") { stop("seroreversion_prior not specified") diff --git a/R/fit_seromodel.R b/R/fit_seromodel.R index 0fc243e7..95d2fe11 100644 --- a/R/fit_seromodel.R +++ b/R/fit_seromodel.R @@ -24,6 +24,7 @@ add_age_group_to_serosurvey <- function(serosurvey) { #' \item{`n_seropositive`}{Number of positive samples for each age group} #' } #' @param model_type Type of the model. Either "constant", "age" or "time" +#' @param is_log_foi Boolean to set logarithmic scale in the FOI #' @param foi_prior Force-of-infection distribution specified by means of #' the helper functions. Currently available options are: #' \describe{ @@ -31,6 +32,12 @@ add_age_group_to_serosurvey <- function(serosurvey) { #' \item{`sf_uniform`} #' \item{`sf_none`} #' } +#' @param foi_sigma_rw Prior distribution for the standard deviation of the +#' force-of-infection. Currently available options are: +#' \describe{ +#' \item{`sf_cauchy`} +#' \iten{`sf_none`} +#' } #' @param foi_index Integer vector specifying the age-groups for which #' force-of-infection values will be estimated #' @param is_seroreversion Boolean specifying whether to include @@ -49,7 +56,9 @@ add_age_group_to_serosurvey <- function(serosurvey) { fit_seromodel <- function( serosurvey, model_type = "constant", + is_log_foi = FALSE, foi_prior = sf_normal(), + foi_sigma_rw = sf_none(), foi_index = NULL, is_seroreversion = FALSE, seroreversion_prior = sf_uniform(), @@ -64,15 +73,23 @@ fit_seromodel <- function( model_type = model_type, foi_prior = foi_prior, foi_index = foi_index, + is_log_foi = is_log_foi, + foi_sigma_rw = foi_sigma_rw, is_seroreversion = is_seroreversion, seroreversion_prior = seroreversion_prior ) + # Assigning right name to the model based on user specifications + model_name <- model_type + if (is_log_foi) { + model_name <- paste0(model_name, "_log") + } if (is_seroreversion) - model_name <- paste0(model_type, "_seroreversion") + model_name <- paste0(model_name, "_seroreversion") else - model_name <- paste0(model_type, "_no_seroreversion") + model_name <- paste0(model_name, "_no_seroreversion") + # Compile or load Stan model model <- stanmodels[[model_name]] seromodel <- rstan::sampling( model, diff --git a/inst/extdata/config.yml b/inst/extdata/config.yml index 70b9e928..9e0f63d5 100644 --- a/inst/extdata/config.yml +++ b/inst/extdata/config.yml @@ -11,3 +11,6 @@ default: # normal distribution mean: 0 sd: 1 + # cauchy + location: 0 + scale: 1 From 34619301712b2064911a01ee4864f7927bcbd439 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Thu, 15 Aug 2024 02:58:21 -0500 Subject: [PATCH 48/78] feat: update stanmodels.R --- R/stanmodels.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/stanmodels.R b/R/stanmodels.R index 0d07aea0..7952d2b2 100644 --- a/R/stanmodels.R +++ b/R/stanmodels.R @@ -1,13 +1,15 @@ # Generated by rstantools. Do not edit by hand. # names of stan models -stanmodels <- c("age_no_seroreversion", "age_seroreversion", "constant_no_seroreversion", "constant_seroreversion", "time_no_seroreversion", "time_seroreversion") +stanmodels <- c("age_no_seroreversion", "age_seroreversion", "constant_no_seroreversion", "constant_seroreversion", "time_log_no_seroreversion", "time_log_seroreversion", "time_no_seroreversion", "time_seroreversion") # load each stan module Rcpp::loadModule("stan_fit4age_no_seroreversion_mod", what = TRUE) Rcpp::loadModule("stan_fit4age_seroreversion_mod", what = TRUE) Rcpp::loadModule("stan_fit4constant_no_seroreversion_mod", what = TRUE) Rcpp::loadModule("stan_fit4constant_seroreversion_mod", what = TRUE) +Rcpp::loadModule("stan_fit4time_log_no_seroreversion_mod", what = TRUE) +Rcpp::loadModule("stan_fit4time_log_seroreversion_mod", what = TRUE) Rcpp::loadModule("stan_fit4time_no_seroreversion_mod", what = TRUE) Rcpp::loadModule("stan_fit4time_seroreversion_mod", what = TRUE) From 9d64eae1a9f8b736efbd73d7a6e9d4c5c68edb38 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Thu, 15 Aug 2024 10:29:01 -0500 Subject: [PATCH 49/78] fix: correct foi_sigma_rw parameters set up --- R/build_stan_data.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/build_stan_data.R b/R/build_stan_data.R index e7fc8891..c776532a 100644 --- a/R/build_stan_data.R +++ b/R/build_stan_data.R @@ -195,7 +195,7 @@ build_stan_data <- function( stan_data$foi_sd <- foi_prior$sd } - if(is_log_foi) { + if (foi_sigma_rw$name == "cauchy") { stan_data$foi_sigma_rw_loc <- foi_sigma_rw$location stan_data$foi_sigma_rw_sc <- foi_sigma_rw$scale } From a569288ab2755743ac9859f7906cd9896577a510 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Thu, 15 Aug 2024 10:29:49 -0500 Subject: [PATCH 50/78] feat: set default behavior for sampling initialization - Set different init functions for log and no-log models --- R/fit_seromodel.R | 41 +++++++++++++++++++++++++++++++++++++++++ inst/extdata/config.yml | 2 ++ 2 files changed, 43 insertions(+) diff --git a/R/fit_seromodel.R b/R/fit_seromodel.R index 95d2fe11..f5cdad57 100644 --- a/R/fit_seromodel.R +++ b/R/fit_seromodel.R @@ -13,6 +13,38 @@ add_age_group_to_serosurvey <- function(serosurvey) { return(serosurvey) } +#' Sets initialization function for sampling +#' +#' @inheritParams fit_seromodel +#' @export +set_foi_init <- function( + foi_init, + is_log_foi, + foi_index +) { + + # Set default behavior for initialization + if (is.null(foi_init)) { + config_file <- system.file("extdata", "config.yml", package = "serofoi") + init_default <- config::get(file = config_file, "priors")$defaults$init + + if (is_log_foi) { + foi_init <- function() { + list(log_foi_vector = rep(log(init_default), max(foi_index))) + } + } else { + foi_init <- function() { + list(foi_vector = rep(init_default, max(foi_index))) + } + } + } + + checkmate::assert_class(foi_init, "function") + checkmate::assert_double(unlist(foi_init()[[1]]), len = max(foi_index)) + + return(foi_init) +} + #' Runs specified stan model for the force-of-infection #' #' @param serosurvey @@ -60,6 +92,7 @@ fit_seromodel <- function( foi_prior = sf_normal(), foi_sigma_rw = sf_none(), foi_index = NULL, + foi_init = NULL, is_seroreversion = FALSE, seroreversion_prior = sf_uniform(), ... @@ -79,6 +112,12 @@ fit_seromodel <- function( seroreversion_prior = seroreversion_prior ) + foi_init <- set_foi_init( + foi_init = foi_init, + is_log_foi = is_log_foi, + foi_index = stan_data$foi_index + ) + # Assigning right name to the model based on user specifications model_name <- model_type if (is_log_foi) { @@ -91,9 +130,11 @@ fit_seromodel <- function( # Compile or load Stan model model <- stanmodels[[model_name]] + seromodel <- rstan::sampling( model, data = stan_data, + init = foi_init, ... ) seromodel@model_name <- model_name diff --git a/inst/extdata/config.yml b/inst/extdata/config.yml index 9e0f63d5..3826fbd2 100644 --- a/inst/extdata/config.yml +++ b/inst/extdata/config.yml @@ -14,3 +14,5 @@ default: # cauchy location: 0 scale: 1 + # init + init: 0.1 From 367724c066da90add0d234b6438a76bc34b5be73 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Thu, 15 Aug 2024 12:06:59 -0500 Subject: [PATCH 51/78] fix: simplify loaded datasets structure and update column NAMESPACE - Change `tsur` to `survey_year` --- data/chagas2012.RData | Bin 637 -> 549 bytes data/chik2015.RData | Bin 1100 -> 257 bytes data/veev2012.RData | Bin 331 -> 268 bytes inst/extdata/chagas2012.RDS | Bin 618 -> 516 bytes inst/extdata/chik2015.RDS | Bin 1083 -> 236 bytes inst/extdata/serodata.RDS | Bin 618 -> 0 bytes inst/extdata/simdata_constant.RDS | Bin 310 -> 0 bytes inst/extdata/simdata_large_epi.RDS | Bin 306 -> 0 bytes inst/extdata/simdata_sw_dec.RDS | Bin 305 -> 0 bytes inst/extdata/veev2012.RDS | Bin 313 -> 247 bytes 10 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 inst/extdata/serodata.RDS delete mode 100644 inst/extdata/simdata_constant.RDS delete mode 100644 inst/extdata/simdata_large_epi.RDS delete mode 100644 inst/extdata/simdata_sw_dec.RDS diff --git a/data/chagas2012.RData b/data/chagas2012.RData index 02353b011673df8b1a64a1f27cd31e0cbfdc9de4..c9dd0144ce52329a9620a2371f28ca93cd1ef917 100644 GIT binary patch literal 549 zcmV+=0^0o_iwFP!000002JM&IP8v}dg|`+W(A1FF3om*>Ok$2;$sS!tf3>D|7 z;0x;$Sm^uw2E7U{G7sj#58x8`7W@ik#r>Jjfp5h9X21-X2Is~7-hm6?tmvB&d@VQ| z(I@NJpB9vH_9uL<+=O*Nb$>jlpBDE`gyTV-$OmzLo|hu7!f~FL`N+D&{dJ_m94T@A zm4DRr)c&XL8?}E_K1xS@f4nTgVf)DXs6OsHEH39#)<>O3_Wkd9ht1JHbI5(tAm@_L z$ty4e%IBTW1AT`6L;s+k7;y$-z&U4kw_e}^uUh;Kk|KyN~CLT^EDL06zF(A&`4&{gOvbPc)& zy#u{tfs9}KtRr7XzK(ny`Ca7q{Ja{?5!m-Rnj@bNo}-B9gRVW(F0ncoSd4OCQN65I3EFO`K&X7<$vhHOcVp|6gk#hU~o{a8NlZrccsJ zshEl{#8fO{nksR8RLia^;}~&6#Z}5M-Tt7v+AZXl3nu%nai*?i?k&nFHyhb}aV5J} z6h|C!#Q9GhrS44YqFquKm%Rp-#y(#|Z-CD!4i>-_a20$Ez5;V>e2VA6M{K-BFaug( zk&X8Rd;^!Cw;E}4C{dEazDtw&&ExL{UDFU1D~Gf9`jY$ zr}a`iGB0^~9#LT*DR%u4pX<76|Eta$HGfn*N=LnablHrv=HdBK{b=4aNwuD$`+L7O=CNp+U$G1cJ>=rVK}x&mE+u0mI#YtS|5P3SEm{5o_UdK-Eh zdIx$3x&hsQ-i6+U-hqONzL&2NK_m3!;%OQBCe+hpI58i-OT8|*UKK|+RpH6 zPkiyk7hinwueBfk=Qz>V=N%UxZYh{I-J$cTGolo4;OR38wFkq#=R)1m-Ok(dUOy!D z7s$A0#-Y$fKW7w#Itcm53RGwsqd1b>O2 XYES3hJEp%;zoz$Z=gvBmQ6B&Rcj!Br diff --git a/data/chik2015.RData b/data/chik2015.RData index f09c9dbb1e9e2aa68a8f62df43abc66ae9e76d85..74ed65066779e6939c32bb4f280670478f0f2d91 100644 GIT binary patch literal 257 zcmV+c0sj6UiwFP!000001`BeDFy@NjVqjokW?*3flB_@`18ZoAo2~@|0}B(7!^ptG zzzL)|k~1>1jSLJ;fvSW-ih!68h*=!w*MKxJ;G%(m7o-k|CjjvjAl3!qT)2M!28jAV z2MBEl(d6LW0Midq=K>Y?gV6RLpnMYsRCT^k^MVgRXk(~4XP9{?{^BgkFW1XU%uOwZ zI+O{d?EnA&f6x@NCg&s;gA@rM338<*mL%$>6#@05D~FjW01hmOAa`+TQCVtbd}V54 z5nL!Sw;(4qzBscg6)MD+7hjxOlwXivoLQ1t1`}mZOizu^&CEmLCRPBM|3DZ3^vsR# H%K-oYGkIzx literal 1100 zcmV-S1he}eiwFP!000001Fe_MZrer_g_+2169X**^ab2qyRZ>AEs(Bia`-pm3`ui_ z5~WSSDs*C0mHubcpPvMzQcl;>~0s(~}ngHo! zU}9oUMAQQdgHR%TA>9cTdTb2kVGEh5CyP}=wuW_ERyU-A0h>RBTa+! z4qgBYLG+2pW@PDP84~Zb!m~I*X~Y_0G1q)tpdMn2K*c3omp)MwsUR~XK+;LcB_a#V zO&S^)1XXe`8M>IoFu*K^8El6+bQ2egH{q_4L~M;AmPjZhDMm}En~X6F}ZT5fRzc0fx}rR&;{AX(fr6yygz0 z(}2h!Qk1gJ5Ncgefr-@?nb$lH^q2!r*gAni$|6VwTu%e&rXkjG8X*p;6hwqNRhV(g zbf-CW3))W6nz7b%HzBfB5b0UYQ0EeJ%N6$2oRQ4ALFe4%@`9r=sCj{s%MH?!Pf#Tn zE^G^K#YIMB0t%i^#)X05LN{DaCd!FWFg%^i5Xmz9sxT@bq9zTC%kT^^`3aG&JZ_Ca z3XP>ZCB;Swk)Dm|!x)unDh@FbM2J}~X)7?WJc8Z{mSTG|&WadW^ZMl1nQp|0{&$_WfA&z;3 zRxZLSMpv4PG_tP0vrdc>c6}b%WV8m(8kK S%$h#^AN~bRgc8!c4gdf&I})t` diff --git a/data/veev2012.RData b/data/veev2012.RData index 0db4d9ee87644711d700de73795d9f79214ed94b..ad072d5cd45a8656db35d87de3adbcf29e10b855 100644 GIT binary patch delta 252 zcmVOT+b6J<|KPmRyb%tPTOR-jqSS(IO{ z2eXCsdHFxU{G& zwGyVJB)=pv2PT!AUz%4^4CS#Wrl-c|X6B)A6Dy!xmJ*;jFe$LUqDr__YHg6Muq&PgnW`3WY-m6BMJsFzj*G!Mx8 O2f_fcKFRUhyWrhD#sGClD{`|2J~9L1jym_Um`fogk;@yi+OFu7W}R4Cz-DIy+5_FKFwn ziN1UEy=T0XYf05&i~pfw9c{Q`EP@*qm#gqiaGrT^3Y-NOz)^4ojEnV|p8!Y1dV^pL zjDpi*y)keG92WBi1&0KOedft=oF5gGan29BTz{{3kLu^RH@;u28?KLg<3!$z^Y`?L zxT=ryyv+NK3;#ThRH!2*t}pq2UCqvKc3;2!{qlb9_y6%S1RL!m$NSCWzK!CtF6DT? z^T>I>J#V8r`c{YBCknDI`JD`aF;IT*{2th6*nikR*iRW-9^PK{)vCXOMjBTK%BDgyK#@*VCYQ~XOrcb_dRoFX35N1!rPouDydS5 zq)MiBQ>EV=)eEcIyRAi5-L_*Vye`@mHNEUL zurdz#5_$t%VH|t{E`h7yL+}My6yq~q0w0L+7Qj4cfn_n?Bk(D>DDoBr?+Y%* z&A;rtar4K;<8<8j$IBL+HIM8cm&bW$`Ep;%{&DM(dH=fJS^Ma(eaLw%ko%J7u0n4@Z$j6gYtVJ*I`kIwHVMB0-GJVK-htkQ-i2;LH=*~S_n`No_n}+RE$9R2 z0~@6OBVZfxHsWo>+laqL{4j_svyZ@WK(mhm&bg25Zyy8S&~Of_=>HE%{R`YQP17{l zjGm8lt=TxN7-57FS5rjKtJOH!$~%3(S2!wm+~L=r@r`eM;~U@jSK3#pg!ZrT<<@u3 zx*lF`S(w+oq5G*jVi$klJEshF2E)GZLEUgpy6?_<{fKnFK&E^>4nxyCcNB)Yo{x6s ze0lE&$0M<{tULHz9!hz*quh4pcr+KfnN( zg}^c*QmiyUOLP=-F_GOWOLV{)h6q9hj5IMLLLrDTS|CGW2}~X!yQu`I4Ux8b1~e5g zr#mGFE(0Q+bi^9d494gt{Fn$^AyQxx5jByvssOe%Jx4^qQlX=so=zzbk0h#?W#l#WhTG4Vkw zJWEoPMq(hAaLp$r>LGCmR8qlp6%q}R3Mxke6rEIDBC5pPl%;`1P!;!*k&ju7BFtix z!**0aH}$c2Q|=n6WNQMkR6!xlFj`98bc|VOA34@RMl6Z&Obdul6wZiy#wlJ&T~5#aPFgL>w|DhzND2G2@Ks&I;<5w4I?fXRQ~0N@S}b z(zAl0E)?cgXzZy4BUx~RF1X7TB}ZdWixMSQSfr(xqDn4&*p}Ri%bdsrlsuiR4+G1E zZn>PSEQnCBJe}+a$+G;ausR~5CM}E0@(i%W36ZTlZmmTMt)n{?#p;wu&&Kp&j7qf= z4lxx(h&e84Cor(%Cza!F??xEdrJ9H#oJP!C4$qvU7^LLoTKf z6lUB8XFNA&U1E2?jt?*A&1(2K<|p%2bGg1-(y3^Ud`p|EAhga=tJ@6-eJb$`W-9^PK{)vCXOMjBTK%BDgyK#@*VCYQ~XOrcb_dRoFX35N1!rPouDydS5 zq)MiBQ>EV=)eEcIyRAi5-L_*Vye`@mHNEUL zurdz#5_$t%VH|t{E`h7yL+}My6yq~q0w0L+7Qj4cfn_n?Bk(D>DDoBr?+Y%* z&A;rtar4K;<8<8j$IBL+HIM8cm&bW$`Ep;%{&DM(dH=fJS^Ma(eaLw%ko%J7u0n4@Z$j6gYtVJ*I`kIwHVMB0-GJVK-htkQ-i2;LH=*~S_n`No_n}+RE$9R2 z0~@6OBVZfxHsWo>+laqL{4j_svyZ@WK(mhm&bg25Zyy8S&~Of_=>HE%{R`YQP17{l zjGm8lt=TxN7-57FS5rjKtJOH!$~%3(S2!wm+~L=r@r`eM;~U@jSK3#pg!ZrT<<@u3 zx*lF`S(w+oq5G*jVi$klJEshF2E)GZLEUgpy6?_<{fKnFK&E^>4nxyCcNB)Yo{x6s ze0lE&$0M<{tULHz@#Fqra4Uz)M3!%wMNa8?O<*TS&7o!Z%T`5tOG@7y@pNVOa$zICe-G4DSk1Ms~ z%6eH6+Fk8Uyv^COO6yf?NzS#i=ETT80;87_U9WTxOZ7H#yRlc;vj@Iung!D57hGcm IQ$_^<0LGD%8dU|?WoU}0irU}gm}8CXL@+;lB~V!}WUClCt&F&6_1 zm@QbGnVXVWk{F+pSd^X`pIVSP{3RF|_&_1xV8Q?f4we+sb__7U3l3RS019mAH!}}~n^*yhmlB{kFe$LUqDr__YHk diff --git a/inst/extdata/simdata_sw_dec.RDS b/inst/extdata/simdata_sw_dec.RDS deleted file mode 100644 index 8827a28049572e44fe0dfe8ca3afe016e5bad679..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 305 zcmV-10nYv(iwFP!000001B>8dU|?WoU}0irU}gm}8CXL@+;lB~V!}WUClCt&F&6_1 zn9Wz5nVXVWk{DlH9-oq$JWND^A|O9Hm@t5WgC&Ku9Rm#Tg2N6KumdqS5Q_k@I220) zu^ikTP=yXMP+9{@8$xL_C~Xa;p&A^V8X&YQl<(dEvy-;wE?|L(Q;B9kO>-=M`FW{y zWuw~92}~`eIf*5yDT7x4)y2%7>F)Gk0!s}blMyP!nwOZHS`2jxCsdHFxU{G&wGyVJ zB)=pv2PT!AUz%4^4CS#Wrl-c|X6B)A6DwfxQUWvwCI!}4R0)?#EiQqwITG_qGL!OC zD$$*moRe4#a~e#L3seN?r4<2Pji#KlD8F0}<{MBJW&$Pn|NsAg15*D0$HVa8G6ett DDTjAV diff --git a/inst/extdata/veev2012.RDS b/inst/extdata/veev2012.RDS index 53f7de17ce5e9d0929c8190919f73c90951050e8..b51b140e78a05f88b8b34615237c79992f0752dd 100644 GIT binary patch delta 212 zcmV;_04x8w0`~z#ABzY80000000xWTVqjokW?*3flB_@`18ZoAo2~_rCk)~MF&_}K zIm|BrNiq;i!}Xa!I1Xk|S_?{ZKxq*ut-^q+ky$N&m^KzBAi>DM!oUfnS@RNeQ;UHN z0T2f&$X#4oRF+y9UzwU%1Q$xoEyziYFV3t=g$nWI#TTa*{;(^KPf zGxJcmi4|zpau(&6>%pw%1^EMrd7x~Nzxbg}5=3`za!z6~%)Kx{u9U=*M7^{kpm{*v OKLF_mmx4(e0ssIfTULz# delta 278 zcmV+x0qOqt0l5NMABzY80000000xWTVqjokW?*4rWMF0mG8tGyL)>&NfMUWx4kr)` z05KZ_3z*Ft;OM7oWMF8d8|La7Mn6FY20oB0984I%z`+bkYe8uaC@lh|RTy9ZqLD-` ze>8W_FMx;>Ni(3v4l^*&$Yr9M#Omtf8SF?o2QBq@rn@U7=9Ofk#5C;%fvFoP$Ov;< zUSe))G1O_CP(ilh(xS4|N|=(8{F1~Rm{f9pX-$D&bP8#U)TS2gq$n`6-oXPU9@fG%weKIgb|{atsVSP&O!q@I##~i0-H4oWx?7 cpJ0MqDTyVCdTB*K^MJg60F4fab?O5E09&GUO#lD@ From 1f806bf1c59100c1bb0d32d5957643ceeeea8f6b Mon Sep 17 00:00:00 2001 From: ntorresd Date: Thu, 15 Aug 2024 12:09:09 -0500 Subject: [PATCH 52/78] fix: change `tsur` to `survey_year` in R modules --- R/fit_seromodel.R | 2 +- R/plot_seromodel.R | 4 ++-- R/validation.R | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/fit_seromodel.R b/R/fit_seromodel.R index f5cdad57..aa6d0030 100644 --- a/R/fit_seromodel.R +++ b/R/fit_seromodel.R @@ -49,7 +49,7 @@ set_foi_init <- function( #' #' @param serosurvey #' \describe{ -#' \item{`tsur`}{Year in which the survey took place} +#' \item{`survey_year`}{Year in which the survey took place} #' \item{`age_min`}{Floor value of the average between age_min and age_max} #' \item{`age_max`}{The size of the sample} #' \item{`sample_size`}{Number of samples for each age group} diff --git a/R/plot_seromodel.R b/R/plot_seromodel.R index e4159d63..bfb2b55b 100644 --- a/R/plot_seromodel.R +++ b/R/plot_seromodel.R @@ -226,7 +226,7 @@ plot_foi_estimates <- function( } else if (startsWith(model_name, "time")) { xlab <- "Year" ages <- rev(1:max(serosurvey$age_max)) - years <- unique(serosurvey$tsur) - ages + years <- unique(serosurvey$survey_year) - ages foi_central_estimates <- mutate( foi_central_estimates, year = years @@ -305,7 +305,7 @@ plot_rhats <- function( } else if (startsWith(model_name, "time")) { xlab <- "Year" ages <- rev(1:max(serosurvey$age_max)) - years <- unique(serosurvey$tsur) - ages + years <- unique(serosurvey$survey_year) - ages rhats_df <- data.frame( year = years, rhat = rhats diff --git a/R/validation.R b/R/validation.R index 58797958..405094b8 100644 --- a/R/validation.R +++ b/R/validation.R @@ -5,7 +5,7 @@ validate_serosurvey <- function(serosurvey) { age_max = "numeric", sample_size = "numeric", n_seropositive = "numeric", - tsur = "numeric" + survey_year = "numeric" ) checkmate::assert_names(names(serosurvey), must.include = names(col_types)) From a6e34a0bae2ea8fd65d36090f98ae77ce5d8f02a Mon Sep 17 00:00:00 2001 From: ntorresd Date: Thu, 15 Aug 2024 12:09:44 -0500 Subject: [PATCH 53/78] update NAMESPACE --- NAMESPACE | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 53361878..87e703e5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,6 +12,8 @@ export(plot_summary) export(prepare_serosurvey_for_plotting) export(probability_seropositive_by_age) export(probability_seropositive_general_model_by_age) +export(set_foi_init) +export(sf_cauchy) export(sf_normal) export(sf_uniform) export(simulate_serosurvey) From fdb3d3a84df6eb138782b149b250f8bcd8ba24d5 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Tue, 20 Aug 2024 02:49:45 -0500 Subject: [PATCH 54/78] update DESCRIPTION --- DESCRIPTION | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 3dfb3665..6010d0bd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -56,7 +56,8 @@ Imports: tidyr, tibble, Matrix, - glue + glue, + config LinkingTo: BH (>= 1.66.0), Rcpp (>= 0.12.0), From 209dc30752608386352a3fbb8d3e136c411a8923 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Tue, 20 Aug 2024 02:54:41 -0500 Subject: [PATCH 55/78] feat: enable sigma prior specification for all foi_models - Set defaults in the log and regular scale for sigma statistics - Use Cauchy distribution for sigma prior in regular scale - Use normal distribution for sigma prior in log scale --- R/build_stan_data.R | 30 ++++++++++++++++++++++++---- inst/stan/age_no_seroreversion.stan | 2 +- inst/stan/age_seroreversion.stan | 2 +- inst/stan/time_no_seroreversion.stan | 2 +- inst/stan/time_seroreversion.stan | 2 +- 5 files changed, 30 insertions(+), 8 deletions(-) diff --git a/R/build_stan_data.R b/R/build_stan_data.R index c776532a..5065a471 100644 --- a/R/build_stan_data.R +++ b/R/build_stan_data.R @@ -107,6 +107,7 @@ get_foi_index <- function( #' @return List with default values of stan data for sampling set_stan_data_defaults <- function( stan_data, + is_log_foi = FALSE, is_seroreversion = FALSE ) { config_file <- system.file("extdata", "config.yml", package = "serofoi") @@ -117,10 +118,28 @@ set_stan_data_defaults <- function( foi_min = prior_default$min, foi_max = prior_default$max, foi_mean = prior_default$mean, - foi_sd = prior_default$sd, - foi_sigma_rw_loc = prior_default$location, - foi_sigma_rw_sc = prior_default$scale + foi_sd = prior_default$sd ) + # Add sigma defaults depending on scale + if (is_log_foi) { + # Normal distribution defaults + foi_defaults <- c( + foi_defaults, + list( + foi_sigma_rw_loc = prior_default$mean, + foi_sigma_rw_sc = prior_default$sd + ) + ) + } else { + # Cauchy distribution defaults + foi_defaults <- c( + foi_defaults, + list( + foi_sigma_rw_loc = prior_default$location, + foi_sigma_rw_sc = prior_default$scale + ) + ) + } stan_data <- c( stan_data, foi_defaults @@ -167,7 +186,10 @@ build_stan_data <- function( sample_size = serosurvey$sample_size, age_groups = serosurvey$age_group ) %>% - set_stan_data_defaults(is_seroreversion = is_seroreversion) + set_stan_data_defaults( + is_log_foi = is_log_foi, + is_seroreversion = is_seroreversion + ) if (is.null(foi_index)) { foi_index_default <- get_foi_index(serosurvey = serosurvey, group_size = 1) diff --git a/inst/stan/age_no_seroreversion.stan b/inst/stan/age_no_seroreversion.stan index 09b83c11..cb832b85 100644 --- a/inst/stan/age_no_seroreversion.stan +++ b/inst/stan/age_no_seroreversion.stan @@ -30,7 +30,7 @@ transformed parameters { model { n_seropositive ~ binomial(sample_size, prob_infected); - sigma ~ cauchy(0, 1); + sigma ~ cauchy(foi_sigma_rw_sc, foi_sigma_rw_sc); // force of infection prior if (foi_prior_index == 0) diff --git a/inst/stan/age_seroreversion.stan b/inst/stan/age_seroreversion.stan index b9326285..a3f298ff 100644 --- a/inst/stan/age_seroreversion.stan +++ b/inst/stan/age_seroreversion.stan @@ -32,7 +32,7 @@ transformed parameters { model { n_seropositive ~ binomial(sample_size, prob_infected); - sigma ~ cauchy(0, 1); + sigma ~ cauchy(foi_sigma_rw_loc, foi_sigma_rw_sc); // force of infection prior if (foi_prior_index == 0) diff --git a/inst/stan/time_no_seroreversion.stan b/inst/stan/time_no_seroreversion.stan index 1540f359..d7af6dad 100644 --- a/inst/stan/time_no_seroreversion.stan +++ b/inst/stan/time_no_seroreversion.stan @@ -31,7 +31,7 @@ transformed parameters { model { n_seropositive ~ binomial(sample_size, prob_infected); - sigma ~ cauchy(0, 1); + sigma ~ cauchy(foi_sigma_rw_loc, foi_sigma_rw_sc); // force of infection prior if (foi_prior_index == 0) diff --git a/inst/stan/time_seroreversion.stan b/inst/stan/time_seroreversion.stan index e1291a08..f6bb0305 100644 --- a/inst/stan/time_seroreversion.stan +++ b/inst/stan/time_seroreversion.stan @@ -33,7 +33,7 @@ transformed parameters { model { n_seropositive ~ binomial(sample_size, prob_infected); - sigma ~ cauchy(0, 1); + sigma ~ cauchy(foi_sigma_rw_sc, foi_sigma_rw_sc); // force of infection prior if (foi_prior_index == 0) From 3170390e886a2e0edab3981cb55d650e7890ad93 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Tue, 20 Aug 2024 02:57:47 -0500 Subject: [PATCH 56/78] fix: change default seroreversion prior to `sf_normal` --- R/fit_seromodel.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/fit_seromodel.R b/R/fit_seromodel.R index aa6d0030..49d233db 100644 --- a/R/fit_seromodel.R +++ b/R/fit_seromodel.R @@ -94,7 +94,7 @@ fit_seromodel <- function( foi_index = NULL, foi_init = NULL, is_seroreversion = FALSE, - seroreversion_prior = sf_uniform(), + seroreversion_prior = sf_normal(), ... ) { serosurvey <- serosurvey %>% From 60866f769a2d7cd12d2cd60aa741317e8e805ffe Mon Sep 17 00:00:00 2001 From: ntorresd Date: Tue, 20 Aug 2024 02:58:42 -0500 Subject: [PATCH 57/78] fix: add error whenever a not supported `model_type` is specified Current available options are: - `'constant'` - `'time'` - `'age'` --- R/fit_seromodel.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/R/fit_seromodel.R b/R/fit_seromodel.R index 49d233db..77103186 100644 --- a/R/fit_seromodel.R +++ b/R/fit_seromodel.R @@ -101,6 +101,11 @@ fit_seromodel <- function( validate_serosurvey() %>% add_age_group_to_serosurvey() + stopifnot( + "model_type must be either 'constant', 'time' or 'age'" = + model_type %in% c("constant", "time", "age") + ) + stan_data <- build_stan_data( serosurvey = serosurvey, model_type = model_type, From 671a41683ac1be6d19107fba9ef61e6ed2c9dafa Mon Sep 17 00:00:00 2001 From: ntorresd Date: Tue, 20 Aug 2024 03:00:34 -0500 Subject: [PATCH 58/78] doc: improve `fit_seromodel` prior parameters documentation --- R/fit_seromodel.R | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/R/fit_seromodel.R b/R/fit_seromodel.R index 77103186..90d07869 100644 --- a/R/fit_seromodel.R +++ b/R/fit_seromodel.R @@ -60,15 +60,16 @@ set_foi_init <- function( #' @param foi_prior Force-of-infection distribution specified by means of #' the helper functions. Currently available options are: #' \describe{ -#' \item{`sf_normal`} -#' \item{`sf_uniform`} -#' \item{`sf_none`} +#' \item{[sf_normal]}{Function to set normal distribution priors} +#' \item{[sf_uniform]}{Function to set uniform distribution priors} #' } #' @param foi_sigma_rw Prior distribution for the standard deviation of the #' force-of-infection. Currently available options are: #' \describe{ -#' \item{`sf_cauchy`} -#' \iten{`sf_none`} +#' \item{[sf_normal]}{Function to set normal distribution prior. +#' Available for time models in the log-scale.} +#' \item{[sf_cauchy]}{Function to set Cauchy distribution prior. +#' Available for time models in regular scale.} #' } #' @param foi_index Integer vector specifying the age-groups for which #' force-of-infection values will be estimated @@ -77,9 +78,9 @@ set_foi_init <- function( #' @param seroreversion_prior seroreversion distribution specified by means of #' the helper functions. Currently available options are: #' \describe{ -#' \item{`sf_normal`} -#' \item{`sf_uniform`} -#' \item{`sf_none`} +#' \item{[sf_normal]}{Function to set normal distribution priors} +#' \item{[sf_uniform]}{Function to set uniform distribution priors} +#' \item{[sf_none]}{Function to set no prior distribution} #' } #' @param ... Additional parameters for [rstan][rstan::sampling] #' @returns stan_fit object with force-of-infection and seroreversion From c5e50bc906d9fb13154d5c97857a33a8787d86c2 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Tue, 20 Aug 2024 03:09:38 -0500 Subject: [PATCH 59/78] fix: remove unecessary samples from output stanfit object --- inst/stan/generated_quantities/log_likelihood.stan | 4 ---- 1 file changed, 4 deletions(-) diff --git a/inst/stan/generated_quantities/log_likelihood.stan b/inst/stan/generated_quantities/log_likelihood.stan index 789d0db5..28ad8d3e 100644 --- a/inst/stan/generated_quantities/log_likelihood.stan +++ b/inst/stan/generated_quantities/log_likelihood.stan @@ -1,9 +1,5 @@ -vector[n_observations] n_seropositive_sim; -vector[n_observations] prob_infected_sim; vector[n_observations] log_likelihood; for(i in 1:n_observations){ - n_seropositive_sim[i] = binomial_rng(sample_size[i], prob_infected[i]); - prob_infected_sim[i] = n_seropositive_sim[i] / sample_size[i]; log_likelihood[i] = binomial_lpmf(n_seropositive[i] | sample_size[i], prob_infected[i]); } From c675100a293605224aeccd4783baffef3248237b Mon Sep 17 00:00:00 2001 From: ntorresd Date: Tue, 20 Aug 2024 03:12:40 -0500 Subject: [PATCH 60/78] feat: add `get_age_intervals` function to plot_seromodel module --- R/plot_seromodel.R | 53 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/R/plot_seromodel.R b/R/plot_seromodel.R index bfb2b55b..2b2785ca 100644 --- a/R/plot_seromodel.R +++ b/R/plot_seromodel.R @@ -39,6 +39,59 @@ prepare_serosurvey_for_plotting <- function( #nolint dplyr::relocate(.data$age_group) } +#' Construct age-group variable from age column +#' +#' Generates age intervals of length step in the interval spanned by +#' `age_min` and `age_max` in a serosurvey. +#' In cases where `max(age_max)%%(step+1)!=0`, the last age interval is +#' truncated and will have a different length than the others. +#' @inheritParams plot_serosurvey +#' @param step step used to split the age interval +#' @return Serosurvey with addition factor variable grouping `age_intervals`. +#' The interval is taken as closed to the right and to the left. +get_age_intervals <- function(serosurvey, step) { + age_min <- min(serosurvey$age_min) + age_max <- max(serosurvey$age_max) + + checkmate::assert_int(age_min, lower = 0) + checkmate::assert_int(age_max, lower = age_min) + checkmate::assert_int(step, lower = 2, upper = age_max) + + limits_low <- as.integer( + seq( + age_min, age_max, + by = step + ) + ) + + if ((age_max - age_min) %% step != 0) { + warn_msg <- "(age_min - age_max) is not an integer multiple of step. + The last age interval will be truncated to " + warn_msg <- paste0( + warn_msg, "[", limits_low[length(limits_low)], ",", age_max, "]" + ) + warning(warn_msg) + } + + # prepare breaks + lim_breaks <- c(limits_low, age_max) + + # define age groups closed to the left and closed to the right + survey_features <- data.frame( + age_min = limits_low, + age_max = limits_low + step - 1 + ) %>% add_age_bins() + + serosurvey$age_interval <- cut( + x = serosurvey$age_group, + breaks = lim_breaks, + include.lowest = TRUE, right = FALSE, + labels = survey_features$group + ) + + return(serosurvey) +} + #' Plots seroprevalence from the given serosurvey #' #' @inheritParams fit_seromodel From 33f8968ada6f7a58b3f0d191577f4415fd22bbfd Mon Sep 17 00:00:00 2001 From: ntorresd Date: Tue, 20 Aug 2024 03:13:38 -0500 Subject: [PATCH 61/78] feat: enable data binning for plotting in `plot_serosurvey` --- R/plot_seromodel.R | 51 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 45 insertions(+), 6 deletions(-) diff --git a/R/plot_seromodel.R b/R/plot_seromodel.R index 2b2785ca..7a19d966 100644 --- a/R/plot_seromodel.R +++ b/R/plot_seromodel.R @@ -20,7 +20,6 @@ prepare_serosurvey_for_plotting <- function( #nolint ) { serosurvey <- serosurvey %>% - add_age_group_to_serosurvey() %>% cbind( Hmisc::binconf( serosurvey$n_seropositive, @@ -97,14 +96,46 @@ get_age_intervals <- function(serosurvey, step) { #' @inheritParams fit_seromodel #' @param size_text Size of text for plotting (`base_size` in #' [ggplot2][ggplot2::theme_bw]) +#' @param bin_serosurvey If `TRUE`, `serodata` is binned by means of +#' `prepare_bin_serosurvey`. Otherwise, age groups are kept as originally input. +#' @param bin_step Integer specifying the age groups bin size to be used when +#' `bin_serosurvey` is set to `TRUE`. #' @return ggplot object with seroprevalence plot #' @export plot_serosurvey <- function( serosurvey, - size_text = 11 + size_text = 11, + bin_serosurvey = FALSE, + bin_step = 5 ) { serosurvey <- validate_serosurvey(serosurvey = serosurvey) %>% - prepare_serosurvey_for_plotting() + add_age_group_to_serosurvey() + + if (bin_serosurvey) { + age_min <- min(serosurvey$age_min) + age_max <- max(serosurvey$age_max) + checkmate::assert_int(bin_step, lower = 2, upper = age_max) + + serosurvey <- get_age_intervals( + serosurvey = serosurvey, + step = bin_step + ) + + serosurvey <- serosurvey %>% + dplyr::group_by(.data$age_interval) %>% + dplyr::summarise( + sample_size = sum(.data$sample_size), + n_seropositive = sum(.data$n_seropositive) + ) %>% + dplyr::mutate( + survey_year = unique(serosurvey$survey_year), + age_min = as.integer(gsub("[[]|\\,.*", "\\1", .data$age_interval)) + 1, + age_max = as.integer(gsub(".*\\,|[]]", "\\1", .data$age_interval)) + ) %>% + add_age_group_to_serosurvey() + } + + serosurvey <- prepare_serosurvey_for_plotting(serosurvey) min_prev <- min(serosurvey$seroprev_lower) max_prev <- max(serosurvey$seroprev_upper) @@ -181,7 +212,9 @@ plot_seroprevalence_estimates <- function( seromodel, serosurvey, alpha = 0.05, - size_text = 11 + size_text = 11, + bin_serosurvey = FALSE, + bin_step = 5 ) { checkmate::assert_class(seromodel, "stanfit", null.ok = TRUE) @@ -203,7 +236,9 @@ plot_seroprevalence_estimates <- function( seroprevalence_plot <- plot_serosurvey( serosurvey = serosurvey, - size_text = size_text + size_text = size_text, + bin_serosurvey = bin_serosurvey, + bin_step = bin_step ) + ggplot2::geom_line( data = seroprevalence_central_estimates, @@ -448,6 +483,8 @@ plot_seromodel <- function( seromodel, serosurvey, alpha = 0.05, + bin_serosurvey = FALSE, + bin_step = 5, foi_df = NULL, foi_max = NULL, loo_estimate_digits = 1, @@ -471,7 +508,9 @@ plot_seromodel <- function( seromodel = seromodel, serosurvey = serosurvey, alpha = alpha, - size_text = size_text + size_text = size_text, + bin_serosurvey = bin_serosurvey, + bin_step = bin_step ) plot_list <- list( From e3cfd30f7804cbbce5773c1f66a19c395704dc38 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Tue, 20 Aug 2024 04:05:49 -0500 Subject: [PATCH 62/78] fix: remove unecessary validation of `survey_year` in `validate_serosurvey` --- R/plot_seromodel.R | 2 ++ R/validation.R | 3 +-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/R/plot_seromodel.R b/R/plot_seromodel.R index 7a19d966..21282bcd 100644 --- a/R/plot_seromodel.R +++ b/R/plot_seromodel.R @@ -312,6 +312,7 @@ plot_foi_estimates <- function( data = foi_central_estimates, ggplot2::aes(x = .data$age) ) } else if (startsWith(model_name, "time")) { + checkmate::assert_names(names(serosurvey), must.include = "survey_year") xlab <- "Year" ages <- rev(1:max(serosurvey$age_max)) years <- unique(serosurvey$survey_year) - ages @@ -391,6 +392,7 @@ plot_rhats <- function( data = rhats_df, ggplot2::aes(x = .data$age) ) } else if (startsWith(model_name, "time")) { + checkmate::assert_names(names(serosurvey), must.include = "survey_year") xlab <- "Year" ages <- rev(1:max(serosurvey$age_max)) years <- unique(serosurvey$survey_year) - ages diff --git a/R/validation.R b/R/validation.R index 405094b8..fe023e63 100644 --- a/R/validation.R +++ b/R/validation.R @@ -4,8 +4,7 @@ validate_serosurvey <- function(serosurvey) { age_min = "numeric", age_max = "numeric", sample_size = "numeric", - n_seropositive = "numeric", - survey_year = "numeric" + n_seropositive = "numeric" ) checkmate::assert_names(names(serosurvey), must.include = names(col_types)) From ac7d073abfb77b4ca64cc2209f3833b19ab9984c Mon Sep 17 00:00:00 2001 From: ntorresd Date: Tue, 20 Aug 2024 09:39:11 -0500 Subject: [PATCH 63/78] fix: minor syntax corrections --- R/build_stan_data.R | 2 +- R/plot_seromodel.R | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/build_stan_data.R b/R/build_stan_data.R index 5065a471..918ffd0b 100644 --- a/R/build_stan_data.R +++ b/R/build_stan_data.R @@ -36,7 +36,7 @@ sf_uniform <- function(min = 0, max = 10) { return(list(min = min, max = max, name = "uniform")) } -#' Sets cauchy distribution parameters for sampling +#' Sets Cauchy distribution parameters for sampling #' #' @param scale Scale #' of the normal distribution diff --git a/R/plot_seromodel.R b/R/plot_seromodel.R index 21282bcd..a6a4d0c7 100644 --- a/R/plot_seromodel.R +++ b/R/plot_seromodel.R @@ -97,7 +97,8 @@ get_age_intervals <- function(serosurvey, step) { #' @param size_text Size of text for plotting (`base_size` in #' [ggplot2][ggplot2::theme_bw]) #' @param bin_serosurvey If `TRUE`, `serodata` is binned by means of -#' `prepare_bin_serosurvey`. Otherwise, age groups are kept as originally input. +#' `prepare_bin_serosurvey`. +#' Otherwise, age groups are kept as originally input. #' @param bin_step Integer specifying the age groups bin size to be used when #' `bin_serosurvey` is set to `TRUE`. #' @return ggplot object with seroprevalence plot @@ -112,7 +113,6 @@ plot_serosurvey <- function( add_age_group_to_serosurvey() if (bin_serosurvey) { - age_min <- min(serosurvey$age_min) age_max <- max(serosurvey$age_max) checkmate::assert_int(bin_step, lower = 2, upper = age_max) From 83c2c6a7165a37ef72a22ff3f52c7db51b3c6ce0 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Tue, 20 Aug 2024 09:39:29 -0500 Subject: [PATCH 64/78] doc: update documentation --- man/add_age_group_to_serosurvey.Rd | 2 +- man/build_stan_data.Rd | 26 ++++++++++++++++------- man/extract_central_estimates.Rd | 2 +- man/fit_seromodel.Rd | 29 +++++++++++++++++++------- man/get_age_intervals.Rd | 29 ++++++++++++++++++++++++++ man/get_foi_index.Rd | 2 +- man/plot_foi_estimates.Rd | 2 +- man/plot_rhats.Rd | 2 +- man/plot_seromodel.Rd | 11 +++++++++- man/plot_seroprevalence_estimates.Rd | 13 ++++++++++-- man/plot_serosurvey.Rd | 16 ++++++++++++-- man/plot_summary.Rd | 2 +- man/prepare_serosurvey_for_plotting.Rd | 2 +- man/serofoi-package.Rd | 4 ++-- man/set_foi_init.Rd | 17 +++++++++++++++ man/set_stan_data_defaults.Rd | 4 +++- man/sf_cauchy.Rd | 20 ++++++++++++++++++ man/summarise_central_estimate.Rd | 2 +- man/summarise_seromodel.Rd | 2 +- 19 files changed, 155 insertions(+), 32 deletions(-) create mode 100644 man/get_age_intervals.Rd create mode 100644 man/set_foi_init.Rd create mode 100644 man/sf_cauchy.Rd diff --git a/man/add_age_group_to_serosurvey.Rd b/man/add_age_group_to_serosurvey.Rd index 7f519c0e..b96b7d3a 100644 --- a/man/add_age_group_to_serosurvey.Rd +++ b/man/add_age_group_to_serosurvey.Rd @@ -8,7 +8,7 @@ add_age_group_to_serosurvey(serosurvey) } \arguments{ \item{serosurvey}{\describe{ -\item{\code{tsur}}{Year in which the survey took place} +\item{\code{survey_year}}{Year in which the survey took place} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} \item{\code{sample_size}}{Number of samples for each age group} diff --git a/man/build_stan_data.Rd b/man/build_stan_data.Rd index 37d99f16..eb6b2d3e 100644 --- a/man/build_stan_data.Rd +++ b/man/build_stan_data.Rd @@ -9,13 +9,15 @@ build_stan_data( model_type = "constant", foi_prior = sf_uniform(), foi_index = NULL, + is_log_foi = FALSE, + foi_sigma_rw = sf_none(), is_seroreversion = FALSE, seroreversion_prior = sf_none() ) } \arguments{ \item{serosurvey}{\describe{ -\item{\code{tsur}}{Year in which the survey took place} +\item{\code{survey_year}}{Year in which the survey took place} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} \item{\code{sample_size}}{Number of samples for each age group} @@ -27,23 +29,33 @@ build_stan_data( \item{foi_prior}{Force-of-infection distribution specified by means of the helper functions. Currently available options are: \describe{ -\item{\code{sf_normal}} -\item{\code{sf_uniform}} -\item{\code{sf_none}} +\item{\link{sf_normal}}{Function to set normal distribution priors} +\item{\link{sf_uniform}}{Function to set uniform distribution priors} }} \item{foi_index}{Integer vector specifying the age-groups for which force-of-infection values will be estimated} +\item{is_log_foi}{Boolean to set logarithmic scale in the FOI} + +\item{foi_sigma_rw}{Prior distribution for the standard deviation of the +force-of-infection. Currently available options are: +\describe{ +\item{\link{sf_normal}}{Function to set normal distribution prior. +Available for time models in the log-scale.} +\item{\link{sf_cauchy}}{Function to set Cauchy distribution prior. +Available for time models in regular scale.} +}} + \item{is_seroreversion}{Boolean specifying whether to include seroreversion rate estimation in the model} \item{seroreversion_prior}{seroreversion distribution specified by means of the helper functions. Currently available options are: \describe{ -\item{\code{sf_normal}} -\item{\code{sf_uniform}} -\item{\code{sf_none}} +\item{\link{sf_normal}}{Function to set normal distribution priors} +\item{\link{sf_uniform}}{Function to set uniform distribution priors} +\item{\link{sf_none}}{Function to set no prior distribution} }} } \value{ diff --git a/man/extract_central_estimates.Rd b/man/extract_central_estimates.Rd index 73f6664c..163a92fe 100644 --- a/man/extract_central_estimates.Rd +++ b/man/extract_central_estimates.Rd @@ -16,7 +16,7 @@ extract_central_estimates( with \link{fit_seromode}} \item{serosurvey}{\describe{ -\item{\code{tsur}}{Year in which the survey took place} +\item{\code{survey_year}}{Year in which the survey took place} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} \item{\code{sample_size}}{Number of samples for each age group} diff --git a/man/fit_seromodel.Rd b/man/fit_seromodel.Rd index 34851ad8..ab4cf1d0 100644 --- a/man/fit_seromodel.Rd +++ b/man/fit_seromodel.Rd @@ -7,16 +7,19 @@ fit_seromodel( serosurvey, model_type = "constant", + is_log_foi = FALSE, foi_prior = sf_normal(), + foi_sigma_rw = sf_none(), foi_index = NULL, + foi_init = NULL, is_seroreversion = FALSE, - seroreversion_prior = sf_uniform(), + seroreversion_prior = sf_normal(), ... ) } \arguments{ \item{serosurvey}{\describe{ -\item{\code{tsur}}{Year in which the survey took place} +\item{\code{survey_year}}{Year in which the survey took place} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} \item{\code{sample_size}}{Number of samples for each age group} @@ -25,12 +28,22 @@ fit_seromodel( \item{model_type}{Type of the model. Either "constant", "age" or "time"} +\item{is_log_foi}{Boolean to set logarithmic scale in the FOI} + \item{foi_prior}{Force-of-infection distribution specified by means of the helper functions. Currently available options are: \describe{ -\item{\code{sf_normal}} -\item{\code{sf_uniform}} -\item{\code{sf_none}} +\item{\link{sf_normal}}{Function to set normal distribution priors} +\item{\link{sf_uniform}}{Function to set uniform distribution priors} +}} + +\item{foi_sigma_rw}{Prior distribution for the standard deviation of the +force-of-infection. Currently available options are: +\describe{ +\item{\link{sf_normal}}{Function to set normal distribution prior. +Available for time models in the log-scale.} +\item{\link{sf_cauchy}}{Function to set Cauchy distribution prior. +Available for time models in regular scale.} }} \item{foi_index}{Integer vector specifying the age-groups for which @@ -42,9 +55,9 @@ seroreversion rate estimation in the model} \item{seroreversion_prior}{seroreversion distribution specified by means of the helper functions. Currently available options are: \describe{ -\item{\code{sf_normal}} -\item{\code{sf_uniform}} -\item{\code{sf_none}} +\item{\link{sf_normal}}{Function to set normal distribution priors} +\item{\link{sf_uniform}}{Function to set uniform distribution priors} +\item{\link{sf_none}}{Function to set no prior distribution} }} \item{...}{Additional parameters for \link[rstan:stanmodel-method-sampling]{rstan}} diff --git a/man/get_age_intervals.Rd b/man/get_age_intervals.Rd new file mode 100644 index 00000000..3cb6abb6 --- /dev/null +++ b/man/get_age_intervals.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_seromodel.R +\name{get_age_intervals} +\alias{get_age_intervals} +\title{Construct age-group variable from age column} +\usage{ +get_age_intervals(serosurvey, step) +} +\arguments{ +\item{serosurvey}{\describe{ +\item{\code{survey_year}}{Year in which the survey took place} +\item{\code{age_min}}{Floor value of the average between age_min and age_max} +\item{\code{age_max}}{The size of the sample} +\item{\code{sample_size}}{Number of samples for each age group} +\item{\code{n_seropositive}}{Number of positive samples for each age group} +}} + +\item{step}{step used to split the age interval} +} +\value{ +Serosurvey with addition factor variable grouping \code{age_intervals}. +The interval is taken as closed to the right and to the left. +} +\description{ +Generates age intervals of length step in the interval spanned by +\code{age_min} and \code{age_max} in a serosurvey. +In cases where \code{max(age_max)\%\%(step+1)!=0}, the last age interval is +truncated and will have a different length than the others. +} diff --git a/man/get_foi_index.Rd b/man/get_foi_index.Rd index 4f7f08db..dd06e9fb 100644 --- a/man/get_foi_index.Rd +++ b/man/get_foi_index.Rd @@ -8,7 +8,7 @@ get_foi_index(serosurvey, group_size) } \arguments{ \item{serosurvey}{\describe{ -\item{\code{tsur}}{Year in which the survey took place} +\item{\code{survey_year}}{Year in which the survey took place} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} \item{\code{sample_size}}{Number of samples for each age group} diff --git a/man/plot_foi_estimates.Rd b/man/plot_foi_estimates.Rd index 456b41d6..b9e3753d 100644 --- a/man/plot_foi_estimates.Rd +++ b/man/plot_foi_estimates.Rd @@ -18,7 +18,7 @@ plot_foi_estimates( with \link{fit_seromode}} \item{serosurvey}{\describe{ -\item{\code{tsur}}{Year in which the survey took place} +\item{\code{survey_year}}{Year in which the survey took place} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} \item{\code{sample_size}}{Number of samples for each age group} diff --git a/man/plot_rhats.Rd b/man/plot_rhats.Rd index 0018adcd..d3225181 100644 --- a/man/plot_rhats.Rd +++ b/man/plot_rhats.Rd @@ -11,7 +11,7 @@ plot_rhats(seromodel, serosurvey, par_name = "foi_expanded", size_text = 11) with \link{fit_seromode}} \item{serosurvey}{\describe{ -\item{\code{tsur}}{Year in which the survey took place} +\item{\code{survey_year}}{Year in which the survey took place} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} \item{\code{sample_size}}{Number of samples for each age group} diff --git a/man/plot_seromodel.Rd b/man/plot_seromodel.Rd index 1d483b96..475a94c9 100644 --- a/man/plot_seromodel.Rd +++ b/man/plot_seromodel.Rd @@ -8,6 +8,8 @@ plot_seromodel( seromodel, serosurvey, alpha = 0.05, + bin_serosurvey = FALSE, + bin_step = 5, foi_df = NULL, foi_max = NULL, loo_estimate_digits = 1, @@ -22,7 +24,7 @@ plot_seromodel( with \link{fit_seromode}} \item{serosurvey}{\describe{ -\item{\code{tsur}}{Year in which the survey took place} +\item{\code{survey_year}}{Year in which the survey took place} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} \item{\code{sample_size}}{Number of samples for each age group} @@ -31,6 +33,13 @@ with \link{fit_seromode}} \item{alpha}{1 - alpha indicates the credibility level to be used} +\item{bin_serosurvey}{If \code{TRUE}, \code{serodata} is binned by means of +\code{prepare_bin_serosurvey}. +Otherwise, age groups are kept as originally input.} + +\item{bin_step}{Integer specifying the age groups bin size to be used when +\code{bin_serosurvey} is set to \code{TRUE}.} + \item{foi_df}{Dataframe with columns \describe{ \item{\code{year}/\code{age}}{Year/Age (depending on the model)} diff --git a/man/plot_seroprevalence_estimates.Rd b/man/plot_seroprevalence_estimates.Rd index d83d727f..6ef46ade 100644 --- a/man/plot_seroprevalence_estimates.Rd +++ b/man/plot_seroprevalence_estimates.Rd @@ -8,7 +8,9 @@ plot_seroprevalence_estimates( seromodel, serosurvey, alpha = 0.05, - size_text = 11 + size_text = 11, + bin_serosurvey = FALSE, + bin_step = 5 ) } \arguments{ @@ -16,7 +18,7 @@ plot_seroprevalence_estimates( with \link{fit_seromode}} \item{serosurvey}{\describe{ -\item{\code{tsur}}{Year in which the survey took place} +\item{\code{survey_year}}{Year in which the survey took place} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} \item{\code{sample_size}}{Number of samples for each age group} @@ -27,6 +29,13 @@ with \link{fit_seromode}} \item{size_text}{Size of text for plotting (\code{base_size} in \link[ggplot2:ggtheme]{ggplot2})} + +\item{bin_serosurvey}{If \code{TRUE}, \code{serodata} is binned by means of +\code{prepare_bin_serosurvey}. +Otherwise, age groups are kept as originally input.} + +\item{bin_step}{Integer specifying the age groups bin size to be used when +\code{bin_serosurvey} is set to \code{TRUE}.} } \value{ ggplot object with seroprevalence estimates and serosurveys plots diff --git a/man/plot_serosurvey.Rd b/man/plot_serosurvey.Rd index 38e6014a..ff40b2ea 100644 --- a/man/plot_serosurvey.Rd +++ b/man/plot_serosurvey.Rd @@ -4,11 +4,16 @@ \alias{plot_serosurvey} \title{Plots seroprevalence from the given serosurvey} \usage{ -plot_serosurvey(serosurvey, size_text = 11) +plot_serosurvey( + serosurvey, + size_text = 11, + bin_serosurvey = FALSE, + bin_step = 5 +) } \arguments{ \item{serosurvey}{\describe{ -\item{\code{tsur}}{Year in which the survey took place} +\item{\code{survey_year}}{Year in which the survey took place} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} \item{\code{sample_size}}{Number of samples for each age group} @@ -17,6 +22,13 @@ plot_serosurvey(serosurvey, size_text = 11) \item{size_text}{Size of text for plotting (\code{base_size} in \link[ggplot2:ggtheme]{ggplot2})} + +\item{bin_serosurvey}{If \code{TRUE}, \code{serodata} is binned by means of +\code{prepare_bin_serosurvey}. +Otherwise, age groups are kept as originally input.} + +\item{bin_step}{Integer specifying the age groups bin size to be used when +\code{bin_serosurvey} is set to \code{TRUE}.} } \value{ ggplot object with seroprevalence plot diff --git a/man/plot_summary.Rd b/man/plot_summary.Rd index 41d05737..b660bfb4 100644 --- a/man/plot_summary.Rd +++ b/man/plot_summary.Rd @@ -18,7 +18,7 @@ plot_summary( with \link{fit_seromode}} \item{serosurvey}{\describe{ -\item{\code{tsur}}{Year in which the survey took place} +\item{\code{survey_year}}{Year in which the survey took place} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} \item{\code{sample_size}}{Number of samples for each age group} diff --git a/man/prepare_serosurvey_for_plotting.Rd b/man/prepare_serosurvey_for_plotting.Rd index 7296cda6..c0c8f637 100644 --- a/man/prepare_serosurvey_for_plotting.Rd +++ b/man/prepare_serosurvey_for_plotting.Rd @@ -8,7 +8,7 @@ prepare_serosurvey_for_plotting(serosurvey, alpha = 0.05) } \arguments{ \item{serosurvey}{\describe{ -\item{\code{tsur}}{Year in which the survey took place} +\item{\code{survey_year}}{Year in which the survey took place} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} \item{\code{sample_size}}{Number of samples for each age group} diff --git a/man/serofoi-package.Rd b/man/serofoi-package.Rd index 8fcb4efa..0e9d187e 100644 --- a/man/serofoi-package.Rd +++ b/man/serofoi-package.Rd @@ -22,11 +22,11 @@ Useful links: } \author{ -\strong{Maintainer}: Zulma M. Cucunubá \email{zulma.cucunuba@javeriana.edu.co} (\href{https://orcid.org/0000-0002-8165-3198}{ORCID}) +\strong{Maintainer}: Zulma M. CucunubC! \email{zulma.cucunuba@javeriana.edu.co} (\href{https://orcid.org/0000-0002-8165-3198}{ORCID}) Authors: \itemize{ - \item Nicolás T. Domínguez \email{ex-ntorres@javeriana.edu.co} (\href{https://orcid.org/0009-0002-8484-1298}{ORCID}) + \item NicolC!s T. DomC-nguez \email{ex-ntorres@javeriana.edu.co} (\href{https://orcid.org/0009-0002-8484-1298}{ORCID}) \item Ben Lambert \item Pierre Nouvellet } diff --git a/man/set_foi_init.Rd b/man/set_foi_init.Rd new file mode 100644 index 00000000..16201036 --- /dev/null +++ b/man/set_foi_init.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fit_seromodel.R +\name{set_foi_init} +\alias{set_foi_init} +\title{Sets initialization function for sampling} +\usage{ +set_foi_init(foi_init, is_log_foi, foi_index) +} +\arguments{ +\item{is_log_foi}{Boolean to set logarithmic scale in the FOI} + +\item{foi_index}{Integer vector specifying the age-groups for which +force-of-infection values will be estimated} +} +\description{ +Sets initialization function for sampling +} diff --git a/man/set_stan_data_defaults.Rd b/man/set_stan_data_defaults.Rd index 45d1681b..c909c154 100644 --- a/man/set_stan_data_defaults.Rd +++ b/man/set_stan_data_defaults.Rd @@ -4,11 +4,13 @@ \alias{set_stan_data_defaults} \title{Set stan data defaults for sampling} \usage{ -set_stan_data_defaults(stan_data, is_seroreversion = FALSE) +set_stan_data_defaults(stan_data, is_log_foi = FALSE, is_seroreversion = FALSE) } \arguments{ \item{stan_data}{List to be passed to \link[rstan:stanmodel-method-sampling]{rstan}} +\item{is_log_foi}{Boolean to set logarithmic scale in the FOI} + \item{is_seroreversion}{Boolean specifying whether to include seroreversion rate estimation in the model} } diff --git a/man/sf_cauchy.Rd b/man/sf_cauchy.Rd new file mode 100644 index 00000000..4d89439a --- /dev/null +++ b/man/sf_cauchy.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/build_stan_data.R +\name{sf_cauchy} +\alias{sf_cauchy} +\title{Sets Cauchy distribution parameters for sampling} +\usage{ +sf_cauchy(location = 0, scale = 1) +} +\arguments{ +\item{scale}{Scale +of the normal distribution} + +\item{sd}{Standard deviation of the normal distribution} +} +\value{ +List with specified statistics and name of the model +} +\description{ +Sets Cauchy distribution parameters for sampling +} diff --git a/man/summarise_central_estimate.Rd b/man/summarise_central_estimate.Rd index a6071d96..32b0020a 100644 --- a/man/summarise_central_estimate.Rd +++ b/man/summarise_central_estimate.Rd @@ -17,7 +17,7 @@ summarise_central_estimate( with \link{fit_seromode}} \item{serosurvey}{\describe{ -\item{\code{tsur}}{Year in which the survey took place} +\item{\code{survey_year}}{Year in which the survey took place} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} \item{\code{sample_size}}{Number of samples for each age group} diff --git a/man/summarise_seromodel.Rd b/man/summarise_seromodel.Rd index 488704ca..0b7ce2fe 100644 --- a/man/summarise_seromodel.Rd +++ b/man/summarise_seromodel.Rd @@ -19,7 +19,7 @@ summarise_seromodel( with \link{fit_seromode}} \item{serosurvey}{\describe{ -\item{\code{tsur}}{Year in which the survey took place} +\item{\code{survey_year}}{Year in which the survey took place} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} \item{\code{sample_size}}{Number of samples for each age group} From 0e0b13a50a66d37b5c28bdbe0360bac039ea3798 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Tue, 20 Aug 2024 09:39:56 -0500 Subject: [PATCH 65/78] doc: upgrade vignettes accordingly to refactorization --- vignettes/foi_models.Rmd | 240 +++++++++++++++------------ vignettes/simulating_serosurveys.Rmd | 147 +++++++--------- vignettes/use_cases.Rmd | 168 ++++++++++--------- 3 files changed, 276 insertions(+), 279 deletions(-) diff --git a/vignettes/foi_models.Rmd b/vignettes/foi_models.Rmd index 90f481b9..88f6e725 100644 --- a/vignettes/foi_models.Rmd +++ b/vignettes/foi_models.Rmd @@ -21,37 +21,98 @@ library(serofoi) library(dplyr) ``` -The current version of ***serofoi*** supports three different models for estimating the *Force-of-Infection (FoI)*, including constant and time-varying trajectories. For fitting the model to the seroprevalence data we use a suit of bayesian models that include prior and upper prior distributions +The current version of ***serofoi*** supports three different serocatalytic models for estimating the *Force-of-Infection (FoI)*: constant, age-varying, and time-varying FoI. To estimate relevant parameters we employ a suit of Bayesian models using age-disaggregated population-based cross-sectional serological surveys as input data. ## What is the Force-of-Infection The force of infection, also known as the hazard rate or the infection pressure, is a key concept in mathematical modelling of infectious diseases. It represents the rate at which susceptible individuals become infected, given their exposure to a pathogen. In simple terms, the force of infection quantifies the risk of a susceptible individual becoming infected over a period of time. It is usually expressed as a rate per unit of time (e.g., per day or per year). +To illustrate this, consider a simple SIR model: +$$ +\begin{aligned} + \frac{dS}{dt} &= -\beta S I,\\ + \frac{dI}{dt} &= \beta S I - \gamma I,\\ + \frac{dR}{dt} &= \gamma I, +\end{aligned} +$$ + +The FoI in this case is defined as $\lambda(t) = \beta I(t)$ (Note that the FoI is time-dependent by definition). To solve this system of equations, we need to know the transmission rate, $\beta>0$, and the rate of recovery from infection $\gamma>0$, as well as some initial conditions. + +**Serocatalytic models** offer a simpler perspective: to estimate how the risk of historical exposure to a pathogen varies depending on the time of birth of the individuals in the sample. For this, we put together the $I$ and $R$ compartments into a single **seropositives** compartment $P(t)$ and consider the the dynamics of each birth cohort separately: +$$ +\begin{aligned} + &\frac{dN^{\tau}(t)}{dt} = -\lambda(t) N^{\tau}(t),\\ + &\frac{dP^{\tau}(t)}{dt} = \lambda(t) N^{\tau}(t), \\ + &N^{\tau}(t)+P^{\tau}(t)=1; \quad t<\tau +\end{aligned} +$$ +where $0 \leq N^\tau(t)\leq 1$ is the proportion of individuals born in year $\tau$ who are seronegative at time $t$. Assuming lack of maternal antibodies, the initial condition for each cohort can be specified as $N^{\tau}(\tau) = 1$. Depending on whether the FoI is constant, time dependent, or age dependent, this system of equations yields the different solutions on which *serofoi* is based on. + ## Constant vs Time-varying FoI -The *FoI* is one of the most important parameters in epidemiology, but it is often incorrectly assumed to be constant over time. Identifying whether the *FoI* follows a constant or a time-varying trend can be important in the identification and characterization of the spread of disease. In Table 1 there is a summary of the models currently supported by serofoi. +The *FoI* is often incorrectly assumed to be constant over time. Identifying whether the *FoI* follows a constant, an age-varying or a time-varying trend can be important in the identification and characterization of the spread of disease. **serofoi** offers tools to implement and compare a wide variety of Bayesian models to estimate the FoI according to the aforementioned serocatalytic models. + +In the idealised situation where FoI is constant $\lambda(t) = \lambda$: + +$$ +\frac{dP^{\tau}(t)}{dt} = \lambda (1 - P^{\tau}(t)). +$$ +Applying the integrating factor method and using the initial conditions $X^\tau(\tau) = 0$ yields to the solution: + +$$ +P^{\tau}(t) = 1 - \exp(-\bar \lambda t), +$$ + +meaning that, in the long rung, the seropositivity converges to 1$P^{\tau}(t)\rightarrow 1$. + +In more realistic applications, the FoI is assumed to be piecewise-constant on discrete intervals of 1-year length. The solution for each year is then: +$$ +P^{\tau}(t) = 1 + (P_0 - 1) \exp(-\lambda_t t) +$$ +where $P_0$ corresponds to the seropositivity at the end of the previous year and $\lambda_t$ is the constant value assumed for the FoI at time $t$. From the resulting recursive equation, it is possible to obtain analytical solutions for the seropositivity $P^\tau(t)$ along each 1-year length chunk. + +## Considering seroreversion + +Since it is possible for seropositive individuals to lose immunity over time, we consider the rate at which infected individuals become seronegative $\mu$. The system of equations is now: + +$$ +\begin{aligned} + \frac{dP^{\tau}(t)}{dt} &= \lambda(t) (1 - P^{\tau}(t)) - \mu P^{\tau}(t),\\ + P^{\tau}(\tau) &= 0, +\end{aligned} +$$ + +Similarly, this yields to a iterative solution for the seropositivity $X^\tau (t)$ when piece-wise constant FoI is considered: + + +$$ + P^{\tau}(t) = \frac{\lambda_t}{\lambda_t + \mu} + \left(P_0 - \frac{\lambda_t}{\lambda_t + \mu}\right) \exp(-(\lambda_t + \mu)), +$$ + +where, again, $P_0$ corresponds to the proportion of seropositive individuals of cohort $\tau$ by the end of the previous year. Correct indexation and simplification of this equation allows the description of age-dependent FOIs. + +## Estimating the FoI - Bayesian modelling + +Now that we can describe the proportion of seropositive individuals $P(t)$ by means of the serocatalytic models for constant, time-varying, and age-varying FOIs, what we want is to obtain estimates of the FoI $\lambda$ and the seroreversion rate $\mu$ by sampling from a Bayesian model taking a cross-sectional serological survey as data. For this, we can use **serofoi**'s `fit_seromodel()`, which relies on the statistical programming language *Stan* to perform this task. The current available models are summarised in table 1: + ::: l-body-outset -| Model Option | Description and usage | -|-----------------|:-----------------------------------------------------------:| -| `constant` | Constant FoI | -| `tv_normal` | Time-varying normal FoI: slow change in FoI | -| `tv_normal_log` | Time-varying normal-log FoI: fast epidemic change in FoI | +| Serocatalytic model | scale | n_seropositive | FoI_1 prior | FoI_i, i>1 | sigma | mu | +| ------------------- | ------- | ------------------------------------- | ------------------------------------- | ------------------------ | ----------------------- | ------------------------------------- | +| constant | regular | binomial(sample_size, prob_infected) | normal(mean, sd)
uniform(min, max) | \- | cauchy(mean, sd) | normal(mean, sd)
uniform(min, max) | +| age | regular | binomial(sample_size, prob_infected) | normal(mean, sd)
uniform(min, max) | normal(FoI_{i-1}, sigma) | cauchy(mean, sd) | normal(mean, sd)
uniform(min, max) | +| time | regular | binomial(sample_size, prob_infected) | normal(mean, sd)
uniform(min, max) | normal(FoI_{i-1}, sigma) | cauchy(mean, sd) | normal(mean, sd)
uniform(min, max) | +| time | log | binomial(sample_size, prob_infected) | normal(mean, sd)
uniform(min, max) | normal(FoI_{i-1}, sigma) | normal(location, scale) | normal(mean, sd)
uniform(min, max) | ::: -Table 1. Model options and descriptions. +Table 1. Available options. ## Model 1. Constant Force-of-Infection (endemic model) -The *endemic constant model* is a simple mathematical model used in epidemiology to describe the seroprevalence of an infectious disease within a population, as a product of a long-term transmission. +The *endemic constant model* is a simple mathematical model used in epidemiology to describe the seroprevalence of an infectious disease within a population, as a product of a long-term transmission. In this case, the rate of infection acquisition $\lambda$ is constant over time, and the seroprevalence $P(A)$ behaves as a cumulative process increasing monotonically with age. -For a constant FoI endemic model, the rate of infection acquisition $\lambda$ is constant over time for each trajectory, and the seroprevalence $P$ behaves as a cumulative process increasing monotonically with age. For the seroprevalence at age $a$ and time $t$, we have: -$$ -P(a,t) = 1-\exp\left(-\lambda a\right) -$$ -The number of positive cases follows a binomial distribution, where $n$ is the number of trials (size of the age group) and $P$ is the probability of successes (seroprevalence) for a certain age group: +We always model the number of positive cases according to a binomial distribution, where $n(A)$ and $X(A)$ are the sample size and number of seropositive of age group $A$ respectively: $$ -p(a,t) \sim binom(n(a,t), P(a,t)) +X(A) \sim \textit{Binom}(n(A), P(A)) $$ -In ***serofoi***, for the constant model, the *FoI* ($\lambda$) is modelled within a Bayesian framework using a uniform prior distribution $\sim U(0,2)$. To test the `constant` model we simulate a serosurvey following a stepwise decreasing *FoI* (red line in Fig. 1) using the data simulation functions of ***serofoi***: @@ -69,34 +130,26 @@ survey_features <- data.frame( sample_size = rep(25, 10) ) -serodata_constant <- simulate_serosurvey( +serosurvey_constant <- simulate_serosurvey( "time", foi_df, survey_features ) %>% -rename( - total = sample_size, - counts = n_seropositive -) %>% -mutate( - tsur = 2050, - survey = "simdata_constant" -) %>% -prepare_serodata() +mutate(survey_year = 2050) ``` -The simulated dataset `serodata_constant` contains information about 250 samples of individuals between 1 and 50 years old (5 samples per age) with age groups of 5 years length. The following code shows how to implement the constant model to this simulated serosurvey: +The simulated dataset `serosurvey_constant` contains information about 250 samples of individuals between 1 and 50 years old (5 samples per age) with age groups of 5 years length. The following code shows how to implement the constant model to this simulated serosurvey: ```{r constant model, include = TRUE, echo = TRUE, results="hide", errors = FALSE, warning = FALSE, message = FALSE, fig.width=4, fig.asp=1.5, fig.align="center", out.width ="50%", fig.keep="all"} seromodel_constant <- fit_seromodel( - serodata = serodata_constant, - foi_model = "constant", + serosurvey = serosurvey_constant, + model_type = "constant", iter = 800 ) plot_seromodel( seromodel_constant, - serodata = serodata_constant, - foi = foi_df, + serosurvey = serosurvey_constant, + foi_df = foi_df, size_text = 6 ) ``` @@ -106,23 +159,20 @@ Figure 1. Constant serofoi model plot. Simulated (red) vs modelled (blue) *FoI*. rm(list = ls(pattern = "_constant")) ``` -In this case, 800 iterations are enough to ensure convergence. The `plot_seromodel` method provides a visualisation of the results, including a summary where the expected log pointwise predictive density (`elpd`) and its standard error (`se`) are shown. We say that a model converges if all the R-hat estimates are below 1.01. +In this case, 800 iterations are enough to ensure convergence. `plot_seromodel()` returns a visualisation of the results including a summary where the expected log pointwise predictive density (`elpd`) and its standard error (`se`) are shown. We say that a model converges if all the R-hat estimates are below 1.01. ## Time-varying FoI models For the time-varying *FoI* models, the probability for a case to be positive at age a at time $t$ also follows a binomial distribution, as described above. However, the seroprevalence is obtained from a cumulative of the yearly-varying values of the *FoI* over time: + $$ -P(a,t) = 1 - \exp\left(-\sum_{i=t-a+1}^{t}\lambda_i\right) + P^{\tau}(t) = \frac{\lambda_t}{\lambda_t + \mu} + \left(P_0 - \frac{\lambda_t}{\lambda_t + \mu}\right) \exp(-(\lambda_t + \mu)), $$ -The corresponding serosurvey completed at time $t_{sur}$ is informative for the interval $[t_{sur}-a_{max}, t_{sur}]$. ## Model 2. Time-varying FoI - Slow Time-Varying FoI -The *time-varying slow normal model* relies on the following prior distributions for the *FoI* to describe the spread of a given infectious disease within a population over time: -$$ -\lambda(t)\sim normal(\lambda(t-1), \sigma) \\ -\lambda(t=1) \sim normal(0, 1) -$$ +The *time-varying model* (`model_type = "time"`) uses a forward random walk algorithm where the prior of the first chronological FoI value in the time-span of the serological survey can be either a normal distribution (`foi_prior = sf_normal()`) or a uniform distribution (`foi_prior = sf_unif()`). Subsequent values of the FoI are sampled from the normal distribution as $\lambda_i \sim \mathcal{N}(\lambda_{i-1}, \sigma)$, where $\sigma \sim \textit{Cauchy}(0, 1)$. The prior distribution for the seroreversion rate can also be either normal or uniform, and is specified by means of the parameter `seroreversio_prior`. + To test the `tv_normal` model we simulate a serosurvey following a stepwise decreasing *FoI* (red line in Fig. 2) using the data simulation functions of ***serofoi***: ```{r tv_normal simdata, include = TRUE, echo = TRUE, results="hide", errors = FALSE, warning = FALSE, message = FALSE} @@ -142,20 +192,12 @@ survey_features <- data.frame( sample_size = rep(25, 10) ) -serodata_sw_dec <- simulate_serosurvey( +serosurvey_sw_dec <- simulate_serosurvey( "time", foi_df, survey_features ) %>% -rename( - total = sample_size, - counts = n_seropositive -) %>% -mutate( - tsur = 2050, - survey = "simdata_step_wise" -) %>% -prepare_serodata() +mutate(survey_year = 2050) ``` @@ -163,16 +205,16 @@ The simulated dataset `foi_sim_sw_dec` contains information about 250 samples o ```{r tv_normal model, include = TRUE, echo = TRUE, results="hide", errors = FALSE, warning = FALSE, message = FALSE, fig.width=4, fig.asp=1.5, fig.align="center", out.width ="50%", fig.keep="all"} -seromodel_tv_normal <- fit_seromodel( - serodata = serodata_sw_dec, - foi_model = "tv_normal", - chunks = rep(c(1, 2, 3), c(23, 10, 15)), +seromodel_time_normal <- fit_seromodel( + serosurvey = serosurvey_sw_dec, + model_type = "time", + foi_index = rep(c(1, 2, 3), c(25, 10, 15)), iter = 1500 ) plot_seromodel( - seromodel_tv_normal, - serodata = serodata_sw_dec, - foi = foi_df, + seromodel_time_normal, + serosurvey = serosurvey_sw_dec, + foi_df = foi_df, size_text = 6 ) ``` @@ -185,14 +227,14 @@ rm(list = ls(pattern = "_sw_dec|_normal")) The number of iterations required may depend on the number of years, reflected by the difference between the year of the serosurvey and the maximum age-class sampled. ## Model 3. Time-varying FoI - Fast Epidemic Model -The *time-varying fast epidemic model*, relies on normal prior distributions for the *FoI* in the logarithmic scale, i.e: +The *time-varying fast epidemic model* is parametrized in such a way that the initial FoI value in the forward random walk is sampled in the logarithmic scale. In this case, the priors for subsequent FOIs are: $$ -\lambda(t)\sim normal(\log(\lambda(t-1)), \sigma) \\ -\lambda(t=1) \sim normal(-6, 4) +\log\left(\lambda(t)\right) \sim \mathcal{N}(\log(\lambda(t-1)), \sigma) \\ +\sigma \sim \mathcal{N} (0, 1) $$ -This is done in order to capture fast changes in the *FoI* trend. Importantly, the standard deviation parameter of this normal distribution of the *FoI* $\lambda(t)$ is set using an upper prior that follows a Cauchy distribution. +This is done in order to capture fast changes in the *FoI* trend. -To test the `tv_normal_log` model we simulate a serosurvey conducted in 2050 emulating a hypothetical situation where a three-year epidemic occurred between 2030 and 2035: +To test the `log_time` model we simulate a serosurvey conducted in 2050 emulating a hypothetical situation where a three-year epidemic occurred between 2030 and 2035: ```{r tv_normal_log simdata, include = TRUE, echo = TRUE, results="hide", errors = FALSE, warning = FALSE, message = FALSE} foi_df <- data.frame( @@ -210,40 +252,33 @@ survey_features <- data.frame( sample_size = rep(25, 10) ) -serodata_large_epi <- simulate_serosurvey( +serosurvey_large_epi <- simulate_serosurvey( survey_features = survey_features, - foi = foi_df, + foi_df, model = "time" ) %>% -rename( - total = sample_size, - counts = n_seropositive -) %>% -mutate( - tsur = 2050, - survey = "simdata_big_outbreak" -) %>% -prepare_serodata() +mutate(survey_year = 2050) ``` The simulated serosurvey tests 250 individuals between 1 and 50 years old by the year 2050. The implementation of the fast epidemic model can be obtained running the following lines of code: ```{r tv_normal_log model, include = TRUE, echo = TRUE, results="hide", errors = FALSE, warning = FALSE, message = FALSE, fig.width=4, fig.asp=1.5, fig.align="center", out.width ="50%", fig.keep="all"} -seromodel_tv_normal_log <- fit_seromodel( - serodata = serodata_large_epi, - foi_model = "tv_normal_log", - chunks = rep(c(1, 2, 3), c(28, 5, 15)), +seromodel_log_time_normal <- fit_seromodel( + serosurvey = serosurvey_large_epi, + model_type = "time", + is_log_foi = TRUE, + foi_index = rep(c(1, 2, 3), c(30, 5, 15)), iter = 2000 ) -plot_tv_normal_log <- plot_seromodel( - seromodel_tv_normal_log, - serodata = serodata_large_epi, - foi = foi_df, +plot_log_time_normal <- plot_seromodel( + seromodel_log_time_normal, + serosurvey = serosurvey_large_epi, + foi_df = foi_df, size_text = 6 ) -plot(plot_tv_normal_log) +plot(plot_log_time_normal) ``` Figure 3. *Time-varying fast epidemic serofoi model* plot. Simulated (red) vs modelled (blue) *FoI*. @@ -251,46 +286,33 @@ In Fig 3 we can see that the *fast epidemic serofoi model* is able to identify t ## Models Comparison -The statistical details of the three models are described in Table 2. - -::: l-body-outset -| Model Option | Probability of positive case at age $a$ | Prior distribution | Upper priors | -|:----------------|:----------------------------------------|:----------------------------------------------------------|:------------------------| -| `constant` | $\sim binom(n(a,t), P(a,t))$ | $\lambda\sim uniform(0,2)$ | | -| `tv_normal` | $\sim binom(n(a,t), P(a,t))$ | $\lambda\sim normal(\lambda(t-1),\sigma)\\ - \lambda(t=1)\sim normal(0,1)$ | $\sigma\sim Cauchy(0,1)$| -| `tv_normal_log` | $\sim binom(n(a,t), P(a,t))$ | $\lambda\sim normal(log(\lambda(t-1)),\sigma)\\ - \lambda(t=1)\sim normal(-6,4)$ | $\sigma\sim Cauchy(0,1)$| -::: -Table 2. Statistical characteristics of ***serofoi***'s currently supported models for the FoI ($\lambda$). Here $n$ is the size of an age group $a$ at time-step $t$ and $P$ is its corresponding seroprevalence. - -Above we showed that the fast epidemic model (`tv_normal_log`) is able to identify the large epidemic outbreak described by the `simdata_large_epi` dataset, which was simulated according to a step-wise decreasing *FoI* (red line in Fig 3). +Above we showed that the fast epidemic model (specified by `model_type = "time"` and `is_log = TRUE` in `fit_seromodel()`) is able to identify the large epidemic outbreak described by the dataset simulated according to a step-wise decreasing *FoI* (red line in Fig 3). -Now, we would like to know whether this model actually fits this dataset better than the other available models in ***serofoi***. For this, we also implement both the endemic model (`constant`) and the slow time-varying normal model (`tv_normal`): +Now, we would like to know whether this model actually fits this dataset better than the other available models in ***serofoi***. For this, we also implement both the endemic model (`model_type = "constant"`) and the slow time-varying normal model (`model_type="time"`, `is_log = FALSE`): ```{r model_comparison, include = FALSE, echo = TRUE, results="hide", errors = FALSE, warning = FALSE, message = FALSE } seromodel_constant <- fit_seromodel( - serodata = serodata_large_epi, - foi_model = "constant", + serosurvey = serosurvey_large_epi, + model_type = "constant", iter = 800 ) plot_constant <- plot_seromodel( seromodel_constant, - serodata = serodata_large_epi, - foi = foi_df, + serosurvey = serosurvey_large_epi, + foi_df = foi_df, size_text = 6 ) -seromodel_tv_normal <- fit_seromodel( - serodata = serodata_large_epi, - foi_model = "tv_normal", - chunks = rep(c(1, 2, 3), c(28, 5, 15)), +seromodel_time_normal <- fit_seromodel( + serosurvey = serosurvey_large_epi, + model_type = "time", + foi_index = rep(c(1, 2, 3), c(30, 5, 15)), iter = 2000 ) -plot_tv_normal <- plot_seromodel( - seromodel_tv_normal, - serodata = serodata_large_epi, - foi = foi_df, +plot_time_normal <- plot_seromodel( + seromodel_time_normal, + serosurvey = serosurvey_large_epi, + foi_df = foi_df, size_text = 6 ) ``` @@ -298,7 +320,7 @@ Using the function `cowplot::plot_grid` we can visualise the results of the thre ```{r model_comparison_plot, include = TRUE, echo = TRUE, errors = FALSE, warning = FALSE, message = FALSE, fig.width=5, fig.asp=1, fig.align="center"} cowplot::plot_grid( - plot_constant, plot_tv_normal, plot_tv_normal_log, + plot_constant, plot_time_normal, plot_log_time_normal, nrow = 1, ncol = 3, labels = "AUTO" ) ``` diff --git a/vignettes/simulating_serosurveys.Rmd b/vignettes/simulating_serosurveys.Rmd index 4511f75a..185e6870 100644 --- a/vignettes/simulating_serosurveys.Rmd +++ b/vignettes/simulating_serosurveys.Rmd @@ -66,25 +66,8 @@ glimpse(serosurvey_constant) Now, we can plot the proportions of those seropositive, including the posterior 95% percentiles (when assuming a flat prior), and plot it along the true seropositivities (line blue): -```{r, echo=FALSE, fig.align="center"} -add_posterior_quantiles <- function(df) { - df %>% - mutate( - alpha = 1 + .data$n_seropositive, - beta = 1 + .data$sample_size - .data$n_seropositive - ) %>% - mutate( - lower = qbeta(0.025, alpha, beta), - middle = qbeta(0.5, alpha, beta), - upper = qbeta(0.975, alpha, beta) - ) -} - -ggplot() + - geom_pointrange( - data = serosurvey_constant %>% add_posterior_quantiles(), - aes(x = age_min, y = middle, ymin = lower, ymax = upper) - ) + +```{r, fig.align="center"} +plot_serosurvey(serosurvey_constant) + geom_line( data = probability_seropositive_by_age( model = "age", @@ -95,9 +78,7 @@ ggplot() + color = "blue", linewidth = 1 ) + - scale_y_continuous(labels = scales::percent) + - ylab("Seropositivity") + - xlab("Age") + scale_y_continuous(labels = scales::percent) ``` ## Age-varying FOI @@ -127,48 +108,45 @@ serosurvey_age_dep <- simulate_serosurvey( Below, we see that the FOI for the age-dependent FOI pathogen increases rapidly during the earliest years then plateaus in later adulthood as the FOI effectively becomes negligible. -```{r, fig.align="center", echo=FALSE} +```{r, fig.align="center"} # combine with constant FOI survey -serosurvey_combined <- serosurvey_constant %>% - add_posterior_quantiles() %>% - rename(age = age_min) %>% - mutate(type = "constant FOI") %>% +serosurvey_combined <- rbind( + serosurvey_constant %>% + mutate(model_type = "constant FOI") %>% left_join( probability_seropositive_by_age( model = "age", foi = foi_constant, seroreversion_rate = 0 - ), - by = "age") %>% - bind_rows( - serosurvey_age_dep %>% - add_posterior_quantiles() %>% - rename(age = age_min) %>% - mutate(type = "age-dependent FOI") %>% + ) %>% + rename(age_min = age), + by = "age_min" + ), + serosurvey_age_dep %>% + mutate(model_type = "age-dependent FOI") %>% left_join( probability_seropositive_by_age( model = "age", foi = foi_age_varying, seroreversion_rate = 0 - ), - by = "age") - ) %>% - mutate(type = as.factor(type)) - -# plot both -ggplot(data = serosurvey_combined) + - geom_pointrange( - aes(x = age, y = middle, ymin = lower, ymax = upper) - ) + + ) %>% + rename(age_min = age), + by = "age_min" + ) + ) %>% + mutate(model_type = as.factor(model_type)) + +plot_serosurvey(serosurvey = serosurvey_combined) + geom_line( - aes(x = age, y = seropositivity), + data = serosurvey_combined, + aes(x = age_min, y = seropositivity), color = "blue", linewidth = 1 ) + scale_y_continuous(labels = scales::percent) + ylab("Seropositivity") + xlab("Age") + - facet_wrap(~type) + facet_wrap(~model_type) ``` ## Time-dependent FOI @@ -205,13 +183,9 @@ serosurvey_spiky <- simulate_serosurvey( Again, we can plot the true seropositivities, which highlights the jumps in seropositivity corresponding to cohorts born either side of epidemics: -```{r, echo=FALSE, fig.align="center"} +```{r, fig.align="center"} # plot shows jumps in seropositivity -ggplot() + - geom_pointrange( - data = serosurvey_spiky %>% add_posterior_quantiles(), - aes(x = age_min, y = middle, ymin = lower, ymax = upper) - ) + +plot_serosurvey(serosurvey_spiky) + geom_line( data = probability_seropositive_by_age( model = "time", @@ -304,13 +278,9 @@ serosurvey <- simulate_serosurvey( ) ``` -```{r, echo=FALSE, fig.align="center"} +```{r, fig.align="center"} # plot -ggplot() + - geom_pointrange( - data = serosurvey %>% add_posterior_quantiles(), - aes(x = age_min, y = middle, ymin = lower, ymax = upper) - ) + +plot_serosurvey(serosurvey) + geom_line( data = probability_seropositive_by_age( model = "age-time", @@ -337,48 +307,46 @@ serosurvey_serorevert <- simulate_serosurvey( ) ``` -```{r, fig.align="center", echo=FALSE} +```{r, fig.align="center"} # combine with constant FOI survey -serosurvey_combined <- serosurvey %>% - add_posterior_quantiles() %>% - rename(age = age_min) %>% - mutate(type = "non-seroreverting") %>% +serosurvey_combined <- rbind( + serosurvey %>% + mutate(model_type = "non-seroreverting") %>% left_join( probability_seropositive_by_age( model = "age-time", foi = foi_age_time, seroreversion_rate = 0 - ), - by = "age") %>% - bind_rows( - serosurvey_serorevert %>% - add_posterior_quantiles() %>% - rename(age = age_min) %>% - mutate(type = "seroreverting") %>% + ) %>% + rename(age_min = age), + by = "age_min" + ), + serosurvey_serorevert %>% + mutate(model_type = "seroreverting") %>% left_join( - probability_seropositive_by_age( + probability_seropositive_by_age( model = "age-time", foi = foi_age_time, seroreversion_rate = 0.01 - ), - by = "age") - ) %>% - mutate(type = as.factor(type)) + ) %>% + rename(age_min = age), + by = "age_min" + ) + ) %>% + mutate(model_type = as.factor(model_type)) # plot both -ggplot(data = serosurvey_combined) + - geom_pointrange( - aes(x = age, y = middle, ymin = lower, ymax = upper) - ) + +plot_serosurvey(serosurvey = serosurvey_combined) + geom_line( - aes(x = age, y = seropositivity), + data = serosurvey_combined, + aes(x = age_min, y = seropositivity), color = "blue", linewidth = 1 ) + scale_y_continuous(labels = scales::percent) + ylab("Seropositivity") + xlab("Age") + - facet_wrap(~type) + facet_wrap(~model_type) ``` # Simulating from a general serological model @@ -483,13 +451,16 @@ serosurvey <- simulate_serosurvey_general_model( ) ``` -```{r, echo=FALSE, fig.align="center"} +```{r, fig.align="center"} # plot -serosurvey %>% - add_posterior_quantiles() %>% - ggplot(aes(x = age_min, y = middle)) + - geom_pointrange(aes(ymin = lower, ymax = upper)) + - geom_smooth(se = FALSE) + +plot_serosurvey(serosurvey) + + geom_line( + data = seropositive_hiv, + aes(x = age, y = seropositivity), + color = "blue", + linewidth = 1 + ) + scale_y_continuous(labels = scales::percent) + - ylab("Seropositivity") + ylab("Seropositivity") + + xlab("Age") ``` diff --git a/vignettes/use_cases.Rmd b/vignettes/use_cases.Rmd index 57db4b39..302eb600 100644 --- a/vignettes/use_cases.Rmd +++ b/vignettes/use_cases.Rmd @@ -48,50 +48,53 @@ serofoi was used to compare three potential scenarios of Chikungunya transmissio ```{r chik_fast, include = TRUE, echo = TRUE, results="hide", errors = FALSE, warning = FALSE, message = FALSE, fig.width=5, fig.asp=1, fig.align="center", fig.keep="all"} # Load and prepare data data("chik2015") -chik2015p <- prepare_serodata(chik2015) # Implementation of the models -m1_chik <- fit_seromodel( - serodata = chik2015p, - foi_model = "constant", - iter = 1000, - thin = 2 +seromodel_constant <- fit_seromodel( + serosurvey = chik2015, + model_type = "constant", + iter = 1000 ) -m2_chik <- fit_seromodel( - serodata = chik2015p, - foi_model = "tv_normal", - chunk_size = 5, - iter = 2000, - thin = 2 + +seromodel_time <- fit_seromodel( + serosurvey = chik2015, + model_type = "time", + foi_prior = sf_normal(0, 0.01), + foi_index = get_foi_index(chik2015, group_size = 5), + iter = 2500 ) -m3_chik <- fit_seromodel( - serodata = chik2015p, - foi_model = "tv_normal_log", - chunk_size = 5, - iter = 2000, - thin = 2 +seromodel_log_time <- fit_seromodel( + serosurvey = chik2015, + model_type = "time", + foi_prior = sf_normal(0, 0.01), + is_log_foi = TRUE, + foi_index = get_foi_index(chik2015, group_size = 5), + iter = 2000 ) # Visualisation of the results -p1_chik <- plot_seromodel(m1_chik, - serodata = chik2015p, - max_lambda = 0.07, +plot_constant <- plot_seromodel( + seromodel = seromodel_constant, + serosurvey = chik2015, + foi_max = 0.07, size_text = 6 ) -p2_chik <- plot_seromodel(m2_chik, - serodata = chik2015p, - max_lambda = 0.07, +plot_time <- plot_seromodel( + seromodel = seromodel_time, + serosurvey = chik2015, + foi_max = 0.07, size_text = 6 ) -p3_chik <- plot_seromodel(m3_chik, - serodata = chik2015p, - max_lambda = 0.07, +plot_log_time <- plot_seromodel( + seromodel = seromodel_log_time, + serosurvey = chik2015, + foi_max = 0.07, size_text = 6 ) -cowplot::plot_grid(p1_chik, p2_chik, p3_chik, ncol = 3) +cowplot::plot_grid(plot_constant, plot_time, plot_log_time, ncol = 3) ``` Figure 1. ***serofoi*** models for FoI estimates of Chikungunya virus transmission in an urban remote area of Brazil. @@ -113,58 +116,57 @@ From [@carrera2020], we use a dataset measuring IgG antibodies against *VEEV* i ### The result: -***serofoi*** was used to compare three potential scenarios of *VEEV* transmission: *constant endemic*, *epidemic slow*, and *epidemic fast*. The results showed a significant increase in the estimated *Force-of-Infection (FoI)* in the region, indicating a rise in *VEEV* transmission. The study found that there was much higher statistical support for a time-varying rather than a constant scenario based on higher elpd and lower se values of the two time-varying models compared to the constant one (Figure 2). The results also suggest slightly (yet relevant) better support for model 3 (`tv-nomal-log`), compared to model 2 (`tv-normal`), suggesting a recent increase in transmission in the study area. +***serofoi*** was used to compare three potential scenarios of *VEEV* transmission: *constant endemic*, *epidemic slow*, and *epidemic fast*. The results showed a significant increase in the estimated *Force-of-Infection (FoI)* in the region, indicating a rise in *VEEV* transmission. The study found that there was much higher statistical support for a time-varying rather than a constant scenario based on higher elpd and lower se values of the two time-varying models compared to the constant one (Figure 2). The results also suggest slightly (yet relevant) better support for model 3 (`log-time`), compared to model 2 (`time`), suggesting a recent increase in transmission in the study area. ```{r veev_hidden, include = TRUE, echo = TRUE, results="hide", errors = FALSE, warning = FALSE, message = FALSE, fig.width=5, fig.asp=1, fig.align="center", fig.keep="all"} # Load and prepare data data("veev2012") -veev2012p <- prepare_serodata(veev2012) # Implementation of the models -m1_veev <- fit_seromodel( - serodata = veev2012p, - foi_model = "constant", - iter = 500, - thin = 2 +seromodel_constant <- fit_seromodel( + serosurvey = veev2012, + model_type = "constant", + iter = 1000 ) -m2_veev <- fit_seromodel( - serodata = veev2012p, - foi_model = "tv_normal", - chunk_size = 5, - iter = 2000, - thin = 2 +seromodel_time <- fit_seromodel( + serosurvey = veev2012, + model_type = "time", + foi_prior = sf_normal(0, 0.1), + foi_index = get_foi_index(veev2012, group_size = 5), + iter = 2000 ) -m3_veev <- fit_seromodel( - serodata = veev2012p, - foi_model = "tv_normal_log", - chunk_size = 5, - iter = 2000, - thin = 2 +seromodel_log_time <- fit_seromodel( + serosurvey = veev2012, + model_type = "time", + foi_prior = sf_normal(0, 0.1), + is_log_foi = TRUE, + foi_index = get_foi_index(veev2012, group_size = 5), + iter = 2000 ) # Visualisation of the results -p1_veev <- plot_seromodel( - m1_veev, - serodata = veev2012p, - max_lambda = 0.35, +plot_constant <- plot_seromodel( + seromodel = seromodel_constant, + serosurvey = veev2012, + foi_max = 0.35, size_text = 6 ) -p2_veev <- plot_seromodel( - m2_veev, - serodata = veev2012p, - max_lambda = 0.35, +plot_time <- plot_seromodel( + seromodel = seromodel_time, + serosurvey = veev2012, + foi_max = 0.35, size_text = 6 ) -p3_veev <- plot_seromodel( - m3_veev, - serodata = veev2012p, - max_lambda = 0.35, +plot_log_time <- plot_seromodel( + seromodel = seromodel_log_time, + serosurvey = veev2012, + foi_max = 0.35, size_text = 6 ) -cowplot::plot_grid(p1_veev, p2_veev, p3_veev, ncol = 3) +cowplot::plot_grid(plot_constant, plot_time, plot_log_time, ncol = 3) ``` Figure 2. ***serofoi*** models for *FoI* estimates of *Venezuelan Equine Encephalitis Virus (VEEV)* transmission in a rural remote area of Panama. @@ -190,39 +192,41 @@ Because Chagas is an endemic disease, we should use only the ***serofoi*** endem ```{r chagas_endemic, include = TRUE, echo = TRUE, results="hide", errors = FALSE, warning = FALSE, message = FALSE, fig.width=5, fig.asp=1, fig.align="center", fig.keep="all"} # Load and prepare data data("chagas2012") -chagas2012p <- prepare_serodata(chagas2012) # Implementation of the models -m1_cha <- fit_seromodel( - serodata = chagas2012p, - foi_model = "constant", +seromodel_constant <- fit_seromodel( + serosurvey = chagas2012, + model_type = "constant", iter = 800 ) -m2_cha <- fit_seromodel( - serodata = chagas2012p, - foi_model = "tv_normal", - chunk_size = 5, - iter = 1000 +seromodel_time <- fit_seromodel( + serosurvey = chagas2012, + model_type = "time", + foi_index = get_foi_index(chagas2012, group_size = 10), + iter = 1500 ) # Visualisation of the results -p1_cha <- plot_seromodel( - m1_cha, - serodata = chagas2012p, - size_text = 6, - bin_data = FALSE -) -p2_cha <- plot_seromodel( - m2_cha, - serodata = chagas2012p, - size_text = 6, - bin_data = FALSE -) -cowplot::plot_grid(p1_cha, p2_cha, ncol = 2) +plot_constant <- plot_seromodel( + seromodel = seromodel_constant, + serosurvey = chagas2012, + bin_serosurvey = TRUE, + bin_step = 10, + size_text = 6 +) +plot_time <- plot_seromodel( + seromodel = seromodel_time, + serosurvey = chagas2012, + bin_serosurvey = TRUE, + bin_step = 10, + size_text = 6 +) +cowplot::plot_grid(plot_constant, plot_time, ncol = 2) ``` Figure 3. ***serofoi*** endemic models for *FoI* estimates of *Trypanosoma cruzi* in a rural area of Colombia. ```{r clean env cha, include = FALSE, echo = TRUE, message=FALSE} rm(list = ls(pattern = "cha")) ``` + ## References From 97f5f3f2832dbe0ed402d48ffb1b6aa5cc953b74 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Tue, 20 Aug 2024 09:40:42 -0500 Subject: [PATCH 66/78] update WORDLIST --- inst/WORDLIST | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/inst/WORDLIST b/inst/WORDLIST index 80e6482c..7710fcfd 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -7,11 +7,14 @@ BMJ Bahia Bajaj Basáñez +Binom CMD Codecov Conteh +CucunubC +Cucunubá Dib -Domínguez +DomC Epiverse Everlyn FOI @@ -29,6 +32,8 @@ Liscano Mariscal María Neira +NicolC +Nicolás ORCID Panamá Parra @@ -37,7 +42,9 @@ Pittí Quevedo RStan Refactorization +Serocatalytic Seroreversion +Serosurvey Serosurveys Sumali Triatomine @@ -48,12 +55,16 @@ Yaneth Zulma alphaviruses bayesian -binom bmatrix boldsymbol +cauchy cdots cruzi dD +dI +dN +dP +dR dS dX ddots @@ -66,16 +77,23 @@ forall frac ggplot hiv +leq lifecycle +mathcal mc +nguez numerating org packagetemplate +prob rhat +rightarrow rstan +sd se serocatalytic seromodel +seropositives seropositivities seroprev seroreversion @@ -83,7 +101,7 @@ serosurvey serosurveys sim stan -sur +textit triatomine u vdots From 7ce260006ce0c8d858b057284328d99181c59ef9 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Tue, 20 Aug 2024 15:01:42 -0500 Subject: [PATCH 67/78] doc: update serofoi.Rmd --- vignettes/serofoi.Rmd | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/vignettes/serofoi.Rmd b/vignettes/serofoi.Rmd index 11a882c1..34d34de1 100644 --- a/vignettes/serofoi.Rmd +++ b/vignettes/serofoi.Rmd @@ -61,15 +61,17 @@ The integrated dataset `serodata_test` provides a minimal example of the input o library(serofoi) # Loading and preparing data for modelling data(chagas2012) -serodata_test <- prepare_serodata(chagas2012) + # Model implementation model_constant <- fit_seromodel( - serodata = serodata_test, - foi_model = "constant" + serosurvey = chagas2012, + model_type = "constant" ) # Visualisation -plot_seromodel(model_constant, - serodata = serodata_test, +plot_seromodel( + model_constant, + serosurvey = chagas2012, + bin_serosurvey = TRUE, size_text = 6 ) ``` From 9571e250eb10d809c2f69bbfb4817efd4d5aa9b2 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Tue, 20 Aug 2024 15:24:10 -0500 Subject: [PATCH 68/78] doc: remove unecessary dependency from simulating_serosurveys vignette --- vignettes/simulating_serosurveys.Rmd | 1 - 1 file changed, 1 deletion(-) diff --git a/vignettes/simulating_serosurveys.Rmd b/vignettes/simulating_serosurveys.Rmd index 185e6870..062af337 100644 --- a/vignettes/simulating_serosurveys.Rmd +++ b/vignettes/simulating_serosurveys.Rmd @@ -19,7 +19,6 @@ library(serofoi) library(ggplot2) library(dplyr) library(purrr) -library(forcats) ``` ```{r ggplot theme, include=FALSE} theme_set(theme_bw()) From 4dfbd351648ffbc79c2d0b8c4558ffc2c4bfe7cb Mon Sep 17 00:00:00 2001 From: ntorresd Date: Tue, 20 Aug 2024 15:55:06 -0500 Subject: [PATCH 69/78] fix: correct serofoi-package.Rd encoding --- inst/WORDLIST | 7 +------ man/serofoi-package.Rd | 4 ++-- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/inst/WORDLIST b/inst/WORDLIST index 7710fcfd..462eaa93 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -11,10 +11,8 @@ Binom CMD Codecov Conteh -CucunubC -Cucunubá Dib -DomC +Domínguez Epiverse Everlyn FOI @@ -32,8 +30,6 @@ Liscano Mariscal María Neira -NicolC -Nicolás ORCID Panamá Parra @@ -81,7 +77,6 @@ leq lifecycle mathcal mc -nguez numerating org packagetemplate diff --git a/man/serofoi-package.Rd b/man/serofoi-package.Rd index 0e9d187e..8fcb4efa 100644 --- a/man/serofoi-package.Rd +++ b/man/serofoi-package.Rd @@ -22,11 +22,11 @@ Useful links: } \author{ -\strong{Maintainer}: Zulma M. CucunubC! \email{zulma.cucunuba@javeriana.edu.co} (\href{https://orcid.org/0000-0002-8165-3198}{ORCID}) +\strong{Maintainer}: Zulma M. Cucunubá \email{zulma.cucunuba@javeriana.edu.co} (\href{https://orcid.org/0000-0002-8165-3198}{ORCID}) Authors: \itemize{ - \item NicolC!s T. DomC-nguez \email{ex-ntorres@javeriana.edu.co} (\href{https://orcid.org/0009-0002-8484-1298}{ORCID}) + \item Nicolás T. Domínguez \email{ex-ntorres@javeriana.edu.co} (\href{https://orcid.org/0009-0002-8484-1298}{ORCID}) \item Ben Lambert \item Pierre Nouvellet } From 4f236be54d62b28d3158b3f04db18a26716b4b51 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Tue, 20 Aug 2024 15:59:51 -0500 Subject: [PATCH 70/78] doc: move vignettes to articles --- .Rbuildignore | 1 + vignettes/{ => articles}/foi_models.Rmd | 9 +-------- vignettes/{ => articles}/simulating_serosurveys.Rmd | 5 ----- vignettes/{ => articles}/use_cases.Rmd | 11 ++--------- 4 files changed, 4 insertions(+), 22 deletions(-) rename vignettes/{ => articles}/foi_models.Rmd (98%) rename vignettes/{ => articles}/simulating_serosurveys.Rmd (98%) rename vignettes/{ => articles}/use_cases.Rmd (98%) diff --git a/.Rbuildignore b/.Rbuildignore index 158350c7..128b8aec 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -14,3 +14,4 @@ ^Meta$ ^.lintr$ ^.*/tests/docker/.*$ +^vignettes/articles$ diff --git a/vignettes/foi_models.Rmd b/vignettes/articles/foi_models.Rmd similarity index 98% rename from vignettes/foi_models.Rmd rename to vignettes/articles/foi_models.Rmd index 88f6e725..b6b752b0 100644 --- a/vignettes/foi_models.Rmd +++ b/vignettes/articles/foi_models.Rmd @@ -1,12 +1,5 @@ --- -title: "An introduction to Force-of-Infection (FoI) Models" -output: rmarkdown::html_vignette -bibliography: references.bib -link-citations: true -vignette: > - %\VignetteIndexEntry{An introduction to Force-of-Infection (FoI) Models} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} +title: "An Introduction To Force Of Infection Models" --- ```{r, include = FALSE} diff --git a/vignettes/simulating_serosurveys.Rmd b/vignettes/articles/simulating_serosurveys.Rmd similarity index 98% rename from vignettes/simulating_serosurveys.Rmd rename to vignettes/articles/simulating_serosurveys.Rmd index 062af337..64e79df5 100644 --- a/vignettes/simulating_serosurveys.Rmd +++ b/vignettes/articles/simulating_serosurveys.Rmd @@ -1,10 +1,5 @@ --- title: "Simulating Serosurveys" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{simulating_serosurveys} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} diff --git a/vignettes/use_cases.Rmd b/vignettes/articles/use_cases.Rmd similarity index 98% rename from vignettes/use_cases.Rmd rename to vignettes/articles/use_cases.Rmd index 302eb600..0afbce3b 100644 --- a/vignettes/use_cases.Rmd +++ b/vignettes/articles/use_cases.Rmd @@ -1,15 +1,7 @@ --- -title: "Real-life use cases for serofoi" -output: rmarkdown::html_vignette -bibliography: references.bib -link-citations: true -vignette: > - %\VignetteIndexEntry{Real-life use cases for serofoi} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} +title: "Real-Life Use Cases For Serofoi" --- - ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, @@ -230,3 +222,4 @@ rm(list = ls(pattern = "cha")) ``` ## References + From 04b0dc36818333e215e2f515e31495f4b1840c9c Mon Sep 17 00:00:00 2001 From: ntorresd Date: Wed, 21 Aug 2024 11:49:18 -0500 Subject: [PATCH 71/78] change version tag to 1.0.0 --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6010d0bd..0d3510c8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: serofoi Type: Package Title: Estimates the Force-of-Infection of a given pathogen from population based seroprevalence studies -Version: 0.1.0 +Version: 1.0.0 Authors@R: c( person( From 5a4a554dd9718a89e3d49f9814385b3e1deeb436 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Wed, 21 Aug 2024 13:18:57 -0500 Subject: [PATCH 72/78] fix: remove `survey_year` from `plot_serosurvey()` --- R/plot_seromodel.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/plot_seromodel.R b/R/plot_seromodel.R index a6a4d0c7..d5790ab7 100644 --- a/R/plot_seromodel.R +++ b/R/plot_seromodel.R @@ -128,7 +128,6 @@ plot_serosurvey <- function( n_seropositive = sum(.data$n_seropositive) ) %>% dplyr::mutate( - survey_year = unique(serosurvey$survey_year), age_min = as.integer(gsub("[[]|\\,.*", "\\1", .data$age_interval)) + 1, age_max = as.integer(gsub(".*\\,|[]]", "\\1", .data$age_interval)) ) %>% From 8d3ed76587cd558b7d2ba07d677fdadb24712dd3 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Thu, 22 Aug 2024 11:49:30 -0500 Subject: [PATCH 73/78] doc: add some examples in functions' documentation Add examples for: - `fit_seromodel` - `get_foi_index` - `plot_serosurvey` --- R/build_stan_data.R | 3 +++ R/fit_seromodel.R | 7 +++++++ R/plot_seromodel.R | 12 ++++++++++++ 3 files changed, 22 insertions(+) diff --git a/R/build_stan_data.R b/R/build_stan_data.R index 918ffd0b..86487905 100644 --- a/R/build_stan_data.R +++ b/R/build_stan_data.R @@ -67,6 +67,9 @@ sf_none <- function() { #' @param group_size Age groups size #' @return Integer vector with the indexes numerating each year/age #' (depending on the model). +#' @examples +#' data(chagas2012) +#' foi_index <- get_foi_index(chagas2012, group_size = 25) #' @export get_foi_index <- function( serosurvey, diff --git a/R/fit_seromodel.R b/R/fit_seromodel.R index 90d07869..affcee66 100644 --- a/R/fit_seromodel.R +++ b/R/fit_seromodel.R @@ -85,6 +85,13 @@ set_foi_init <- function( #' @param ... Additional parameters for [rstan][rstan::sampling] #' @returns stan_fit object with force-of-infection and seroreversion #' (when applicable) samples +#' @examples +#' data(veev2012) +#' seromodel <- fit_seromodel( +#' serosurvey = veev2012, +#' model_type = "time", +#' foi_index = get_foi_index(veev2012, group_size = 30) +#' ) #' @export fit_seromodel <- function( serosurvey, diff --git a/R/plot_seromodel.R b/R/plot_seromodel.R index d5790ab7..f1ef90cc 100644 --- a/R/plot_seromodel.R +++ b/R/plot_seromodel.R @@ -102,6 +102,18 @@ get_age_intervals <- function(serosurvey, step) { #' @param bin_step Integer specifying the age groups bin size to be used when #' `bin_serosurvey` is set to `TRUE`. #' @return ggplot object with seroprevalence plot +#' @examples +#' # Chikungunya example serosurvey +#' data(chik2015) +#' plot_serosurvey(chik2015) +#' +#' # VEEV example serosurvey +#' data(veev2012) +#' plot_serosurvey(veev2012) +#' +#' # Chagas disease example serosurvey +#' data(chagas2012) +#' plot_serosurvey(chagas2012, bin_serosurvey = TRUE) #' @export plot_serosurvey <- function( serosurvey, From c7d8ac8c95f830d3f209a6b492b72b050f4d364f Mon Sep 17 00:00:00 2001 From: ntorresd Date: Thu, 22 Aug 2024 11:51:51 -0500 Subject: [PATCH 74/78] doc: add complementary information to functions' documentation --- R/build_stan_data.R | 6 ++++-- R/fit_seromodel.R | 5 +++-- R/summarise_seromodel.R | 7 +++++++ 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/R/build_stan_data.R b/R/build_stan_data.R index 86487905..358ffe1c 100644 --- a/R/build_stan_data.R +++ b/R/build_stan_data.R @@ -61,8 +61,10 @@ sf_none <- function() { #' Generates force-of-infection indexes for heterogeneous age groups #' -#' The max value of the force-of-infection indexes correspond to -#' the number of foi values to be estimated when sampling. +#' Generates a list of integers indexing together the time/age intervals +#' for which FOI values will be estimated in [fit_seromodel]. +#' The max value in `foi_index` correspond to the number of FOI values to +#' be estimated when sampling. #' @inheritParams fit_seromodel #' @param group_size Age groups size #' @return Integer vector with the indexes numerating each year/age diff --git a/R/fit_seromodel.R b/R/fit_seromodel.R index affcee66..e0e0c2b7 100644 --- a/R/fit_seromodel.R +++ b/R/fit_seromodel.R @@ -67,12 +67,13 @@ set_foi_init <- function( #' force-of-infection. Currently available options are: #' \describe{ #' \item{[sf_normal]}{Function to set normal distribution prior. -#' Available for time models in the log-scale.} +#' Available for time models in the log-scale} #' \item{[sf_cauchy]}{Function to set Cauchy distribution prior. #' Available for time models in regular scale.} #' } #' @param foi_index Integer vector specifying the age-groups for which -#' force-of-infection values will be estimated +#' force-of-infection values will be estimated. It can be specified by +#' means of [get_foi_index] #' @param is_seroreversion Boolean specifying whether to include #' seroreversion rate estimation in the model #' @param seroreversion_prior seroreversion distribution specified by means of diff --git a/R/summarise_seromodel.R b/R/summarise_seromodel.R index 79439d93..08529cc2 100644 --- a/R/summarise_seromodel.R +++ b/R/summarise_seromodel.R @@ -2,6 +2,13 @@ #' #' @inheritParams extract_central_estimates #' @param par_loo_estimate Name of the loo estimate to be extracted. +#' Available options are: +#' \describe{ +#' \item{`"elpd_loo"`}{Expected log pointwise predictive density} +#' \item{`"p_loo"`}{Effective number of parameters} +#' \item{`"looic"`}{Leave-one-out cross-validation information criteria} +#' } +#' For additional information refer to [loo][loo::loo]. #' @param loo_estimate_digits Number of loo estimate digits #' @return Text summarising specified loo estimate #' @export From b56a8a9c37b7cd76976301438fa7771a6b2668fb Mon Sep 17 00:00:00 2001 From: ntorresd Date: Thu, 22 Aug 2024 11:52:16 -0500 Subject: [PATCH 75/78] doc: update documentation --- R/fit_seromodel.R | 3 ++- man/add_age_group_to_serosurvey.Rd | 3 ++- man/build_stan_data.Rd | 8 +++++--- man/extract_central_estimates.Rd | 3 ++- man/fit_seromodel.Rd | 16 +++++++++++++--- man/get_age_intervals.Rd | 3 ++- man/get_foi_index.Rd | 13 ++++++++++--- man/plot_foi_estimates.Rd | 3 ++- man/plot_rhats.Rd | 3 ++- man/plot_seromodel.Rd | 3 ++- man/plot_seroprevalence_estimates.Rd | 3 ++- man/plot_serosurvey.Rd | 16 +++++++++++++++- man/plot_summary.Rd | 3 ++- man/prepare_serosurvey_for_plotting.Rd | 3 ++- man/set_foi_init.Rd | 3 ++- man/summarise_central_estimate.Rd | 3 ++- man/summarise_loo_estimate.Rd | 9 ++++++++- man/summarise_seromodel.Rd | 12 ++++++++++-- 18 files changed, 85 insertions(+), 25 deletions(-) diff --git a/R/fit_seromodel.R b/R/fit_seromodel.R index e0e0c2b7..4bf91a29 100644 --- a/R/fit_seromodel.R +++ b/R/fit_seromodel.R @@ -49,7 +49,8 @@ set_foi_init <- function( #' #' @param serosurvey #' \describe{ -#' \item{`survey_year`}{Year in which the survey took place} +#' \item{`survey_year`}{Year in which the survey took place +#' (only needed to plot time models)} #' \item{`age_min`}{Floor value of the average between age_min and age_max} #' \item{`age_max`}{The size of the sample} #' \item{`sample_size`}{Number of samples for each age group} diff --git a/man/add_age_group_to_serosurvey.Rd b/man/add_age_group_to_serosurvey.Rd index b96b7d3a..a68efa78 100644 --- a/man/add_age_group_to_serosurvey.Rd +++ b/man/add_age_group_to_serosurvey.Rd @@ -8,7 +8,8 @@ add_age_group_to_serosurvey(serosurvey) } \arguments{ \item{serosurvey}{\describe{ -\item{\code{survey_year}}{Year in which the survey took place} +\item{\code{survey_year}}{Year in which the survey took place +(only needed to plot time models)} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} \item{\code{sample_size}}{Number of samples for each age group} diff --git a/man/build_stan_data.Rd b/man/build_stan_data.Rd index eb6b2d3e..e8627565 100644 --- a/man/build_stan_data.Rd +++ b/man/build_stan_data.Rd @@ -17,7 +17,8 @@ build_stan_data( } \arguments{ \item{serosurvey}{\describe{ -\item{\code{survey_year}}{Year in which the survey took place} +\item{\code{survey_year}}{Year in which the survey took place +(only needed to plot time models)} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} \item{\code{sample_size}}{Number of samples for each age group} @@ -34,7 +35,8 @@ the helper functions. Currently available options are: }} \item{foi_index}{Integer vector specifying the age-groups for which -force-of-infection values will be estimated} +force-of-infection values will be estimated. It can be specified by +means of \link{get_foi_index}} \item{is_log_foi}{Boolean to set logarithmic scale in the FOI} @@ -42,7 +44,7 @@ force-of-infection values will be estimated} force-of-infection. Currently available options are: \describe{ \item{\link{sf_normal}}{Function to set normal distribution prior. -Available for time models in the log-scale.} +Available for time models in the log-scale} \item{\link{sf_cauchy}}{Function to set Cauchy distribution prior. Available for time models in regular scale.} }} diff --git a/man/extract_central_estimates.Rd b/man/extract_central_estimates.Rd index 163a92fe..d9701da4 100644 --- a/man/extract_central_estimates.Rd +++ b/man/extract_central_estimates.Rd @@ -16,7 +16,8 @@ extract_central_estimates( with \link{fit_seromode}} \item{serosurvey}{\describe{ -\item{\code{survey_year}}{Year in which the survey took place} +\item{\code{survey_year}}{Year in which the survey took place +(only needed to plot time models)} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} \item{\code{sample_size}}{Number of samples for each age group} diff --git a/man/fit_seromodel.Rd b/man/fit_seromodel.Rd index ab4cf1d0..e6c93057 100644 --- a/man/fit_seromodel.Rd +++ b/man/fit_seromodel.Rd @@ -19,7 +19,8 @@ fit_seromodel( } \arguments{ \item{serosurvey}{\describe{ -\item{\code{survey_year}}{Year in which the survey took place} +\item{\code{survey_year}}{Year in which the survey took place +(only needed to plot time models)} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} \item{\code{sample_size}}{Number of samples for each age group} @@ -41,13 +42,14 @@ the helper functions. Currently available options are: force-of-infection. Currently available options are: \describe{ \item{\link{sf_normal}}{Function to set normal distribution prior. -Available for time models in the log-scale.} +Available for time models in the log-scale} \item{\link{sf_cauchy}}{Function to set Cauchy distribution prior. Available for time models in regular scale.} }} \item{foi_index}{Integer vector specifying the age-groups for which -force-of-infection values will be estimated} +force-of-infection values will be estimated. It can be specified by +means of \link{get_foi_index}} \item{is_seroreversion}{Boolean specifying whether to include seroreversion rate estimation in the model} @@ -69,3 +71,11 @@ stan_fit object with force-of-infection and seroreversion \description{ Runs specified stan model for the force-of-infection } +\examples{ +data(veev2012) +seromodel <- fit_seromodel( +serosurvey = veev2012, + model_type = "time", + foi_index = get_foi_index(veev2012, group_size = 30) +) +} diff --git a/man/get_age_intervals.Rd b/man/get_age_intervals.Rd index 3cb6abb6..02c63a78 100644 --- a/man/get_age_intervals.Rd +++ b/man/get_age_intervals.Rd @@ -8,7 +8,8 @@ get_age_intervals(serosurvey, step) } \arguments{ \item{serosurvey}{\describe{ -\item{\code{survey_year}}{Year in which the survey took place} +\item{\code{survey_year}}{Year in which the survey took place +(only needed to plot time models)} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} \item{\code{sample_size}}{Number of samples for each age group} diff --git a/man/get_foi_index.Rd b/man/get_foi_index.Rd index dd06e9fb..73cc6011 100644 --- a/man/get_foi_index.Rd +++ b/man/get_foi_index.Rd @@ -8,7 +8,8 @@ get_foi_index(serosurvey, group_size) } \arguments{ \item{serosurvey}{\describe{ -\item{\code{survey_year}}{Year in which the survey took place} +\item{\code{survey_year}}{Year in which the survey took place +(only needed to plot time models)} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} \item{\code{sample_size}}{Number of samples for each age group} @@ -22,6 +23,12 @@ Integer vector with the indexes numerating each year/age (depending on the model). } \description{ -The max value of the force-of-infection indexes correspond to -the number of foi values to be estimated when sampling. +Generates a list of integers indexing together the time/age intervals +for which FOI values will be estimated in \link{fit_seromodel}. +The max value in \code{foi_index} correspond to the number of FOI values to +be estimated when sampling. +} +\examples{ +data(chagas2012) +foi_index <- get_foi_index(chagas2012, group_size = 25) } diff --git a/man/plot_foi_estimates.Rd b/man/plot_foi_estimates.Rd index b9e3753d..cf2ce023 100644 --- a/man/plot_foi_estimates.Rd +++ b/man/plot_foi_estimates.Rd @@ -18,7 +18,8 @@ plot_foi_estimates( with \link{fit_seromode}} \item{serosurvey}{\describe{ -\item{\code{survey_year}}{Year in which the survey took place} +\item{\code{survey_year}}{Year in which the survey took place +(only needed to plot time models)} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} \item{\code{sample_size}}{Number of samples for each age group} diff --git a/man/plot_rhats.Rd b/man/plot_rhats.Rd index d3225181..9ea8b3d9 100644 --- a/man/plot_rhats.Rd +++ b/man/plot_rhats.Rd @@ -11,7 +11,8 @@ plot_rhats(seromodel, serosurvey, par_name = "foi_expanded", size_text = 11) with \link{fit_seromode}} \item{serosurvey}{\describe{ -\item{\code{survey_year}}{Year in which the survey took place} +\item{\code{survey_year}}{Year in which the survey took place +(only needed to plot time models)} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} \item{\code{sample_size}}{Number of samples for each age group} diff --git a/man/plot_seromodel.Rd b/man/plot_seromodel.Rd index 475a94c9..2de8b85e 100644 --- a/man/plot_seromodel.Rd +++ b/man/plot_seromodel.Rd @@ -24,7 +24,8 @@ plot_seromodel( with \link{fit_seromode}} \item{serosurvey}{\describe{ -\item{\code{survey_year}}{Year in which the survey took place} +\item{\code{survey_year}}{Year in which the survey took place +(only needed to plot time models)} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} \item{\code{sample_size}}{Number of samples for each age group} diff --git a/man/plot_seroprevalence_estimates.Rd b/man/plot_seroprevalence_estimates.Rd index 6ef46ade..29cf73ed 100644 --- a/man/plot_seroprevalence_estimates.Rd +++ b/man/plot_seroprevalence_estimates.Rd @@ -18,7 +18,8 @@ plot_seroprevalence_estimates( with \link{fit_seromode}} \item{serosurvey}{\describe{ -\item{\code{survey_year}}{Year in which the survey took place} +\item{\code{survey_year}}{Year in which the survey took place +(only needed to plot time models)} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} \item{\code{sample_size}}{Number of samples for each age group} diff --git a/man/plot_serosurvey.Rd b/man/plot_serosurvey.Rd index ff40b2ea..60865a2b 100644 --- a/man/plot_serosurvey.Rd +++ b/man/plot_serosurvey.Rd @@ -13,7 +13,8 @@ plot_serosurvey( } \arguments{ \item{serosurvey}{\describe{ -\item{\code{survey_year}}{Year in which the survey took place} +\item{\code{survey_year}}{Year in which the survey took place +(only needed to plot time models)} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} \item{\code{sample_size}}{Number of samples for each age group} @@ -36,3 +37,16 @@ ggplot object with seroprevalence plot \description{ Plots seroprevalence from the given serosurvey } +\examples{ +# Chikungunya example serosurvey +data(chik2015) +plot_serosurvey(chik2015) + +# VEEV example serosurvey +data(veev2012) +plot_serosurvey(veev2012) + +# Chagas disease example serosurvey +data(chagas2012) +plot_serosurvey(chagas2012, bin_serosurvey = TRUE) +} diff --git a/man/plot_summary.Rd b/man/plot_summary.Rd index b660bfb4..274322cc 100644 --- a/man/plot_summary.Rd +++ b/man/plot_summary.Rd @@ -18,7 +18,8 @@ plot_summary( with \link{fit_seromode}} \item{serosurvey}{\describe{ -\item{\code{survey_year}}{Year in which the survey took place} +\item{\code{survey_year}}{Year in which the survey took place +(only needed to plot time models)} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} \item{\code{sample_size}}{Number of samples for each age group} diff --git a/man/prepare_serosurvey_for_plotting.Rd b/man/prepare_serosurvey_for_plotting.Rd index c0c8f637..bd6c654d 100644 --- a/man/prepare_serosurvey_for_plotting.Rd +++ b/man/prepare_serosurvey_for_plotting.Rd @@ -8,7 +8,8 @@ prepare_serosurvey_for_plotting(serosurvey, alpha = 0.05) } \arguments{ \item{serosurvey}{\describe{ -\item{\code{survey_year}}{Year in which the survey took place} +\item{\code{survey_year}}{Year in which the survey took place +(only needed to plot time models)} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} \item{\code{sample_size}}{Number of samples for each age group} diff --git a/man/set_foi_init.Rd b/man/set_foi_init.Rd index 16201036..d62452db 100644 --- a/man/set_foi_init.Rd +++ b/man/set_foi_init.Rd @@ -10,7 +10,8 @@ set_foi_init(foi_init, is_log_foi, foi_index) \item{is_log_foi}{Boolean to set logarithmic scale in the FOI} \item{foi_index}{Integer vector specifying the age-groups for which -force-of-infection values will be estimated} +force-of-infection values will be estimated. It can be specified by +means of \link{get_foi_index}} } \description{ Sets initialization function for sampling diff --git a/man/summarise_central_estimate.Rd b/man/summarise_central_estimate.Rd index 32b0020a..868577da 100644 --- a/man/summarise_central_estimate.Rd +++ b/man/summarise_central_estimate.Rd @@ -17,7 +17,8 @@ summarise_central_estimate( with \link{fit_seromode}} \item{serosurvey}{\describe{ -\item{\code{survey_year}}{Year in which the survey took place} +\item{\code{survey_year}}{Year in which the survey took place +(only needed to plot time models)} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} \item{\code{sample_size}}{Number of samples for each age group} diff --git a/man/summarise_loo_estimate.Rd b/man/summarise_loo_estimate.Rd index 055038e0..456da0ac 100644 --- a/man/summarise_loo_estimate.Rd +++ b/man/summarise_loo_estimate.Rd @@ -14,7 +14,14 @@ summarise_loo_estimate( \item{seromodel}{stan_fit object obtained from sampling a model with \link{fit_seromode}} -\item{par_loo_estimate}{Name of the loo estimate to be extracted.} +\item{par_loo_estimate}{Name of the loo estimate to be extracted. +Available options are: +\describe{ +\item{\code{"elpd_loo"}}{Expected log pointwise predictive density} +\item{\code{"p_loo"}}{Effective number of parameters} +\item{\code{"looic"}}{Leave-one-out cross-validation information criteria} +} +For additional information refer to \link[loo:loo]{loo}.} \item{loo_estimate_digits}{Number of loo estimate digits} } diff --git a/man/summarise_seromodel.Rd b/man/summarise_seromodel.Rd index 0b7ce2fe..0b06e310 100644 --- a/man/summarise_seromodel.Rd +++ b/man/summarise_seromodel.Rd @@ -19,7 +19,8 @@ summarise_seromodel( with \link{fit_seromode}} \item{serosurvey}{\describe{ -\item{\code{survey_year}}{Year in which the survey took place} +\item{\code{survey_year}}{Year in which the survey took place +(only needed to plot time models)} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} \item{\code{sample_size}}{Number of samples for each age group} @@ -28,7 +29,14 @@ with \link{fit_seromode}} \item{alpha}{1 - alpha indicates the credibility level to be used} -\item{par_loo_estimate}{Name of the loo estimate to be extracted.} +\item{par_loo_estimate}{Name of the loo estimate to be extracted. +Available options are: +\describe{ +\item{\code{"elpd_loo"}}{Expected log pointwise predictive density} +\item{\code{"p_loo"}}{Effective number of parameters} +\item{\code{"looic"}}{Leave-one-out cross-validation information criteria} +} +For additional information refer to \link[loo:loo]{loo}.} \item{loo_estimate_digits}{Number of loo estimate digits} From a671e4d2d8b53a99b4aba3e322960cbf7c4f1c4a Mon Sep 17 00:00:00 2001 From: ntorresd Date: Mon, 9 Sep 2024 17:28:33 -0500 Subject: [PATCH 76/78] refac: change `sample_size` for `n_sample` across the package --- R/build_stan_data.R | 2 +- R/fit_seromodel.R | 2 +- R/plot_seromodel.R | 8 +- R/simulate_serosurvey.R | 40 ++++----- R/validation.R | 6 +- data/chagas2012.RData | Bin 549 -> 256 bytes data/chik2015.RData | Bin 257 -> 254 bytes data/veev2012.RData | Bin 268 -> 265 bytes inst/extdata/chagas2012.RDS | Bin 516 -> 233 bytes inst/extdata/chik2015.RDS | Bin 236 -> 233 bytes inst/extdata/veev2012.RDS | Bin 247 -> 245 bytes inst/stan/age_no_seroreversion.stan | 2 +- inst/stan/age_seroreversion.stan | 2 +- inst/stan/constant_no_seroreversion.stan | 2 +- inst/stan/constant_seroreversion.stan | 2 +- inst/stan/data/basic_data.stan | 2 +- .../generated_quantities/log_likelihood.stan | 2 +- inst/stan/time_log_no_seroreversion.stan | 2 +- inst/stan/time_log_seroreversion.stan | 2 +- inst/stan/time_no_seroreversion.stan | 2 +- inst/stan/time_seroreversion.stan | 2 +- tests/testthat/test-simulate_serosurvey.R | 78 +++++++++--------- vignettes/articles/foi_models.Rmd | 14 ++-- vignettes/articles/simulating_serosurveys.Rmd | 8 +- 24 files changed, 89 insertions(+), 89 deletions(-) diff --git a/R/build_stan_data.R b/R/build_stan_data.R index 358ffe1c..534d02b6 100644 --- a/R/build_stan_data.R +++ b/R/build_stan_data.R @@ -188,7 +188,7 @@ build_stan_data <- function( age_max = max(serosurvey$age_max), ages = seq(1, max(serosurvey$age_max), 1), n_seropositive = serosurvey$n_seropositive, - sample_size = serosurvey$sample_size, + n_sample = serosurvey$n_sample, age_groups = serosurvey$age_group ) %>% set_stan_data_defaults( diff --git a/R/fit_seromodel.R b/R/fit_seromodel.R index 4bf91a29..c7121081 100644 --- a/R/fit_seromodel.R +++ b/R/fit_seromodel.R @@ -53,7 +53,7 @@ set_foi_init <- function( #' (only needed to plot time models)} #' \item{`age_min`}{Floor value of the average between age_min and age_max} #' \item{`age_max`}{The size of the sample} -#' \item{`sample_size`}{Number of samples for each age group} +#' \item{`n_sample`}{Number of samples for each age group} #' \item{`n_seropositive`}{Number of positive samples for each age group} #' } #' @param model_type Type of the model. Either "constant", "age" or "time" diff --git a/R/plot_seromodel.R b/R/plot_seromodel.R index f1ef90cc..d0b1eb20 100644 --- a/R/plot_seromodel.R +++ b/R/plot_seromodel.R @@ -7,7 +7,7 @@ #' \describe{ #' \item{seroprev}{Seroprevalence computed as the proportion of positive #' cases `n_seropositive` in the number of samples -#' `sample_size` for each age group} +#' `n_sample` for each age group} #' \item{seroprev_lower}{Lower limit of the binomial confidence interval #' of `seroprev`} #' \item{seroprev_upper}{Upper limit of the binomial confidence interval @@ -23,7 +23,7 @@ prepare_serosurvey_for_plotting <- function( #nolint cbind( Hmisc::binconf( serosurvey$n_seropositive, - serosurvey$sample_size, + serosurvey$n_sample, alpha = alpha, method = "exact", return.df = TRUE @@ -136,7 +136,7 @@ plot_serosurvey <- function( serosurvey <- serosurvey %>% dplyr::group_by(.data$age_interval) %>% dplyr::summarise( - sample_size = sum(.data$sample_size), + n_sample = sum(.data$n_sample), n_seropositive = sum(.data$n_seropositive) ) %>% dplyr::mutate( @@ -165,7 +165,7 @@ plot_serosurvey <- function( ggplot2::geom_point( ggplot2::aes( y = .data$seroprev, - size = .data$sample_size + size = .data$n_sample ), fill = "#7a0177", colour = "black", shape = 21 ) + diff --git a/R/simulate_serosurvey.R b/R/simulate_serosurvey.R index 2b2c3127..56428ab8 100644 --- a/R/simulate_serosurvey.R +++ b/R/simulate_serosurvey.R @@ -326,7 +326,7 @@ add_age_bins <- function(survey_features) { survey_by_individual_age <- function(survey_features, age_df) { age_df %>% left_join(survey_features, by = "group") %>% - rename(overall_sample_size = .data$sample_size) + rename(overall_sample_size = .data$n_sample) } #' Generate random sample sizes using multinomial sampling. @@ -335,15 +335,15 @@ survey_by_individual_age <- function(survey_features, age_df) { #' multinomial sampling. It takes the total sample size and the number of age #' groups as input and returns a vector of sample sizes for each age group. #' -#' @param sample_size The total sample size to be distributed among age groups. +#' @param n_sample The total sample size to be distributed among age groups. #' @param n_ages The number of age groups. #' #' @return A vector containing random sample sizes for each age group. -multinomial_sampling_group <- function(sample_size, n_ages) { +multinomial_sampling_group <- function(n_sample, n_ages) { prob_value <- 1 / n_ages probs <- rep(prob_value, n_ages) sample_size_by_age <- as.vector( - rmultinom(1, sample_size, prob = probs) + rmultinom(1, n_sample, prob = probs) ) return(sample_size_by_age) } @@ -367,10 +367,10 @@ generate_random_sample_sizes <- function(survey_df_long) { for (interval_aux in na.omit(intervals)) { df_tmp <- survey_df_long %>% filter(.data$group == interval_aux) - sample_size <- df_tmp$overall_sample_size[1] - sample_size_by_age <- multinomial_sampling_group(sample_size, nrow(df_tmp)) + n_sample <- df_tmp$overall_sample_size[1] + sample_size_by_age <- multinomial_sampling_group(n_sample, nrow(df_tmp)) df_tmp <- df_tmp %>% - mutate(sample_size = sample_size_by_age) + mutate(n_sample = sample_size_by_age) if (is.null(df_new)) { df_new <- df_tmp @@ -450,20 +450,20 @@ generate_seropositive_counts_by_age_bin <- function( #nolint dplyr::mutate( n_seropositive = rbinom( nrow(probability_seropositive_by_age), - .data$sample_size, + .data$n_sample, .data$seropositivity) ) grouped_df <- combined_df %>% dplyr::group_by(.data$age_min, .data$age_max) %>% dplyr::summarise( - sample_size = sum(.data$sample_size), + n_sample = sum(.data$n_sample), n_seropositive = sum(.data$n_seropositive), .groups = "drop" ) %>% left_join( survey_features, - by = c("age_min", "age_max", "sample_size") + by = c("age_min", "age_max", "n_sample") ) return(grouped_df) @@ -480,7 +480,7 @@ generate_seropositive_counts_by_age_bin <- function( #nolint #' different years. It should have two columns: 'year' and 'foi'. #' @param survey_features A dataframe containing information about the binned #' age groups and sample sizes for each. It should contain columns: -#' ['age_min', 'age_max', 'sample_size']. +#' ['age_min', 'age_max', 'n_sample']. #' @param seroreversion_rate A non-negative value determining the rate of #' seroreversion (per year). Default is 0. #' @@ -496,7 +496,7 @@ generate_seropositive_counts_by_age_bin <- function( #nolint #' survey_features <- data.frame( #' age_min = c(1, 3, 15), #' age_max = c(2, 14, 20), -#' sample_size = c(1000, 2000, 1500)) +#' n_sample = c(1000, 2000, 1500)) #' serosurvey <- simulate_serosurvey_time_model( #' foi_df, survey_features) #' @export @@ -545,7 +545,7 @@ simulate_serosurvey_time_model <- function( #' different ages. It should have two columns: 'age' and 'foi'. #' @param survey_features A dataframe containing information about the binned #' age groups and sample sizes for each. It should contain columns: -#' ['age_min', 'age_max', 'sample_size']. +#' ['age_min', 'age_max', 'n_sample']. #' @param seroreversion_rate A non-negative value determining the rate of #' seroreversion (per year). Default is 0. #' @@ -561,7 +561,7 @@ simulate_serosurvey_time_model <- function( #' survey_features <- data.frame( #' age_min = c(1, 3, 15), #' age_max = c(2, 14, 20), -#' sample_size = c(1000, 2000, 1500)) +#' n_sample = c(1000, 2000, 1500)) #' serosurvey <- simulate_serosurvey_age_model( #' foi_df, survey_features) #' @export @@ -610,7 +610,7 @@ simulate_serosurvey_age_model <- function( #' different ages. It should have two columns: 'year', 'age' and 'foi'. #' @param survey_features A dataframe containing information about the binned #' age groups and sample sizes for each. It should contain columns: -#' ['age_min', 'age_max', 'sample_size']. +#' ['age_min', 'age_max', 'n_sample']. #' @param seroreversion_rate A non-negative value determining the rate of #' seroreversion (per year). Default is 0. #' @@ -627,7 +627,7 @@ simulate_serosurvey_age_model <- function( #' survey_features <- data.frame( #' age_min = c(1, 3, 15), #' age_max = c(2, 14, 20), -#' sample_size = c(1000, 2000, 1500)) +#' n_sample = c(1000, 2000, 1500)) #' serosurvey <- simulate_serosurvey_age_and_time_model( #' foi_df, survey_features) #' @export @@ -683,7 +683,7 @@ simulate_serosurvey_age_and_time_model <- function( #nolint #' For age-and-time-varying models the columns should be ['age', 'time', 'foi']. #' @param survey_features A dataframe containing information about the binned #' age groups and sample sizes for each. -#' It should contain columns: ['age_min', 'age_max', 'sample_size']. +#' It should contain columns: ['age_min', 'age_max', 'n_sample']. #' @param seroreversion_rate A non-negative value determining the rate of #' seroreversion (per year). Default is 0. #' @@ -699,7 +699,7 @@ simulate_serosurvey_age_and_time_model <- function( #nolint #' survey_features <- data.frame( #' age_min = c(1, 3, 15), #' age_max = c(2, 14, 20), -#' sample_size = c(1000, 2000, 1500)) +#' n_sample = c(1000, 2000, 1500)) #' serosurvey <- simulate_serosurvey( #' model = "time", #' foi = foi_df, @@ -713,7 +713,7 @@ simulate_serosurvey_age_and_time_model <- function( #nolint #' survey_features <- data.frame( #' age_min = c(1, 3, 15), #' age_max = c(2, 14, 20), -#' sample_size = c(1000, 2000, 1500)) +#' n_sample = c(1000, 2000, 1500)) #' serosurvey <- simulate_serosurvey( #' model = "age", #' foi = foi_df, @@ -728,7 +728,7 @@ simulate_serosurvey_age_and_time_model <- function( #nolint #' survey_features <- data.frame( #' age_min = c(1, 3, 15), #' age_max = c(2, 14, 20), -#' sample_size = c(1000, 2000, 1500)) +#' n_sample = c(1000, 2000, 1500)) #' serosurvey <- simulate_serosurvey( #' model = "age", #' foi = foi_df, diff --git a/R/validation.R b/R/validation.R index fe023e63..c155db62 100644 --- a/R/validation.R +++ b/R/validation.R @@ -3,7 +3,7 @@ validate_serosurvey <- function(serosurvey) { col_types <- list( age_min = "numeric", age_max = "numeric", - sample_size = "numeric", + n_sample = "numeric", n_seropositive = "numeric" ) @@ -42,11 +42,11 @@ validate_survey_features <- function(survey_features) { if (!is.data.frame(survey_features) || !all( - c("age_min", "age_max", "sample_size") %in% names(survey_features)) + c("age_min", "age_max", "n_sample") %in% names(survey_features)) ) { stop( "survey_features must be a dataframe with columns ", - "'age_min', 'age_max', and 'sample_size'." + "'age_min', 'age_max', and 'n_sample'." ) } diff --git a/data/chagas2012.RData b/data/chagas2012.RData index c9dd0144ce52329a9620a2371f28ca93cd1ef917..7d8c7bc3fc06b4520e8c214e53908b4c1e14fee7 100644 GIT binary patch literal 256 zcmV+b0ssCViwFP!000001`BeDFy@NjVqjokW?*3flB_@`18ZoAo2~@|0}B(7!^ptG zzzL+ek~0$16N`-u42^*5gh9%Hm=B0q9Ol=6v@zhKfq)mJ4u~fJ@f0A|1>#(|e*XrD z`alN=Z3xli;N1Yz4^ig=759VC_8*{p69!auzEJam4?t*Rs5)nuc_=>PEXps}%S+5n zErvRj38d`*|Nno`6tX7gBo>1d2_T0{N@7W(URn`QKe}?5nF8Rzf(UXKmlloJR zCKf@3IP&6)6LSl4QlWf4AV0MzzaYOjvm~<&Cd!_eo*JKOk$2;$sS!tf3>D|7 z;0x;$Sm^uw2E7U{G7sj#58x8`7W@ik#r>Jjfp5h9X21-X2Is~7-hm6?tmvB&d@VQ| z(I@NJpB9vH_9uL<+=O*Nb$>jlpBDE`gyTV-$OmzLo|hu7!f~FL`N+D&{dJ_m94T@A zm4DRr)c&XL8?}E_K1xS@f4nTgVf)DXs6OsHEH39#)<>O3_Wkd9ht1JHbI5(tAm@_L z$ty4e%IBTW1AT`6L;s+k7;y$-z&U4kw_e}^uUh;Kk|KyN~CLT^EDL06zF(A&`4&{gOvbPc)& zy#u{tfs9}KtRr7XzK(ny`Ca7q{Ja{?5!m-Rnj@bNo}8lRh)hr&&) R05bo9FaS;WVXnvl0008#8A1R6 delta 62 zcmV-E0KxzM0f7RL$~RmnF}ENmHNH5rDitckmlt20T9jXqUz}NzSq2kjPfSmZ&&|w3 U;U-o9ng2i-0QAg_@5=!I01a*#i~s-t diff --git a/data/veev2012.RData b/data/veev2012.RData index ad072d5cd45a8656db35d87de3adbcf29e10b855..eb288af6495c9d1623579e843758b8cae945f643 100644 GIT binary patch delta 103 zcmV-t0GR)b0*L~UsbDO8^WuvWa|?1(p?p3dKeZ^oAip@XB(n@A%AS~>8lRh)hr&&) zK(mvxD8F0}W-l+u6F|%ZWrIA$4|R_qx?_`b5{qGug$Z({B$g!Vr4<3q1M>cXFaSBo J&ImXH008(JDzpFq delta 106 zcmV-w0G0oV0*nHXsbgFyF}ENmHNH5rDitckmlt20T9jXqUz}NzSq2kjPfSmZ&&|w3 z;U-p~S<6|JU#0I!O@Sy~#O=#W4561i4ZYOA__cih$+;dH+Bd M0K!2i!aD*00A@uk$p8QV diff --git a/inst/extdata/chagas2012.RDS b/inst/extdata/chagas2012.RDS index 6ca1745e2c0071d9c0dc301f7d7a9c2b230582f3..eac4be9330ab520f15a9854cc47df9ea9d83c23d 100644 GIT binary patch literal 233 zcmVFRUhyWrhD#sGClD{`|2J~9L1jym_Um`fogk;@yi+OFu7W}R4Cz-DIy+5_FKFwn ziN1UEy=T0XYf05&i~pfw9c{Q`EP@*qm#gqiaGrT^3Y-NOz)^4ojEnV|p8!Y1dV^pL zjDpi*y)keG92WBi1&0KOedft=oF5gGan29BTz{{3kLu^RH@;u28?KLg<3!$z^Y`?L zxT=ryyv+NK3;#ThRH!2*t}pq2UCqvKc3;2!{qlb9_y6%S1RL!m$NSCWzK!CtF6DT? z^T>I>J#V8r`c{YBCknDI`JD`aF;IT*{2th6*nikR*iRVs1fBYJ72KRVq}7FE740wJ5(Jzc{lbvkWH6o|v8*pPQM7 T!cD9IGXDVp>xXnjssR81Erl6t diff --git a/inst/extdata/veev2012.RDS b/inst/extdata/veev2012.RDS index b51b140e78a05f88b8b34615237c79992f0752dd..daf1d10f4f8f6ba2768e030041394f85fb694014 100644 GIT binary patch delta 102 zcmV-s0Ga>y0rdfpmR}X($cryd%q_@Ch4T4;{M4fSg8bsllFTxgD0^ahYJ6^H9tt n_observations; int age_max; int ages[age_max]; int n_seropositive[n_observations]; -int sample_size[n_observations]; +int n_sample[n_observations]; int age_groups[n_observations]; diff --git a/inst/stan/generated_quantities/log_likelihood.stan b/inst/stan/generated_quantities/log_likelihood.stan index 28ad8d3e..6daf64e9 100644 --- a/inst/stan/generated_quantities/log_likelihood.stan +++ b/inst/stan/generated_quantities/log_likelihood.stan @@ -1,5 +1,5 @@ vector[n_observations] log_likelihood; for(i in 1:n_observations){ - log_likelihood[i] = binomial_lpmf(n_seropositive[i] | sample_size[i], prob_infected[i]); + log_likelihood[i] = binomial_lpmf(n_seropositive[i] | n_sample[i], prob_infected[i]); } diff --git a/inst/stan/time_log_no_seroreversion.stan b/inst/stan/time_log_no_seroreversion.stan index ee5b86a9..50e59166 100644 --- a/inst/stan/time_log_no_seroreversion.stan +++ b/inst/stan/time_log_no_seroreversion.stan @@ -33,7 +33,7 @@ transformed parameters { } model { - n_seropositive ~ binomial(sample_size, prob_infected); + n_seropositive ~ binomial(n_sample, prob_infected); sigma ~ normal(foi_sigma_rw_loc, foi_sigma_rw_sc); // force of infection prior diff --git a/inst/stan/time_log_seroreversion.stan b/inst/stan/time_log_seroreversion.stan index 7410d1c9..45d8fe0f 100644 --- a/inst/stan/time_log_seroreversion.stan +++ b/inst/stan/time_log_seroreversion.stan @@ -35,7 +35,7 @@ transformed parameters { } model { - n_seropositive ~ binomial(sample_size, prob_infected); + n_seropositive ~ binomial(n_sample, prob_infected); sigma ~ normal(foi_sigma_rw_loc, foi_sigma_rw_sc); // force of infection prior diff --git a/inst/stan/time_no_seroreversion.stan b/inst/stan/time_no_seroreversion.stan index d7af6dad..7276938c 100644 --- a/inst/stan/time_no_seroreversion.stan +++ b/inst/stan/time_no_seroreversion.stan @@ -30,7 +30,7 @@ transformed parameters { } model { - n_seropositive ~ binomial(sample_size, prob_infected); + n_seropositive ~ binomial(n_sample, prob_infected); sigma ~ cauchy(foi_sigma_rw_loc, foi_sigma_rw_sc); // force of infection prior diff --git a/inst/stan/time_seroreversion.stan b/inst/stan/time_seroreversion.stan index f6bb0305..00598c04 100644 --- a/inst/stan/time_seroreversion.stan +++ b/inst/stan/time_seroreversion.stan @@ -32,7 +32,7 @@ transformed parameters { } model { - n_seropositive ~ binomial(sample_size, prob_infected); + n_seropositive ~ binomial(n_sample, prob_infected); sigma ~ cauchy(foi_sigma_rw_sc, foi_sigma_rw_sc); // force of infection prior diff --git a/tests/testthat/test-simulate_serosurvey.R b/tests/testthat/test-simulate_serosurvey.R index b0f640eb..a0860bf2 100644 --- a/tests/testthat/test-simulate_serosurvey.R +++ b/tests/testthat/test-simulate_serosurvey.R @@ -247,14 +247,14 @@ test_that("add_age_bins function works as expected", { test_that("survey_by_individual_age function works as expected", { # Test case 1: Check if overall sample size is calculated correctly for a single row dataframe age_df <- data.frame(age_min = 20, age_max = 30, group = "[20,30]") - survey_features <- data.frame(group = "[20,30]", sample_size = 100) + survey_features <- data.frame(group = "[20,30]", n_sample = 100) expected_df <- data.frame(age_min = 20, age_max = 30, group = "[20,30]", overall_sample_size = 100) actual_df <- survey_by_individual_age(survey_features, age_df) expect_equal(actual_df, expected_df) # Test case 2: Check if overall sample size is calculated correctly for multiple rows dataframe age_df <- data.frame(age_min = c(20, 30), age_max = c(31, 50), group = c("[20,30]", "[31,50]")) - survey_features <- data.frame(group = c("[20,30]", "[31,50]"), sample_size = c(100, 150)) + survey_features <- data.frame(group = c("[20,30]", "[31,50]"), n_sample = c(100, 150)) expected_df <- data.frame(age_min = c(20, 30), age_max = c(31, 50), group = c("[20,30]", "[31,50]"), overall_sample_size = c(100, 150)) actual_df <- survey_by_individual_age(survey_features, age_df) expect_equal(actual_df, expected_df) @@ -262,20 +262,20 @@ test_that("survey_by_individual_age function works as expected", { test_that("multinomial_sampling_group function works as expected", { # Test case 1: Check if sample sizes are generated correctly for a sample size of 100 and 5 age groups - sample_size <- 100 + n_sample <- 100 n_ages <- 5 expected_length <- n_ages - actual_sample_sizes <- multinomial_sampling_group(sample_size, n_ages) + actual_sample_sizes <- multinomial_sampling_group(n_sample, n_ages) expect_length(actual_sample_sizes, expected_length) - expect_equal(sum(actual_sample_sizes), sample_size) + expect_equal(sum(actual_sample_sizes), n_sample) # Test case 2: Check if sample sizes are generated correctly for a sample size of 200 and 10 age groups - sample_size <- 200 + n_sample <- 200 n_ages <- 10 expected_length <- n_ages - actual_sample_sizes <- multinomial_sampling_group(sample_size, n_ages) + actual_sample_sizes <- multinomial_sampling_group(n_sample, n_ages) expect_length(actual_sample_sizes, expected_length) - expect_equal(sum(actual_sample_sizes), sample_size) + expect_equal(sum(actual_sample_sizes), n_sample) }) test_that("generate_random_sample_sizes function works as expected", { @@ -289,11 +289,11 @@ test_that("generate_random_sample_sizes function works as expected", { dplyr::group_by(group) %>% dplyr::summarise( overall_sample_size = overall_sample_size[1], - sample_size = sum(sample_size) + n_sample = sum(n_sample) ) expect_equal( group_df$overall_sample_size[1], - group_df$sample_size[1] + group_df$n_sample[1] ) # Test case 2: Check if random sample sizes are generated correctly for two intervals @@ -307,9 +307,9 @@ test_that("generate_random_sample_sizes function works as expected", { dplyr::group_by(group) %>% dplyr::summarise( overall_sample_size = overall_sample_size[1], - sample_size = sum(sample_size) + n_sample = sum(n_sample) ) - expect_equal(group_df$sample_size, group_df$overall_sample_size) + expect_equal(group_df$n_sample, group_df$overall_sample_size) }) test_that("sample_size_by_individual_age_random returns correct dataframe structure", { @@ -318,7 +318,7 @@ test_that("sample_size_by_individual_age_random returns correct dataframe struct survey_features <- data.frame( age_min = c(1, 3, 15), age_max = c(2, 14, 20), - sample_size = c(1000, 2000, 1500) + n_sample = c(1000, 2000, 1500) ) actual_df <- sample_size_by_individual_age_random(survey_features) expect_equal(nrow(actual_df), max(survey_features$age_max)) @@ -327,9 +327,9 @@ test_that("sample_size_by_individual_age_random returns correct dataframe struct dplyr::group_by(group) %>% dplyr::summarise( overall_sample_size = overall_sample_size[1], - sample_size = sum(sample_size) + n_sample = sum(n_sample) ) - expect_equal(group_df$sample_size, group_df$overall_sample_size) + expect_equal(group_df$n_sample, group_df$overall_sample_size) # Test with sample survey_features data: non-contiguous age bins # TODO: doesn't work as age_bins construction too simple currently. @@ -337,7 +337,7 @@ test_that("sample_size_by_individual_age_random returns correct dataframe struct survey_features <- data.frame( age_min = c(1, 7, 18), age_max = c(2, 16, 20), - sample_size = c(1000, 2000, 1500) + n_sample = c(1000, 2000, 1500) ) actual_df <- sample_size_by_individual_age_random(survey_features) expect_equal(nrow(actual_df), 15) @@ -346,7 +346,7 @@ test_that("sample_size_by_individual_age_random returns correct dataframe struct test_that("simulate_serosurvey_time_model function works as expected", { # Test case 1: Check if the output dataframe has the correct structure - sample_sizes <- c(1000, 2000, 1500) + n_samples <- c(1000, 2000, 1500) foi_df <- data.frame( year=seq(1990, 2009, 1) ) %>% @@ -354,11 +354,11 @@ test_that("simulate_serosurvey_time_model function works as expected", { survey_features <- data.frame( age_min = c(1, 3, 15), age_max = c(2, 14, 20), - sample_size = sample_sizes) + n_sample = n_samples) actual_df <- simulate_serosurvey_time_model(foi_df, survey_features) expect_true("age_min" %in% colnames(actual_df)) expect_true("age_max" %in% colnames(actual_df)) - expect_true("sample_size" %in% colnames(actual_df)) + expect_true("n_sample" %in% colnames(actual_df)) expect_true("n_seropositive" %in% colnames(actual_df)) # Test case 2: Check if the output dataframe has the correct number of rows @@ -393,7 +393,7 @@ test_that("simulate_serosurvey_time_model input validation", { survey_features <- data.frame( age_min = c(1, 3, 15), age_max = c(2, 14, 20), - sample_size = c(1000, 2000, 1500) + n_sample = c(1000, 2000, 1500) ) # Test with valid inputs @@ -405,7 +405,7 @@ test_that("simulate_serosurvey_time_model input validation", { # Test with non-dataframe survey_features dataframe expect_error(simulate_serosurvey_time_model(foi_df, list()), - "survey_features must be a dataframe with columns 'age_min', 'age_max', and 'sample_size'.") + "survey_features must be a dataframe with columns 'age_min', 'age_max', and 'n_sample'.") # Test with misspelt columns in foi dataframe expect_error(simulate_serosurvey_time_model(data.frame(years = c(1990), foi = c(0.1)), survey_features), @@ -417,7 +417,7 @@ test_that("simulate_serosurvey_time_model input validation", { # Test with missing columns in survey_features dataframe expect_error(simulate_serosurvey_time_model(foi_df, data.frame(age_min = c(1))), - "survey_features must be a dataframe with columns 'age_min', 'age_max', and 'sample_size'.") + "survey_features must be a dataframe with columns 'age_min', 'age_max', and 'n_sample'.") # Test with non-numeric seroreversion_rate expect_error(simulate_serosurvey_time_model(foi_df, survey_features, "seroreversion"), @@ -431,7 +431,7 @@ test_that("simulate_serosurvey_time_model input validation", { test_that("simulate_serosurvey_age_model function works as expected", { # Test case 1: Check if the output dataframe has the correct structure - sample_sizes <- c(1000, 2000, 1500) + n_samples <- c(1000, 2000, 1500) foi_df <- data.frame( age=seq(1, 20, 1) ) %>% @@ -439,11 +439,11 @@ test_that("simulate_serosurvey_age_model function works as expected", { survey_features <- data.frame( age_min = c(1, 3, 15), age_max = c(2, 14, 20), - sample_size = sample_sizes) + n_sample = n_samples) actual_df <- simulate_serosurvey_age_model(foi_df, survey_features) expect_true("age_min" %in% colnames(actual_df)) expect_true("age_max" %in% colnames(actual_df)) - expect_true("sample_size" %in% colnames(actual_df)) + expect_true("n_sample" %in% colnames(actual_df)) expect_true("n_seropositive" %in% colnames(actual_df)) # Test case 2: Check if the output dataframe has the correct number of rows @@ -478,7 +478,7 @@ test_that("simulate_serosurvey_age_model input validation", { survey_features <- data.frame( age_min = c(1, 3, 15), age_max = c(2, 14, 20), - sample_size = c(1000, 2000, 1500) + n_sample = c(1000, 2000, 1500) ) # Test with valid inputs @@ -490,7 +490,7 @@ test_that("simulate_serosurvey_age_model input validation", { # Test with non-dataframe survey_features dataframe expect_error(simulate_serosurvey_age_model(foi_df, list()), - "survey_features must be a dataframe with columns 'age_min', 'age_max', and 'sample_size'.") + "survey_features must be a dataframe with columns 'age_min', 'age_max', and 'n_sample'.") # Test with misspelt columns in foi dataframe expect_error(simulate_serosurvey_age_model(data.frame(ages = c(1), foi = c(0.1)), survey_features), @@ -502,7 +502,7 @@ test_that("simulate_serosurvey_age_model input validation", { # Test with missing columns in survey_features dataframe expect_error(simulate_serosurvey_age_model(foi_df, data.frame(age_min = c(1))), - "survey_features must be a dataframe with columns 'age_min', 'age_max', and 'sample_size'.") + "survey_features must be a dataframe with columns 'age_min', 'age_max', and 'n_sample'.") # Test with non-numeric seroreversion_rate expect_error(simulate_serosurvey_age_model(foi_df, survey_features, "seroreversion"), @@ -515,7 +515,7 @@ test_that("simulate_serosurvey_age_model input validation", { test_that("simulate_serosurvey_age_and_time_model function works as expected", { # Test case 1: Check if the output dataframe has the correct structure - sample_sizes <- c(1000, 2000, 1500) + n_samples <- c(1000, 2000, 1500) foi_df <- tidyr::expand_grid( year = seq(1990, 2009, 1), age = seq(1, 20, 1) @@ -524,11 +524,11 @@ test_that("simulate_serosurvey_age_and_time_model function works as expected", { survey_features <- data.frame( age_min = c(1, 3, 15), age_max = c(2, 14, 20), - sample_size = sample_sizes) + n_sample = n_samples) actual_df <- simulate_serosurvey_age_and_time_model(foi_df, survey_features) expect_true("age_min" %in% colnames(actual_df)) expect_true("age_max" %in% colnames(actual_df)) - expect_true("sample_size" %in% colnames(actual_df)) + expect_true("n_sample" %in% colnames(actual_df)) expect_true("n_seropositive" %in% colnames(actual_df)) # Test case 2: Check if the output dataframe has the correct number of rows @@ -565,7 +565,7 @@ test_that("simulate_serosurvey_age_and_time_model input validation", { survey_features <- data.frame( age_min = c(1, 3, 15), age_max = c(2, 14, 20), - sample_size = c(1000, 2000, 1500) + n_sample = c(1000, 2000, 1500) ) # Test with valid inputs @@ -577,7 +577,7 @@ test_that("simulate_serosurvey_age_and_time_model input validation", { # Test with non-dataframe survey_features dataframe expect_error(simulate_serosurvey_age_and_time_model(foi_df, list()), - "survey_features must be a dataframe with columns 'age_min', 'age_max', and 'sample_size'.") + "survey_features must be a dataframe with columns 'age_min', 'age_max', and 'n_sample'.") # Test with misspelt columns in foi dataframe expect_error(simulate_serosurvey_age_and_time_model(data.frame(ages = c(1), foi = c(0.1)), survey_features), @@ -595,7 +595,7 @@ test_that("simulate_serosurvey_age_and_time_model input validation", { # Test with missing columns in survey_features dataframe expect_error(simulate_serosurvey_age_and_time_model(foi_df, data.frame(age_min = c(1))), - "survey_features must be a dataframe with columns 'age_min', 'age_max', and 'sample_size'.") + "survey_features must be a dataframe with columns 'age_min', 'age_max', and 'n_sample'.") # Test with non-numeric seroreversion_rate expect_error(simulate_serosurvey_age_and_time_model(foi_df, survey_features, "seroreversion"), @@ -616,10 +616,10 @@ test_that("simulate_serosurvey returns serosurvey data based on specified model" survey_features <- data.frame( age_min = c(1, 3, 15), age_max = c(2, 14, 20), - sample_size = c(1000, 2000, 1500) + n_sample = c(1000, 2000, 1500) ) serosurvey <- simulate_serosurvey("age", foi_df, survey_features) - expect_true(all(names(serosurvey) %in% c("age_min", "age_max", "sample_size", "n_seropositive"))) + expect_true(all(names(serosurvey) %in% c("age_min", "age_max", "n_sample", "n_seropositive"))) # Test with 'time' model foi_df <- data.frame( @@ -627,7 +627,7 @@ test_that("simulate_serosurvey returns serosurvey data based on specified model" foi = runif(20, 0.05, 0.15) ) serosurvey <- simulate_serosurvey("time", foi_df, survey_features) - expect_true(all(names(serosurvey) %in% c("age_min", "age_max", "sample_size", "n_seropositive"))) + expect_true(all(names(serosurvey) %in% c("age_min", "age_max", "n_sample", "n_seropositive"))) # Test with 'age-time' model foi_df <- tidyr::expand_grid( @@ -637,7 +637,7 @@ test_that("simulate_serosurvey returns serosurvey data based on specified model" mutate(foi=rnorm(20 * 20, 0.1, 0.001)) serosurvey <- simulate_serosurvey("age-time", foi_df, survey_features) - expect_true(all(names(serosurvey) %in% c("age_min", "age_max", "sample_size", "n_seropositive"))) + expect_true(all(names(serosurvey) %in% c("age_min", "age_max", "n_sample", "n_seropositive"))) }) test_that("simulate_serosurvey handles invalid model inputs", { @@ -649,7 +649,7 @@ test_that("simulate_serosurvey handles invalid model inputs", { survey_features <- data.frame( age_min = c(1, 3, 15), age_max = c(2, 14, 20), - sample_size = c(1000, 2000, 1500) + n_sample = c(1000, 2000, 1500) ) expect_error(simulate_serosurvey("invalid_model", foi_df, survey_features), "model must be one of 'age', 'time', or 'age-time'.") diff --git a/vignettes/articles/foi_models.Rmd b/vignettes/articles/foi_models.Rmd index b6b752b0..e504330f 100644 --- a/vignettes/articles/foi_models.Rmd +++ b/vignettes/articles/foi_models.Rmd @@ -92,10 +92,10 @@ Now that we can describe the proportion of seropositive individuals $P(t)$ by me ::: l-body-outset | Serocatalytic model | scale | n_seropositive | FoI_1 prior | FoI_i, i>1 | sigma | mu | | ------------------- | ------- | ------------------------------------- | ------------------------------------- | ------------------------ | ----------------------- | ------------------------------------- | -| constant | regular | binomial(sample_size, prob_infected) | normal(mean, sd)
uniform(min, max) | \- | cauchy(mean, sd) | normal(mean, sd)
uniform(min, max) | -| age | regular | binomial(sample_size, prob_infected) | normal(mean, sd)
uniform(min, max) | normal(FoI_{i-1}, sigma) | cauchy(mean, sd) | normal(mean, sd)
uniform(min, max) | -| time | regular | binomial(sample_size, prob_infected) | normal(mean, sd)
uniform(min, max) | normal(FoI_{i-1}, sigma) | cauchy(mean, sd) | normal(mean, sd)
uniform(min, max) | -| time | log | binomial(sample_size, prob_infected) | normal(mean, sd)
uniform(min, max) | normal(FoI_{i-1}, sigma) | normal(location, scale) | normal(mean, sd)
uniform(min, max) | +| constant | regular | binomial(n_sample, prob_infected) | normal(mean, sd)
uniform(min, max) | \- | cauchy(mean, sd) | normal(mean, sd)
uniform(min, max) | +| age | regular | binomial(n_sample, prob_infected) | normal(mean, sd)
uniform(min, max) | normal(FoI_{i-1}, sigma) | cauchy(mean, sd) | normal(mean, sd)
uniform(min, max) | +| time | regular | binomial(n_sample, prob_infected) | normal(mean, sd)
uniform(min, max) | normal(FoI_{i-1}, sigma) | cauchy(mean, sd) | normal(mean, sd)
uniform(min, max) | +| time | log | binomial(n_sample, prob_infected) | normal(mean, sd)
uniform(min, max) | normal(FoI_{i-1}, sigma) | normal(location, scale) | normal(mean, sd)
uniform(min, max) | ::: Table 1. Available options. @@ -120,7 +120,7 @@ foi_df <- data.frame( survey_features <- data.frame( age_min = seq(1, 50, 5), age_max = seq(5, 50, 5), - sample_size = rep(25, 10) + n_sample = rep(25, 10) ) serosurvey_constant <- simulate_serosurvey( @@ -182,7 +182,7 @@ foi_df <- data.frame( survey_features <- data.frame( age_min = seq(1, 50, 5), age_max = seq(5, 50, 5), - sample_size = rep(25, 10) + n_sample = rep(25, 10) ) serosurvey_sw_dec <- simulate_serosurvey( @@ -242,7 +242,7 @@ foi_df <- data.frame( survey_features <- data.frame( age_min = seq(1, 50, 5), age_max = seq(5, 50, 5), - sample_size = rep(25, 10) + n_sample = rep(25, 10) ) serosurvey_large_epi <- simulate_serosurvey( diff --git a/vignettes/articles/simulating_serosurveys.Rmd b/vignettes/articles/simulating_serosurveys.Rmd index 64e79df5..cf511ec5 100644 --- a/vignettes/articles/simulating_serosurveys.Rmd +++ b/vignettes/articles/simulating_serosurveys.Rmd @@ -40,11 +40,11 @@ foi_constant %>% We suppose that we carry out a serological survey which randomly samples the population. For simplicity, to start, we imagine that all individual one-year group ages are sampled and the sample size in each age is the same: $n=15$. ```{r} -sample_size <- 15 +n_sample <- 15 survey_features <- data.frame( age_min = seq(1, max_age, 1), age_max = seq(1, max_age, 1), - sample_size = rep(sample_size, max_age)) + n_sample = rep(n_sample, max_age)) ``` We then put these pieces together and simulate a serosurvey. Either a time-varying FOI model or an age-varying FOI model would produce the same results here since they are equivalent when the FOI is constant. @@ -259,11 +259,11 @@ foi_age_time %>% We now simulate a serosurvey assuming these historical FOIs generated the population-wide seropositivities in 2025; we make the sample sizes larger to be able to clearly visualise the patterns in seropositivity. We also illustrate how serosurveys with wider age-bins can be generated by choosing 5-year bins. ```{r} max_age <- 80 -sample_size <- 50 +n_sample <- 50 survey_features <- data.frame( age_min = seq(1, max_age, 5), age_max = seq(5, max_age, 5)) %>% - mutate(sample_size = rep(sample_size, length(age_min))) + mutate(n_sample = rep(n_sample, length(age_min))) serosurvey <- simulate_serosurvey( model = "age-time", From 6195b5de5332880a75cefe2f296ebf1abac421a6 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Mon, 9 Sep 2024 17:29:19 -0500 Subject: [PATCH 77/78] doc: update documentation --- man/add_age_group_to_serosurvey.Rd | 2 +- man/build_stan_data.Rd | 2 +- man/extract_central_estimates.Rd | 2 +- man/fit_seromodel.Rd | 2 +- man/get_age_intervals.Rd | 2 +- man/get_foi_index.Rd | 2 +- man/multinomial_sampling_group.Rd | 4 ++-- man/plot_foi_estimates.Rd | 2 +- man/plot_rhats.Rd | 2 +- man/plot_seromodel.Rd | 2 +- man/plot_seroprevalence_estimates.Rd | 2 +- man/plot_serosurvey.Rd | 2 +- man/plot_summary.Rd | 2 +- man/prepare_serosurvey_for_plotting.Rd | 4 ++-- man/simulate_serosurvey.Rd | 8 ++++---- man/simulate_serosurvey_age_and_time_model.Rd | 4 ++-- man/simulate_serosurvey_age_model.Rd | 4 ++-- man/simulate_serosurvey_general_model.Rd | 2 +- man/simulate_serosurvey_time_model.Rd | 4 ++-- man/summarise_central_estimate.Rd | 2 +- man/summarise_seromodel.Rd | 2 +- 21 files changed, 29 insertions(+), 29 deletions(-) diff --git a/man/add_age_group_to_serosurvey.Rd b/man/add_age_group_to_serosurvey.Rd index a68efa78..634531ed 100644 --- a/man/add_age_group_to_serosurvey.Rd +++ b/man/add_age_group_to_serosurvey.Rd @@ -12,7 +12,7 @@ add_age_group_to_serosurvey(serosurvey) (only needed to plot time models)} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} -\item{\code{sample_size}}{Number of samples for each age group} +\item{\code{n_sample}}{Number of samples for each age group} \item{\code{n_seropositive}}{Number of positive samples for each age group} }} } diff --git a/man/build_stan_data.Rd b/man/build_stan_data.Rd index e8627565..b07bc224 100644 --- a/man/build_stan_data.Rd +++ b/man/build_stan_data.Rd @@ -21,7 +21,7 @@ build_stan_data( (only needed to plot time models)} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} -\item{\code{sample_size}}{Number of samples for each age group} +\item{\code{n_sample}}{Number of samples for each age group} \item{\code{n_seropositive}}{Number of positive samples for each age group} }} diff --git a/man/extract_central_estimates.Rd b/man/extract_central_estimates.Rd index d9701da4..e5c0ee4b 100644 --- a/man/extract_central_estimates.Rd +++ b/man/extract_central_estimates.Rd @@ -20,7 +20,7 @@ with \link{fit_seromode}} (only needed to plot time models)} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} -\item{\code{sample_size}}{Number of samples for each age group} +\item{\code{n_sample}}{Number of samples for each age group} \item{\code{n_seropositive}}{Number of positive samples for each age group} }} diff --git a/man/fit_seromodel.Rd b/man/fit_seromodel.Rd index e6c93057..34dc0ff7 100644 --- a/man/fit_seromodel.Rd +++ b/man/fit_seromodel.Rd @@ -23,7 +23,7 @@ fit_seromodel( (only needed to plot time models)} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} -\item{\code{sample_size}}{Number of samples for each age group} +\item{\code{n_sample}}{Number of samples for each age group} \item{\code{n_seropositive}}{Number of positive samples for each age group} }} diff --git a/man/get_age_intervals.Rd b/man/get_age_intervals.Rd index 02c63a78..d68b35a2 100644 --- a/man/get_age_intervals.Rd +++ b/man/get_age_intervals.Rd @@ -12,7 +12,7 @@ get_age_intervals(serosurvey, step) (only needed to plot time models)} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} -\item{\code{sample_size}}{Number of samples for each age group} +\item{\code{n_sample}}{Number of samples for each age group} \item{\code{n_seropositive}}{Number of positive samples for each age group} }} diff --git a/man/get_foi_index.Rd b/man/get_foi_index.Rd index 73cc6011..10449090 100644 --- a/man/get_foi_index.Rd +++ b/man/get_foi_index.Rd @@ -12,7 +12,7 @@ get_foi_index(serosurvey, group_size) (only needed to plot time models)} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} -\item{\code{sample_size}}{Number of samples for each age group} +\item{\code{n_sample}}{Number of samples for each age group} \item{\code{n_seropositive}}{Number of positive samples for each age group} }} diff --git a/man/multinomial_sampling_group.Rd b/man/multinomial_sampling_group.Rd index 08558564..5aa05989 100644 --- a/man/multinomial_sampling_group.Rd +++ b/man/multinomial_sampling_group.Rd @@ -4,10 +4,10 @@ \alias{multinomial_sampling_group} \title{Generate random sample sizes using multinomial sampling.} \usage{ -multinomial_sampling_group(sample_size, n_ages) +multinomial_sampling_group(n_sample, n_ages) } \arguments{ -\item{sample_size}{The total sample size to be distributed among age groups.} +\item{n_sample}{The total sample size to be distributed among age groups.} \item{n_ages}{The number of age groups.} } diff --git a/man/plot_foi_estimates.Rd b/man/plot_foi_estimates.Rd index cf2ce023..e653b7ee 100644 --- a/man/plot_foi_estimates.Rd +++ b/man/plot_foi_estimates.Rd @@ -22,7 +22,7 @@ with \link{fit_seromode}} (only needed to plot time models)} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} -\item{\code{sample_size}}{Number of samples for each age group} +\item{\code{n_sample}}{Number of samples for each age group} \item{\code{n_seropositive}}{Number of positive samples for each age group} }} diff --git a/man/plot_rhats.Rd b/man/plot_rhats.Rd index 9ea8b3d9..507f7eeb 100644 --- a/man/plot_rhats.Rd +++ b/man/plot_rhats.Rd @@ -15,7 +15,7 @@ with \link{fit_seromode}} (only needed to plot time models)} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} -\item{\code{sample_size}}{Number of samples for each age group} +\item{\code{n_sample}}{Number of samples for each age group} \item{\code{n_seropositive}}{Number of positive samples for each age group} }} diff --git a/man/plot_seromodel.Rd b/man/plot_seromodel.Rd index 2de8b85e..59344530 100644 --- a/man/plot_seromodel.Rd +++ b/man/plot_seromodel.Rd @@ -28,7 +28,7 @@ with \link{fit_seromode}} (only needed to plot time models)} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} -\item{\code{sample_size}}{Number of samples for each age group} +\item{\code{n_sample}}{Number of samples for each age group} \item{\code{n_seropositive}}{Number of positive samples for each age group} }} diff --git a/man/plot_seroprevalence_estimates.Rd b/man/plot_seroprevalence_estimates.Rd index 29cf73ed..d81cbd22 100644 --- a/man/plot_seroprevalence_estimates.Rd +++ b/man/plot_seroprevalence_estimates.Rd @@ -22,7 +22,7 @@ with \link{fit_seromode}} (only needed to plot time models)} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} -\item{\code{sample_size}}{Number of samples for each age group} +\item{\code{n_sample}}{Number of samples for each age group} \item{\code{n_seropositive}}{Number of positive samples for each age group} }} diff --git a/man/plot_serosurvey.Rd b/man/plot_serosurvey.Rd index 60865a2b..7ab7d945 100644 --- a/man/plot_serosurvey.Rd +++ b/man/plot_serosurvey.Rd @@ -17,7 +17,7 @@ plot_serosurvey( (only needed to plot time models)} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} -\item{\code{sample_size}}{Number of samples for each age group} +\item{\code{n_sample}}{Number of samples for each age group} \item{\code{n_seropositive}}{Number of positive samples for each age group} }} diff --git a/man/plot_summary.Rd b/man/plot_summary.Rd index 274322cc..7d0b3359 100644 --- a/man/plot_summary.Rd +++ b/man/plot_summary.Rd @@ -22,7 +22,7 @@ with \link{fit_seromode}} (only needed to plot time models)} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} -\item{\code{sample_size}}{Number of samples for each age group} +\item{\code{n_sample}}{Number of samples for each age group} \item{\code{n_seropositive}}{Number of positive samples for each age group} }} diff --git a/man/prepare_serosurvey_for_plotting.Rd b/man/prepare_serosurvey_for_plotting.Rd index bd6c654d..06c8bccf 100644 --- a/man/prepare_serosurvey_for_plotting.Rd +++ b/man/prepare_serosurvey_for_plotting.Rd @@ -12,7 +12,7 @@ prepare_serosurvey_for_plotting(serosurvey, alpha = 0.05) (only needed to plot time models)} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} -\item{\code{sample_size}}{Number of samples for each age group} +\item{\code{n_sample}}{Number of samples for each age group} \item{\code{n_seropositive}}{Number of positive samples for each age group} }} @@ -23,7 +23,7 @@ serosurvey with additional columns: \describe{ \item{seroprev}{Seroprevalence computed as the proportion of positive cases \code{n_seropositive} in the number of samples -\code{sample_size} for each age group} +\code{n_sample} for each age group} \item{seroprev_lower}{Lower limit of the binomial confidence interval of \code{seroprev}} \item{seroprev_upper}{Upper limit of the binomial confidence interval diff --git a/man/simulate_serosurvey.Rd b/man/simulate_serosurvey.Rd index 3b9871f6..76dc69c2 100644 --- a/man/simulate_serosurvey.Rd +++ b/man/simulate_serosurvey.Rd @@ -17,7 +17,7 @@ For age-and-time-varying models the columns should be \link{'age', 'time', 'foi' \item{survey_features}{A dataframe containing information about the binned age groups and sample sizes for each. -It should contain columns: \link{'age_min', 'age_max', 'sample_size'}.} +It should contain columns: \link{'age_min', 'age_max', 'n_sample'}.} \item{seroreversion_rate}{A non-negative value determining the rate of seroreversion (per year). Default is 0.} @@ -44,7 +44,7 @@ mutate(foi = rnorm(length(year), 0.1, 0.01)) survey_features <- data.frame( age_min = c(1, 3, 15), age_max = c(2, 14, 20), - sample_size = c(1000, 2000, 1500)) + n_sample = c(1000, 2000, 1500)) serosurvey <- simulate_serosurvey( model = "time", foi = foi_df, @@ -58,7 +58,7 @@ mutate(foi = rnorm(length(year), 0.1, 0.01)) survey_features <- data.frame( age_min = c(1, 3, 15), age_max = c(2, 14, 20), - sample_size = c(1000, 2000, 1500)) + n_sample = c(1000, 2000, 1500)) serosurvey <- simulate_serosurvey( model = "age", foi = foi_df, @@ -73,7 +73,7 @@ mutate(foi = rnorm(20 * 20, 0.1, 0.01)) survey_features <- data.frame( age_min = c(1, 3, 15), age_max = c(2, 14, 20), - sample_size = c(1000, 2000, 1500)) + n_sample = c(1000, 2000, 1500)) serosurvey <- simulate_serosurvey( model = "age", foi = foi_df, diff --git a/man/simulate_serosurvey_age_and_time_model.Rd b/man/simulate_serosurvey_age_and_time_model.Rd index 02f66c39..d853122e 100644 --- a/man/simulate_serosurvey_age_and_time_model.Rd +++ b/man/simulate_serosurvey_age_and_time_model.Rd @@ -16,7 +16,7 @@ different ages. It should have two columns: 'year', 'age' and 'foi'.} \item{survey_features}{A dataframe containing information about the binned age groups and sample sizes for each. It should contain columns: -\link{'age_min', 'age_max', 'sample_size'}.} +\link{'age_min', 'age_max', 'n_sample'}.} \item{seroreversion_rate}{A non-negative value determining the rate of seroreversion (per year). Default is 0.} @@ -43,7 +43,7 @@ mutate(foi = rnorm(20 * 20, 0.1, 0.01)) survey_features <- data.frame( age_min = c(1, 3, 15), age_max = c(2, 14, 20), - sample_size = c(1000, 2000, 1500)) + n_sample = c(1000, 2000, 1500)) serosurvey <- simulate_serosurvey_age_and_time_model( foi_df, survey_features) } diff --git a/man/simulate_serosurvey_age_model.Rd b/man/simulate_serosurvey_age_model.Rd index d617cbf4..73f5342a 100644 --- a/man/simulate_serosurvey_age_model.Rd +++ b/man/simulate_serosurvey_age_model.Rd @@ -12,7 +12,7 @@ different ages. It should have two columns: 'age' and 'foi'.} \item{survey_features}{A dataframe containing information about the binned age groups and sample sizes for each. It should contain columns: -\link{'age_min', 'age_max', 'sample_size'}.} +\link{'age_min', 'age_max', 'n_sample'}.} \item{seroreversion_rate}{A non-negative value determining the rate of seroreversion (per year). Default is 0.} @@ -37,7 +37,7 @@ mutate(foi = rnorm(length(year), 0.1, 0.01)) survey_features <- data.frame( age_min = c(1, 3, 15), age_max = c(2, 14, 20), - sample_size = c(1000, 2000, 1500)) + n_sample = c(1000, 2000, 1500)) serosurvey <- simulate_serosurvey_age_model( foi_df, survey_features) } diff --git a/man/simulate_serosurvey_general_model.Rd b/man/simulate_serosurvey_general_model.Rd index af61bf5b..2012a683 100644 --- a/man/simulate_serosurvey_general_model.Rd +++ b/man/simulate_serosurvey_general_model.Rd @@ -21,7 +21,7 @@ birth cohort.} \item{survey_features}{A dataframe containing information about the binned age groups and sample sizes for each. -It should contain columns: \link{'age_min', 'age_max', 'sample_size'}.} +It should contain columns: \link{'age_min', 'age_max', 'n_sample'}.} } \value{ A dataframe with simulated serosurvey data, including age group diff --git a/man/simulate_serosurvey_time_model.Rd b/man/simulate_serosurvey_time_model.Rd index d5baebbe..a5a15094 100644 --- a/man/simulate_serosurvey_time_model.Rd +++ b/man/simulate_serosurvey_time_model.Rd @@ -12,7 +12,7 @@ different years. It should have two columns: 'year' and 'foi'.} \item{survey_features}{A dataframe containing information about the binned age groups and sample sizes for each. It should contain columns: -\link{'age_min', 'age_max', 'sample_size'}.} +\link{'age_min', 'age_max', 'n_sample'}.} \item{seroreversion_rate}{A non-negative value determining the rate of seroreversion (per year). Default is 0.} @@ -37,7 +37,7 @@ mutate(foi = rnorm(length(year), 0.1, 0.01)) survey_features <- data.frame( age_min = c(1, 3, 15), age_max = c(2, 14, 20), - sample_size = c(1000, 2000, 1500)) + n_sample = c(1000, 2000, 1500)) serosurvey <- simulate_serosurvey_time_model( foi_df, survey_features) } diff --git a/man/summarise_central_estimate.Rd b/man/summarise_central_estimate.Rd index 868577da..5a26b682 100644 --- a/man/summarise_central_estimate.Rd +++ b/man/summarise_central_estimate.Rd @@ -21,7 +21,7 @@ with \link{fit_seromode}} (only needed to plot time models)} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} -\item{\code{sample_size}}{Number of samples for each age group} +\item{\code{n_sample}}{Number of samples for each age group} \item{\code{n_seropositive}}{Number of positive samples for each age group} }} diff --git a/man/summarise_seromodel.Rd b/man/summarise_seromodel.Rd index 0b06e310..7f0245ff 100644 --- a/man/summarise_seromodel.Rd +++ b/man/summarise_seromodel.Rd @@ -23,7 +23,7 @@ with \link{fit_seromode}} (only needed to plot time models)} \item{\code{age_min}}{Floor value of the average between age_min and age_max} \item{\code{age_max}}{The size of the sample} -\item{\code{sample_size}}{Number of samples for each age group} +\item{\code{n_sample}}{Number of samples for each age group} \item{\code{n_seropositive}}{Number of positive samples for each age group} }} From e3092d33903ee4d533858d3bd5f037f24ba6dc95 Mon Sep 17 00:00:00 2001 From: ntorresd Date: Mon, 9 Sep 2024 17:29:31 -0500 Subject: [PATCH 78/78] change version to 1.0.1 --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 0d3510c8..9e244ff4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: serofoi Type: Package -Title: Estimates the Force-of-Infection of a given pathogen from population based seroprevalence studies -Version: 1.0.0 +Title: Estimates the Force-of-Infection of a given pathogen from population-based seroprevalence studies +Version: 1.0.1 Authors@R: c( person(