Skip to content

Commit

Permalink
#169 working draft of age and time model
Browse files Browse the repository at this point in the history
  • Loading branch information
ben18785 authored and ntorresd committed Jul 31, 2024
1 parent 184036c commit e73032f
Show file tree
Hide file tree
Showing 3 changed files with 239 additions and 18 deletions.
4 changes: 3 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,9 @@ Imports:
ggplot2,
Hmisc,
loo,
purrr
purrr,
tidyr,
tibble
LinkingTo:
BH (>= 1.66.0),
Rcpp (>= 0.12.0),
Expand Down
70 changes: 67 additions & 3 deletions R/simulate_serosurvey.R
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ probability_seropositive_age_model_by_age <- function(
#' It takes into account the FOI and the rate of seroreversion.
#'
#' @param foi A dataframe containing the force of infection (FOI) values for different ages.
#' It should have two columns: 'age' and 'foi'.
#' It should have three columns: 'year', 'age' and 'foi'.
#' @param seroreversion_rate A non-negative numeric value representing the rate of seroreversion.
#'
#' @return A dataframe with columns 'age' and 'seropositivity'.
Expand All @@ -139,8 +139,42 @@ probability_seropositive_age_and_time_model_by_age <- function(
seroreversion_rate
) {

foi_matrix <- foi %>%
tidyr::pivot_wider(
values_from = foi,
names_from = c(year)) %>%
tibble::column_to_rownames("age") %>%
as.matrix()

years <- unique(foi$year)
n_years <- length(years)
ages <- unique(foi$age)
n_ages <- length(ages)

probabilities <- vector(length = length(n_ages))
# solves ODE exactly within pieces
for (i in seq_along(years)) { # birth cohorts
probability_previous <- 0
foi_matrix_subset <- foi_matrix[1:(n_ages - i + 1), i:n_ages] %>%
as.matrix() # only to handle single element matrix case
foi_diag <- diag(foi_matrix_subset)
for(j in 1:(n_years - i + 1)) { # exposure during lifetime
foi_tmp <- foi_diag[j]
lambda_over_both <- (foi_tmp / (foi_tmp + seroreversion_rate))
probability_previous <- lambda_over_both +
(probability_previous - lambda_over_both) *
exp(-(foi_tmp + seroreversion_rate))
}
probabilities[i] <- probability_previous
}
probabilities_oldest_age_last <- rev(probabilities)

df <- data.frame(
age = ages,
seropositivity = probabilities_oldest_age_last
)

return(df)
}


Expand Down Expand Up @@ -282,7 +316,7 @@ validate_survey <- function(survey_features) {
}

validate_foi_df <- function(foi_df, cnames_additional) {
if (!is.data.frame(foi_df) || !all(cnames_additional %in% names(foi_df))) {
if (!is.data.frame(foi_df) || !all(cnames_additional %in% names(foi_df)) || ncol(foi_df) != (1 + length(cnames_additional))) {
if(length(cnames_additional) == 1)
message_end <- paste0(" and ", cnames_additional, ".")
else
Expand Down Expand Up @@ -443,6 +477,36 @@ simulate_serosurvey_age_model <- function(
return(grouped_df)
}

#' Simulate serosurvey data based on an age-and-time-varying FOI model.
#'
#' This function generates binned serosurvey data based on an age-and-time-varying FOI model,
#' optionally including seroreversion. This function allows construction of serosurveys
#' with binned age groups, and it generates uncertainty in the distribution of a sample size
#' within an age bin through multinomial sampling.
#'
#' @param foi A dataframe containing the force of infection (FOI) values for different ages.
#' It should have two columns: 'year', 'age' and 'foi'.
#' @param survey_features A dataframe containing information about the binned age groups and sample
#' sizes for each. It should contain columns: ['age_min', 'age_max', 'sample_size'].
#' @param seroreversion_rate A non-negative value determining the rate of seroreversion (per year).
#' Default is 0.
#'
#' @return A dataframe with simulated serosurvey data, including age group information, overall
#' sample sizes, the number of seropositive individuals, and other survey features.
#' @examples
#' # specify FOIs for each year
#' foi_df <- data.frame(
#' year = seq(1990, 2009, 1),
#' age = seq(1, 20, 1)
#' ) %>%
#' mutate(foi = rnorm(20 * 20, 0.1, 0.01))
#' survey_features <- data.frame(
#' age_min = c(1, 3, 15),
#' age_max = c(2, 14, 20),
#' sample_size = c(1000, 2000, 1500))
#' serosurvey <- simulate_serosurvey_age_and_time_model(
#' foi_df, survey_features)
#' @export
simulate_serosurvey_age_and_time_model <- function(
foi,
survey_features,
Expand Down Expand Up @@ -548,7 +612,7 @@ simulate_serosurvey <- function(
seroreversion_rate
)
} else if(model == "age-time" || model == "time-age") {
serosurvey <- simulate_serosurvey_age_and_time_model( #TODO add age-time
serosurvey <- simulate_serosurvey_age_and_time_model(
foi,
survey_features,
seroreversion_rate
Expand Down
183 changes: 169 additions & 14 deletions tests/testthat/test-simulate_serosurvey.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ test_that("probability_exact_age_varying calculates probabilities correctly", {
# Test with analytical solution for non-constant FOIs
ages <- c(1, 2)
fois <- c(0.1, 0.2)
probabilities <- probability_exact_age_varying(years, fois)
probabilities <- probability_exact_age_varying(ages, fois)
expected <- c(1 - exp(-0.1), 1 - exp(-(0.1 + 0.2)))
expect_true(
all(
Expand Down Expand Up @@ -95,8 +95,6 @@ test_that("probability_exact_time_varying calculates probabilities correctly", {

})



test_that("probability_seropositive_time_model_by_age works", {

foi <- data.frame(
Expand Down Expand Up @@ -169,6 +167,67 @@ test_that("probability_seropositive_age_model_by_age works", {
expect_true(all(prob_df_1$seropositivity < prob_df$seropositivity))
})

test_that("probability_seropositive_age_and_time_model_by_age works as expected", {
us <- c(0.1, 0.2, 0.3)
vs <- c(1, 0.5, 0.2)
foi <- expand_grid(
u=us,
v=vs
) %>%
mutate(foi=u * v) %>%
pull(foi)

foi_df <- expand_grid(
year=c(1990, 1991, 1992),
age=c(1, 2, 3)
) %>%
mutate(foi=foi) %>%
arrange(year)

prob_df <- probability_seropositive_age_and_time_model_by_age(
foi = foi_df,
seroreversion_rate = 0
)

foi_matrix <- foi_df %>%
tidyr::pivot_wider(
values_from = foi,
names_from = c(year)) %>%
tibble::column_to_rownames("age") %>%
as.matrix()
serop_age_1 <- 1 - exp(-foi_matrix[1, 3])
serop_age_2 <- 1 - exp(-(foi_matrix[1, 2] + foi_matrix[2, 3]))
serop_age_3 <- 1 - exp(-(foi_matrix[1, 1] + foi_matrix[2, 2] + foi_matrix[3, 3]))
expected <- c(serop_age_1, serop_age_2, serop_age_3)

expect_true(
all(
dplyr::near(
prob_df$seropositivity,
expected,
tol = 1e-6
)
)
)

# now add seroreversion
mu <- 0.1
prob_df_sr <- probability_seropositive_age_and_time_model_by_age(
foi = foi_df,
seroreversion_rate = mu
)
expect_true(all(prob_df_sr$seropositivity < prob_df$seropositivity))
lambda <- foi_matrix[1, 3]
serop_age_1 <- lambda / (lambda + mu) * (1 - exp(-(lambda + mu)))
expect_true(
dplyr::near(
prob_df_sr$seropositivity[1],
serop_age_1,
tol = 1e-6
)
)
})

test_that("add_age_bins function works as expected", {
# Test case 1: Check if intervals are created correctly for a single row dataframe
survey_features <- data.frame(age_min = 20, age_max = 30)
Expand Down Expand Up @@ -352,6 +411,10 @@ test_that("simulate_serosurvey_time_model input validation", {
expect_error(simulate_serosurvey_time_model(data.frame(years = c(1990), foi = c(0.1)), survey_features),
"foi must be a dataframe with columns foi and year.")

# Test with too many columns in foi dataframe
expect_error(simulate_serosurvey_time_model(data.frame(age = c(1), year = c(2), foi = c(0.1)), survey_features),
"foi must be a dataframe with columns foi and year.")

# Test with missing columns in survey_features dataframe
expect_error(simulate_serosurvey_time_model(foi_df, data.frame(age_min = c(1))),
"survey_features must be a dataframe with columns 'age_min', 'age_max', and 'sample_size'.")
Expand Down Expand Up @@ -433,6 +496,10 @@ test_that("simulate_serosurvey_age_model input validation", {
expect_error(simulate_serosurvey_age_model(data.frame(ages = c(1), foi = c(0.1)), survey_features),
"foi must be a dataframe with columns foi and age.")

# Test with too many columns in foi dataframe
expect_error(simulate_serosurvey_age_model(data.frame(age = c(1), year = c(2), foi = c(0.1)), survey_features),
"foi must be a dataframe with columns foi and age.")

# Test with missing columns in survey_features dataframe
expect_error(simulate_serosurvey_age_model(foi_df, data.frame(age_min = c(1))),
"survey_features must be a dataframe with columns 'age_min', 'age_max', and 'sample_size'.")
Expand All @@ -446,6 +513,99 @@ test_that("simulate_serosurvey_age_model input validation", {
"seroreversion_rate must be a non-negative numeric value.")
})

test_that("simulate_serosurvey_age_and_time_model function works as expected", {
# Test case 1: Check if the output dataframe has the correct structure
sample_sizes <- c(1000, 2000, 1500)
foi_df <- expand_grid(
year = seq(1990, 2009, 1),
age = seq(1, 20, 1)
) %>%
mutate(foi=rnorm(20 * 20, 0.1, 0.001))
survey_features <- data.frame(
age_min = c(1, 3, 15),
age_max = c(2, 14, 20),
sample_size = sample_sizes)
actual_df <- simulate_serosurvey_age_and_time_model(foi_df, survey_features)
expect_true("age_min" %in% colnames(actual_df))
expect_true("age_max" %in% colnames(actual_df))
expect_true("sample_size" %in% colnames(actual_df))
expect_true("n_seropositive" %in% colnames(actual_df))

# Test case 2: Check if the output dataframe has the correct number of rows
expected_rows <- nrow(survey_features)
actual_rows <- nrow(actual_df)
expect_equal(actual_rows, expected_rows)

# Test case 3: try a much higher FOI which should result in a higher proportion seropositive
foi_df_1 <- expand_grid(
year = seq(1990, 2009, 1),
age = seq(1, 20, 1)
) %>%
mutate(foi=rnorm(20 * 20, 10.1, 0.001))
actual_df_1 <- simulate_serosurvey_age_and_time_model(foi_df_1, survey_features)
expect_true(all(actual_df_1$n_seropositive >= actual_df$n_seropositive))

# Test case 4: allow a high rate of seroreversion which should reduce the proportion seropositive
actual_df_2 <- simulate_serosurvey_age_and_time_model(
foi=foi_df,
survey_features=survey_features,
seroreversion_rate=10
)
expect_true(all(actual_df_2$n_seropositive <= actual_df$n_seropositive))
})

test_that("simulate_serosurvey_age_and_time_model input validation", {

foi_df <- expand_grid(
year = seq(1990, 2009, 1),
age = seq(1, 20, 1)
) %>%
mutate(foi=rnorm(20 * 20, 0.1, 0.001))

survey_features <- data.frame(
age_min = c(1, 3, 15),
age_max = c(2, 14, 20),
sample_size = c(1000, 2000, 1500)
)

# Test with valid inputs
expect_silent(simulate_serosurvey_age_and_time_model(foi_df, survey_features))

# Test with non-dataframe foi dataframe
expect_error(simulate_serosurvey_age_and_time_model(list(), survey_features),
"foi must be a dataframe with columns foi and age.")

# Test with non-dataframe survey_features dataframe
expect_error(simulate_serosurvey_age_and_time_model(foi_df, list()),
"survey_features must be a dataframe with columns 'age_min', 'age_max', and 'sample_size'.")

# Test with misspelt columns in foi dataframe
expect_error(simulate_serosurvey_age_and_time_model(data.frame(ages = c(1), foi = c(0.1)), survey_features),
"foi must be a dataframe with columns foi, age and year.")

# Test with missing columns in foi dataframe
expect_error(simulate_serosurvey_age_and_time_model(data.frame(age = c(1), foi = c(0.1)), survey_features),
"foi must be a dataframe with columns foi, age and year.")
expect_error(simulate_serosurvey_age_and_time_model(data.frame(year = c(1), foi = c(0.1)), survey_features),
"foi must be a dataframe with columns foi, age and year.")

# Test with too many columns in foi dataframe
expect_error(simulate_serosurvey_time_model(data.frame(age = c(1), year = c(2), sex = c(3), foi = c(0.1)), survey_features),
"foi must be a dataframe with columns foi and year.")

# Test with missing columns in survey_features dataframe
expect_error(simulate_serosurvey_age_and_time_model(foi_df, data.frame(age_min = c(1))),
"survey_features must be a dataframe with columns 'age_min', 'age_max', and 'sample_size'.")

# Test with non-numeric seroreversion_rate
expect_error(simulate_serosurvey_age_and_time_model(foi_df, survey_features, "seroreversion"),
"seroreversion_rate must be a non-negative numeric value.")

# Test with negative seroreversion_rate
expect_error(simulate_serosurvey_age_and_time_model(foi_df, survey_features, -1),
"seroreversion_rate must be a non-negative numeric value.")
})


test_that("simulate_serosurvey returns serosurvey data based on specified model", {
# Test with 'age' model
Expand All @@ -469,17 +629,12 @@ test_that("simulate_serosurvey returns serosurvey data based on specified model"
serosurvey <- simulate_serosurvey("time", foi_df, survey_features)
expect_true(all(names(serosurvey) %in% c("age_min", "age_max", "sample_size", "n_seropositive")))

# Test with 'age-time' model: TODO
years <- 1981:2000
foi_df <- NULL
for (year in years) {
aux_df <- data.frame(
year = year,
age = seq(1, 30, 1),
foi = runif(30, 0.05, 0.15)
)
foi_df <- bind_rows(foi_df, aux_df)
}
# Test with 'age-time' model
foi_df <- expand_grid(
year = seq(1990, 2009, 1),
age = seq(1, 20, 1)
) %>%
mutate(foi=rnorm(20 * 20, 0.1, 0.001))

serosurvey <- simulate_serosurvey("age-time", foi_df, survey_features)
expect_true(all(names(serosurvey) %in% c("age_min", "age_max", "sample_size", "n_seropositive")))
Expand Down

0 comments on commit e73032f

Please sign in to comment.