Skip to content

Commit

Permalink
#169 corrections to exposure matrix and simulating time models first …
Browse files Browse the repository at this point in the history
…draft
  • Loading branch information
ben18785 authored and ntorresd committed Jul 31, 2024
1 parent 7a59136 commit d79ca43
Show file tree
Hide file tree
Showing 2 changed files with 173 additions and 7 deletions.
145 changes: 138 additions & 7 deletions R/simulate_serosurvey.R
Original file line number Diff line number Diff line change
@@ -1,22 +1,29 @@
create_exposure_matrix <- function(ages) {

n_ages <- length(ages)
exposure_matrix <- matrix(NA, n_ages, n_ages)
exposure_matrix <- lower.tri(exposure_matrix, diag = TRUE)
exposure_matrix[exposure_matrix==TRUE] <- 1

return(exposure_matrix)
}

#' Computes the probability of being seropositive for age-varying
#' force-of-infection including seroreversion
#'
#' @param ages Integer indicating the ages of the exposed cohorts
#' @param foi Numeric atomic vector corresponding to the age-varying
#' force-of-infection to simulate from
#' @param mu Seroreversion rate
#' @return probability of being seropositive for age-varying FoI
#' including seroreversion
#' @return vector of probabilities of being seropositive for age-varying FoI
#' including seroreversion (ordered from youngest to oldest individuals)
probability_exact_time_varying <- function(
ages,
foi,
mu = 0
) {
n_ages <- length(ages)
exposure_matrix <- matrix(0, nrow = n_ages, ncol = n_ages)
for (k in 1:n_ages) {
exposure_matrix[k, (n_ages - ages[k] + 1):n_ages] <- 1
}

exposure_matrix <- create_exposure_matrix(ages)
probabilities <-
(foi / (foi + mu)) * (1 - exp(-drop(exposure_matrix %*% (foi + mu))))
return(probabilities)
Expand Down Expand Up @@ -45,3 +52,127 @@ probability_exact_age_varying <- function(
}
return(probability)
}

probability_seropositive_time_model_by_age <- function(
foi,
seroreversion) {

ages <- seq_along(foi$year)

probabilities <- probability_exact_time_varying(
age = ages,
foi = foi$foi,
mu = seroreversion
)

df <- data.frame(
age = ages,
seropositivity = probabilities
)

return(df)
}

create_group_interval <- function(age_min, age_max, is_first_row=FALSE) {

first_element <- dplyr::if_else(is_first_row, "[", "(")
last_element <- "]"
age_min <- dplyr::if_else(is_first_row, age_min, age_min - 1)
interval <- paste0(first_element, age_min, ",", age_max, last_element)

return(interval)
}

multinomial_sampling_group <- function(sample_size, n_ages) {
prob_value <- 1 / n_ages
probs <- rep(prob_value, n_ages)
sample_size_by_age <- as.vector(
rmultinom(1, sample_size, prob=probs)
)
return(sample_size_by_age)
}

sample_size_by_individual_age_random <- function(
survey_features
) {

ages <- seq(1, max(survey_features$age_max), 1)
bins <- cut(ages, breaks = c(1, survey_features$age_max), include.lowest = TRUE)
df <- data.frame(
age = ages,
group = bins
)

intervals <- vector(length = nrow(survey_features))
for(i in seq_along(intervals)) {
age_min <- survey_features$age_min[i]
age_max <- survey_features$age_max[i]
is_first_row <- i == 1
intervals[i] <- create_group_interval(
age_min,
age_max,
is_first_row
)
}
survey_features <- survey_features %>%
dplyr::mutate(group = intervals)

df <- df %>%
dplyr::left_join(survey_features, by = "group") %>%
dplyr::rename(overall_sample_size=sample_size)

for(i in seq_along(intervals)) {
interval_aux <- intervals[i]
df_tmp <- df %>%
dplyr::filter(group == interval_aux)
sample_size <- df_tmp$overall_sample_size[1]
sample_size_by_age <- multinomial_sampling_group(
sample_size, nrow(df_tmp)
)
df_tmp <- df_tmp %>%
dplyr::mutate(sample_size=sample_size_by_age)

if(i == 1)
df_new <- df_tmp
else
df_new <- df_new %>% bind_rows(df_tmp)
}

return(df_new)
}


simulate_serosurvey_time_model <- function(
foi,
survey_features,
seroreversion=FALSE
) {

probability_serop_by_age <- probability_seropositive_time_model_by_age(
foi = foi,
seroreversion = seroreversion
)

sample_size_by_age_random <- sample_size_by_individual_age_random(
survey_features = survey_features
)

combined_df <- probability_serop_by_age %>%
dplyr::left_join(sample_size_by_age_random, by="age") %>%
dplyr::mutate(
n_seropositive=rbinom(nrow(probability_serop_by_age),
combined_df$sample_size,
combined_df$seropositivity))

grouped_df <- combined_df %>%
dplyr::group_by(age_min, age_max) %>%
dplyr::summarise(
sample_size=sum(sample_size),
n_seropositive=sum(n_seropositive)
)

return(grouped_df)
}



35 changes: 35 additions & 0 deletions tests/testthat/test-simulate_serosurvey.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
test_that("probability_seropositive_time_model_by_age works", {

foi <- data.frame(
year=seq(1990, 2009, 1)
) %>%
mutate(foi=rnorm(20, 0.2, 0.01))

seroreversion <- 0.0
prob_df <- probability_seropositive_time_model_by_age(
foi = foi,
seroreversion = seroreversion
)

# check output dimensions
expect_equal(nrow(prob_df), nrow(foi))
ages <- seq(1, nrow(foi), 1)
expect_equal(ages, prob_df$age)

# checking monotonicity
derivative_foi <- diff(prob_df$seropositivity)
expect_true(all(derivative_foi > 0))

seroreversion <- 0.1
prob_df_1 <- probability_seropositive_time_model_by_age(
foi = foi,
seroreversion = seroreversion
)

# check output dimensions
expect_equal(nrow(prob_df_1), nrow(foi))
expect_equal(ages, prob_df_1$age)

# check seropositivities always lower (due to seroreversion)
expect_true(all(prob_df_1$seropositivity < prob_df$seropositivity))
})

0 comments on commit d79ca43

Please sign in to comment.