Skip to content

Commit

Permalink
second stan model added
Browse files Browse the repository at this point in the history
  • Loading branch information
mwohlfender committed Sep 1, 2023
1 parent 6a51458 commit 7c26874
Show file tree
Hide file tree
Showing 20 changed files with 1,566 additions and 223 deletions.
42 changes: 21 additions & 21 deletions .Rhistory
Original file line number Diff line number Diff line change
@@ -1,24 +1,3 @@
n_nodes_detected <- n_nodes_detected + sum(stats::rbinom(n = n_new_leaves_cluster, size = 1, prob = detection_proba))
n_free_leaves <- n_free_leaves + n_new_leaves_cluster - 1
}
print(n_nodes_detected)
n_free_leaves <- 1
n_nodes_detected <- stats::rbinom(n = 1, size = 1, prob = detection_proba)
while ((n_free_leaves > 0) && (n_nodes_detected <= max_cluster_size)) {
n_new_leaves_tree <- stats::rnbinom(n = 1, size = k, mu = R)
n_new_leaves_cluster <- sum(stats::rbinom(n = n_new_leaves_tree, size = 1, prob = mutation_proba))
n_nodes_detected <- n_nodes_detected + sum(stats::rbinom(n = n_new_leaves_cluster, size = 1, prob = detection_proba))
n_free_leaves <- n_free_leaves + n_new_leaves_cluster - 1
}
print(n_nodes_detected)
n_free_leaves <- 1
n_nodes_detected <- stats::rbinom(n = 1, size = 1, prob = detection_proba)
while ((n_free_leaves > 0) && (n_nodes_detected <= max_cluster_size)) {
n_new_leaves_tree <- stats::rnbinom(n = 1, size = k, mu = R)
n_new_leaves_cluster <- sum(stats::rbinom(n = n_new_leaves_tree, size = 1, prob = mutation_proba))
n_nodes_detected <- n_nodes_detected + sum(stats::rbinom(n = n_new_leaves_cluster, size = 1, prob = detection_proba))
n_free_leaves <- n_free_leaves + n_new_leaves_cluster - 1
}
print(n_nodes_detected)
n_free_leaves <- 1
n_nodes_detected <- stats::rbinom(n = 1, size = 1, prob = detection_proba)
Expand Down Expand Up @@ -510,3 +489,24 @@ devtools::load_all(".")
options(mc.cores = parallel::detectCores())
estRodis_estimate_parameters(clusters_size = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17,18, 20, 22, 23, 28, 29, 32, 33, 34, 35, 36, 83, 103),clusters_freq = c(703, 117, 49, 37, 19, 17, 5, 15, 4, 3, 1, 3, 4, 2, 2, 1, 3, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1))
library(estRodis)
library(estRodis)
devtools::load_all(".")
library(estRodis)
devtools::load_all(".")
library(estRodis)
library(estRodis)
simulated_clusters <- estRodis_simulate_cluster_sizes(n_clusters = 1000,max_cluster_size = 2500, R = 0.8, k = 0.3, yearly_mutation_rate = 14,mean_generation_interval = 5.2, testing_proba = 0.6, sequencing_proba = 0.4)
simulated_clusters
options(mc.cores = parallelly::availableCores())
estRodis_estimate_parameters(clusters_size = simulated_clusters$size,clusters_freq = simulated_clusters$frequency,sequencing_proba = 0.44)
library(estRodis)
library(estRodis)
library(estRodis)
library(estRodis)
library(estRodis)
devtools::load_all(".")
?estRodis_estimate_parameters_2
?estRodis_estimate_parameters_1
devtools::load_all(".")
?estRodis_estimate_parameters_1
?estRodis_estimate_parameters_2
3 changes: 2 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Generated by roxygen2: do not edit by hand

export(estRodis_estimate_parameters)
export(estRodis_estimate_parameters_1)
export(estRodis_estimate_parameters_2)
export(estRodis_simulate_cluster_sizes)
import(Rcpp)
import(RcppParallel)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@

#' @title Estimate parameters.
#'
#' @description `estRodis_estimate_parameters()` estimates the effective reproduction number,
#' @description `estRodis_estimate_parameters_1()` estimates the effective reproduction number,
#' the dispersion parameter, the probability of a case undergoing a mutation and
#' the probability that a case is confirmed by a test based on the size distribution
#' of identical sequence clusters.
Expand All @@ -17,7 +17,7 @@
#' distribution (normal) of the number of yearly mutations.
#' @param prior_testing Parameters (shape and interval boundaries) for the prior
#' distribution (scaled beta) of the fraction of cases that are confirmed by a test.
#' @param sequencing_proba Sequencing probability.
#' @param sequencing_proba Probability that a case confirmed by a test gets sequenced.
#' @param pars Parameter that is passed on to rstan::sampling, see manual of rstan::sampling for more details.
#' @param chains Parameter that is passed on to rstan::sampling, see manual of rstan::sampling for more details.
#' @param iter Parameter that is passed on to rstan::sampling, see manual of rstan::sampling for more details.
Expand All @@ -36,10 +36,14 @@
#' @param open_progress Parameter that is passed on to rstan::sampling, see manual of rstan::sampling for more details.
#' @param show_messages Parameter that is passed on to rstan::sampling, see manual of rstan::sampling for more details.
#'
#' @details The core of the function `estRodis_estimate_parameters()` is a mathematical model
#' @details The core of the function `estRodis_estimate_parameters_1()` is a mathematical model
#' of the size distribution of sequence clusters, in which viral transmission,
#' the mutation of the virus, and incomplete case-detection are integrated.
#' Parameters are estimated by a Bayesian inference model implemented in Stan.
#' The effective reproduction number, the dispersion parameter, the number of yearly mutations
#' and the testing probability are included into the model with prior distributions.
#' The mean generation interval and the sequencing probability are included into
#' the model as fixed parameters.
#' More details can be found in the following paper: ADD REFERENCE
#'
#' @return An object of S4 class `stanfit` containing the fitted results.
Expand All @@ -53,36 +57,36 @@
#'
#' options(mc.cores = parallelly::availableCores())
#'
#' estRodis_estimate_parameters(
#' estRodis_estimate_parameters_1(
#' clusters_size = simulated_clusters$size,
#' clusters_freq = simulated_clusters$frequency,
#' sequencing_proba = 0.44)

estRodis_estimate_parameters <- function(clusters_size,
clusters_freq,
prior_r = c(10, 10),
prior_k = c(5, 10),
mean_generation_interval = 5.2,
prior_number_yearly_mutations = c(14, 0.5),
prior_testing = c(1, 3, 0.05, 1),
sequencing_proba = 1,
pars = NA,
chains = 4,
iter = 2000,
warmup = floor(iter/2),
thin = 1,
seed = sample.int(.Machine$integer.max, 1),
init = 'random',
check_data = TRUE,
sample_file = NULL,
diagnostic_file = NULL,
verbose = FALSE,
algorithm = c("NUTS", "HMC", "Fixed_param"),
control = NULL,
include = TRUE,
cores = getOption("mc.cores", 1L),
open_progress = interactive() && !isatty(stdout()) && !identical(Sys.getenv("RSTUDIO"), "1"),
show_messages = TRUE) {
estRodis_estimate_parameters_1 <- function(clusters_size,
clusters_freq,
prior_r = c(10, 10),
prior_k = c(5, 10),
mean_generation_interval = 5.2,
prior_number_yearly_mutations = c(14, 0.5),
prior_testing = c(1, 3, 0.05, 1),
sequencing_proba = 1,
pars = NA,
chains = 4,
iter = 2000,
warmup = floor(iter/2),
thin = 1,
seed = sample.int(.Machine$integer.max, 1),
init = 'random',
check_data = TRUE,
sample_file = NULL,
diagnostic_file = NULL,
verbose = FALSE,
algorithm = c("NUTS", "HMC", "Fixed_param"),
control = NULL,
include = TRUE,
cores = getOption("mc.cores", 1L),
open_progress = interactive() && !isatty(stdout()) && !identical(Sys.getenv("RSTUDIO"), "1"),
show_messages = TRUE) {

standata <- list(M = length(clusters_size),
clusters_size = clusters_size,
Expand All @@ -94,7 +98,7 @@ estRodis_estimate_parameters <- function(clusters_size,
prior_testing = prior_testing,
sequencing_proba = sequencing_proba)

out <- rstan::sampling(object = stanmodels$estRodis_stan_model_estimate_parameters,
out <- rstan::sampling(object = stanmodels$estRodis_stan_model_estimate_parameters_1,
data = standata,
pars = pars,
chains = chains,
Expand All @@ -119,5 +123,3 @@ estRodis_estimate_parameters <- function(clusters_size,
}




124 changes: 124 additions & 0 deletions R/estRodis_estimate_parameters_2.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@

#' @title Estimate parameters.
#'
#' @description `estRodis_estimate_parameters_2()` estimates the effective reproduction number,
#' the dispersion parameter, the probability of a case undergoing a mutation and
#' the probability that a case is confirmed by a test based on the size distribution
#' of identical sequence clusters.
#'
#' @param clusters_size A list of sizes of identical sequence clusters.
#' @param clusters_freq A list of frequencies of identical sequence clusters.
#' @param prior_r Shape and rate parameter for the prior distribution (gamma) of
#' the effective reproduction number.
#' @param prior_k Shape and rate parameter for the prior distribution (gamma) of
#' the dispersion parameter.
#' @param mean_generation_interval Mean generation interval.
#' @param prior_number_yearly_mutations Mean and standard deviation for the prior
#' distribution (normal) of the number of yearly mutations.
#' @param testing_proba Probability that a case gets confirmed by a test.
#' @param sequencing_proba Probability that a case confirmed by a test gets sequenced.
#' @param pars Parameter that is passed on to rstan::sampling, see manual of rstan::sampling for more details.
#' @param chains Parameter that is passed on to rstan::sampling, see manual of rstan::sampling for more details.
#' @param iter Parameter that is passed on to rstan::sampling, see manual of rstan::sampling for more details.
#' @param warmup Parameter that is passed on to rstan::sampling, see manual of rstan::sampling for more details.
#' @param thin Parameter that is passed on to rstan::sampling, see manual of rstan::sampling for more details.
#' @param seed Parameter that is passed on to rstan::sampling, see manual of rstan::sampling for more details.
#' @param init Parameter that is passed on to rstan::sampling, see manual of rstan::sampling for more details.
#' @param check_data Parameter that is passed on to rstan::sampling, see manual of rstan::sampling for more details.
#' @param sample_file Parameter that is passed on to rstan::sampling, see manual of rstan::sampling for more details.
#' @param diagnostic_file Parameter that is passed on to rstan::sampling, see manual of rstan::sampling for more details.
#' @param verbose Parameter that is passed on to rstan::sampling, see manual of rstan::sampling for more details.
#' @param algorithm Parameter that is passed on to rstan::sampling, see manual of rstan::sampling for more details.
#' @param control Parameter that is passed on to rstan::sampling, see manual of rstan::sampling for more details.
#' @param include Parameter that is passed on to rstan::sampling, see manual of rstan::sampling for more details.
#' @param cores Parameter that is passed on to rstan::sampling, see manual of rstan::sampling for more details.
#' @param open_progress Parameter that is passed on to rstan::sampling, see manual of rstan::sampling for more details.
#' @param show_messages Parameter that is passed on to rstan::sampling, see manual of rstan::sampling for more details.
#'
#' @details The core of the function `estRodis_estimate_parameters_2()` is a mathematical model
#' of the size distribution of sequence clusters, in which viral transmission,
#' the mutation of the virus, and incomplete case-detection are integrated.
#' Parameters are estimated by a Bayesian inference model implemented in Stan.
#' The effective reproduction number, the dispersion parameter and the number of yearly mutations
#' are included into the model with prior distributions. The mean generation interval,
#' the testing probability and the sequencing probability are included into
#' the model as fixed parameters. More details can be found in the following paper: ADD REFERENCE
#'
#' @return An object of S4 class `stanfit` containing the fitted results.
#'
#' @export
#'
#' @examples
#' simulated_clusters <- estRodis_simulate_cluster_sizes(n_clusters = 1000,
#' max_cluster_size = 2500, R = 0.8, k = 0.3, yearly_mutation_rate = 14,
#' mean_generation_interval = 5.2, testing_proba = 0.6, sequencing_proba = 0.4)
#'
#' options(mc.cores = parallelly::availableCores())
#'
#' estRodis_estimate_parameters_2(
#' clusters_size = simulated_clusters$size,
#' clusters_freq = simulated_clusters$frequency,
#' testing_proba = 0.55,
#' sequencing_proba = 0.44)

estRodis_estimate_parameters_2 <- function(clusters_size,
clusters_freq,
prior_r = c(10, 10),
prior_k = c(5, 10),
mean_generation_interval = 5.2,
prior_number_yearly_mutations = c(14, 0.5),
testing_proba = 1,
sequencing_proba = 1,
pars = NA,
chains = 4,
iter = 2000,
warmup = floor(iter/2),
thin = 1,
seed = sample.int(.Machine$integer.max, 1),
init = 'random',
check_data = TRUE,
sample_file = NULL,
diagnostic_file = NULL,
verbose = FALSE,
algorithm = c("NUTS", "HMC", "Fixed_param"),
control = NULL,
include = TRUE,
cores = getOption("mc.cores", 1L),
open_progress = interactive() && !isatty(stdout()) && !identical(Sys.getenv("RSTUDIO"), "1"),
show_messages = TRUE) {

standata <- list(M = length(clusters_size),
clusters_size = clusters_size,
clusters_freq = clusters_freq,
prior_r = prior_r,
prior_k = prior_k,
mean_generation_interval = mean_generation_interval,
prior_number_yearly_mutations = prior_number_yearly_mutations,
testing_proba = testing_proba,
sequencing_proba = sequencing_proba)

out <- rstan::sampling(object = stanmodels$estRodis_stan_model_estimate_parameters_2,
data = standata,
pars = pars,
chains = chains,
iter = iter,
warmup = warmup,
thin = thin,
seed = seed,
init = init,
check_data = check_data,
sample_file = sample_file,
diagnostic_file = diagnostic_file,
verbose = verbose,
algorithm = algorithm,
control = control,
include = include,
cores = cores,
open_progress = open_progress,
show_messages = show_messages)

return(out)

}


5 changes: 3 additions & 2 deletions R/stanmodels.R
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
# Generated by rstantools. Do not edit by hand.

# names of stan models
stanmodels <- c("estRodis_stan_model_estimate_parameters")
stanmodels <- c("estRodis_stan_model_estimate_parameters_1", "estRodis_stan_model_estimate_parameters_2")

# load each stan module
Rcpp::loadModule("stan_fit4estRodis_stan_model_estimate_parameters_mod", what = TRUE)
Rcpp::loadModule("stan_fit4estRodis_stan_model_estimate_parameters_1_mod", what = TRUE)
Rcpp::loadModule("stan_fit4estRodis_stan_model_estimate_parameters_2_mod", what = TRUE)

# instantiate each stanmodel object
stanmodels <- sapply(stanmodels, function(model_name) {
Expand Down
58 changes: 58 additions & 0 deletions inst/stan/estRodis_stan_model_estimate_parameters_1.stan
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@

// stan model for estimation of the effective reproduction number, the overdispersion parameter,
// the number of yearly mutations and the testing probability

#include "/stan_functions/estRodis_stan_functions_estimate_parameters.stan"

data {
// number of different identical sequence cluster sizes
int < lower = 1 > M;
// sizes of identical sequence clusters
int clusters_size[M];
// frequencies of identical sequence cluster sizes
int clusters_freq[M];
// shape and rate parameter for the prior distribution (gamma) of the effective reproduction number
real prior_r[2];
// shape and rate parameter for the prior distribution (gamma) of the dispersion parameter
real prior_k[2];
// mean time between receving the virus and transmitting it to another person
real mean_generation_interval;
// mean and standard deviation for the prior distribution (normal) of the number of yearly mutations
real prior_number_yearly_mutations[2];
// parameters (shape and interval boundaries) for the prior distribution (scaled beta) of
// the fraction of cases that are confirmed by a test
real prior_testing[4];
// probability that a case confirmed by a test gets sequenced
real sequencing_proba;
}

parameters {
real < lower = 0 > R;
real < lower = 0 > k;
real < lower = 0 > number_yearly_mutations;
real < lower = prior_testing[3], upper = prior_testing[4]> testing_proba;
}

transformed parameters {
real mutation_proba = 1 - exp(- number_yearly_mutations / 365.25 * mean_generation_interval);
real detection_proba = testing_proba * sequencing_proba;
}

model {
// prior distributions
// alpha = prior_r[1], beta = prior_r[2]
R ~ gamma(prior_r[1], prior_r[2]);
// alpha = prior_k[1], beta = prior_k[2]
k ~ gamma(prior_k[1], prior_k[2]);
// mu = prior_number_yearly_mutations[1], sigma = prior_number_yearly_mutations[2]
number_yearly_mutations ~ normal(prior_number_yearly_mutations[1], prior_number_yearly_mutations[2]);
// alpha = prior_testing[1], beta = prior_testing[2], p = prior_testing[3], q = prior_testing[4]
testing_proba ~ estRodis_stan_scaled_beta(prior_testing[1], prior_testing[2], prior_testing[3], prior_testing[4]);

// likelihood
target += estRodis_stan_likelihood_log(clusters_size, clusters_freq, R, k, mutation_proba, detection_proba);
}

generated quantities {
// The posterior predictive distribution
}
Loading

0 comments on commit 7c26874

Please sign in to comment.