Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dev #32

Merged
merged 23 commits into from
Oct 25, 2021
Merged

Dev #32

Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
0568ae3
update benchmark
stemangiola Oct 7, 2021
9e46ba8
fix devs and create counts function
stemangiola Oct 8, 2021
6fcfbb1
set seed for simulation
stemangiola Oct 8, 2021
4c06cad
make model more stable
stemangiola Oct 11, 2021
6962e2d
use QR decomposition and add utilities
stemangiola Oct 11, 2021
cd84187
fix for formula with 2+ covariates
stemangiola Oct 12, 2021
8346a8d
add figures article
stemangiola Oct 14, 2021
da9a912
update plots
stemangiola Oct 16, 2021
4298644
detect outliers without variance association + update figures
stemangiola Oct 18, 2021
055cc16
update novel result
stemangiola Oct 18, 2021
1ce5345
update figures
stemangiola Oct 21, 2021
32502bf
made sure variational approximate MCMC, the means match the sd are un…
stemangiola Oct 24, 2021
e42cee6
approximate variational by default
stemangiola Oct 24, 2021
7866728
Merge pull request #31 from stemangiola/non-centered-parameterisation
stemangiola Oct 24, 2021
bb39b21
Merge branch 'master' into dev
stemangiola Oct 24, 2021
d20a1ed
small fix
stemangiola Oct 24, 2021
c9a9ee9
no variational default
stemangiola Oct 24, 2021
df21d14
docs
stemangiola Oct 24, 2021
3eaffe9
force full MCMC for last inference
stemangiola Oct 25, 2021
8acdfc5
Merge pull request #33 from stemangiola/non-centered-parameterisation
stemangiola Oct 25, 2021
cd83bde
approximate by default
stemangiola Oct 25, 2021
6bc08b2
Merge branch 'dev' of github.com:stemangiola/sccomp into dev
stemangiola Oct 25, 2021
42aab80
update approximation options
stemangiola Oct 25, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ importFrom(purrr,pmap)
importFrom(purrr,when)
importFrom(rlang,":=")
importFrom(rlang,enquo)
importFrom(rlang,ensym)
importFrom(rlang,quo_is_null)
importFrom(rlang,quo_name)
importFrom(rlang,quo_squash)
Expand Down
30 changes: 16 additions & 14 deletions R/functions_dirichlet_multinomial.R
Original file line number Diff line number Diff line change
Expand Up @@ -94,14 +94,14 @@ dirichlet_multinomial_glm = function(.data,
"precision"
)
} %>%
beta_to_CI(censoring_iteration = 1, false_positive_rate = false_positive_rate ) %>%
beta_to_CI(censoring_iteration = 1, false_positive_rate = false_positive_rate, factor_of_interest = data_for_model$X %>% colnames() %>% .[2] ) %>%

# Join filtered
mutate(
significant =
!!as.symbol(sprintf(".lower_%s", colnames(data_for_model$X)[2])) *
!!as.symbol(sprintf(".upper_%s", colnames(data_for_model$X)[2])) > 0
) %>%
# # Join filtered
# mutate(
# significant =
# !!as.symbol(sprintf(".lower_%s", colnames(data_for_model$X)[2])) *
# !!as.symbol(sprintf(".upper_%s", colnames(data_for_model$X)[2])) > 0
# ) %>%

# add probability
left_join( get_probability_non_zero(parsed_fit, prefix = "composition"), by="M" ) %>%
Expand Down Expand Up @@ -135,14 +135,16 @@ dirichlet_multinomial_glm = function(.data,
)

return_df =
.data_2 %>%
.data_2
# %>%
#
# # Join filtered
# mutate(
# significant =
# !!as.symbol(sprintf(".lower_%s", colnames(data_for_model$X)[2])) *
# !!as.symbol(sprintf(".upper_%s", colnames(data_for_model$X)[2])) > 0
# )

# Join filtered
mutate(
significant =
!!as.symbol(sprintf(".lower_%s", colnames(data_for_model$X)[2])) *
!!as.symbol(sprintf(".upper_%s", colnames(data_for_model$X)[2])) > 0
)
fit = attr(.data_2, "fit")
}

Expand Down
120 changes: 94 additions & 26 deletions R/functions_multi_beta_binomial.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ estimate_multi_beta_binomial_glm = function(.data,
prior_mean_variable_association,
percent_false_positive = 5,
check_outliers = FALSE,
approximate_posterior_inference = TRUE,
approximate_posterior_inference = "outlier_detection",
variance_association = FALSE,
cores = detectCores(), # For development purpose,
seed = sample(1e5, 1),
Expand All @@ -62,38 +62,42 @@ estimate_multi_beta_binomial_glm = function(.data,
# Produce data list
covariate_names = parse_formula(formula)

data_for_model =
.data %>%
data_to_spread ( formula, !!.sample, !!.cell_type, !!.count) %>%
data_spread_to_model_input(
formula, !!.sample, !!.cell_type, !!.count,
variance_association = variance_association,
truncation_ajustment = 1.1,
approximate_posterior_inference = approximate_posterior_inference
)

# Pior
data_for_model$prior_prec_intercept = prior_mean_variable_association$intercept
data_for_model$prior_prec_slope = prior_mean_variable_association$slope
data_for_model$prior_prec_sd = prior_mean_variable_association$standard_deviation

# Original - old
# prec_sd ~ normal(0,2);
# prec_coeff ~ normal(0,5);

# If we are NOT checking outliers
if(!check_outliers){

# Start first fit
if(check_outliers) message("sccomp says: outlier identification first pass - step 1/3 [ETA: ~20s]")
else message("sccomp says: estimation [ETA: ~20s]")
message("sccomp says: estimation [ETA: ~20s]")

fit =
data_for_model %>%
data_for_model =
.data %>%
data_to_spread ( formula, !!.sample, !!.cell_type, !!.count) %>%
data_spread_to_model_input(
formula, !!.sample, !!.cell_type, !!.count,
variance_association = variance_association,
truncation_ajustment = 1.1,
approximate_posterior_inference = approximate_posterior_inference == "all"
)

# Run the first discovery phase with permissive false discovery rate
fit_model(stanmodels$glm_multi_beta_binomial, cores= cores, quantile = CI, approximate_posterior_inference = approximate_posterior_inference, verbose = verbose, seed = seed)
# Pior
data_for_model$prior_prec_intercept = prior_mean_variable_association$intercept
data_for_model$prior_prec_slope = prior_mean_variable_association$slope
data_for_model$prior_prec_sd = prior_mean_variable_association$standard_deviation

fit =
data_for_model %>%

if(!check_outliers){
# Run the first discovery phase with permissive false discovery rate
fit_model(
stanmodels$glm_multi_beta_binomial,
cores= cores,
quantile = CI,
approximate_posterior_inference = approximate_posterior_inference == "all",
verbose = verbose,
seed = seed
)

list(
fit = fit,
Expand All @@ -103,8 +107,41 @@ estimate_multi_beta_binomial_glm = function(.data,

}

# If we are checking outliers
else{

message("sccomp says: outlier identification first pass - step 1/3 [ETA: ~20s]")

# Force variance NOT associated with mean for stringency of outlier detection
data_for_model =
.data %>%
data_to_spread ( formula, !!.sample, !!.cell_type, !!.count) %>%
data_spread_to_model_input(
formula, !!.sample, !!.cell_type, !!.count,
variance_association = FALSE,
truncation_ajustment = 1.1,
approximate_posterior_inference = approximate_posterior_inference %in% c("outlier_detection", "all")
)

# Pior
data_for_model$prior_prec_intercept = prior_mean_variable_association$intercept
data_for_model$prior_prec_slope = prior_mean_variable_association$slope
data_for_model$prior_prec_sd = prior_mean_variable_association$standard_deviation


fit =
data_for_model %>%

# Run the first discovery phase with permissive false discovery rate
fit_model(
stanmodels$glm_multi_beta_binomial,
cores= cores,
quantile = CI,
approximate_posterior_inference = approximate_posterior_inference %in% c("outlier_detection", "all"),
verbose = verbose,
seed = seed
)

rng = rstan::gqs(
stanmodels$glm_multi_beta_binomial_generate_date,
#rstan::stan_model("inst/stan/glm_multi_beta_binomial_generate_date.stan"),
Expand Down Expand Up @@ -141,6 +178,22 @@ estimate_multi_beta_binomial_glm = function(.data,
truncation_up = case_when(outlier ~ -1, TRUE ~ truncation_up),
)

# Allow variance association
data_for_model =
.data %>%
data_to_spread ( formula, !!.sample, !!.cell_type, !!.count) %>%
data_spread_to_model_input(
formula, !!.sample, !!.cell_type, !!.count,
variance_association = variance_association,
truncation_ajustment = 1.1,
approximate_posterior_inference = approximate_posterior_inference %in% c("outlier_detection", "all")
)

# Pior
data_for_model$prior_prec_intercept = prior_mean_variable_association$intercept
data_for_model$prior_prec_slope = prior_mean_variable_association$slope
data_for_model$prior_prec_sd = prior_mean_variable_association$standard_deviation

# Add censoring
data_for_model$is_truncated = 1
data_for_model$truncation_up = truncation_df %>% select(N, M, truncation_up) %>% spread(M, truncation_up) %>% as_matrix(rownames = "N") %>% apply(2, as.integer)
Expand All @@ -157,7 +210,15 @@ estimate_multi_beta_binomial_glm = function(.data,

fit2 =
data_for_model %>%
fit_model(stanmodels$glm_multi_beta_binomial, cores = cores, quantile = my_quantile_step_2, approximate_posterior_inference = approximate_posterior_inference, verbose = verbose, seed = seed)
fit_model(
stanmodels$glm_multi_beta_binomial,
cores = cores,
quantile = my_quantile_step_2,
approximate_posterior_inference = approximate_posterior_inference %in% c("outlier_detection", "all"),
verbose = verbose,
seed = seed
)

#fit_model(stan_model("inst/stan/glm_multi_beta_binomial.stan"), chains= 4, output_samples = 500, approximate_posterior_inference = FALSE, verbose = TRUE)

rng2 = rstan::gqs(
Expand Down Expand Up @@ -213,7 +274,14 @@ estimate_multi_beta_binomial_glm = function(.data,
fit3 =
data_for_model %>%
# Run the first discovery phase with permissive false discovery rate
fit_model(stanmodels$glm_multi_beta_binomial, cores = cores, quantile = CI, approximate_posterior_inference = approximate_posterior_inference, verbose = verbose, seed = seed)
fit_model(
stanmodels$glm_multi_beta_binomial,
cores = cores,
quantile = CI,
approximate_posterior_inference = approximate_posterior_inference %in% c("all"),
verbose = verbose, seed = seed
)

#fit_model(stan_model("inst/stan/glm_multi_beta_binomial.stan"), chains= 4, output_samples = 500)

list(
Expand Down
Loading