Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve arguments #101

Merged
merged 5 commits into from
Nov 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions R/functions_dirichlet_multinomial.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
#' @param .sample A column name as symbol. The sample identifier
#' @param .cell_type A column name as symbol. The cell_type identifier
#' @param .count A column name as symbol. The cell_type abundance (read count)
#' @param prior_mean_variable_association A list of the form list(intercept = c(4.436925, 1.304049), slope = c(-0.73074903, 0.06532897), standard_deviation = c(0.4527292, 0.3318759)). Where for each parameter, we specify mean and standard deviation. This is used to incorporate prior knowledge about the mean/variability association of cell-type proportions.
#' @param prior_overdispersion_mean_association A list of the form list(intercept = c(4.436925, 1.304049), slope = c(-0.73074903, 0.06532897), standard_deviation = c(0.4527292, 0.3318759)). Where for each parameter, we specify mean and standard deviation. This is used to incorporate prior knowledge about the mean/variability association of cell-type proportions.
#' @param percent_false_positive A real between 0 and 100. It is the aimed percent of cell types being a false positive. For example, percent_false_positive_genes = 1 provide 1 percent of the calls for significant changes that are actually not significant.
#' @param check_outliers A boolean. Whether to check for outliers before the fit.
#' @param approximate_posterior_inference A boolean. Whether the inference of the joint posterior distribution should be approximated with variational Bayes. It confers execution time advantage.
Expand All @@ -43,7 +43,7 @@ dirichlet_multinomial_glm = function(.data,
.sample,
.cell_type,
.count,
prior_mean_variable_association = NULL,
prior_overdispersion_mean_association = NULL,
percent_false_positive = 5,
check_outliers = FALSE,
approximate_posterior_inference = "none",
Expand Down
140 changes: 11 additions & 129 deletions R/functions_multi_beta_binomial.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@ sccomp_glm_data_frame_raw = function(.data,

# Secondary arguments
contrasts = NULL,
prior_mean_variable_association = list(intercept = c(5, 2), slope = c(0, 0.6), standard_deviation = c(20, 40)),
prior_mean = list(intercept = c(0,1), coefficients = c(0,1)),
prior_overdispersion_mean_association = list(intercept = c(5, 2), slope = c(0, 0.6), standard_deviation = c(20, 40)),
percent_false_positive = 5,
check_outliers = TRUE,
approximate_posterior_inference = "none",
test_composition_above_logit_fold_change = 0.2, .sample_cell_group_pairs_to_exclude = NULL,
verbose = FALSE,
my_glm_model,
exclude_priors = FALSE,
bimodal_mean_variability_association = FALSE,
enable_loo = FALSE,
Expand Down Expand Up @@ -80,10 +80,10 @@ sccomp_glm_data_frame_raw = function(.data,
.sample = !!.sample,
.cell_group = !!.cell_group,
.count = count,
my_glm_model = my_glm_model,
contrasts = contrasts,
#.grouping_for_random_intercept = !! .grouping_for_random_intercept,
prior_mean_variable_association = prior_mean_variable_association,
prior_mean = prior_mean,
prior_overdispersion_mean_association = prior_overdispersion_mean_association,
percent_false_positive = percent_false_positive,
check_outliers = check_outliers,
approximate_posterior_inference = approximate_posterior_inference,
Expand All @@ -110,13 +110,13 @@ sccomp_glm_data_frame_counts = function(.data,
# Secondary arguments
contrasts = NULL,
#.grouping_for_random_intercept = NULL,
prior_mean_variable_association = list(intercept = c(5, 2), slope = c(0, 0.6), standard_deviation = c(20, 40)),
prior_mean = list(intercept = c(0,1), coefficients = c(0,1)),
prior_overdispersion_mean_association = list(intercept = c(5, 2), slope = c(0, 0.6), standard_deviation = c(20, 40)),
percent_false_positive = 5,
check_outliers = TRUE,
approximate_posterior_inference = "none",
test_composition_above_logit_fold_change = 0.2, .sample_cell_group_pairs_to_exclude = NULL,
verbose = FALSE,
my_glm_model ,
exclude_priors = FALSE,
bimodal_mean_variability_association = FALSE,
enable_loo = FALSE,
Expand Down Expand Up @@ -261,9 +261,11 @@ sccomp_glm_data_frame_counts = function(.data,
data_for_model$TNS = length(data_for_model$truncation_not_idx)

# Prior
data_for_model$prior_prec_intercept = prior_mean_variable_association$intercept
data_for_model$prior_prec_slope = prior_mean_variable_association$slope
data_for_model$prior_prec_sd = prior_mean_variable_association$standard_deviation
data_for_model$prior_prec_intercept = prior_overdispersion_mean_association$intercept
data_for_model$prior_prec_slope = prior_overdispersion_mean_association$slope
data_for_model$prior_prec_sd = prior_overdispersion_mean_association$standard_deviation
data_for_model$prior_mean_intercept = prior_mean$intercept
data_for_model$prior_mean_coefficients = prior_mean$coefficients
data_for_model$exclude_priors = exclude_priors
data_for_model$enable_loo = TRUE & enable_loo

Expand Down Expand Up @@ -323,127 +325,7 @@ sccomp_glm_data_frame_counts = function(.data,
}


#' multi_beta_binomial main
#'
#' @description This function runs the data modelling and statistical test for the hypothesis that a cell_type includes outlier biological replicate.
#'
#' @importFrom tibble as_tibble
#' @import dplyr
#' @importFrom tidyr spread
#' @importFrom magrittr %$%
#' @importFrom magrittr divide_by
#' @importFrom magrittr multiply_by
#' @importFrom purrr map2
#' @importFrom purrr map_int
#' @importFrom magrittr multiply_by
#' @importFrom magrittr equals
#' @importFrom purrr map
#' @importFrom tibble rowid_to_column
#' @importFrom purrr map_lgl
#' @importFrom dplyr case_when
#' @importFrom rlang :=
#'
#' @keywords internal
#' @noRd
#'
#' @param .data A tibble including a cell_type name column | sample name column | read counts column | factor columns | Pvaue column | a significance column
#' @param formula_composition A formula. The sample formula used to perform the differential cell_group abundance analysis
#' @param formula_variability A formula. The sample formula used to perform the differential cell_group variability analysis
#' @param .sample A column name as symbol. The sample identifier
#' @param .cell_group A column name as symbol. The cell_type identifier
#' @param .count A column name as symbol. The cell_type abundance (read count)
#' @param check_outliers A boolean. Whether to check for outliers before the fit.
#' @param approximate_posterior_inference A boolean. Whether the inference of the joint posterior distribution should be approximated with variational Bayes. It confers execution time advantage.
#' @param enable_loo A boolean. Enable model comparison by the R package LOO. This is helpful when you want to compare the fit between two models, for example, analogously to ANOVA, between a one factor model versus a interceot-only model.
#' @param verbose A boolean. Prints progression.
#' @param cores An integer. How many cored to be used with parallel calculations.
#' @param seed An integer. Used for development and testing purposes
#'
#' @return A nested tibble `tbl` with cell_type-wise information: `sample wise data` | plot | `ppc samples failed` | `exposure deleterious outliers`
#'
#'
multi_beta_binomial_glm = function(.data,
formula_composition = ~ 1,
formula_variability = ~1,
.sample,
.cell_group,
.count,

# Secondary parameters
contrasts = NULL,
#.grouping_for_random_intercept = NULL,
prior_mean_variable_association,
percent_false_positive = 5,
check_outliers = FALSE,
.sample_cell_group_pairs_to_exclude = NULL,
approximate_posterior_inference = TRUE,
enable_loo = FALSE,
cores = detectCores(), # For development purpose,
seed = sample(1e5, 1),
verbose = FALSE,
exclude_priors = FALSE,
bimodal_mean_variability_association = FALSE,
use_data = TRUE,
test_composition_above_logit_fold_change,
max_sampling_iterations = 20000,
pass_fit = TRUE
) {

# Prepare column same enquo
.sample = enquo(.sample)
.cell_group = enquo(.cell_group)
.count = enquo(.count)
.sample_cell_group_pairs_to_exclude = enquo(.sample_cell_group_pairs_to_exclude)

#.grouping_for_random_intercept = enquo(.grouping_for_random_intercept)
#contrasts = contrasts |> enquo() |> quo_names()

estimates_list =
estimate_multi_beta_binomial_glm(
.data = .data,
formula_composition = formula_composition,
.sample = !!.sample,
.cell_group = !!.cell_group,
.count = !!.count,
formula_variability = formula_variability,
contrasts = contrasts,
#.grouping_for_random_intercept = !!.grouping_for_random_intercept,
prior_mean_variable_association = prior_mean_variable_association,
percent_false_positive = percent_false_positive,
check_outliers = check_outliers,
.sample_cell_group_pairs_to_exclude = !!.sample_cell_group_pairs_to_exclude,
approximate_posterior_inference = approximate_posterior_inference,
enable_loo = enable_loo,
cores = cores, # For development purpose,
seed = seed,
verbose = verbose,
exclude_priors = exclude_priors,
bimodal_mean_variability_association = bimodal_mean_variability_association,
use_data = use_data,
max_sampling_iterations = max_sampling_iterations
)

# Create a dummy tibble
tibble() |>
# Attach association mean concentration
add_attr(estimates_list$fit, "fit") %>%
add_attr(estimates_list$data_for_model, "model_input") |>
add_attr(estimates_list$truncation_df2, "truncation_df2") |>
add_attr(.sample, ".sample") |>
add_attr(.cell_group, ".cell_group") |>
add_attr(.count, ".count") |>
add_attr(check_outliers, "check_outliers") |>
add_attr(formula_composition, "formula_composition") |>
add_attr(formula_variability, "formula_variability") |>

sccomp_test(
contrasts = contrasts,
percent_false_positive = percent_false_positive,
test_composition_above_logit_fold_change = test_composition_above_logit_fold_change
)


}

#' @importFrom stats model.matrix
get_mean_precision = function(fit, data_for_model){
Expand Down
Loading