Skip to content

Commit

Permalink
Merge pull request #173 from ShixiangWang/devel
Browse files Browse the repository at this point in the history
Devel
  • Loading branch information
ShixiangWang authored May 12, 2020
2 parents 7d069e5 + aea2730 commit fb4558d
Show file tree
Hide file tree
Showing 49 changed files with 745 additions and 58 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ BugReports: https://github.com/ShixiangWang/sigminer/issues
Depends:
R (>= 3.5)
Imports:
cli,
cli (>= 2.0.0),
cowplot,
data.table,
dplyr,
Expand Down
6 changes: 6 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ S3method(subset,CopyNumber)
export("%>%")
export(":=")
export(.data)
export(.env)
export(CopyNumber)
export(MAF)
export(add_h_arrow)
Expand Down Expand Up @@ -42,9 +43,13 @@ export(show_cn_profile)
export(show_cosmic_sig_profile)
export(show_group_comparison)
export(show_group_mapping)
export(show_sig_bootstrap_error)
export(show_sig_bootstrap_exposure)
export(show_sig_bootstrap_stability)
export(show_sig_consensusmap)
export(show_sig_exposure)
export(show_sig_feature_corrplot)
export(show_sig_fit)
export(show_sig_number_survey)
export(show_sig_number_survey2)
export(show_sig_profile)
Expand Down Expand Up @@ -85,6 +90,7 @@ importFrom(graphics,hist)
importFrom(magrittr,"%>%")
importFrom(rlang,":=")
importFrom(rlang,.data)
importFrom(rlang,.env)
importFrom(rlang,as_label)
importFrom(rlang,as_name)
importFrom(rlang,enquo)
Expand Down
261 changes: 261 additions & 0 deletions R/show_sig_bootstrap.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,261 @@
#' Show Signature Bootstrap Analysis Results
#'
#' See details for description.
#'
#' Functions:
#'
#' - [show_sig_bootstrap_exposure] - this function plots exposures from bootstrap samples with both dotted boxplot.
#' The optimal exposure (the exposure from original input) is shown as triangle point. **Only one sample can be plotted**.
#' - [show_sig_bootstrap_error] - this function plots decomposition errors from bootstrap samples with both dotted boxplot.
#' The error from optimal solution (the decomposition error from original input) is shown as triangle point. **Only one sample can be plotted**.
#' - [show_sig_bootstrap_stability] - this function plots the signature exposure instability for specified signatures. Currently,
#' the instability measure supports 3 types:
#' - 'MRSE' for Mean Root Squared Error (default) of bootstrap exposures and original exposures for each sample.
#' - 'MAE' for Mean Absolute Error of bootstrap exposures and original exposures for each sample.
#' - 'AbsDiff' for Absolute Difference between mean bootstram exposure and original exposure.
#'
#' @inheritParams ggpubr::ggboxplot
#' @inheritParams ggpubr::ggpar
#' @inheritParams ggplot2::geom_boxplot
#' @inheritParams sig_fit_bootstrap_batch
#' @param bt_result result object from [sig_fit_bootstrap_batch].
#' @param sample a sample id.
#' @param signatures signatures to show.
#' @param measure measure to estimate the exposure instability, can be one of 'MRSE', 'MAE' and 'AbsDiff'.
#' @param dodge_width dodge width.
#' @param ... other parameters passing to [ggpubr::ggboxplot].
#'
#' @name show_sig_bootstrap
#' @return a `ggplot` object
#' @seealso [sig_fit_bootstrap_batch], [sig_fit], [sig_fit_bootstrap]
#' @references Huang X, Wojtowicz D, Przytycka TM. Detecting presence of mutational signatures in cancer with confidence. Bioinformatics. 2018;34(2):330–337. doi:10.1093/bioinformatics/btx604
#' @examples
#' \donttest{
#' if (require("BSgenome.Hsapiens.UCSC.hg19")) {
#' laml.maf <- system.file("extdata", "tcga_laml.maf.gz", package = "maftools")
#' laml <- read_maf(maf = laml.maf)
#' mt_tally <- sig_tally(
#' laml,
#' ref_genome = "BSgenome.Hsapiens.UCSC.hg19",
#' use_syn = TRUE
#' )
#'
#' library(NMF)
#' mt_sig <- sig_extract(mt_tally$nmf_matrix,
#' n_sig = 3,
#' nrun = 2,
#' cores = 1,
#' pConstant = 1e-13
#' )
#'
#' mat <- t(mt_tally$nmf_matrix)
#' mat <- mat[, colSums(mat) > 0]
#' bt_result <- sig_fit_bootstrap_batch(mat, sig = mt_sig, n = 10)
#' ## Parallel computation
#' ## bt_result = sig_fit_bootstrap_batch(mat, sig = mt_sig, n = 10, use_parallel = TRUE)
#'
#' ## Show bootstrap exposure (optimal exposure is shown as triangle)
#' p1 <- show_sig_bootstrap_exposure(bt_result, methods = c("LS", "QP"))
#' p1
#' p2 <- show_sig_bootstrap_exposure(bt_result, methods = c("LS", "QP"),
#' sample = "TCGA-AB-3012",
#' signatures = c("Sig1", "Sig2"))
#' p2
#'
#' ## Show bootstrap error
#' p3 <- show_sig_bootstrap_error(bt_result, methods = c("LS", "QP"))
#' p3
#'
#' ## Show exposure (in)stability
#' p4 <- show_sig_bootstrap_stability(bt_result, methods = c("LS", "QP"))
#' p4
#' p5 <- show_sig_bootstrap_stability(bt_result, methods = c("LS", "QP"), measure = "MAE")
#' p5
#' p6 <- show_sig_bootstrap_stability(bt_result, methods = c("LS", "QP"), measure = "AbsDiff")
#' p6
#'
#' } else {
#' message("Please install package 'BSgenome.Hsapiens.UCSC.hg19' firstly!")
#' }
#' }
#' @testexamples
#' expect_s3_class(p1, "ggplot")
#' expect_s3_class(p2, "ggplot")
#' expect_s3_class(p3, "ggplot")
#' expect_s3_class(p4, "ggplot")
#' expect_s3_class(p5, "ggplot")
#' expect_s3_class(p6, "ggplot")
NULL

#' @rdname show_sig_bootstrap
#' @export
show_sig_bootstrap_exposure <- function(bt_result, sample = NULL, signatures = NULL,
methods = "LS", palette = "aaas", title = NULL,
xlab = FALSE, ylab = "Signature exposure", width = 0.3,
dodge_width = 0.8, outlier.shape = NA,
add = "jitter", add.params = list(alpha = 0.3),
...) {
stopifnot(is.list(bt_result))

timer <- Sys.time()
send_info("Started.")
on.exit(send_elapsed_time(timer))

dat <- bt_result$expo
rm(bt_result)
dat <- dplyr::filter(dat, .data$method %in% methods)
if (!is.null(sample)) {
if (length(sample) > 1) {
send_stop("Only one sample can be used!")
}
dat <- dplyr::filter(dat, .data$sample %in% .env$sample)
} else {
send_warning("Top 1 sample is selected due to no sample specified in argument {.code sample}.")
dat <- dplyr::filter(dat, .data$sample %in% .data$sample[1])
}
if (!is.null(signatures)) {
dat <- dplyr::filter(dat, .data$sig %in% signatures)
}
if (!nrow(dat) > 0) {
send_stop("No data left to plot, could you check your input?")
}

if (is.null(title)) {
title <- unique(dat$sample)
}

send_info("Plotting.")
## Plotting
ggpubr::ggboxplot(subset(dat, dat$type != "optimal"),
x = "sig", y = "exposure", color = "method", outlier.shape = outlier.shape,
palette = palette, width = width, add = add, add.params = add.params,
title = title, xlab = xlab, ylab = ylab, ...
) +
ggplot2::geom_point(
data = subset(dat, dat$type == "optimal"),
mapping = aes_string(x = "sig", y = "exposure", color = "method"),
shape = 2, position = ggplot2::position_dodge2(width = dodge_width, preserve = "single")
)
}


#' @rdname show_sig_bootstrap
#' @export
show_sig_bootstrap_error <- function(bt_result, sample = NULL,
methods = "LS", palette = "aaas", title = NULL,
xlab = FALSE, ylab = "Decomposition error", width = 0.3,
dodge_width = 0.8, outlier.shape = NA,
add = "jitter", add.params = list(alpha = 0.3),
legend = "none",
...) {
stopifnot(is.list(bt_result))

timer <- Sys.time()
send_info("Started.")
on.exit(send_elapsed_time(timer))

dat <- bt_result$error
rm(bt_result)
dat <- dplyr::filter(dat, .data$method %in% methods)
if (!is.null(sample)) {
if (length(sample) > 1) {
send_stop("Only one sample can be used!")
}
dat <- dplyr::filter(dat, .data$sample %in% .env$sample)
} else {
send_warning("Top 1 sample is selected due to no sample specified in argument {.code sample}.")
dat <- dplyr::filter(dat, .data$sample %in% .data$sample[1])
}
if (!nrow(dat) > 0) {
send_stop("No data left to plot, could you check your input?")
}

if (is.null(title)) {
title <- unique(dat$sample)
}

send_info("Plotting.")
## Plotting
ggpubr::ggboxplot(subset(dat, dat$type != "optimal"),
x = "method", y = "errors", color = "method", outlier.shape = outlier.shape,
palette = palette, width = width, add = add, add.params = list(alpha = 0.3),
title = title, xlab = xlab, ylab = ylab, legend = legend, ...
) +
ggplot2::geom_point(
data = subset(dat, dat$type == "optimal"),
mapping = aes_string(x = "method", y = "errors", color = "method"),
shape = 2, position = ggplot2::position_dodge2(width = dodge_width, preserve = "single")
)
}


#' @rdname show_sig_bootstrap
#' @export
show_sig_bootstrap_stability <- function(bt_result, signatures = NULL, measure = c("MRSE", "MAE", "AbsDiff"),
methods = "LS", palette = "aaas", title = NULL,
xlab = FALSE, ylab = "Signature instability",
width = 0.3, outlier.shape = NA,
add = "jitter", add.params = list(alpha = 0.3),
...) {
stopifnot(is.list(bt_result))
measure <- match.arg(measure)

timer <- Sys.time()
send_info("Started.")
on.exit(send_elapsed_time(timer))

dat <- bt_result$expo
rm(bt_result)
dat <- dplyr::filter(dat, .data$method %in% methods)

if (!is.null(signatures)) {
dat <- dplyr::filter(dat, .data$sig %in% signatures)
}
if (!nrow(dat) > 0) {
send_stop("No data left to plot, could you check your input?")
}

if (measure == "AbsDiff") {
dat_opt <- dplyr::filter(dat, .data$type == "optimal")
dat_bt <- dplyr::filter(dat, .data$type != "optimal")

dat_bt <- dat_bt %>%
dplyr::group_by(.data$method, .data$sample, .data$sig) %>%
dplyr::summarise(exposure = mean(.data$exposure)) %>%
dplyr::ungroup()
dat_bt$type <- "bootstrap"

dat <- dplyr::bind_rows(dat_opt, dat_bt)

dat <- dat %>%
tidyr::pivot_wider(names_from = "type", values_from = "exposure") %>%
dplyr::group_by(.data$method, .data$sig, .data$sample) %>%
dplyr::summarise(measure = abs(.data$optimal - .data$bootstrap)) %>%
dplyr::ungroup()
} else {
## Calculate MRSE(Mean Root Squared Error)or MAE (Mean Absolute Error)
if (measure == "MRSE") {
## across solution: https://github.com/tidyverse/dplyr/issues/5230
dat <- dat %>%
tidyr::pivot_wider(names_from = "type", values_from = "exposure") %>%
dplyr::mutate_at(dplyr::vars(dplyr::starts_with("Rep_")), ~ (. - .data$optimal)^2) %>%
dplyr::mutate(measure = dplyr::select(., -c("method", "sample", "optimal", "sig")) %>% rowMeans() %>% sqrt()) %>%
dplyr::select(c("method", "sample", "sig", "measure"))
} else {
## MAE
dat <- dat %>%
tidyr::pivot_wider(names_from = "type", values_from = "exposure") %>%
dplyr::mutate_at(dplyr::vars(dplyr::starts_with("Rep_")), ~ abs(. - .data$optimal)) %>%
dplyr::mutate(measure = dplyr::select(., -c("method", "sample", "optimal", "sig")) %>% rowMeans()) %>%
dplyr::select(c("method", "sample", "sig", "measure"))
}
}

send_info("Plotting.")
## Plotting
ggpubr::ggboxplot(dat,
x = "sig", y = "measure", color = "method", outlier.shape = outlier.shape,
palette = palette, width = width, add = add, add.params = add.params,
title = title, xlab = xlab, ylab = ylab, ...
)
}
67 changes: 67 additions & 0 deletions R/show_sig_fit.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#' Show Signature Fit Result
#'
#' See [sig_fit] for examples.
#'
#' @inheritParams ggpubr::ggboxplot
#' @inheritParams ggpubr::ggpar
#' @inheritParams ggplot2::geom_boxplot
#' @inheritParams sig_fit_bootstrap_batch
#' @inheritParams show_sig_bootstrap
#' @param fit_result result object from [sig_fit].
#' @param samples samples to show, if `NULL`, all samples are used.
#' @return a `ggplot` object.
#' @export
#' @seealso [sig_fit], [show_sig_bootstrap_exposure], [sig_fit_bootstrap], [sig_fit_bootstrap_batch]
show_sig_fit <- function(fit_result, samples = NULL, signatures = NULL,
palette = "aaas", title = NULL,
xlab = FALSE, ylab = "Signature exposure", legend = "none",
width = 0.3, outlier.shape = NA,
add = "jitter", add.params = list(alpha = 0.3),
...) {

timer <- Sys.time()
send_info("Started.")
on.exit(send_elapsed_time(timer))

send_info("Checking input format.")
if (data.table::is.data.table(fit_result)) {
dat <- data.table::copy(fit_result)
} else if (is.list(fit_result)) {
dat <- fit_result$expo
} else if (is.matrix(fit_result)) {
dat <- fit_result %>%
as.data.frame() %>%
tibble::rownames_to_column("Sig") %>%
tidyr::pivot_longer(cols = -"Sig", names_to = "sample", values_to = "expo") %>%
tidyr::pivot_wider(id_cols = "sample", names_from = "Sig", values_from = "expo") %>%
data.table::as.data.table()
} else {
send_stop("Bad input, could you take a check?")
}

send_success("Checked.")

send_info("Checking filters.")
if (!is.null(samples)) {
dat <- dplyr::filter(dat, .data$sample %in% samples)
}
if (!is.null(signatures)) {
dat <- dplyr::select(dat, c("sample", signatures))
}
if (!nrow(dat) > 0) {
send_stop("No data left to plot, could you check your input?")
}
send_info("Checked.")

dat <- dat %>%
dplyr::as_tibble() %>%
tidyr::pivot_longer(-"sample", names_to = "sig", values_to = "exposure")

send_info("Plotting.")
## Plotting
ggpubr::ggboxplot(dat,
x = "sig", y = "exposure", color = "sig", outlier.shape = outlier.shape,
palette = palette, width = width, add = add, add.params = add.params,
title = title, xlab = xlab, ylab = ylab, legend = legend, ...
)
}
4 changes: 3 additions & 1 deletion R/sig_auto_extract.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
#' number of signatures through the automatic relevance determination technique.
#' This functions delevers highly interpretable and sparse representations for
#' both signature profiles and attributions at a balance between data fitting and
#' model complexity. See detail part and references for more.
#' model complexity (this method may introduce more signatures than expected,
#' especially for copy number signatures (thus **I don't recommend you to use this feature
#' to extract copy number signatures**)). See detail part and references for more.
#'
#' There are three methods available in this function: "L1W.L2H", "L1KL" and "L2KL".
#' They use different priors for the bayesian variant of NMF algorithm
Expand Down
9 changes: 1 addition & 8 deletions R/sig_extract.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,7 @@
#' library(NMF)
#' res <- sig_extract(cn_tally_M$nmf_matrix, 2, nrun = 1)
#' }
#' @tests
#' load(system.file("extdata", "toy_copynumber_tally_M.RData",
#' package = "sigminer", mustWork = TRUE
#' ))
#' # Extract copy number signatures
#' library(NMF)
#' res <- sig_extract(cn_tally_M$nmf_matrix, 2, nrun = 1)
#'
#' @testexamples
#' expect_s3_class(res, "Signature")
#' @seealso [sig_tally] for getting variation matrix,
#' [sig_estimate] for estimating signature number for [sig_extract], [sig_auto_extract] for
Expand Down
Loading

0 comments on commit fb4558d

Please sign in to comment.