Skip to content

Commit

Permalink
hyp_dots
Browse files Browse the repository at this point in the history
  • Loading branch information
tetomonti committed May 29, 2024
2 parents 1e26e67 + 1aad26a commit 1465315
Showing 1 changed file with 93 additions and 106 deletions.
199 changes: 93 additions & 106 deletions R/hyp_dots.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,113 +17,100 @@
#' @importFrom ggplot2 ggplot aes geom_point labs scale_color_continuous scale_size_continuous guides theme element_text element_blank
#'
#' @keywords internal
.dots_multi_plot <- function(multihyp_data,
top=20,
abrv=50,
size_by=c("genesets", "significance", "overlap", "none"),
pval_cutoff=1,
fdr_cutoff=1,
val=c("fdr", "pval"),
title="")
.dots_multi_plot <- function(
multihyp_data,
top = 20,
abrv = 50,
size_by = c("genesets", "significance", "none"),
pval_cutoff = 1,
fdr_cutoff = 1,
val = c("fdr", "pval"),
title = "")
{
# Default arguments
val <- match.arg(val)
size_by <- match.arg(size_by)

# Count significant genesets across signatures
multihyp_dfs <- lapply(multihyp_data, function(hyp_obj) {
hyp_obj$data %>%
dplyr::filter(pval <= pval_cutoff) %>%
dplyr::filter(fdr <= fdr_cutoff) %>%
dplyr::select(label)
})
# Take top genesets
labels <- names(sort(table(unlist(multihyp_dfs)), decreasing=TRUE))
if (!is.null(top)) labels <- head(labels, top)

# Handle empty dataframes
if (length(labels) == 0) return(ggempty())

# Create a multihyp dataframe
dfs <- lapply(multihyp_data, function(hyp_obj) {
hyp_df <- hyp_obj$data
hyp_df[hyp_df$label %in% labels, c("label", val), drop=FALSE]
})
# merge dataframes ('cbind')
df <- suppressWarnings(Reduce(function(x, y) merge(x, y, by="label", all=TRUE), dfs))
colnames(df) <- c("label", names(dfs))
rownames(df) <- df$label
df <- df[rev(labels), names(dfs)]

# Abbreviate labels
label.abrv <- substr(rownames(df), 1, abrv)
if (any(duplicated(label.abrv))) {
stop("Non-unique labels after abbreviating")
} else {
rownames(df) <- factor(label.abrv, levels=label.abrv)
}
if (val == "pval") {
cutoff <- pval_cutoff
color.label <- "P-Value"
}
if (val == "fdr") {
cutoff <- fdr_cutoff
color.label <- "FDR"
}
df.melted <- reshape2::melt(as.matrix(df))
colnames(df.melted) <- c("label", "signature", "significance")
df.melted$size <- 1

if (size_by == "significance") {
df.melted$size <- df.melted$significance
} else if (size_by == "genesets") {
geneset.sizes <- lapply(multihyp_data, function(hyp_obj) {
hyp_obj$data[, c("label", "geneset")]
}) %>%
do.call(rbind, .) %>%
dplyr::distinct(label, .keep_all=TRUE) %>%
dplyr::pull(geneset, label)
#df.melted$size <- geneset.sizes[df.melted$label]
names(geneset.sizes) <- substr(names(geneset.sizes), 1, abrv)
stopifnot( all(!is.na(match_idx <- match(df.melted$label,names(geneset.sizes)))) )
df.melted$size <- geneset.sizes[match_idx]
} else if (size == "overlap") {
stop( "size_by overlap not implemented yet")
overlap.sizes <- lapply(multihyp_data, function(hyp_obj) {
hyp_obj$data[, c("label", "overlap")]
}) %>%
do.call(rbind, .) %>%
dplyr::distinct(label, .keep_all=TRUE) %>%
dplyr::pull(overlap, label)
#df.melted$size <- overlap.sizes[df.melted$label]
names(overlap.sizes) <- substr(names(overlap.sizes), 1, abrv)
stopifnot( all(!is.na(match_idx <- match(df.melted$label,names(overlap.sizes)))) )
df.melted$size <- overlap.sizes[match_idx]
}
p <- df.melted %>%
dplyr::filter(significance <= cutoff) %>%
ggplot(aes(x = signature, y = label, color = significance, size = size)) +
geom_point() +
scale_color_continuous(low = "#114357", high = "#E53935", trans = .reverselog_trans(10)) +
labs(title = title, color = color.label) +
theme(
plot.title = element_text(hjust = 0.5),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1)
)
if (size_by == "none") {
p <- p + guides(size="none")
} else if (size_by == "significance") {
p <- p + scale_size_continuous(trans=.reverselog_trans(10)) + labs(size="Significance")
} else if (size_by == "genesets" ) {
p <- p + scale_size_continuous(trans=scales::log10_trans()) + labs(size="Genesets\nSize")
} else if (size_by == "overlap" ) {
p <- p + scale_size_continuous(trans=scales::log10_trans()) + labs(size="Overlap\nSize")
} else {
stop("unrecognized 'size_by':", size_by)
}
return(p)
# Default arguments
val <- match.arg(val)
size_by <- match.arg(size_by)

# Count significant genesets across signatures
multihyp_dfs <- lapply(multihyp_data, function(hyp_obj) {
hyp_obj$data %>%
dplyr::filter(pval <= pval_cutoff) %>%
dplyr::filter(fdr <= fdr_cutoff) %>%
dplyr::select(label)
})
# Take top genesets
labels <- names(sort(table(unlist(multihyp_dfs)), decreasing = TRUE))
if (!is.null(top)) labels <- head(labels, top)

# Handle empty dataframes
if (length(labels) == 0) {
return(ggempty())
}
# Create a multihyp dataframe
dfs <- lapply(multihyp_data, function(hyp_obj) {
hyp_df <- hyp_obj$data
hyp_df[hyp_df$label %in% labels, c("label", val), drop = FALSE]
})
df <- suppressWarnings(Reduce(function(x, y) merge(x, y, by = "label", all = TRUE), dfs))
colnames(df) <- c("label", names(dfs))
rownames(df) <- df$label
df <- df[rev(labels), names(dfs)]

# Abbreviate labels
label.abrv <- substr(rownames(df), 1, abrv)
if (any(duplicated(label.abrv))) {
stop("Non-unique labels after abbreviating")
} else {
rownames(df) <- factor(label.abrv, levels = label.abrv)
}
if (val == "pval") {
cutoff <- pval_cutoff
color.label <- "P-Value"
}
if (val == "fdr") {
cutoff <- fdr_cutoff
color.label <- "FDR"
}
df.melted <- reshape2::melt(as.matrix(df))
colnames(df.melted) <- c("label", "signature", "significance")
df.melted$size <- 1

if (size_by == "significance") {
df.melted$size <- df.melted$significance
}
if (size_by == "genesets") {
geneset.sizes <- lapply(multihyp_data, function(hyp_obj) {
hyp_obj$data[, c("label", "geneset")]
}) %>%
do.call(rbind, .) %>%
dplyr::distinct(label, .keep_all = TRUE) %>%
dplyr::pull(geneset, label)
df.melted$size <- geneset.sizes[df.melted$label]
}
p <- df.melted %>%
dplyr::filter(significance <= cutoff) %>%
ggplot(aes(x = signature, y = label, color = significance, size = size)) +
geom_point() +
scale_color_continuous(low = "#114357", high = "#E53935", trans = .reverselog_trans(10)) +
labs(title = title, color = color.label) +
theme(
plot.title = element_text(hjust = 0.5),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1)
)
if (size_by == "none") {
p <- p + guides(size = "none")
} else if (size_by == "significance") {
p <- p + scale_size_continuous(trans = .reverselog_trans(10)) + labs(size = "Significance")
} else if (size_by == "genesets") {
p <- p + scale_size_continuous(trans = scales::log10_trans()) + labs(size = "Genesets\nSize")
# p <- p + scale_color_continuous(
# high = "#114357", low = "#E53935", trans = scales::log10_trans(),
# guide = guide_colorbar(reverse = TRUE)
#)
}
return(p)
}

#' Plot top enriched genesets
Expand Down

0 comments on commit 1465315

Please sign in to comment.