Skip to content

Commit

Permalink
Merge pull request #53 from montilab/dev
Browse files Browse the repository at this point in the history
Add in latest dev changes
  • Loading branch information
anfederico authored Oct 16, 2023
2 parents 681dda4 + 9fceb9d commit f5f7491
Show file tree
Hide file tree
Showing 8 changed files with 204 additions and 148 deletions.
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ env:
- R_BUILD_ARGS="--no-build-vignettes --no-manual"
- R_CHECK_ARGS="--no-build-vignettes --no-manual --timings"
- _R_CHECK_TIMINGS_="0"
- R_DEFAULT_INTERNET_TIMEOUT="300"

r:
- release
Expand Down
40 changes: 22 additions & 18 deletions R/ggeplot.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,26 @@
#' @importFrom ggplot2 qplot aes geom_rug geom_hline geom_vline annotate theme element_text element_blank element_line element_rect
#'
#' @export
ggeplot <- function(n, positions, x_axis, y_axis, title="") {
score <- which.max(abs(y_axis))
qplot(x_axis,
y_axis,
main=title,
ylab="Running Enrichment Score",
xlab="Position in Ranked List of Genes",
geom="line")+
geom_rug(data=data.frame(positions), aes(x=positions), inherit.aes=FALSE)+
geom_hline(yintercept=0) +
geom_vline(xintercept=n/2, linetype="dotted") +
annotate("point", x=x_axis[score], y=y_axis[score], color="red") +
annotate("text", x=x_axis[score]+n/20, y=y_axis[score], label=round(y_axis[score],2)) +
annotate("point", x=x_axis[score], y=y_axis[score], color="red") +
theme(plot.title=element_text(hjust=0.5),
panel.background=element_blank(),
axis.line=element_line(color="black"),
panel.border=element_rect(color="black", fill=NA, size=1))
ggeplot <- function(n, positions, x_axis, y_axis, title = "") {
score <- which.max(abs(y_axis))
DF <- data.frame(x_axis = x_axis, y_axis = y_axis)
ggplot2::ggplot(DF, aes(x = x_axis, y = y_axis)) +
ggplot2::geom_line() +
ggplot2::labs(
title = title,
y = "Running Enrichment Score",
x = "Position in Ranked List of Genes"
) +
ggplot2::geom_rug(data = data.frame(positions), aes(x = positions), inherit.aes = FALSE) +
ggplot2::geom_hline(yintercept = 0) +
ggplot2::geom_vline(xintercept = n / 2, linetype = "dotted") +
ggplot2::annotate("point", x = x_axis[score], y = y_axis[score], color = "red") +
ggplot2::annotate("text", x = x_axis[score] + n / 20, y = y_axis[score], label = round(y_axis[score], 2)) +
ggplot2::annotate("point", x = x_axis[score], y = y_axis[score], color = "red") +
ggplot2::theme(
plot.title = ggplot2::element_text(hjust = 0.5),
panel.background = ggplot2::element_blank(),
axis.line = ggplot2::element_line(color = "black"),
panel.border = ggplot2::element_rect(color = "black", fill = NA, linewidth = 1)
)
}
2 changes: 1 addition & 1 deletion R/ggvenn.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ ggvenn <- function(a, b, ga, gb, title="") {
paste(gb, " (", x.b, ")", sep=""))) %>%

ggplot(aes(x0=x, y0=y, r=c(r.a, r.b), fill=groups)) +
geom_circle(alpha=0.3, size=0.5) +
geom_circle(alpha=0.3, linewidth=0.5) +
coord_fixed() +
theme_void() +
theme(plot.title=element_text(hjust=0.5),
Expand Down
4 changes: 2 additions & 2 deletions R/hype.R
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,8 @@ hypeR <- function(signature,
weights <- NULL
signature.type <- "ranked"
}
data <- data.frame(matrix(ncol=7, nrow=0))
colnames(data) <- c("label", "pval", "fdr", "signature", "geneset", "overlap", "score")
data <- data.frame(matrix(ncol=8, nrow=0))
colnames(data) <- c("label", "score", "pval", "fdr", "geneset", "signature", "overlap", "hits")
results <- .ks_enrichment(signature=signature,
genesets=gsets.obj$genesets,
weights=weights,
Expand Down
228 changes: 126 additions & 102 deletions R/ks_enrichment.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,75 +16,84 @@
y,
weights=NULL,
weights.pwr=1,
absolute=FALSE,
absolute=FALSE, # this is not really implemented, should be removed
plotting=FALSE,
plot.title="") {

n.y <- length(y)
err = list(score=0, pval=1, plot=ggempty())
if (n.y < 1 ) return(err)
if (any(y > n.x)) return(err)
if (any(y < 1)) return(err)

x.axis <- y.axis <- NULL
plot.title="")
{
n.y <- length(y)
err <- list(score = 0, pval = 1, leading_edge = NULL, leading_hits = NULL, plot = ggempty())
if (n.y < 1 || any(y > n.x) || any(y < 1) ) {
return(err)
}
x.axis <- y.axis <- NULL
leading_edge <- NULL # recording the x corresponding to the highest y value

## If weights are provided
if ( !is.null(weights) ) {
weights <- abs(weights[y])^weights.pwr

# If weights are provided
if (!is.null(weights)) {
weights <- abs(weights[y])^weights.pwr
Pmis <- rep(1, n.x); Pmis[y] <- 0; Pmis <- cumsum(Pmis); Pmis <- Pmis/(n.x-n.y)
Phit <- rep(0, n.x); Phit[y] <- weights; Phit <- cumsum(Phit); Phit <- Phit/Phit[n.x]
z <- Phit-Pmis

Pmis <- rep(1, n.x); Pmis[y] <- 0; Pmis <- cumsum(Pmis); Pmis <- Pmis/(n.x-n.y)
Phit <- rep(0, n.x); Phit[y] <- weights; Phit <- cumsum(Phit); Phit <- Phit/Phit[n.x]
z <- Phit-Pmis
score <- if (absolute) max(z)-min(z) else z[leading_edge <- which.max(abs(z))]

score <- if (absolute) max(z)-min(z) else z[which.max(abs(z))]

x.axis <- 1:n.x
y.axis <- z
x.axis <- 1:n.x
y.axis <- z
}
## Without weights
else {
y <- sort(y)
n <- n.x*n.y/(n.x + n.y)
hit <- 1/n.y
mis <- 1/n.x

# Without weights
} else {
y <- sort(y)
n <- n.x*n.y/(n.x + n.y)
hit <- 1/n.y
mis <- 1/n.x

Y <- sort(c(y-1, y))
Y <- Y[diff(Y) != 0]
y.match <- match(y, Y)
D <- rep(0, length(Y))
D[y.match] <- (1:n.y)
zero <- which(D == 0)[-1]
D[zero] <- D[zero-1]

z <- D*hit-Y*mis

score <- if (absolute) max(z)-min(z) else z[which.max(abs(z))]

x.axis <- Y
y.axis <- z

if (Y[1] > 0) {
x.axis <- c(0, x.axis)
y.axis <- c(0, y.axis)
}
if (max(Y) < n.x) {
x.axis <- c(x.axis, n.x)
y.axis <- c(y.axis, 0)
}
}
Y <- sort(c(y-1, y)) # append the positions preceding hits
Y <- Y[diff(Y) != 0] # remove repeated position
y.match <- match(y, Y) # find the hits' positions
D <- rep(0, length(Y))
D[y.match] <- (1:n.y)
zero <- which(D == 0)[-1]
D[zero] <- D[zero-1]

# One-sided Kolmogorov–Smirnov test
results <- suppressWarnings(ks.test(1:n.x, y, alternative="less"))
results$statistic <- score # Use the signed statistic

# Enrichment plot
p <- if (plotting) ggeplot(n.x, y, x.axis, y.axis, plot.title) else ggempty()

return(list(score=as.numeric(results$statistic),
pval=results$p.value,
plot=p))
z <- D*hit-Y*mis

score <- if (absolute) max(z)-min(z) else z[leading_edge <- which.max(abs(z))]

x.axis <- Y
y.axis <- z

if (Y[1] > 0) {
x.axis <- c(0, x.axis)
y.axis <- c(0, y.axis)
}
if (max(Y) < n.x) {
x.axis <- c(x.axis, n.x)
y.axis <- c(y.axis, 0)
}
}
leading_edge <- x.axis[leading_edge]
leading_hits <- intersect(x.axis[x.axis <= leading_edge], y)

## One-sided Kolmogorov–Smirnov test
results <- suppressWarnings(ks.test(1:n.x, y, alternative="less"))
results$statistic <- score # Use the signed statistic

## Enrichment plot
p <- if (plotting && n.y > 0) {
ggeplot(n.x, y, x.axis, y.axis, plot.title) +
geom_vline(xintercept = leading_edge, linetype = "dotted", color = "red", linewidth = 0.25)
} else {
ggempty()
}
return(list(
score = as.numeric(results$statistic),
pval = results$p.value,
leading_edge = leading_edge,
leading_hits = leading_hits,
plot = p
))
}

#' Enrichment test via one-sided Kolmogorov–Smirnov test
#'
#' @param signature A vector of ranked symbols
Expand All @@ -96,48 +105,63 @@
#' @return A list of data and plots
#'
#' @keywords internal
.ks_enrichment <- function(signature,
genesets,
weights=NULL,
weights.pwr=1,
absolute=FALSE,
plotting=TRUE) {

if (!is(genesets, "list")) stop("Error: Expected genesets to be a list of gene sets\n")
if (!is.null(weights)) stopifnot(length(signature) == length(weights))

signature <- unique(signature)
genesets <- lapply(genesets, unique)
.ks_enrichment <- function(
signature,
genesets,
weights = NULL,
weights.pwr = 1,
absolute = FALSE,
plotting = TRUE)
{
if (!is(genesets, "list")) stop("Error: Expected genesets to be a list of gene sets\n")
if (!is.null(weights)) stopifnot(length(signature) == length(weights))

results <- mapply(function(geneset, title) {
signature <- unique(signature)
genesets <- lapply(genesets, unique)

ranks <- match(geneset, signature)
ranks <- ranks[!is.na(ranks)]

# Run ks-test
results <- .kstest(n.x=length(signature),
y=ranks,
weights=weights,
weights.pwr=weights.pwr,
absolute=absolute,
plotting=plotting,
plot.title=title)

results[['geneset']] <- length(geneset)
results[['overlap']] <- length(ranks)
return(results)
results <- mapply( function(geneset, title) {
ranks <- match(geneset, signature)
ranks <- ranks[!is.na(ranks)]

}, genesets, names(genesets), USE.NAMES=TRUE, SIMPLIFY=FALSE)
## Run ks-test
results_in <- .kstest(
n.x = length(signature),
y = ranks,
weights = weights,
weights.pwr = weights.pwr,
absolute = absolute,
plotting = plotting,
plot.title = title
)
#results_in[["geneset"]] <- length(geneset)
results_in[["geneset"]] <- length(intersect(geneset, signature))
results_in[["overlap"]] <- length(results_in$leading_hits)
return(results_in)
}, genesets, names(genesets), USE.NAMES = TRUE, SIMPLIFY = FALSE)

results <- do.call(rbind, results)
data <- data.frame(apply(results[,c("score", "pval", "geneset", "overlap")], 2, unlist), stringsAsFactors=FALSE)
data$score <- signif(data$score, 2)
data$pval <- signif(data$pval, 2)
data$fdr <- signif(p.adjust(data$pval, method="fdr"), 2)
data$label <- names(genesets)
data$signature <- length(signature)
data <- data[,c("label", "pval", "fdr", "signature", "geneset", "overlap", "score")]
plots <- results[,"plot"]

return(list(data=data, plots=plots))
results <- do.call(rbind, results)
data <- data.frame(apply(results[, c("score", "pval", "geneset", "overlap")], 2, unlist),
stringsAsFactors = FALSE
)
## add list of genes in the leading edge
data <- data %>%
#dplyr::mutate(hits = sapply(results[, "leading_hits"], function(x) paste(signature[x], collapse = ",")))
dplyr::mutate(hits = sapply(results[, "leading_hits"], function(x) paste(signature[x], collapse = " , ")))
data$score <- signif(data$score, 2)
data$pval <- signif(data$pval, 2)
data$label <- names(genesets)
data$signature <- length(signature)
data$fdr <- signif(p.adjust(data$pval, method = "fdr"), 2)
data <- data %>%
dplyr::relocate(fdr, .after = pval) %>%
dplyr::relocate(signature, .after = geneset) %>%
dplyr::relocate(label) %>%
tibble::remove_rownames() # make sure this is OK
plots <- results[, "plot"]

return(list(
data = data,
plots = plots,
leading_hits = sapply(results[, "leading_hits"], function(x) signature[x])
))
}
12 changes: 6 additions & 6 deletions tests/testthat/test-hyp_dots.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ hyp_dots_tests <- function(hyp_obj, return_obj=FALSE) {
expect_silent(hyp_dots(hyp_obj, size_by="genesets"))
expect_silent(hyp_dots(hyp_obj, size_by="significance"))
expect_silent(hyp_dots(hyp_obj, size_by="none"))
p <- hyp_dots(hyp_obj, "gg")
p <- hyp_dots(hyp_obj)
expect_is(p, "gg")
if (return_obj) return(hyp_obj)
}
Expand All @@ -17,11 +17,11 @@ test_that("hyp_dots() is working", {
testdat <- readRDS(file.path(system.file("extdata", package="hypeR"), "testdat.rds"))
gsets_obj <- testdat$gsets
rgsets_obj <- testdat$rgsets

# Overrepresentation (signature)
signature <- testdat$signature
experiment <- testdat$experiment

hypeR(signature, gsets_obj, test="hypergeometric", background=100) %>%
hyp_dots_tests()
hypeR(signature, rgsets_obj, test="hypergeometric", background=100) %>%
Expand All @@ -31,7 +31,7 @@ test_that("hyp_dots() is working", {
expect_equal(length(p), 3)
expect_equal(names(p), c("Signature 1", "Signature 2", "Signature 3"))
expect_is(p[["Signature 3"]], "gg")

# Enrichment (ranked signature)
signature <- names(testdat$weighted_signature)
experiment <- lapply(testdat$weighted_experiment, names)
Expand All @@ -45,11 +45,11 @@ test_that("hyp_dots() is working", {
expect_equal(length(p), 3)
expect_equal(names(p), c("Signature 1", "Signature 2", "Signature 3"))
expect_is(p[["Signature 3"]], "gg")

# Enrichment (weighted signature)
signature <- testdat$weighted_signature
experiment <- testdat$weighted_experiment

hypeR(signature, gsets_obj, test="kstest") %>%
hyp_dots_tests()
hypeR(signature, rgsets_obj, test="kstest") %>%
Expand Down
Loading

0 comments on commit f5f7491

Please sign in to comment.