diff --git a/CRAN-RELEASE b/CRAN-RELEASE index b191737a..1e217160 100644 --- a/CRAN-RELEASE +++ b/CRAN-RELEASE @@ -1,2 +1,2 @@ -This package was submitted to CRAN on 2020-06-17. -Once it is accepted, delete this file and tag the release (commit 7a1f5431a0). +This package was submitted to CRAN on 2020-08-25. +Once it is accepted, delete this file and tag the release (commit f750974add). diff --git a/NAMESPACE b/NAMESPACE index 906cf7c9..0e80236b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -81,6 +81,7 @@ export(sigprofiler_extract) export(sigprofiler_import) export(sym) export(syms) +export(transform_seg_table) export(use_color_style) exportClasses(CopyNumber) exportClasses(MAF) diff --git a/NEWS.md b/NEWS.md index 88447aca..37201e1f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,6 @@ # sigminer 1.0.12 +- Added `transform_seg_table()`. - Added `show_cn_group_profile()`. - Added `show_cn_freq_circos()`. - `sig_orders` option in `show_sig_profile()` function now can select and order signatures to plot. diff --git a/R/show_cn_circos.R b/R/show_cn_circos.R index cc5d9733..68fd4a09 100644 --- a/R/show_cn_circos.R +++ b/R/show_cn_circos.R @@ -38,7 +38,7 @@ show_cn_circos <- function(data, samples = NULL, show_title = TRUE, chrs = paste0("chr", 1:22), - genome_build = c("hg19", "hg38"), + genome_build = c("hg19", "hg38", "mm10"), col = NULL, side = "inside", ...) { diff --git a/R/show_cn_freq_circos.R b/R/show_cn_freq_circos.R index d2032fa5..b2e9bb42 100644 --- a/R/show_cn_freq_circos.R +++ b/R/show_cn_freq_circos.R @@ -34,7 +34,7 @@ show_cn_freq_circos <- function(data, resolution_factor = 1L, title = c("AMP", "DEL"), chrs = paste0("chr", 1:22), - genome_build = c("hg19", "hg38"), + genome_build = c("hg19", "hg38", "mm10"), cols = NULL, plot_ideogram = TRUE, track_height = 0.5, diff --git a/R/show_cn_group_profile.R b/R/show_cn_group_profile.R index 8459157b..1c572135 100644 --- a/R/show_cn_group_profile.R +++ b/R/show_cn_group_profile.R @@ -44,7 +44,7 @@ show_cn_group_profile <- function(data, fill_area = TRUE, cols = NULL, chrs = paste0("chr", c(1:22, "X")), - genome_build = c("hg19", "hg38"), + genome_build = c("hg19", "hg38", "mm10"), cutoff = 2L, resolution_factor = 1L, force_y_limit = TRUE, diff --git a/R/show_cn_profile.R b/R/show_cn_profile.R index 482160fb..bb4bc1f6 100644 --- a/R/show_cn_profile.R +++ b/R/show_cn_profile.R @@ -31,7 +31,7 @@ #' expect_s3_class(p, "ggplot") show_cn_profile <- function(data, samples = NULL, show_n = NULL, show_title = FALSE, chrs = paste0("chr", 1:22), - genome_build = c("hg19", "hg38"), + genome_build = c("hg19", "hg38", "mm10"), nrow = NULL, ncol = NULL, return_plotlist = FALSE) { stopifnot(is.data.frame(data) | inherits(data, "CopyNumber")) if (is.data.frame(data)) { diff --git a/R/transform_seg_table.R b/R/transform_seg_table.R new file mode 100644 index 00000000..b392badc --- /dev/null +++ b/R/transform_seg_table.R @@ -0,0 +1,111 @@ +#' Transform Copy Number Table +#' +#' @inheritParams get_cn_freq_table +#' @inheritParams tidyr::pivot_wider +#' @param ref_type annotation data type used for constructing matrix. +#' +#' @return a `data.table`. +#' @export +#' +#' @examples +#' load(system.file("extdata", "toy_copynumber.RData", +#' package = "sigminer", mustWork = TRUE +#' )) +#' # Compute the mean segVal in each cytoband +#' x <- transform_seg_table(cn, resolution_factor = 1) +#' x +#' # Compute the mean segVal in each half-cytoband +#' x2 <- transform_seg_table(cn, resolution_factor = 2) +#' x2 +#' @testexamples +#' expect_is(x, "data.table") +#' expect_is(x2, "data.table") +transform_seg_table <- function(data, + genome_build = c("hg19", "hg38", "mm10"), + ref_type = c("cytoband", "gene"), + values_fill = NA, + values_fn = function(x, ...) { + round(mean(x, ...))}, + resolution_factor = 1L) { + + stopifnot(is.data.frame(data) | inherits(data, "CopyNumber")) + if (is.data.frame(data)) { + nc_cols <- c("chromosome", "start", "end", "segVal", "sample") + if (!all(nc_cols %in% colnames(data))) { + stop("Invalid input, it must contain columns: ", paste(nc_cols, collapse = " ")) + } + } + + genome_build <- match.arg(genome_build) + if (inherits(data, "CopyNumber")) { + genome_build <- data@genome_build + data <- data@data + } else { + data <- data.table::as.data.table(data) + } + + ref_type <- match.arg(ref_type) + + #data$sample <- factor(data$sample, levels = unique(data$sample)) + data$chromosome <- ifelse(startsWith(data$chromosome, prefix = "chr"), + data$chromosome, + paste0("chr", data$chromosome)) + + if (ref_type == "cytoband") { + annot <- get_genome_annotation( + data_type = "cytobands", + genome_build = genome_build + ) + annot$start <- annot$start + 1L + } else { + if (genome_build == "mm10") { + # Not support for now + annot_file <- system.file("extdata", "mouse_mm10_gene_info.rds", + package = "sigminer", mustWork = TRUE) + } else { + annot_file <- system.file("extdata", paste0("human_", genome_build, "_gene_info.rds"), + package = "sigminer", mustWork = TRUE) + } + + annot <- readRDS(annot_file) + annot <- annot[, c("chrom", "start", "end", "gene_name", "gene_type")] + colnames(annot)[4] <- "band" + } + + + data.table::setDT(annot) + ## Control the resolution + if (resolution_factor > 1) { + f <- function(x, y, n, chrom, band) { + helper_create_chunks(x, y, + n = n, + chrom = chrom, + band = paste(band, seq_len(n), sep = "-chunk-") + ) + } + annot <- purrr::pmap_df( + data.frame( + x = annot$start, + y = annot$end, + n = resolution_factor, + chrom = annot$chrom, + band = annot$band + ), + .f = f + ) %>% + data.table::as.data.table() %>% + data.table::setcolorder(c("chrom", "start", "end", "band")) + } + data.table::setkey(annot, chrom, start, end) + merge_dt <- data.table::foverlaps(data, annot, + by.x = c("chromosome", "start", "end") + ) + merge_dt <- merge_dt %>% + dplyr::as_tibble() %>% + dplyr::select(-c("i.start", "i.end")) %>% + na.omit() %>% + tidyr::pivot_wider(names_from = "sample", values_from = "segVal", + values_fill = values_fill, values_fn = values_fn) + colnames(merge_dt)[4] <- "label" + merge_dt %>% data.table::as.data.table() +} diff --git a/_pkgdown.yml b/_pkgdown.yml index aa3b7c52..1499d2f8 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -112,6 +112,7 @@ reference: - title: Copy number analysis and visualization desc: Functions for analyzing copy number data and visualization. contents: + - transform_seg_table - get_cn_ploidy - scoring - show_cn_profile diff --git a/data-raw/mouse_genome.R b/data-raw/mouse_genome.R index fd0d1906..2db49ed8 100644 --- a/data-raw/mouse_genome.R +++ b/data-raw/mouse_genome.R @@ -101,4 +101,19 @@ mm10$width <- NULL transcript.mm10 <- mm10 usethis::use_data(transcript.mm10, overwrite = TRUE) -## Currently, I don't use gene location data, so don't generate it for now. +# Gene -------------------------------------------------------------------- + +## mm10 gene +gtf_mm10 <- data.table::fread("data-raw/mm10.annotation.gtf.gz", skip = 5, sep = "\t", header = FALSE) + +gtf_mm10[, `:=`( + gene_name = extract_col(V9, "gene_name"), + gene_id = extract_col(V9, "gene_id"), + gene_type = extract_col(V9, "gene_type") +)] + +gene_mm10 <- gtf_mm10[V3 == "gene", .(V1, V4, V5, V7, gene_name, gene_id, gene_type)] +colnames(gene_mm10)[1:4] <- c("chrom", "start", "end", "strand") + +## Save to extdata +saveRDS(gene_mm10, file = "inst/extdata/mouse_mm10_gene_info.rds") diff --git a/docs/reference/cosine.html b/docs/reference/cosine.html index 870159fe..d76355db 100644 --- a/docs/reference/cosine.html +++ b/docs/reference/cosine.html @@ -151,8 +151,8 @@

Value

a numeric value or matrix.

Examples

-
x <- c( 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
-y <- c( 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0 )
+    
x <- c(1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+y <- c(0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0)
 z1 <- cosine(x, y)
 z1
 z2 <- cosine(matrix(x), matrix(y))
diff --git a/docs/reference/get_cn_freq_table.html b/docs/reference/get_cn_freq_table.html
index 78ff7c4c..3d0130a3 100644
--- a/docs/reference/get_cn_freq_table.html
+++ b/docs/reference/get_cn_freq_table.html
@@ -131,7 +131,12 @@ 

Get CNV Frequency Table

Get CNV Frequency Table

-
get_cn_freq_table(data, genome_build = "hg19", cutoff = 2L)
+
get_cn_freq_table(
+  data,
+  genome_build = "hg19",
+  cutoff = 2L,
+  resolution_factor = 1L
+)

Arguments

@@ -150,6 +155,12 @@

Arg

+ + + +

copy number value cutoff for splitting data into AMP and DEL. The values equal to cutoff are discarded. Default is 2, you can also set a length-2 vector, e.g. c(2, 2).

resolution_factor

an integer to control the resolution. +When it is 1 (default), compute frequency in each cytoband. +When it is 2, use compute frequency in each half cytoband.

diff --git a/docs/reference/index.html b/docs/reference/index.html index b23b279a..ca702fcd 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -659,6 +659,12 @@

+ +

transform_seg_table()

+ +

Transform Copy Number Table

+ +

get_cn_ploidy()

diff --git a/docs/reference/show_cn_circos.html b/docs/reference/show_cn_circos.html index 3e6edade..03473bec 100644 --- a/docs/reference/show_cn_circos.html +++ b/docs/reference/show_cn_circos.html @@ -136,7 +136,7 @@

Show Copy Number Profile in Circos

samples = NULL, show_title = TRUE, chrs = paste0("chr", 1:22), - genome_build = c("hg19", "hg38"), + genome_build = c("hg19", "hg38", "mm10"), col = NULL, side = "inside", ... diff --git a/docs/reference/show_cn_freq_circos.html b/docs/reference/show_cn_freq_circos.html index e85858cf..287f8375 100644 --- a/docs/reference/show_cn_freq_circos.html +++ b/docs/reference/show_cn_freq_circos.html @@ -135,9 +135,10 @@

Show Copy Number Variation Frequency Profile with Circos

data, groups = NULL, cutoff = 2L, + resolution_factor = 1L, title = c("AMP", "DEL"), chrs = paste0("chr", 1:22), - genome_build = c("hg19", "hg38"), + genome_build = c("hg19", "hg38", "mm10"), cols = NULL, plot_ideogram = TRUE, track_height = 0.5, @@ -162,6 +163,12 @@

Arg

copy number value cutoff for splitting data into AMP and DEL. The values equal to cutoff are discarded. Default is 2, you can also set a length-2 vector, e.g. c(2, 2).

+ + + resolution_factor +

an integer to control the resolution. +When it is 1 (default), compute frequency in each cytoband. +When it is 2, use compute frequency in each half cytoband.

title diff --git a/docs/reference/show_cn_group_profile.html b/docs/reference/show_cn_group_profile.html index 9b1539e5..e2ffaff8 100644 --- a/docs/reference/show_cn_group_profile.html +++ b/docs/reference/show_cn_group_profile.html @@ -137,8 +137,9 @@

Show Summary Copy Number Profile for Sample Groups

fill_area = TRUE, cols = NULL, chrs = paste0("chr", c(1:22, "X")), - genome_build = c("hg19", "hg38"), + genome_build = c("hg19", "hg38", "mm10"), cutoff = 2L, + resolution_factor = 1L, force_y_limit = TRUE, nrow = NULL, ncol = NULL, @@ -178,6 +179,12 @@

Arg

copy number value cutoff for splitting data into AMP and DEL. The values equal to cutoff are discarded. Default is 2, you can also set a length-2 vector, e.g. c(2, 2).

+ + + resolution_factor +

an integer to control the resolution. +When it is 1 (default), compute frequency in each cytoband. +When it is 2, use compute frequency in each half cytoband.

force_y_limit @@ -212,16 +219,20 @@

Examp ss <- unique(cn@data$sample) p2 <- show_cn_group_profile(cn, groups = list(a = ss[1:5], b = ss[6:10])) p2 -p3 <- show_cn_group_profile(cn, groups = list(g1 = ss[1:5], g2 = ss[6:10]), - force_y_limit = c(-1, 1), nrow = 2) +p3 <- show_cn_group_profile(cn, + groups = list(g1 = ss[1:5], g2 = ss[6:10]), + force_y_limit = c(-1, 1), nrow = 2 +) p3 ## Set custom cutoff for custom data data <- cn@data data$segVal <- data$segVal - 2L -p4 <- show_cn_group_profile(data, groups = list(g1 = ss[1:5], g2 = ss[6:10]), - force_y_limit = c(-1, 1), nrow = 2, - cutoff = c(0, 0)) +p4 <- show_cn_group_profile(data, + groups = list(g1 = ss[1:5], g2 = ss[6:10]), + force_y_limit = c(-1, 1), nrow = 2, + cutoff = c(0, 0) +) p4