diff --git a/CHANGELOG.md b/CHANGELOG.md index 33e29c30e..84a30603c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unpublished Version / DEV] +### Enhancements & fixes + +* Make tximport output files using all the samples at the same time since working 1 by 1, will generate different numbers (see [#553](https://github.com/nf-core/rnaseq/issues/553), tximport documentation. First pointed by @j-andrews7. + ## [[3.0](https://github.com/nf-core/rnaseq/releases/tag/3.0)] - 2020-12-15 ### :warning: Major enhancements diff --git a/bin/deseq2_qc.r b/bin/deseq2_qc.r index e543110da..469102296 100755 --- a/bin/deseq2_qc.r +++ b/bin/deseq2_qc.r @@ -54,7 +54,7 @@ if (is.null(opt$count_file)){ ################################################ ################################################ -count.table <- read.delim(file=opt$count_file,header=TRUE) +count.table <- read.delim(file=opt$count_file,header=TRUE, row.names=NULL) rownames(count.table) <- count.table[,opt$id_col] count.table <- count.table[,opt$count_col:ncol(count.table),drop=FALSE] colnames(count.table) <- gsub(opt$sample_suffix,"",colnames(count.table)) diff --git a/bin/salmon_summarizedexperiment.r b/bin/salmon_summarizedexperiment.r index 18bdfa112..0295976be 100755 --- a/bin/salmon_summarizedexperiment.r +++ b/bin/salmon_summarizedexperiment.r @@ -4,7 +4,7 @@ args = commandArgs(trailingOnly=TRUE) if (length(args) < 2) { - stop("Usage: salmon_se.r ", call.=FALSE) + stop("Usage: salmon_se.r ", call.=FALSE) } coldata = args[1] diff --git a/bin/salmon_tximport.r b/bin/salmon_tximport.r index 51b36f8c2..6b88fc4a0 100755 --- a/bin/salmon_tximport.r +++ b/bin/salmon_tximport.r @@ -10,8 +10,8 @@ coldata = args[1] sample_name = args[3] -prefix = paste(c(sample_name, "salmon"), sep=".") - +#prefix = paste(c(sample_name, "salmon"), sep=".") +prefix = sample_name tx2gene = "salmon_tx2gene.tsv" info = file.info(tx2gene) if (info$size == 0){ diff --git a/docs/output.md b/docs/output.md index f0f029924..5baaa7812 100644 --- a/docs/output.md +++ b/docs/output.md @@ -646,14 +646,6 @@ Results generated by MultiQC collate pipeline QC from supported tools i.e. FastQ * `logs/`: Contains the file `salmon_quant.log` giving a record of Salmon's quantification. * `quant.genes.sf`: Salmon _gene_-level quantification of the sample, including feature length, effective length, TPM, and number of reads. * `quant.sf`: Salmon _transcript_-level quantification of the sample, including feature length, effective length, TPM, and number of reads. - * `.salmon.gene_counts.tsv`: Subset of `quant.genes.sf`, only containing the gene id and raw counts. - * `.salmon.gene_tpm.tsv`: Subset of `quant.genes.sf`, only containing the gene id and TPM values. - * `.salmon.gene_counts_scaled.tsv`: Subset of `quant.genes.sf`, only containing the gene id and scaled counts. - * `.salmon.gene_tpm_scaled.tsv`: Subset of `quant.genes.sf`, only containing the gene id and scaled TPM values. - * `.salmon.gene_counts_length_scaled.tsv`: Subset of `quant.genes.sf`, only containing the gene id and length-scaled counts. - * `.salmon.gene_tpm_length_scaled.tsv`:Subset of `quant.genes.sf`, only containing the gene id and length-scaled TPM values. - * `.salmon.transcript_counts.tsv`: Subset of `quant.sf`, only containing the transcript id and raw counts. - * `.salmon.transcript_tpm.tsv`: Subset of `quant.sf`, only containing the transcript id and TPM values. diff --git a/modules/local/process/salmon_tximport.nf b/modules/local/process/salmon_tximport.nf index a9e03be45..0ec6f1b72 100644 --- a/modules/local/process/salmon_tximport.nf +++ b/modules/local/process/salmon_tximport.nf @@ -4,11 +4,11 @@ include { saveFiles; getSoftwareName } from './functions' params.options = [:] process SALMON_TXIMPORT { - tag "$meta.id" + //tag "$meta.id" label "process_medium" publishDir "${params.outdir}", mode: params.publish_dir_mode, - saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) } + saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:'') } conda (params.enable_conda ? "bioconda::bioconductor-tximeta=1.8.0" : null) if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) { @@ -18,23 +18,23 @@ process SALMON_TXIMPORT { } input: - tuple val(meta), path("salmon/*") + path ("salmon/*") path tx2gene output: - tuple val(meta), path("*gene_tpm.tsv") , emit: tpm_gene - tuple val(meta), path("*gene_counts.tsv") , emit: counts_gene - tuple val(meta), path("*gene_tpm_length_scaled.tsv") , emit: tpm_gene_length_scaled - tuple val(meta), path("*gene_counts_length_scaled.tsv"), emit: counts_gene_length_scaled - tuple val(meta), path("*gene_tpm_scaled.tsv") , emit: tpm_gene_scaled - tuple val(meta), path("*gene_counts_scaled.tsv") , emit: counts_gene_scaled - tuple val(meta), path("*transcript_tpm.tsv") , emit: tpm_transcript - tuple val(meta), path("*transcript_counts.tsv") , emit: counts_transcript + path("*gene_tpm.tsv") , emit: tpm_gene + path("*gene_counts.tsv") , emit: counts_gene + path("*gene_tpm_length_scaled.tsv") , emit: tpm_gene_length_scaled + path("*gene_counts_length_scaled.tsv"), emit: counts_gene_length_scaled + path("*gene_tpm_scaled.tsv") , emit: tpm_gene_scaled + path("*gene_counts_scaled.tsv") , emit: counts_gene_scaled + path("*transcript_tpm.tsv") , emit: tpm_transcript + path("*transcript_counts.tsv") , emit: counts_transcript path "*.version.txt" , emit: version script: // This script is bundled with the pipeline, in nf-core/rnaseq/bin/ """ - salmon_tximport.r NULL salmon $meta.id + salmon_tximport.r NULL salmon salmon.merged Rscript -e "library(tximeta); write(x=as.character(packageVersion('tximeta')), file='bioconductor-tximeta.version.txt')" """ } diff --git a/modules/local/subworkflow/quantify_salmon.nf b/modules/local/subworkflow/quantify_salmon.nf index 3757f7ec8..30574b97c 100644 --- a/modules/local/subworkflow/quantify_salmon.nf +++ b/modules/local/subworkflow/quantify_salmon.nf @@ -30,39 +30,30 @@ workflow QUANTIFY_SALMON { */ SALMON_QUANT ( reads, index, gtf, transcript_fasta, alignment_mode) SALMON_TX2GENE ( SALMON_QUANT.out.results.collect{it[1]}, gtf ) - SALMON_TXIMPORT ( SALMON_QUANT.out.results, SALMON_TX2GENE.out.collect() ) - SALMON_MERGE_COUNTS ( - SALMON_TXIMPORT.out.counts_gene.collect{it[1]}, // [meta, counts]: Collect the second element (counts files) in the channel across all samples - SALMON_TXIMPORT.out.tpm_gene.collect{it[1]}, - SALMON_TXIMPORT.out.counts_gene_length_scaled.collect{it[1]}, // [meta, counts]: Collect the second element (counts files) in the channel across all samples - SALMON_TXIMPORT.out.tpm_gene_length_scaled.collect{it[1]}, - SALMON_TXIMPORT.out.counts_gene_scaled.collect{it[1]}, // [meta, counts]: Collect the second element (counts files) in the channel across all samples - SALMON_TXIMPORT.out.tpm_gene_scaled.collect{it[1]}, - SALMON_TXIMPORT.out.counts_transcript.collect{it[1]}, - SALMON_TXIMPORT.out.tpm_transcript.collect{it[1]}, - ) + SALMON_TXIMPORT ( SALMON_QUANT.out.results.collect{it[1]}, SALMON_TX2GENE.out.collect() ) + SALMON_SE_GENE ( - SALMON_MERGE_COUNTS.out.counts_gene, - SALMON_MERGE_COUNTS.out.tpm_gene, + SALMON_TXIMPORT.out.counts_gene, + SALMON_TXIMPORT.out.tpm_gene, SALMON_TX2GENE.out.collect(), ) SALMON_SE_GENE_LENGTH_SCALED ( - SALMON_MERGE_COUNTS.out.counts_gene_length_scaled, - SALMON_MERGE_COUNTS.out.tpm_gene_length_scaled, + SALMON_TXIMPORT.out.counts_gene_length_scaled, + SALMON_TXIMPORT.out.tpm_gene_length_scaled, SALMON_TX2GENE.out.collect(), ) SALMON_SE_GENE_SCALED ( - SALMON_MERGE_COUNTS.out.counts_gene_scaled, - SALMON_MERGE_COUNTS.out.tpm_gene_scaled, + SALMON_TXIMPORT.out.counts_gene_scaled, + SALMON_TXIMPORT.out.tpm_gene_scaled, SALMON_TX2GENE.out.collect(), ) SALMON_SE_TRANSCRIPT ( - SALMON_MERGE_COUNTS.out.counts_transcript, - SALMON_MERGE_COUNTS.out.tpm_transcript, + SALMON_TXIMPORT.out.counts_transcript, + SALMON_TXIMPORT.out.tpm_transcript, SALMON_TX2GENE.out.collect(), ) @@ -80,18 +71,12 @@ workflow QUANTIFY_SALMON { counts_transcript = SALMON_TXIMPORT.out.counts_transcript // channel: [ val(meta), counts ] tximeta_version = SALMON_TXIMPORT.out.version // path: *.version.txt - merged_counts_gene = SALMON_MERGE_COUNTS.out.counts_gene // path: *.gene_counts.tsv - merged_tpm_gene = SALMON_MERGE_COUNTS.out.tpm_gene // path: *.gene_tpm.tsv - merged_counts_gene_length_scaled = SALMON_MERGE_COUNTS.out.counts_gene_length_scaled // path: *.gene_counts.tsv - merged_tpm_gene_length_scaled = SALMON_MERGE_COUNTS.out.tpm_gene_length_scaled // path: *.gene_tpm.tsv - merged_counts_gene_scaled = SALMON_MERGE_COUNTS.out.counts_gene_scaled // path: *.gene_counts.tsv - merged_tpm_gene_scaled = SALMON_MERGE_COUNTS.out.tpm_gene_scaled // path: *.gene_tpm.tsv merged_gene_rds = SALMON_SE_GENE.out.rds // path: *.rds merged_gene_rds_length_scaled = SALMON_SE_GENE_LENGTH_SCALED.out.rds // path: *.rds merged_gene_rds_scaled = SALMON_SE_GENE_SCALED.out.rds // path: *.rds summarizedexperiment_version = SALMON_SE_GENE.out.version // path: *.version.txt - merged_counts_transcript = SALMON_MERGE_COUNTS.out.counts_transcript // path: *.transcript_counts.tsv - merged_tpm_transcript = SALMON_MERGE_COUNTS.out.tpm_transcript // path: *.transcript_tpm.tsv + merged_counts_transcript = SALMON_TXIMPORT.out.counts_transcript // path: *.transcript_counts.tsv + merged_tpm_transcript = SALMON_TXIMPORT.out.tpm_transcript // path: *.transcript_tpm.tsv merged_transcript_rds = SALMON_SE_TRANSCRIPT.out.rds // path: *.rds } diff --git a/rnaseq.nf b/rnaseq.nf index 52ec75f84..e5d152c02 100755 --- a/rnaseq.nf +++ b/rnaseq.nf @@ -635,7 +635,7 @@ workflow RNASEQ { if (!params.skip_qc & !params.skip_deseq2_qc) { DESEQ2_QC_SALMON ( - QUANTIFY_SALMON.out.merged_counts_gene_length_scaled, + QUANTIFY_SALMON.out.counts_gene_length_scaled, ch_pca_header_multiqc, ch_clustering_header_multiqc )