Skip to content

Commit

Permalink
fix counts files. tximport needs all samples at once.
Browse files Browse the repository at this point in the history
see #553. Thanks @j-andrews7!
  • Loading branch information
Lorena Pantano committed Jan 22, 2021
1 parent fa4a3e8 commit 9927b15
Show file tree
Hide file tree
Showing 8 changed files with 33 additions and 52 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unpublished Version / DEV]

### Enhancements & fixes

* Make tximport output files using all the samples at the same time since working 1 by 1, will generate different numbers (see [#553](https://github.com/nf-core/rnaseq/issues/553), tximport documentation. First pointed by @j-andrews7.

## [[3.0](https://github.com/nf-core/rnaseq/releases/tag/3.0)] - 2020-12-15

### :warning: Major enhancements
Expand Down
2 changes: 1 addition & 1 deletion bin/deseq2_qc.r
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ if (is.null(opt$count_file)){
################################################
################################################

count.table <- read.delim(file=opt$count_file,header=TRUE)
count.table <- read.delim(file=opt$count_file,header=TRUE, row.names=NULL)
rownames(count.table) <- count.table[,opt$id_col]
count.table <- count.table[,opt$count_col:ncol(count.table),drop=FALSE]
colnames(count.table) <- gsub(opt$sample_suffix,"",colnames(count.table))
Expand Down
2 changes: 1 addition & 1 deletion bin/salmon_summarizedexperiment.r
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

args = commandArgs(trailingOnly=TRUE)
if (length(args) < 2) {
stop("Usage: salmon_se.r <coldata> <salmon_out>", call.=FALSE)
stop("Usage: salmon_se.r <coldata> <counts> <tpm>", call.=FALSE)
}

coldata = args[1]
Expand Down
4 changes: 2 additions & 2 deletions bin/salmon_tximport.r
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ coldata = args[1]

sample_name = args[3]

prefix = paste(c(sample_name, "salmon"), sep=".")

#prefix = paste(c(sample_name, "salmon"), sep=".")
prefix = sample_name
tx2gene = "salmon_tx2gene.tsv"
info = file.info(tx2gene)
if (info$size == 0){
Expand Down
8 changes: 0 additions & 8 deletions docs/output.md
Original file line number Diff line number Diff line change
Expand Up @@ -646,14 +646,6 @@ Results generated by MultiQC collate pipeline QC from supported tools i.e. FastQ
* `logs/`: Contains the file `salmon_quant.log` giving a record of Salmon's quantification.
* `quant.genes.sf`: Salmon _gene_-level quantification of the sample, including feature length, effective length, TPM, and number of reads.
* `quant.sf`: Salmon _transcript_-level quantification of the sample, including feature length, effective length, TPM, and number of reads.
* `<SAMPLE>.salmon.gene_counts.tsv`: Subset of `quant.genes.sf`, only containing the gene id and raw counts.
* `<SAMPLE>.salmon.gene_tpm.tsv`: Subset of `quant.genes.sf`, only containing the gene id and TPM values.
* `<SAMPLE>.salmon.gene_counts_scaled.tsv`: Subset of `quant.genes.sf`, only containing the gene id and scaled counts.
* `<SAMPLE>.salmon.gene_tpm_scaled.tsv`: Subset of `quant.genes.sf`, only containing the gene id and scaled TPM values.
* `<SAMPLE>.salmon.gene_counts_length_scaled.tsv`: Subset of `quant.genes.sf`, only containing the gene id and length-scaled counts.
* `<SAMPLE>.salmon.gene_tpm_length_scaled.tsv`:Subset of `quant.genes.sf`, only containing the gene id and length-scaled TPM values.
* `<SAMPLE>.salmon.transcript_counts.tsv`: Subset of `quant.sf`, only containing the transcript id and raw counts.
* `<SAMPLE>.salmon.transcript_tpm.tsv`: Subset of `quant.sf`, only containing the transcript id and TPM values.

</details>

Expand Down
24 changes: 12 additions & 12 deletions modules/local/process/salmon_tximport.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@ include { saveFiles; getSoftwareName } from './functions'
params.options = [:]

process SALMON_TXIMPORT {
tag "$meta.id"
//tag "$meta.id"
label "process_medium"
publishDir "${params.outdir}",
mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:meta.id) }
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), publish_id:'') }

conda (params.enable_conda ? "bioconda::bioconductor-tximeta=1.8.0" : null)
if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
Expand All @@ -18,23 +18,23 @@ process SALMON_TXIMPORT {
}

input:
tuple val(meta), path("salmon/*")
path ("salmon/*")
path tx2gene

output:
tuple val(meta), path("*gene_tpm.tsv") , emit: tpm_gene
tuple val(meta), path("*gene_counts.tsv") , emit: counts_gene
tuple val(meta), path("*gene_tpm_length_scaled.tsv") , emit: tpm_gene_length_scaled
tuple val(meta), path("*gene_counts_length_scaled.tsv"), emit: counts_gene_length_scaled
tuple val(meta), path("*gene_tpm_scaled.tsv") , emit: tpm_gene_scaled
tuple val(meta), path("*gene_counts_scaled.tsv") , emit: counts_gene_scaled
tuple val(meta), path("*transcript_tpm.tsv") , emit: tpm_transcript
tuple val(meta), path("*transcript_counts.tsv") , emit: counts_transcript
path("*gene_tpm.tsv") , emit: tpm_gene
path("*gene_counts.tsv") , emit: counts_gene
path("*gene_tpm_length_scaled.tsv") , emit: tpm_gene_length_scaled
path("*gene_counts_length_scaled.tsv"), emit: counts_gene_length_scaled
path("*gene_tpm_scaled.tsv") , emit: tpm_gene_scaled
path("*gene_counts_scaled.tsv") , emit: counts_gene_scaled
path("*transcript_tpm.tsv") , emit: tpm_transcript
path("*transcript_counts.tsv") , emit: counts_transcript
path "*.version.txt" , emit: version

script: // This script is bundled with the pipeline, in nf-core/rnaseq/bin/
"""
salmon_tximport.r NULL salmon $meta.id
salmon_tximport.r NULL salmon salmon.merged
Rscript -e "library(tximeta); write(x=as.character(packageVersion('tximeta')), file='bioconductor-tximeta.version.txt')"
"""
}
39 changes: 12 additions & 27 deletions modules/local/subworkflow/quantify_salmon.nf
Original file line number Diff line number Diff line change
Expand Up @@ -30,39 +30,30 @@ workflow QUANTIFY_SALMON {
*/
SALMON_QUANT ( reads, index, gtf, transcript_fasta, alignment_mode)
SALMON_TX2GENE ( SALMON_QUANT.out.results.collect{it[1]}, gtf )
SALMON_TXIMPORT ( SALMON_QUANT.out.results, SALMON_TX2GENE.out.collect() )
SALMON_MERGE_COUNTS (
SALMON_TXIMPORT.out.counts_gene.collect{it[1]}, // [meta, counts]: Collect the second element (counts files) in the channel across all samples
SALMON_TXIMPORT.out.tpm_gene.collect{it[1]},
SALMON_TXIMPORT.out.counts_gene_length_scaled.collect{it[1]}, // [meta, counts]: Collect the second element (counts files) in the channel across all samples
SALMON_TXIMPORT.out.tpm_gene_length_scaled.collect{it[1]},
SALMON_TXIMPORT.out.counts_gene_scaled.collect{it[1]}, // [meta, counts]: Collect the second element (counts files) in the channel across all samples
SALMON_TXIMPORT.out.tpm_gene_scaled.collect{it[1]},
SALMON_TXIMPORT.out.counts_transcript.collect{it[1]},
SALMON_TXIMPORT.out.tpm_transcript.collect{it[1]},
)
SALMON_TXIMPORT ( SALMON_QUANT.out.results.collect{it[1]}, SALMON_TX2GENE.out.collect() )


SALMON_SE_GENE (
SALMON_MERGE_COUNTS.out.counts_gene,
SALMON_MERGE_COUNTS.out.tpm_gene,
SALMON_TXIMPORT.out.counts_gene,
SALMON_TXIMPORT.out.tpm_gene,
SALMON_TX2GENE.out.collect(),
)

SALMON_SE_GENE_LENGTH_SCALED (
SALMON_MERGE_COUNTS.out.counts_gene_length_scaled,
SALMON_MERGE_COUNTS.out.tpm_gene_length_scaled,
SALMON_TXIMPORT.out.counts_gene_length_scaled,
SALMON_TXIMPORT.out.tpm_gene_length_scaled,
SALMON_TX2GENE.out.collect(),
)

SALMON_SE_GENE_SCALED (
SALMON_MERGE_COUNTS.out.counts_gene_scaled,
SALMON_MERGE_COUNTS.out.tpm_gene_scaled,
SALMON_TXIMPORT.out.counts_gene_scaled,
SALMON_TXIMPORT.out.tpm_gene_scaled,
SALMON_TX2GENE.out.collect(),
)

SALMON_SE_TRANSCRIPT (
SALMON_MERGE_COUNTS.out.counts_transcript,
SALMON_MERGE_COUNTS.out.tpm_transcript,
SALMON_TXIMPORT.out.counts_transcript,
SALMON_TXIMPORT.out.tpm_transcript,
SALMON_TX2GENE.out.collect(),
)

Expand All @@ -80,18 +71,12 @@ workflow QUANTIFY_SALMON {
counts_transcript = SALMON_TXIMPORT.out.counts_transcript // channel: [ val(meta), counts ]
tximeta_version = SALMON_TXIMPORT.out.version // path: *.version.txt

merged_counts_gene = SALMON_MERGE_COUNTS.out.counts_gene // path: *.gene_counts.tsv
merged_tpm_gene = SALMON_MERGE_COUNTS.out.tpm_gene // path: *.gene_tpm.tsv
merged_counts_gene_length_scaled = SALMON_MERGE_COUNTS.out.counts_gene_length_scaled // path: *.gene_counts.tsv
merged_tpm_gene_length_scaled = SALMON_MERGE_COUNTS.out.tpm_gene_length_scaled // path: *.gene_tpm.tsv
merged_counts_gene_scaled = SALMON_MERGE_COUNTS.out.counts_gene_scaled // path: *.gene_counts.tsv
merged_tpm_gene_scaled = SALMON_MERGE_COUNTS.out.tpm_gene_scaled // path: *.gene_tpm.tsv
merged_gene_rds = SALMON_SE_GENE.out.rds // path: *.rds
merged_gene_rds_length_scaled = SALMON_SE_GENE_LENGTH_SCALED.out.rds // path: *.rds
merged_gene_rds_scaled = SALMON_SE_GENE_SCALED.out.rds // path: *.rds
summarizedexperiment_version = SALMON_SE_GENE.out.version // path: *.version.txt

merged_counts_transcript = SALMON_MERGE_COUNTS.out.counts_transcript // path: *.transcript_counts.tsv
merged_tpm_transcript = SALMON_MERGE_COUNTS.out.tpm_transcript // path: *.transcript_tpm.tsv
merged_counts_transcript = SALMON_TXIMPORT.out.counts_transcript // path: *.transcript_counts.tsv
merged_tpm_transcript = SALMON_TXIMPORT.out.tpm_transcript // path: *.transcript_tpm.tsv
merged_transcript_rds = SALMON_SE_TRANSCRIPT.out.rds // path: *.rds
}
2 changes: 1 addition & 1 deletion rnaseq.nf
Original file line number Diff line number Diff line change
Expand Up @@ -635,7 +635,7 @@ workflow RNASEQ {

if (!params.skip_qc & !params.skip_deseq2_qc) {
DESEQ2_QC_SALMON (
QUANTIFY_SALMON.out.merged_counts_gene_length_scaled,
QUANTIFY_SALMON.out.counts_gene_length_scaled,
ch_pca_header_multiqc,
ch_clustering_header_multiqc
)
Expand Down

0 comments on commit 9927b15

Please sign in to comment.