Skip to content

Commit

Permalink
Update fastq_align_star for nf-test and handle transcriptome alignmen…
Browse files Browse the repository at this point in the history
…t better (nf-core#4927)

* Update fastq_align_star for nf-test and handle transcriptome alignment better

* Remove backup file

* Replace test data with RNA-specific that will actually align

* Appease eclint

* Add extra test for when trancriptome bam produced but transcriptome not supplied

* Fix snapshots

* Remove convenience transcriptome creation

* Update subworkflows/nf-core/fastq_align_star/meta.yml

Co-authored-by: Matthias Hörtenhuber <mashehu@users.noreply.github.com>

---------

Co-authored-by: Matthias Hörtenhuber <mashehu@users.noreply.github.com>
  • Loading branch information
2 people authored and jch-13 committed Mar 18, 2024
1 parent 96c180a commit 05b3f76
Show file tree
Hide file tree
Showing 11 changed files with 1,380 additions and 208 deletions.
16 changes: 16 additions & 0 deletions modules/nf-core/umicollapse/tests/main.nf.test.snap

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

58 changes: 38 additions & 20 deletions subworkflows/nf-core/fastq_align_star/main.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
include { STAR_ALIGN } from '../../../modules/nf-core/star/align/main'
include { BAM_SORT_STATS_SAMTOOLS } from '../bam_sort_stats_samtools/main'
include { STAR_ALIGN } from '../../../modules/nf-core/star/align/main'
include { BAM_SORT_STATS_SAMTOOLS as BAM_SORT_STATS_SAMTOOLS_GENOME } from '../bam_sort_stats_samtools/main'
include { BAM_SORT_STATS_SAMTOOLS as BAM_SORT_STATS_SAMTOOLS_TRANSCRIPTOME } from '../bam_sort_stats_samtools/main'


workflow FASTQ_ALIGN_STAR {

Expand All @@ -11,6 +13,7 @@ workflow FASTQ_ALIGN_STAR {
val_seq_platform // string : sequencing platform
val_seq_center // string : sequencing center
ch_fasta // channel: [ val(meta), path(fasta) ]
ch_transcripts_fasta // channel: [ path(fasta) ]

main:

Expand All @@ -25,25 +28,40 @@ workflow FASTQ_ALIGN_STAR {
//
// Sort, index BAM file and run samtools stats, flagstat and idxstats
//
BAM_SORT_STATS_SAMTOOLS ( STAR_ALIGN.out.bam, ch_fasta )
ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions)
BAM_SORT_STATS_SAMTOOLS_GENOME ( STAR_ALIGN.out.bam, ch_fasta )
ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS_GENOME.out.versions)

//
// Sort, index BAM file and run samtools stats, flagstat and idxstats
//
// Only runs when '--quantMode TranscriptomeSAM' is set in args and
// STAR_ALIGN.out.bam_transcript is populated
//

BAM_SORT_STATS_SAMTOOLS_TRANSCRIPTOME ( STAR_ALIGN.out.bam_transcript, ch_transcripts_fasta.map{[[:], it]} )

emit:

orig_bam = STAR_ALIGN.out.bam // channel: [ val(meta), path(bam) ]
log_final = STAR_ALIGN.out.log_final // channel: [ val(meta), path(log_final) ]
log_out = STAR_ALIGN.out.log_out // channel: [ val(meta), path(log_out) ]
log_progress = STAR_ALIGN.out.log_progress // channel: [ val(meta), path(log_progress) ]
bam_sorted = STAR_ALIGN.out.bam_sorted // channel: [ val(meta), path(bam) ]
bam_transcript = STAR_ALIGN.out.bam_transcript // channel: [ val(meta), path(bam) ]
fastq = STAR_ALIGN.out.fastq // channel: [ val(meta), path(fastq) ]
tab = STAR_ALIGN.out.tab // channel: [ val(meta), path(tab) ]

bam = BAM_SORT_STATS_SAMTOOLS.out.bam // channel: [ val(meta), path(bam) ]
bai = BAM_SORT_STATS_SAMTOOLS.out.bai // channel: [ val(meta), path(bai) ]
stats = BAM_SORT_STATS_SAMTOOLS.out.stats // channel: [ val(meta), path(stats) ]
flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), path(flagstat) ]
idxstats = BAM_SORT_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), path(idxstats) ]

versions = ch_versions // channel: [ path(versions.yml) ]
orig_bam = STAR_ALIGN.out.bam // channel: [ val(meta), path(bam) ]
log_final = STAR_ALIGN.out.log_final // channel: [ val(meta), path(log_final) ]
log_out = STAR_ALIGN.out.log_out // channel: [ val(meta), path(log_out) ]
log_progress = STAR_ALIGN.out.log_progress // channel: [ val(meta), path(log_progress) ]
bam_sorted = STAR_ALIGN.out.bam_sorted // channel: [ val(meta), path(bam) ]
fastq = STAR_ALIGN.out.fastq // channel: [ val(meta), path(fastq) ]
tab = STAR_ALIGN.out.tab // channel: [ val(meta), path(tab) ]
orig_bam_transcript = STAR_ALIGN.out.bam_transcript // channel: [ val(meta), path(bam) ]

bam = BAM_SORT_STATS_SAMTOOLS_GENOME.out.bam // channel: [ val(meta), path(bam) ]
bai = BAM_SORT_STATS_SAMTOOLS_GENOME.out.bai // channel: [ val(meta), path(bai) ]
stats = BAM_SORT_STATS_SAMTOOLS_GENOME.out.stats // channel: [ val(meta), path(stats) ]
flagstat = BAM_SORT_STATS_SAMTOOLS_GENOME.out.flagstat // channel: [ val(meta), path(flagstat) ]
idxstats = BAM_SORT_STATS_SAMTOOLS_GENOME.out.idxstats // channel: [ val(meta), path(idxstats) ]

bam_transcript = BAM_SORT_STATS_SAMTOOLS_TRANSCRIPTOME.out.bam // channel: [ val(meta), path(bam) ]
bai_transcript = BAM_SORT_STATS_SAMTOOLS_TRANSCRIPTOME.out.bai // channel: [ val(meta), path(bai) ]
stats_transcript = BAM_SORT_STATS_SAMTOOLS_TRANSCRIPTOME.out.stats // channel: [ val(meta), path(stats) ]
flagstat_transcript = BAM_SORT_STATS_SAMTOOLS_TRANSCRIPTOME.out.flagstat // channel: [ val(meta), path(flagstat) ]
idxstats_transcript = BAM_SORT_STATS_SAMTOOLS_TRANSCRIPTOME.out.idxstats // channel: [ val(meta), path(idxstats) ]

versions = ch_versions // channel: [ path(versions.yml) ]
}
34 changes: 29 additions & 5 deletions subworkflows/nf-core/fastq_align_star/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ input:
type: file
description: Reference genome fasta file
pattern: "*.{fasta,fa,fna}"
- ch_transcripts_fasta:
type: file
description: Optional reference genome fasta file
pattern: "*.{fasta,fa,fna}"
output:
- orig_bam:
description: |
Expand All @@ -67,7 +71,7 @@ output:
description: |
Sorted BAM file of read alignments (optional)
Structure: [ val(meta), path(bam) ]
- bam_transcript:
- orig_bam_transcript:
description: |
Output BAM file of transcriptome alignment (optional)
Structure: [ val(meta), path(bam) ]
Expand All @@ -79,10 +83,6 @@ output:
description: |
STAR output tab file(s) (optional)
Structure: [ val(meta), path(tab) ]
- stats:
description: |
File containing samtools stats output
Structure: [ val(meta), path(stats) ]
- bam:
description: |
BAM file ordered by samtools
Expand All @@ -91,6 +91,10 @@ output:
description: |
BAI index of the ordered BAM file
Structure: [ val(meta), path(bai) ]
- stats:
description: |
File containing samtools stats output
Structure: [ val(meta), path(stats) ]
- flagstat:
description: |
File containing samtools flagstat output
Expand All @@ -99,6 +103,26 @@ output:
description: |
File containing samtools idxstats output
Structure: [ val(meta), path(idxstats) ]
- bam_transcript:
description: |
Transcriptome-level BAM file ordered by samtools (optional)
Structure: [ val(meta), path(bam) ]
- bai_transcript:
description: |
Transcriptome-level BAI index of the ordered BAM file (optional)
Structure: [ val(meta), path(bai) ]
- stats_transcript:
description: |
Transcriptome-level file containing samtools stats output (optional)
Structure: [ val(meta), path(stats) ]
- flagstat_transcript:
description: |
Transcriptome-level file containing samtools flagstat output (optional)
Structure: [ val(meta), path(flagstat) ]
- idxstats_transcript:
description: |
Transcriptome-level file containing samtools idxstats output (optional)
Structure: [ val(meta), path(idxstats) ]
- versions:
type: file
description: File containing software versions
Expand Down
Loading

0 comments on commit 05b3f76

Please sign in to comment.