Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

improve minimal usage of Sarek #180

Merged
merged 3 commits into from
Apr 15, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Piellorieppe is one of the main massif in the Sarek National Park.
- [#175](https://github.com/nf-core/sarek/pull/175) - Add `Sentieon` documentation
- [#176](https://github.com/nf-core/sarek/pull/176) - Add empty `custom` genome in `genomes.config` to allow genomes that are not in `AWS iGenomes`
- [#179](https://github.com/nf-core/sarek/pull/179) - Add `FreeBayes` germline variant calling
- [#180](https://github.com/nf-core/sarek/pull/180) - Now saving Mapped Bams (and creating TSV) in minimal setting

### Changed - [2.6dev]

Expand Down Expand Up @@ -53,6 +54,7 @@ Piellorieppe is one of the main massif in the Sarek National Park.
- [#143](https://github.com/nf-core/sarek/pull/143) - Revert `snpEff` cache version to `86` for `GRCh38`
- [#152](https://github.com/nf-core/sarek/pull/152), [#158](https://github.com/nf-core/sarek/pull/158), [#164](https://github.com/nf-core/sarek/pull/164), [#174](https://github.com/nf-core/sarek/pull/174) - Update docs
- [#164](https://github.com/nf-core/sarek/pull/164) - Update `gatk4-spark` from `4.1.4.1` to `4.1.6.0`
- [#180](https://github.com/nf-core/sarek/pull/180) - Improve minimal setting

### Fixed - [2.6dev]

Expand Down
48 changes: 36 additions & 12 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -1179,24 +1179,24 @@ process MergeBamMapped {
set idPatient, idSample, idRun, file(bam) from multipleBam

output:
set idPatient, idSample, file("${idSample}.bam") into mergedBam
set idPatient, idSample, file("${idSample}.bam") into bam_mapped_merged

script:
"""
samtools merge --threads ${task.cpus} ${idSample}.bam ${bam}
"""
}

mergedBam = mergedBam.dump(tag:'Merged BAM')
bam_mapped_merged = bam_mapped_merged.dump(tag:'Merged BAM')

mergedBam = mergedBam.mix(singleBam,singleBamSentieon)
bam_mapped_merged = bam_mapped_merged.mix(singleBam,singleBamSentieon)

(mergedBam, mergedBamForSentieon) = mergedBam.into(2)
(bam_mapped_merged, mergedBamForSentieon) = bam_mapped_merged.into(2)

if (!params.sentieon) mergedBamForSentieon.close()
else mergedBam.close()
else bam_mapped_merged.close()

mergedBam = mergedBam.dump(tag:'BAMs for MD')
bam_mapped_merged = bam_mapped_merged.dump(tag:'BAMs for MD')
mergedBamForSentieon = mergedBamForSentieon.dump(tag:'Sentieon BAMs to Index')

process IndexBamMergedForSentieon {
Expand All @@ -1216,18 +1216,21 @@ process IndexBamMergedForSentieon {
"""
}

(mergedBam, mergedBamToIndex) = mergedBam.into(2)
(bam_mapped_merged, bam_mapped_merged_to_index) = bam_mapped_merged.into(2)

process IndexBamFile {
label 'cpus_8'

tag {idPatient + "-" + idSample}

publishDir "${params.outdir}/Preprocessing/${idSample}/Mapped/${it}", mode: params.publish_dir_mode

input:
set idPatient, idSample, file(bam) from mergedBamToIndex
set idPatient, idSample, file(bam) from bam_mapped_merged_to_index

output:
set idPatient, idSample, file(bam), file("*.bai") into indexedBam
set idPatient, idSample, file(bam), file("*.bai") into bam_mapped_merged_indexed
set idPatient, idSample into tsv_bam_indexed

when: !(params.known_indels)

Expand All @@ -1238,6 +1241,27 @@ process IndexBamFile {
"""
}

(tsv_bam_indexed, tsv_bam_indexed_sample) = tsv_bam_indexed.into(2)

// Creating a TSV file to restart from this step
tsv_bam_indexed.map { idPatient, idSample ->
gender = genderMap[idPatient]
status = statusMap[idPatient, idSample]
bam = "${params.outdir}/Preprocessing/${idSample}/Mapped/${idSample}.bam"
bai = "${params.outdir}/Preprocessing/${idSample}/Mapped/${idSample}.bam.bai"
"${idPatient}\t${gender}\t${status}\t${idSample}\t${bam}\t${bai}\n"
}.collectFile(
name: 'mapped.tsv', sort: true, storeDir: "${params.outdir}/Preprocessing/TSV"
)

tsv_bam_indexed_sample
.collectFile(storeDir: "${params.outdir}/Preprocessing/TSV") { idPatient, idSample ->
status = statusMap[idPatient, idSample]
gender = genderMap[idPatient]
bam = "${params.outdir}/Preprocessing/${idSample}/Mapped/${idSample}.bam"
bai = "${params.outdir}/Preprocessing/${idSample}/Mapped/${idSample}.bam.bai"
["mapped_${idSample}.tsv", "${idPatient}\t${gender}\t${status}\t${idSample}\t${bam}\t${bai}\n"]
}
// STEP 2: MARKING DUPLICATES

process MarkDuplicates {
Expand All @@ -1252,7 +1276,7 @@ process MarkDuplicates {
}

input:
set idPatient, idSample, file("${idSample}.bam") from mergedBam
set idPatient, idSample, file("${idSample}.bam") from bam_mapped_merged

output:
set idPatient, idSample, file("${idSample}.md.bam"), file("${idSample}.md.bam.bai") into duplicateMarkedBams
Expand Down Expand Up @@ -1750,8 +1774,8 @@ bamQCReport = bamQCReport.dump(tag:'BamQC')
// When using sentieon for mapping, Channel bamRecal is bamRecalSentieon
if (params.sentieon && step == 'mapping') bamRecal = bamRecalSentieon

// When no knownIndels for mapping, Channel bamRecal is indexedBam
bamRecal = (params.known_indels && step == 'mapping') ? bamRecal : indexedBam
// When no knownIndels for mapping, Channel bamRecal is bam_mapped_merged_indexed
bamRecal = (params.known_indels && step == 'mapping') ? bamRecal : bam_mapped_merged_indexed

// When starting with variant calling, Channel bamRecal is inputSample
if (step == 'variantcalling') bamRecal = inputSample
Expand Down