diff --git a/conf/modules.config b/conf/modules.config index dc243fdcb8..79dc66a316 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -52,6 +52,15 @@ process { ] } + withName: 'DRAGMAP_HASHTABLE' { + publishDir = [ + path: { "${params.outdir}/reference/dragmap" }, + enabled: "params.save_reference", + pattern: "dragmap" + ] + + } + withName: 'CREATE_INTERVALS_BED' { publishDir = [ enabled: "${params.save_reference}", @@ -256,6 +265,13 @@ process { // Only name sort if Spark for Markduplicates + duplicate marking is not skipped ext.args2 = { params.use_gatk_spark && params.use_gatk_spark.contains('markduplicates') && (!params.skip_tools || (params.skip_tools && !params.skip_tools.contains('markduplicates'))) ? '-n' : '' } ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat('.').concat(reads.get(0).name.findAll(/part_([0-9]+)?/).last()) : "" } + + withName: "DRAGMAP_ALIGN" { + publishDir = [ enabled: false ] + ext.prefix = { params.split_fastq > 1 ? "${meta.id}".concat(reads.get(0).name.findAll(/part_([0-9]+)?/).last().concat('.')) : "${meta.id}" } + } + + withName: 'INDEX_MAPPING' { publishDir = [ enabled: false ] diff --git a/docs/output.md b/docs/output.md index 6916048284..366d47592c 100644 --- a/docs/output.md +++ b/docs/output.md @@ -81,6 +81,12 @@ Such files are intermediate and not kept in the final files delivered to users. Such files are intermediate and not kept in the final files delivered to users. +#### DRAGMAP + +[DRAGMAP](https://github.com/Illumina/dragmap) is an open-source software implementation of the DRAGEN mapper, which the Illumina team created so that we would have an open-source way to produce the same results as their proprietary DRAGEN hardware. + +Such files are intermediate and not kept in the final files delivered to users. + ### Mark Duplicates #### GATK MarkDuplicates diff --git a/modules.json b/modules.json index 3aa090862a..fc1c23f9d2 100644 --- a/modules.json +++ b/modules.json @@ -39,6 +39,12 @@ "deepvariant": { "git_sha": "c450b08a75cda8878876ccbbe42493d6774397bd" }, + "dragmap/align": { + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, + "dragmap/hashtable": { + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, "ensemblvep": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, diff --git a/modules/nf-core/modules/dragmap/align/main.nf b/modules/nf-core/modules/dragmap/align/main.nf new file mode 100644 index 0000000000..9f261cc2e7 --- /dev/null +++ b/modules/nf-core/modules/dragmap/align/main.nf @@ -0,0 +1,64 @@ +process DRAGMAP_ALIGN { + tag "$meta.id" + label 'process_high' + + conda (params.enable_conda ? "bioconda::dragmap=1.2.1 bioconda::samtools=1.14 conda-forge::pigz=2.3.4" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-580d344d9d4a496cd403932da8765f9e0187774d:f7aad9060cde739c95685fc5ff6d6f7e3ec629c8-0': + 'quay.io/biocontainers/mulled-v2-580d344d9d4a496cd403932da8765f9e0187774d:f7aad9060cde739c95685fc5ff6d6f7e3ec629c8-0' }" + + input: + tuple val(meta), path(reads) + path hashmap + val sort_bam + + output: + tuple val(meta), path("*.bam"), emit: bam + tuple val(meta), path('*.log'), emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def samtools_command = sort_bam ? 'sort' : 'view' + if (meta.single_end) { + """ + dragen-os \\ + -r $hashmap \\ + -1 $reads \\ + --num-threads $task.cpus \\ + $args \\ + 2> ${prefix}.dragmap.log \\ + | samtools $samtools_command -@ $task.cpus $args2 -o ${prefix}.bam - + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + dragmap: \$(echo \$(dragen-os --version 2>&1)) + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + } else { + """ + dragen-os \\ + -r $hashmap \\ + -1 ${reads[0]} \\ + -2 ${reads[1]} \\ + --num-threads $task.cpus \\ + $args \\ + 2> ${prefix}.dragmap.log \\ + | samtools $samtools_command -@ $task.cpus $args2 -o ${prefix}.bam - + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + dragmap: \$(echo \$(dragen-os --version 2>&1)) + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/modules/dragmap/align/meta.yml b/modules/nf-core/modules/dragmap/align/meta.yml new file mode 100644 index 0000000000..dcce34fb56 --- /dev/null +++ b/modules/nf-core/modules/dragmap/align/meta.yml @@ -0,0 +1,42 @@ +name: dragmap_align +description: Performs fastq alignment to a reference using DRAGMAP +keywords: + - alignment + - map + - fastq + - bam + - sam +tools: + - dragmap: + description: Dragmap is the Dragen mapper/aligner Open Source Software. + homepage: https://github.com/Illumina/dragmap + documentation: https://github.com/Illumina/dragmap + tool_dev_url: https://github.com/Illumina/dragmap#basic-command-line-usage + doi: "" + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - hashmap: + type: file + description: DRAGMAP hash table + pattern: "Directory containing DRAGMAP hash table *.{cmp,.bin,.txt}" +output: + - bam: + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@Emiller88" diff --git a/modules/nf-core/modules/dragmap/hashtable/main.nf b/modules/nf-core/modules/dragmap/hashtable/main.nf new file mode 100644 index 0000000000..81333dfdae --- /dev/null +++ b/modules/nf-core/modules/dragmap/hashtable/main.nf @@ -0,0 +1,36 @@ +process DRAGMAP_HASHTABLE { + tag "$fasta" + label 'process_high' + + conda (params.enable_conda ? "bioconda::dragmap=1.2.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/dragmap:1.2.1--hd4ca14e_0': + 'quay.io/biocontainers/dragmap:1.2.1--hd4ca14e_0' }" + + input: + path fasta + + output: + path "dragmap" , emit: hashmap + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + mkdir dragmap + dragen-os \\ + --build-hash-table true \\ + --ht-reference $fasta \\ + --output-directory dragmap \\ + $args \\ + --ht-num-threads $task.cpus + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + dragmap: \$(echo \$(dragen-os --version 2>&1)) + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/dragmap/hashtable/meta.yml b/modules/nf-core/modules/dragmap/hashtable/meta.yml new file mode 100644 index 0000000000..f86a5dbb74 --- /dev/null +++ b/modules/nf-core/modules/dragmap/hashtable/meta.yml @@ -0,0 +1,30 @@ +name: dragmap_hashtable +description: Create DRAGEN hashtable for reference genome +keywords: + - index + - fasta + - genome + - reference +tools: + - dragmap: + description: Dragmap is the Dragen mapper/aligner Open Source Software. + homepage: https://github.com/Illumina/dragmap + documentation: https://github.com/Illumina/dragmap + tool_dev_url: https://github.com/Illumina/dragmap#basic-command-line-usage + doi: "" + licence: ["GPL v3"] +input: + - fasta: + type: file + description: Input genome fasta file +output: + - hashmap: + type: file + description: DRAGMAP hash table + pattern: "*.{cmp,.bin,.txt}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@Emiller88" diff --git a/nextflow_schema.json b/nextflow_schema.json index 5435659174..4e7b60b0cd 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -204,10 +204,11 @@ "fa_icon": "fas fa-puzzle-piece", "enum": [ "bwa-mem", - "bwa-mem2" + "bwa-mem2", + "dragmap" ], "description": "Specify aligner to be used to map reads to reference genome.", - "help_text": "> **WARNING** Current indices for `bwa` in AWS iGenomes are not compatible with `bwa-mem2`.\n> Use `--bwa=false` to have `Sarek` build them automatically.\n\n> **WARNING** BWA-mem2 is in active development\n> Sarek might not be able to require the right amount of resources for it at the moment\n> We recommend to use pre-built indexes", + "help_text": "> **WARNING** Current indices for `bwa` in AWS iGenomes are not compatible with `bwa-mem2` and `dragmap`.\n> Use `--bwa=false` to have `Sarek` build them automatically.\n\n> **WARNING** BWA-mem2 is in active development\n> Sarek might not be able to require the right amount of resources for it at the moment\n> We recommend to use pre-built indexes", "hidden": true }, "markdup_java_options": { diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf index abb12dc02d..842ed1aa20 100644 --- a/subworkflows/local/prepare_genome.nf +++ b/subworkflows/local/prepare_genome.nf @@ -10,6 +10,8 @@ include { BWA_INDEX as BWAMEM1_INDEX } from '../../modules/nf-core/modules/bwa/index/main' include { BWAMEM2_INDEX } from '../../modules/nf-core/modules/bwamem2/index/main' +include { DRAGMAP_HASHTABLE } from '../../modules/nf-core/modules/dragmap/hashtable/main' +include { CREATE_INTERVALS_BED } from '../../modules/local/create_intervals_bed/main' include { GATK4_CREATESEQUENCEDICTIONARY } from '../../modules/nf-core/modules/gatk4/createsequencedictionary/main' include { MSISENSORPRO_SCAN } from '../../modules/nf-core/modules/msisensorpro/scan/main' include { SAMTOOLS_FAIDX } from '../../modules/nf-core/modules/samtools/faidx/main' @@ -33,8 +35,11 @@ workflow PREPARE_GENOME { BWAMEM1_INDEX(fasta) // If aligner is bwa-mem BWAMEM2_INDEX(fasta) // If aligner is bwa-mem2 + DRAGMAP_HASHTABLE(fasta) // if we use mix here, bwa becomes a channel that is comsumed - ch_bwa = params.aligner == "bwa-mem" ? BWAMEM1_INDEX.out.index : BWAMEM2_INDEX.out.index + ch_bwa = params.aligner == "bwa-mem" ? + BWAMEM1_INDEX.out.index : params.aligner == "dragmap" ? + DRAGMAP_HASHTABLE.out.hashmap : BWAMEM2_INDEX.out.index GATK4_CREATESEQUENCEDICTIONARY(fasta) MSISENSORPRO_SCAN(fasta.map{ it -> [[id:it[0].baseName], it] }) diff --git a/tests/test_aligner.yml b/tests/test_aligner.yml index 1ced2a344b..8a1db64484 100644 --- a/tests/test_aligner.yml +++ b/tests/test_aligner.yml @@ -22,3 +22,27 @@ - path: results/reports/qualimap/test/test.recal - path: results/reports/samtools_stats/test/test.md.cram.stats - path: results/reports/samtools_stats/test/test.recal.cram.stats +- name: Run dragmap + command: nextflow run main.nf -profile test,docker --aligner dragmap + tags: + - aligner + - dragmap + - preprocessing + files: + - path: results/multiqc + - path: results/preprocessing/test/markduplicates/test.md.cram + - path: results/preprocessing/test/markduplicates/test.md.cram.crai + - path: results/preprocessing/test/recal_table/test.recal.table + - path: results/preprocessing/test/recalibrated/test.recal.cram + - path: results/preprocessing/test/recalibrated/test.recal.cram.crai + - path: results/preprocessing/csv/markduplicates.csv + - path: results/preprocessing/csv/markduplicates_test.csv + - path: results/preprocessing/csv/markduplicates_no_table.csv + - path: results/preprocessing/csv/markduplicates_no_table_test.csv + - path: results/preprocessing/csv/recalibrated.csv + - path: results/preprocessing/csv/recalibrated_test.csv + - path: results/reports/fastqc/test-test_L1 + - path: results/reports/qualimap/test/test.mapped + - path: results/reports/qualimap/test/test.recal + - path: results/reports/samtools_stats/test/test.md.cram.stats + - path: results/reports/samtools_stats/test/test.recal.cram.stats