diff --git a/CHANGELOG.md b/CHANGELOG.md index d4af5dea..af014c0b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#395](https://github.com/nf-core/mag/pull/395) - Add support for fast domain-level classification of bins using Tiara, to allow bins to be separated into eukaryotic and prokaryotic-specific processes. - [#422](https://github.com/nf-core/mag/pull/422) - Adds support for normalization of read depth with BBNorm (added by @erikrikarddaniel and @fabianegli) - [#439](https://github.com/nf-core/mag/pull/439) - Adds ability to enter the pipeline at the binning stage by providing a CSV of pre-computed assemblies (by @prototaxites) +- [#459](https://github.com/nf-core/mag/pull/459) - Adds ability to skip damage correction step in the ancient DNA workflow and just run pyDamage (by @jfy133) ### `Changed` diff --git a/conf/modules.config b/conf/modules.config index c1d4569e..b61e95ca 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -521,7 +521,7 @@ process { withName: PYDAMAGE_ANALYZE { ext.prefix = { "${meta.assembler}-${meta.id}" } publishDir = [ - path: {"${params.outdir}/Ancient_DNA/pydamage/analyze/" }, + path: {"${params.outdir}/Ancient_DNA/pydamage/analyze/${meta.assembler}-${meta.id}" }, mode: params.publish_dir_mode ] } @@ -530,7 +530,7 @@ process { ext.prefix = { "${meta.assembler}-${meta.id}" } ext.args = "-t ${params.pydamage_accuracy}" publishDir = [ - path: {"${params.outdir}/Ancient_DNA/pydamage/filter/" }, + path: {"${params.outdir}/Ancient_DNA/pydamage/filter/${meta.assembler}-${meta.id}" }, mode: params.publish_dir_mode ] } diff --git a/docs/output.md b/docs/output.md index 0d805bb8..44ad112d 100644 --- a/docs/output.md +++ b/docs/output.md @@ -641,7 +641,7 @@ Optional, only running when parameter `-profile ancient_dna` is specified. ### `variant_calling` -Because of aDNA damage, _de novo_ assemblers sometimes struggle to call a correct consensus on the contig sequence. To avoid this situation, the consensus is re-called with a variant calling software using the reads aligned back to the contigs +Because of aDNA damage, _de novo_ assemblers sometimes struggle to call a correct consensus on the contig sequence. To avoid this situation, the consensus is optionally re-called with a variant calling software using the reads aligned back to the contigs when `--run_ancient_damagecorrection` is supplied.
Output files diff --git a/modules.json b/modules.json index 46389ff2..add56e46 100644 --- a/modules.json +++ b/modules.json @@ -176,14 +176,15 @@ "git_sha": "371eff7748d769c2ddc8bd593773523a364a52fe", "installed_by": ["modules"] }, - "tiara/tiara": { - "branch": "master", - "git_sha": "d91e3d3d4806179065b087b91ff36c11976bf233" - }, "seqtk/mergepe": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", "installed_by": ["modules"] + }, + "tiara/tiara": { + "branch": "master", + "git_sha": "d91e3d3d4806179065b087b91ff36c11976bf233", + "installed_by": ["modules"] } } }, diff --git a/nextflow.config b/nextflow.config index d996ffb4..3db1d045 100644 --- a/nextflow.config +++ b/nextflow.config @@ -61,13 +61,14 @@ params { // ancient DNA assembly validation options ancient_dna = false + pydamage_accuracy = 0.5 + skip_ancient_damagecorrection = false freebayes_ploidy = 1 freebayes_min_basequality = 20 freebayes_minallelefreq = 0.33 bcftools_view_high_variant_quality = 30 bcftools_view_medium_variant_quality = 20 bcftools_view_minimal_allelesupport = 3 - pydamage_accuracy = 0.5 // taxonomy options centrifuge_db = null diff --git a/nextflow_schema.json b/nextflow_schema.json index fe35aed0..c0979e3d 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -32,7 +32,7 @@ "format": "file-path", "description": "Additional input CSV samplesheet containing information about pre-computed assemblies. When set, both read pre-processing and assembly are skipped and the pipeline begins at the binning stage.", "help_text": "If you have pre-computed assemblies from another source, it is possible to jump straight to the binning stage of the pipeline by supplying these assemblies in a CSV file. This CSV file should have three columns and the following header: `id,group,assembler,fasta`. Short reads must still be supplied in to `--input` in CSV format. See [usage docs](https://nf-co.re/mag/usage#input-specifications) for further details.", - "default": null, + "default": "None", "fa_icon": "fas fa-file-csv" }, "outdir": { @@ -740,7 +740,7 @@ "type": "number", "default": 0.5, "description": "Specify single-copy gene score threshold for bin refinement.", - "help_text": "Score threshold for single-copy gene selection algorithm to keep selecting bins, with a value ranging from 0-1.\n\nFor description of scoring algorithm, see: Sieber, Christian M. K., et al. 2018. Nature Microbiology 3 (7): 836–43. https://doi.org/10.1038/s41564-018-0171-1.\n\n> Modifies DAS Tool parameter --score_threshold\n" + "help_text": "Score threshold for single-copy gene selection algorithm to keep selecting bins, with a value ranging from 0-1.\n\nFor description of scoring algorithm, see: Sieber, Christian M. K., et al. 2018. Nature Microbiology 3 (7): 836\u201343. https://doi.org/10.1038/s41564-018-0171-1.\n\n> Modifies DAS Tool parameter --score_threshold\n" }, "postbinning_input": { "type": "string", @@ -780,6 +780,15 @@ "type": "boolean", "description": "Turn on/off the ancient DNA subworfklow" }, + "pydamage_accuracy": { + "type": "number", + "default": 0.5, + "description": "PyDamage accuracy threshold" + }, + "skip_ancient_damagecorrection": { + "type": "boolean", + "description": "deactivate damage correction of ancient contigs using variant and consensus calling" + }, "freebayes_ploidy": { "type": "integer", "default": 1, @@ -809,11 +818,6 @@ "type": "integer", "default": 3, "description": "minimum number of bases supporting the alternative allele" - }, - "pydamage_accuracy": { - "type": "number", - "default": 0.5, - "description": "PyDamage accuracy threshold" } } } diff --git a/subworkflows/local/ancient_dna.nf b/subworkflows/local/ancient_dna.nf index 442a8c1d..de47e49b 100644 --- a/subworkflows/local/ancient_dna.nf +++ b/subworkflows/local/ancient_dna.nf @@ -10,31 +10,44 @@ workflow ANCIENT_DNA_ASSEMBLY_VALIDATION { take: input //channel: [val(meta), path(contigs), path(bam), path(bam_index)] main: + ch_versions = Channel.empty() + PYDAMAGE_ANALYZE(input.map {item -> [item[0], item[2], item[3]]}) PYDAMAGE_FILTER(PYDAMAGE_ANALYZE.out.csv) - FAIDX(input.map { item -> [ item[0], item[1] ] }) - freebayes_input = input.join(FAIDX.out.fai) // [val(meta), path(contigs), path(bam), path(bam_index), path(fai)] - FREEBAYES (freebayes_input.map { item -> [item[0], item[2], item[3], [], [], []] }, - freebayes_input.map { item -> item[1] }, - freebayes_input.map { item -> item[4] }, - [], - [], - [] ) - - BCFTOOLS_INDEX_PRE(FREEBAYES.out.vcf) - BCFTOOLS_VIEW(FREEBAYES.out.vcf.join(BCFTOOLS_INDEX_PRE.out.tbi), [], [], []) - BCFTOOLS_INDEX_POST(BCFTOOLS_VIEW.out.vcf) - BCFTOOLS_CONSENSUS(BCFTOOLS_VIEW.out.vcf - .join(BCFTOOLS_INDEX_POST.out.tbi) - .join(input.map { item -> [ item[0], item[1] ] })) + ch_versions = ch_versions.mix(PYDAMAGE_ANALYZE.out.versions.first()) + + if ( params.skip_ancient_damagecorrection ) { + ch_corrected_contigs = Channel.empty() + } + + if ( !params.skip_ancient_damagecorrection ) { + FAIDX(input.map { item -> [ item[0], item[1] ] }) + freebayes_input = input.join(FAIDX.out.fai) // [val(meta), path(contigs), path(bam), path(bam_index), path(fai)] + FREEBAYES (freebayes_input.map { item -> [item[0], item[2], item[3], [], [], []] }, + freebayes_input.map { item -> item[1] }, + freebayes_input.map { item -> item[4] }, + [], + [], + [] ) + + BCFTOOLS_INDEX_PRE(FREEBAYES.out.vcf) + BCFTOOLS_VIEW(FREEBAYES.out.vcf.join(BCFTOOLS_INDEX_PRE.out.tbi), [], [], []) + BCFTOOLS_INDEX_POST(BCFTOOLS_VIEW.out.vcf) + BCFTOOLS_CONSENSUS(BCFTOOLS_VIEW.out.vcf + .join(BCFTOOLS_INDEX_POST.out.tbi) + .join(input.map { item -> [ item[0], item[1] ] })) + + ch_corrected_contigs = BCFTOOLS_CONSENSUS.out.fasta + + ch_versions = ch_versions.mix(FAIDX.out.versions.first()) + ch_versions = ch_versions.mix(FREEBAYES.out.versions.first()) + ch_versions = ch_versions.mix(BCFTOOLS_CONSENSUS.out.versions.first()) + } + + - ch_versions = Channel.empty() - ch_versions = PYDAMAGE_ANALYZE.out.versions.first() - ch_versions = ch_versions.mix(FAIDX.out.versions.first()) - ch_versions = ch_versions.mix(FREEBAYES.out.versions.first()) - ch_versions = ch_versions.mix(BCFTOOLS_CONSENSUS.out.versions.first()) emit: - contigs_recalled = BCFTOOLS_CONSENSUS.out.fasta // channel: [ val(meta), path(fasta) ] + contigs_recalled = ch_corrected_contigs // channel: [ val(meta), path(fasta) ] pydamage_results = PYDAMAGE_ANALYZE.out.csv // channel: [ val(meta), path(csv) ] pydamage_filtered_results = PYDAMAGE_FILTER.out.csv // channel: [ val(meta), path(csv) ] versions = ch_versions // channel: [ versions.yml ] diff --git a/workflows/mag.nf b/workflows/mag.nf index d5ea3b1f..61ad3a84 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -647,7 +647,7 @@ workflow MAG { if (!params.skip_binning){ - if (params.ancient_dna) { + if (params.ancient_dna && params.run_ancient_damagecorrection) { BINNING ( BINNING_PREPARATION.out.grouped_mappings .join(ANCIENT_DNA_ASSEMBLY_VALIDATION.out.contigs_recalled)