diff --git a/CHANGELOG.md b/CHANGELOG.md index c5d38d55..45f016b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,6 +33,7 @@ Initial release of nf-core/multiplesequencealign, created with the [nf-core](htt - [[#84](https://github.com/nf-core/multiplesequencealign/issues/84)] - Update Metromap. - [[#139](https://github.com/nf-core/multiplesequencealign/pull/139)] - Add Foldmason. - [[#146](https://github.com/nf-core/multiplesequencealign/pull/146)] - Only show additional process tags when they exists and use the same ubuntu version in all modules. +- [[#145](https://github.com/nf-core/multiplesequencealign/pull/145)] - Add consensus MSA. ### `Fixed` diff --git a/conf/modules.config b/conf/modules.config index 179945ac..541db20c 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -122,8 +122,7 @@ withName: "CREATE_TCOFFEETEMPLATE" { ext.prefix = { "${meta.id}" } } - - withName: "CLUSTALO_ALIGN|FAMSA_ALIGN|LEARNMSA_ALIGN|MAFFT|MAGUS_ALIGN|MUSCLE5_SUPER5|REGRESSIVE|TCOFFEE_ALIGN|TCOFFEE3D_ALIGN|FOLDMASON_EASYMSA" { + withName: "CLUSTALO_ALIGN|FAMSA_ALIGN|FOLDMASON_EASYMSA|KALIGN_ALIGN|LEARNMSA_ALIGN|MAFFT|MAGUS_ALIGN|MUSCLE5_SUPER5|REGRESSIVE|TCOFFEE_ALIGN|TCOFFEE3D_ALIGN" { tag = { [ "${meta.id}", @@ -165,7 +164,13 @@ } - withName:"PIGZ_COMPRESS" { + withName: 'CONSENSUS'{ + ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.argstree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}" } + ext.args = { "-output fasta_aln" } + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + } + + withName:"PIGZ_COMPRESS"{ publishDir = [ path: { "${params.outdir}/alignment/${meta.id}" }, mode: params.publish_dir_mode, diff --git a/docs/images/nf-core-msa_metro_map.png b/docs/images/nf-core-msa_metro_map.png index 7af75339..1b0b36a2 100644 Binary files a/docs/images/nf-core-msa_metro_map.png and b/docs/images/nf-core-msa_metro_map.png differ diff --git a/docs/usage.md b/docs/usage.md index 501459b4..323b99c0 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -60,6 +60,8 @@ The available ALIGN methods are listed below (those that accept guide trees are - [MTMALIGN](https://bio.tools/mtm-align) - [FOLDMASON](https://github.com/steineggerlab/foldmason) +Optionally, [M-COFFEE](https://tcoffee.org/Projects/mcoffee/index.html) will combine the output of all alignments into a consensus MSA (--build_consensus). + ### 4. EVALUATE Optionally, the produced MSAs can be evaluated. This step can be skipped using the `--skip_eval` parameter. The evaluations implemented are listed below. diff --git a/modules.json b/modules.json index ce9434cc..ccc334ae 100644 --- a/modules.json +++ b/modules.json @@ -101,6 +101,11 @@ "git_sha": "faf557ba56156ac0e5de76a25c1e3df11c944f59", "installed_by": ["modules"] }, + "tcoffee/consensus": { + "branch": "master", + "git_sha": "8b8d8daa4b7d75ccfb290fcb721a00cc98e23567", + "installed_by": ["modules"] + }, "tcoffee/irmsd": { "branch": "master", "git_sha": "faf557ba56156ac0e5de76a25c1e3df11c944f59", diff --git a/modules/nf-core/tcoffee/consensus/environment.yml b/modules/nf-core/tcoffee/consensus/environment.yml new file mode 100644 index 00000000..f5c82840 --- /dev/null +++ b/modules/nf-core/tcoffee/consensus/environment.yml @@ -0,0 +1,8 @@ +name: "tcoffee_consensus" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::t-coffee=13.46.0.919e8c6b + - conda-forge::pigz=2.8 diff --git a/modules/nf-core/tcoffee/consensus/main.nf b/modules/nf-core/tcoffee/consensus/main.nf new file mode 100644 index 00000000..666c1ee0 --- /dev/null +++ b/modules/nf-core/tcoffee/consensus/main.nf @@ -0,0 +1,62 @@ +process TCOFFEE_CONSENSUS { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'oras://community.wave.seqera.io/library/t-coffee_pigz:f47b85d70360f1a0': + 'community.wave.seqera.io/library/t-coffee_pigz:6c9b2f8b97ee55e5' }" + + + input: + tuple val(meta) , path(aln) + tuple val(meta2), path(tree) + val(compress) + + output: + tuple val(meta), path("*.{aln,aln.gz}"), emit: alignment + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def tree_args = tree ? "-usetree $tree" : "" + def outfile = compress ? "stdout" : "${prefix}.aln" + def write_output = compress ? " | pigz -cp ${task.cpus} > ${prefix}.aln.gz" : "" + """ + export TEMP='./' + t_coffee -aln ${aln} \ + $tree_args \ + $args \ + -thread ${task.cpus} \ + -outfile $outfile \ + $write_output + + if [ -f stdout ] && [ "$compress" = true ]; then + pigz -cp ${task.cpus} < stdout > ${prefix}.aln.gz + rm stdout + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tcoffee: \$( t_coffee -version | awk '{gsub("Version_", ""); print \$3}') + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + export TEMP='./' + touch ${prefix}.aln${compress ? '.gz':''} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tcoffee: \$( t_coffee -version | awk '{gsub("Version_", ""); print \$3}') + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) + END_VERSIONS + """ +} diff --git a/modules/nf-core/tcoffee/consensus/meta.yml b/modules/nf-core/tcoffee/consensus/meta.yml new file mode 100644 index 00000000..54d60830 --- /dev/null +++ b/modules/nf-core/tcoffee/consensus/meta.yml @@ -0,0 +1,63 @@ +name: tcoffee_consensus +description: Computes a consensus alignment using T_COFFEE +keywords: + - alignment + - MSA + - genomics +tools: + - tcoffee: + description: "A collection of tools for Computing, Evaluating and Manipulating Multiple Alignments of DNA, RNA, Protein Sequences and Structures." + homepage: "http://www.tcoffee.org/Projects/tcoffee/" + documentation: "https://tcoffee.readthedocs.io/en/latest/tcoffee_main_documentation.html" + tool_dev_url: "https://github.com/cbcrg/tcoffee" + doi: "10.1006/jmbi.2000.4042" + licence: ["GPL v3"] + - pigz: + description: "Parallel implementation of the gzip algorithm." + homepage: "https://zlib.net/pigz/" + documentation: "https://zlib.net/pigz/pigz.pdf" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - aln: + type: file + description: List of multiple sequence alignments in FASTA format to be used to compute the consensus + pattern: "*.{fa,fasta}" + - meta2: + type: map + description: | + Groovy Map containing tree information + e.g. `[ id:'test_tree']` + - tree: + type: file + description: Input guide tree in Newick format + pattern: "*.{dnd}" + + - compress: + type: boolean + description: Flag representing whether the output MSA should be compressed. Set to true to enable/false to disable compression. Compression is done using pigz, and is multithreaded. + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - alignment: + type: file + description: Consensus alignment in FASTA format + pattern: "*.{fa,fasta,aln}" + +authors: + - "@luisas" +maintainers: + - "@luisas" diff --git a/modules/nf-core/tcoffee/consensus/tests/main.nf.test b/modules/nf-core/tcoffee/consensus/tests/main.nf.test new file mode 100644 index 00000000..78aa567b --- /dev/null +++ b/modules/nf-core/tcoffee/consensus/tests/main.nf.test @@ -0,0 +1,132 @@ +nextflow_process { + + name "Test Process TCOFFEE_CONSENSUS" + script "../main.nf" + process "TCOFFEE_CONSENSUS" + + tag "modules" + tag "modules_nfcore" + tag "tcoffee" + tag "tcoffee/consensus" + tag "tcoffee/align" + tag "pigz" + tag "famsa/guidetree" + tag "famsa/align" + + config "./sequence.config" + + setup { + run("FAMSA_GUIDETREE") { + script "../../../famsa/guidetree/main.nf" + process { + """ + input[0] = [ [ id:'test' ], + file(params.modules_testdata_base_path + "../../multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + ] + """ + } + } + run("FAMSA_ALIGN") { + script "../../../famsa/align/main.nf" + process { + """ + input[0] = [ [ id:'test' ], + file(params.modules_testdata_base_path + "../../multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + ] + input[1] = [[:],[]] + input[2] = false + + """ + } + } + run("TCOFFEE_ALIGN") { + script "../../../tcoffee/align/main.nf" + process { + """ + input[0] = [ [ id:'test' ], + file(params.modules_testdata_base_path + "../../multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + ] + input[1] = [[:],[]] + input[2] = [[:],[],[]] + input[3] = false + + """ + } + } + } + + test("consensus - no tree - uncompressed - seatoxin ") { + + when { + process { + """ + msas = FAMSA_ALIGN.out.alignment.mix(TCOFFEE_ALIGN.out.alignment).groupTuple() + input[0] = msas + input[1] = [[:],[]] + input[2] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.alignment, + process.out.versions + ).match() + } + ) + } + } + + test("consensus - tree - compressed- seatoxin") { + + when { + process { + """ + msas = FAMSA_ALIGN.out.alignment.mix(TCOFFEE_ALIGN.out.alignment).groupTuple() + input[0] = msas + input[1] = FAMSA_GUIDETREE.out.tree.collect{ meta, tree -> tree }.map{ tree -> [[ id: 'test'], tree]} + input[2] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.alignment, + process.out.versions + ).match() + } + ) + } + } + + + test("consensus - stub") { + + options "-stub" + + when { + process { + """ + msas = FAMSA_ALIGN.out.alignment.mix(TCOFFEE_ALIGN.out.alignment).groupTuple() + input[0] = msas + input[1] = [[:],[]] + input[2] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match()} + ) + } + } + +} diff --git a/modules/nf-core/tcoffee/consensus/tests/main.nf.test.snap b/modules/nf-core/tcoffee/consensus/tests/main.nf.test.snap new file mode 100644 index 00000000..a0fe4f99 --- /dev/null +++ b/modules/nf-core/tcoffee/consensus/tests/main.nf.test.snap @@ -0,0 +1,63 @@ +{ + "consensus - no tree - uncompressed - seatoxin ": { + "content": [ + [ + [ + { + "id": "test" + }, + "consensus.aln:md5,ed7fb1f7b7a9cd66e9b0c9d60d1b0e52" + ] + ], + [ + "versions.yml:md5,79d4f7ac70fab29f8cd0a18a4d3f76d1" + ] + ], + "timestamp": "2024-09-04T13:17:59.621521" + }, + "consensus - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "consensus.aln:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,79d4f7ac70fab29f8cd0a18a4d3f76d1" + ], + "alignment": [ + [ + { + "id": "test" + }, + "consensus.aln:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,79d4f7ac70fab29f8cd0a18a4d3f76d1" + ] + } + ], + "timestamp": "2024-09-04T13:18:15.69498" + }, + "consensus - tree - compressed- seatoxin": { + "content": [ + [ + [ + { + "id": "test" + }, + "consensus.aln.gz:md5,ed7fb1f7b7a9cd66e9b0c9d60d1b0e52" + ] + ], + [ + "versions.yml:md5,79d4f7ac70fab29f8cd0a18a4d3f76d1" + ] + ], + "timestamp": "2024-09-04T13:18:08.240517" + } +} \ No newline at end of file diff --git a/modules/nf-core/tcoffee/consensus/tests/sequence.config b/modules/nf-core/tcoffee/consensus/tests/sequence.config new file mode 100644 index 00000000..b23494c3 --- /dev/null +++ b/modules/nf-core/tcoffee/consensus/tests/sequence.config @@ -0,0 +1,11 @@ +process { + + withName: "TCOFFEE_ALIGN"{ + ext.prefix = "tcoffee_test" + ext.args = { "-output fasta_aln" } + } + withName: "TCOFFEE_CONSENSUS"{ + ext.args = { "-output fasta_aln" } + ext.prefix = "consensus" + } +} diff --git a/modules/nf-core/tcoffee/consensus/tests/tags.yml b/modules/nf-core/tcoffee/consensus/tests/tags.yml new file mode 100644 index 00000000..f3eb4719 --- /dev/null +++ b/modules/nf-core/tcoffee/consensus/tests/tags.yml @@ -0,0 +1,2 @@ +tcoffee/consensus: + - "modules/nf-core/tcoffee/consensus/**" diff --git a/nextflow.config b/nextflow.config index 8e8fc0dc..7bdc3886 100644 --- a/nextflow.config +++ b/nextflow.config @@ -18,6 +18,9 @@ params { tools = null templates_suffix = ".pdb" + // Alignment + build_consensus = false + // Stats skip_stats = false calc_sim = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 1c786209..b9747e9a 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -60,6 +60,19 @@ } } }, + "align_options": { + "title": "Stats options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define extra alignment options.", + "properties": { + "build_consensus": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Build consensus alignment with M-COFFEE." + } + } + }, "stats_options": { "title": "Stats options", "type": "object", @@ -372,6 +385,9 @@ { "$ref": "#/definitions/input_output_options" }, + { + "$ref": "#/definitions/align_options" + }, { "$ref": "#/definitions/stats_options" }, diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf index 4b3dc73f..4b4b9aed 100644 --- a/subworkflows/local/align.nf +++ b/subworkflows/local/align.nf @@ -19,6 +19,7 @@ include { MUSCLE5_SUPER5 } from '../../modules/nf-core/muscle include { TCOFFEE_ALIGN } from '../../modules/nf-core/tcoffee/align/main' include { TCOFFEE_ALIGN as TCOFFEE3D_ALIGN } from '../../modules/nf-core/tcoffee/align/main' include { TCOFFEE_ALIGN as REGRESSIVE_ALIGN } from '../../modules/nf-core/tcoffee/align/main' +include { TCOFFEE_CONSENSUS as CONSENSUS } from '../../modules/nf-core/tcoffee/consensus/main' include { MTMALIGN_ALIGN } from '../../modules/nf-core/mtmalign/align/main' workflow ALIGN { @@ -316,6 +317,18 @@ workflow ALIGN { ch_versions = ch_versions.mix(FOLDMASON_EASYMSA.out.versions.first()) } + // ----------------- CONSENSUS ------------------ + if(params.build_consensus){ + ch_msa.map{ meta, msa -> [ meta["id"], msa]} + .groupTuple() + .map{ id_meta, msas -> [ ["id": id_meta, "tree":"", "args_tree":"", "args_tree_clean":null, "aligner":"CONSENSUS", "args_aligner":"", "args_aligner_clean":null ], msas ]} + .set{ ch_msa_consensus } + + CONSENSUS(ch_msa_consensus, [[:],[]], compress) + ch_msa = ch_msa.mix(CONSENSUS.out.alignment) + ch_versions = ch_versions.mix(CONSENSUS.out.versions.first()) + } + emit: msa = ch_msa // channel: [ val(meta), path(msa) ] diff --git a/workflows/multiplesequencealign.nf b/workflows/multiplesequencealign.nf index 70d74da2..be1746c5 100644 --- a/workflows/multiplesequencealign.nf +++ b/workflows/multiplesequencealign.nf @@ -142,7 +142,7 @@ workflow MULTIPLESEQUENCEALIGN { // // Align // - compress_during_align = !params.skip_compression && params.skip_eval + compress_during_align = !params.skip_compression && params.skip_eval && !params.build_consensus ALIGN ( ch_seqs, ch_tools,