From b504e5d30a7606b40b65bcb5f12f358b631bd0db Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 24 Apr 2024 09:38:53 +0200 Subject: [PATCH 1/7] Work on centrifuge transfer, problem with kreport producing no output even though there are hits --- conf/modules.config | 2 +- modules.json | 15 +++ .../centrifuge/centrifuge/environment.yml | 7 + modules/nf-core/centrifuge/centrifuge/main.nf | 91 +++++++++++++ .../nf-core/centrifuge/centrifuge/meta.yml | 75 +++++++++++ .../centrifuge/centrifuge/tests/main.nf.test | 106 +++++++++++++++ .../centrifuge/tests/main.nf.test.snap | 125 ++++++++++++++++++ .../centrifuge/centrifuge/tests/tags.yml | 2 + .../centrifuge/kreport/environment.yml | 7 + modules/nf-core/centrifuge/kreport/main.nf | 45 +++++++ modules/nf-core/centrifuge/kreport/meta.yml | 51 +++++++ .../centrifuge/kreport/tests/main.nf.test | 81 ++++++++++++ .../kreport/tests/main.nf.test.snap | 47 +++++++ .../nf-core/centrifuge/kreport/tests/tags.yml | 2 + modules/nf-core/untar/environment.yml | 11 ++ modules/nf-core/untar/main.nf | 63 +++++++++ modules/nf-core/untar/meta.yml | 46 +++++++ modules/nf-core/untar/tests/main.nf.test | 47 +++++++ modules/nf-core/untar/tests/main.nf.test.snap | 42 ++++++ modules/nf-core/untar/tests/tags.yml | 2 + nextflow_schema.json | 4 + workflows/mag.nf | 59 +++------ 22 files changed, 892 insertions(+), 38 deletions(-) create mode 100644 modules/nf-core/centrifuge/centrifuge/environment.yml create mode 100644 modules/nf-core/centrifuge/centrifuge/main.nf create mode 100644 modules/nf-core/centrifuge/centrifuge/meta.yml create mode 100644 modules/nf-core/centrifuge/centrifuge/tests/main.nf.test create mode 100644 modules/nf-core/centrifuge/centrifuge/tests/main.nf.test.snap create mode 100644 modules/nf-core/centrifuge/centrifuge/tests/tags.yml create mode 100644 modules/nf-core/centrifuge/kreport/environment.yml create mode 100644 modules/nf-core/centrifuge/kreport/main.nf create mode 100644 modules/nf-core/centrifuge/kreport/meta.yml create mode 100644 modules/nf-core/centrifuge/kreport/tests/main.nf.test create mode 100644 modules/nf-core/centrifuge/kreport/tests/main.nf.test.snap create mode 100644 modules/nf-core/centrifuge/kreport/tests/tags.yml create mode 100644 modules/nf-core/untar/environment.yml create mode 100644 modules/nf-core/untar/main.nf create mode 100644 modules/nf-core/untar/meta.yml create mode 100644 modules/nf-core/untar/tests/main.nf.test create mode 100644 modules/nf-core/untar/tests/main.nf.test.snap create mode 100644 modules/nf-core/untar/tests/tags.yml diff --git a/conf/modules.config b/conf/modules.config index 172ff038..20ae66ee 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -222,7 +222,7 @@ process { ] } - withName: CENTRIFUGE { + withName: CENTRIFUGE_CENTRIFUGE { publishDir = [ path: { "${params.outdir}/Taxonomy/centrifuge/${meta.id}" }, mode: params.publish_dir_mode, diff --git a/modules.json b/modules.json index c8023704..470611b7 100644 --- a/modules.json +++ b/modules.json @@ -41,6 +41,16 @@ "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", "installed_by": ["modules"] }, + "centrifuge/centrifuge": { + "branch": "master", + "git_sha": "9a07a1293d9b818d1e06d0f7b58152f74d462012", + "installed_by": ["modules"] + }, + "centrifuge/kreport": { + "branch": "master", + "git_sha": "9a07a1293d9b818d1e06d0f7b58152f74d462012", + "installed_by": ["modules"] + }, "checkm/lineagewf": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", @@ -205,6 +215,11 @@ "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] + }, + "untar": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["modules"] } } }, diff --git a/modules/nf-core/centrifuge/centrifuge/environment.yml b/modules/nf-core/centrifuge/centrifuge/environment.yml new file mode 100644 index 00000000..cf34dc0e --- /dev/null +++ b/modules/nf-core/centrifuge/centrifuge/environment.yml @@ -0,0 +1,7 @@ +name: centrifuge_centrifuge +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::centrifuge=1.0.4.1 diff --git a/modules/nf-core/centrifuge/centrifuge/main.nf b/modules/nf-core/centrifuge/centrifuge/main.nf new file mode 100644 index 00000000..d9a5653d --- /dev/null +++ b/modules/nf-core/centrifuge/centrifuge/main.nf @@ -0,0 +1,91 @@ +process CENTRIFUGE_CENTRIFUGE { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/centrifuge:1.0.4.1--hdcf5f25_1' : + 'biocontainers/centrifuge:1.0.4.1--hdcf5f25_1' }" + + input: + tuple val(meta), path(reads) + path db + val save_unaligned + val save_aligned + + output: + tuple val(meta), path('*report.txt') , emit: report + tuple val(meta), path('*results.txt') , emit: results + tuple val(meta), path('*.{sam,tab}') , optional: true, emit: sam + tuple val(meta), path('*.mapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_mapped + tuple val(meta), path('*.unmapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_unmapped + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def paired = meta.single_end ? "-U ${reads}" : "-1 ${reads[0]} -2 ${reads[1]}" + def unaligned = '' + def aligned = '' + if (meta.single_end) { + unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : '' + aligned = save_aligned ? "--al-gz ${prefix}.mapped.fastq.gz" : '' + } else { + unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : '' + aligned = save_aligned ? "--al-conc-gz ${prefix}.mapped.fastq.gz" : '' + } + """ + ## we add "-no-name ._" to ensure silly Mac OSX metafiles files aren't included + db_name=`find -L ${db} -name "*.1.cf" -not -name "._*" | sed 's/\\.1.cf\$//'` + + ## make a directory for placing the pipe files in somewhere other than default /tmp + ## otherwise get pipefile name clashes when multiple centrifuge runs on same node + ## use /tmp at the same time + mkdir ./temp + + centrifuge \\ + -x \$db_name \\ + --temp-directory ./temp \\ + -p $task.cpus \\ + $paired \\ + --report-file ${prefix}.report.txt \\ + -S ${prefix}.results.txt \\ + $unaligned \\ + $aligned \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + centrifuge: \$( centrifuge --version | sed -n 1p | sed 's/^.*centrifuge-class version //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def paired = meta.single_end ? "-U ${reads}" : "-1 ${reads[0]} -2 ${reads[1]}" + def unaligned = '' + def aligned = '' + if (meta.single_end) { + unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : '' + aligned = save_aligned ? "--al-gz ${prefix}.mapped.fastq.gz" : '' + } else { + unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : '' + aligned = save_aligned ? "--al-conc-gz ${prefix}.mapped.fastq.gz" : '' + } + """ + touch ${prefix}.report.txt + touch ${prefix}.results.txt + touch ${prefix}.sam + echo | gzip -n > ${prefix}.unmapped.fastq.gz + echo | gzip -n > ${prefix}.mapped.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + centrifuge: \$( centrifuge --version | sed -n 1p | sed 's/^.*centrifuge-class version //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/centrifuge/centrifuge/meta.yml b/modules/nf-core/centrifuge/centrifuge/meta.yml new file mode 100644 index 00000000..a06104e1 --- /dev/null +++ b/modules/nf-core/centrifuge/centrifuge/meta.yml @@ -0,0 +1,75 @@ +name: centrifuge_centrifuge +description: Classifies metagenomic sequence data +keywords: + - classify + - metagenomics + - fastq + - db +tools: + - centrifuge: + description: Centrifuge is a classifier for metagenomic sequences. + homepage: https://ccb.jhu.edu/software/centrifuge/ + documentation: https://ccb.jhu.edu/software/centrifuge/manual.shtml + doi: 10.1101/gr.210641.116 + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - db: + type: directory + description: Path to directory containing centrifuge database files + - save_unaligned: + type: boolean + description: If true unmapped fastq files are saved + - save_aligned: + type: boolean + description: If true mapped fastq files are saved +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - report: + type: file + description: | + File containing a classification summary + pattern: "*.{report.txt}" + - results: + type: file + description: | + File containing classification results + pattern: "*.{results.txt}" + - sam: + type: file + description: | + Optional output file containing read alignments (SAM format )or a table of per-read hit information (TAB)s + pattern: "*.{sam,tab}" + - fastq_unmapped: + type: file + description: Unmapped fastq files + pattern: "*.unmapped.fastq.gz" + - fastq_mapped: + type: file + description: Mapped fastq files + pattern: "*.mapped.fastq.gz" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@sofstam" + - "@jfy133" + - "@sateeshperi" +maintainers: + - "@sofstam" + - "@jfy133" + - "@sateeshperi" diff --git a/modules/nf-core/centrifuge/centrifuge/tests/main.nf.test b/modules/nf-core/centrifuge/centrifuge/tests/main.nf.test new file mode 100644 index 00000000..d83b522a --- /dev/null +++ b/modules/nf-core/centrifuge/centrifuge/tests/main.nf.test @@ -0,0 +1,106 @@ +nextflow_process { + + name "Test Process CENTRIFUGE_CENTRIFUGE" + script "../main.nf" + process "CENTRIFUGE_CENTRIFUGE" + + tag "modules" + tag "modules_nfcore" + tag "centrifuge" + tag "centrifuge/centrifuge" + tag "untar" + + setup { + run("UNTAR") { + script "../../../untar/main.nf" + process { + """ + input[0] = db = [ [], file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/minigut_cf.tar.gz', checkIfExists: true) ] + """ + } + } + } + + test("sarscov2_fastq_se") { + + when { + process { + """ + input[0] = [ [id: 'test', single_end: true], file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + input[1] = UNTAR.out.untar.map{ it[1] } + input[2] = true + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.report[0][1]).name, + file(process.out.results[0][1]).name, + file(process.out.fastq_mapped[0][1][0]).name, + file(process.out.fastq_unmapped[0][1][0]).name, + ).match() } + ) + } + + } + + test("sarscov2_fastq_pe") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] + ] + input[1] = UNTAR.out.untar.map{ it[1] } + input[2] = true + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.report[0][1]).name, + file(process.out.results[0][1]).name, + file(process.out.fastq_mapped[0][1][0]).name, + file(process.out.fastq_unmapped[0][1][0]).name, + ).match() } + ) + } + + } + + test("sarscov2_fastq_se_stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [id: 'test'], file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + input[1] = UNTAR.out.untar.map{ it[1] } + input[2] = true + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/centrifuge/centrifuge/tests/main.nf.test.snap b/modules/nf-core/centrifuge/centrifuge/tests/main.nf.test.snap new file mode 100644 index 00000000..f8a2ef7b --- /dev/null +++ b/modules/nf-core/centrifuge/centrifuge/tests/main.nf.test.snap @@ -0,0 +1,125 @@ +{ + "sarscov2_fastq_se_stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.report.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.results.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test" + }, + "test.mapped.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "4": [ + [ + { + "id": "test" + }, + "test.unmapped.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "5": [ + "versions.yml:md5,1ce028d9f968eca6df31586fe3b77c84" + ], + "fastq_mapped": [ + [ + { + "id": "test" + }, + "test.mapped.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "fastq_unmapped": [ + [ + { + "id": "test" + }, + "test.unmapped.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "report": [ + [ + { + "id": "test" + }, + "test.report.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "results": [ + [ + { + "id": "test" + }, + "test.results.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sam": [ + [ + { + "id": "test" + }, + "test.sam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,1ce028d9f968eca6df31586fe3b77c84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-02T07:47:36.886757827" + }, + "sarscov2_fastq_se": { + "content": [ + "test.report.txt", + "test.results.txt", + "", + "" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-02T08:22:31.470316024" + }, + "sarscov2_fastq_pe": { + "content": [ + "test.report.txt", + "test.results.txt", + "test.mapped.fastq.1.gz", + "test.unmapped.fastq.1.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-02T08:22:48.866073154" + } +} \ No newline at end of file diff --git a/modules/nf-core/centrifuge/centrifuge/tests/tags.yml b/modules/nf-core/centrifuge/centrifuge/tests/tags.yml new file mode 100644 index 00000000..53444cd2 --- /dev/null +++ b/modules/nf-core/centrifuge/centrifuge/tests/tags.yml @@ -0,0 +1,2 @@ +centrifuge/centrifuge: + - "modules/nf-core/centrifuge/centrifuge/**" diff --git a/modules/nf-core/centrifuge/kreport/environment.yml b/modules/nf-core/centrifuge/kreport/environment.yml new file mode 100644 index 00000000..5c8fb451 --- /dev/null +++ b/modules/nf-core/centrifuge/kreport/environment.yml @@ -0,0 +1,7 @@ +name: centrifuge_kreport +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::centrifuge=1.0.4.1 diff --git a/modules/nf-core/centrifuge/kreport/main.nf b/modules/nf-core/centrifuge/kreport/main.nf new file mode 100644 index 00000000..25eb7167 --- /dev/null +++ b/modules/nf-core/centrifuge/kreport/main.nf @@ -0,0 +1,45 @@ +process CENTRIFUGE_KREPORT { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/centrifuge:1.0.4.1--hdcf5f25_1' : + 'biocontainers/centrifuge:1.0.4.1--hdcf5f25_1' }" + + input: + tuple val(meta), path(report) + path db + + output: + tuple val(meta), path('*.txt'), emit: kreport + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + db_name=`find -L ${db} -name "*.1.cf" -not -name "._*" | sed 's/\\.1.cf\$//'` + centrifuge-kreport -x \$db_name ${report} > ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + centrifuge: \$( centrifuge --version | sed -n 1p | sed 's/^.*centrifuge-class version //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + centrifuge: \$( centrifuge --version | sed -n 1p | sed 's/^.*centrifuge-class version //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/centrifuge/kreport/meta.yml b/modules/nf-core/centrifuge/kreport/meta.yml new file mode 100644 index 00000000..5641152b --- /dev/null +++ b/modules/nf-core/centrifuge/kreport/meta.yml @@ -0,0 +1,51 @@ +name: "centrifuge_kreport" +description: Creates Kraken-style reports from centrifuge out files +keywords: + - classify + - metagenomics + - fastq + - db + - report + - kraken +tools: + - centrifuge: + description: Centrifuge is a classifier for metagenomic sequences. + homepage: https://ccb.jhu.edu/software/centrifuge/ + documentation: https://ccb.jhu.edu/software/centrifuge/manual.shtml + doi: 10.1101/gr.210641.116 + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - report: + type: file + description: File containing the centrifuge classification report + pattern: "*.{txt}" + - db: + type: directory + description: Path to directory containing centrifuge database files +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - kreport: + type: file + description: | + File containing kraken-style report from centrifuge + out files. + pattern: "*.{txt}" +authors: + - "@sofstam" + - "@jfy133" +maintainers: + - "@sofstam" + - "@jfy133" diff --git a/modules/nf-core/centrifuge/kreport/tests/main.nf.test b/modules/nf-core/centrifuge/kreport/tests/main.nf.test new file mode 100644 index 00000000..6347bd7c --- /dev/null +++ b/modules/nf-core/centrifuge/kreport/tests/main.nf.test @@ -0,0 +1,81 @@ +// nf-core modules test centrifuge/kreport +nextflow_process { + + name "Test Process CENTRIFUGE_KREPORT" + script "../main.nf" + process "CENTRIFUGE_KREPORT" + + tag "modules" + tag "modules_nfcore" + tag "centrifuge" + tag "centrifuge/centrifuge" + tag "centrifuge/kreport" + tag "untar" + + setup { + run("UNTAR") { + script "../../../untar/main.nf" + process { + """ + input[0] = db = [ [], file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/delete_me/minigut_cf.tar.gz', checkIfExists: true) ] + """ + } + } + run("CENTRIFUGE_CENTRIFUGE") { + script "../../../centrifuge/centrifuge/main.nf" + process { + """ + input[0] = [ [id: 'test', single_end: true], file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] + input[1] = UNTAR.out.untar.map{ it[1] } + input[2] = true + input[3] = true + """ + } + } + } + + test("sarscov2_fastq_se") { + + when { + process { + """ + input[0] = CENTRIFUGE_CENTRIFUGE.out.results + input[1] = UNTAR.out.untar.map{it[1]} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.kreport[0][1]).name, + ).match() } + ) + } + + } + + test("sarscov2 - bam - stub") { + + options "-stub" + + when { + process { + """ + input[0] = CENTRIFUGE_CENTRIFUGE.out.results + input[1] = UNTAR.out.untar.map{it[1]} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/centrifuge/kreport/tests/main.nf.test.snap b/modules/nf-core/centrifuge/kreport/tests/main.nf.test.snap new file mode 100644 index 00000000..4e0aaa79 --- /dev/null +++ b/modules/nf-core/centrifuge/kreport/tests/main.nf.test.snap @@ -0,0 +1,47 @@ +{ + "sarscov2 - bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,43c766a19f2edf7e05d1a2a0b1816b13" + ], + "kreport": [ + [ + { + "id": "test", + "single_end": true + }, + "test.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,43c766a19f2edf7e05d1a2a0b1816b13" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-02T06:18:36.794405448" + }, + "sarscov2_fastq_se": { + "content": [ + "test.txt" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-02T06:28:20.461891873" + } +} \ No newline at end of file diff --git a/modules/nf-core/centrifuge/kreport/tests/tags.yml b/modules/nf-core/centrifuge/kreport/tests/tags.yml new file mode 100644 index 00000000..a3823d76 --- /dev/null +++ b/modules/nf-core/centrifuge/kreport/tests/tags.yml @@ -0,0 +1,2 @@ +centrifuge/kreport: + - "modules/nf-core/centrifuge/kreport/**" diff --git a/modules/nf-core/untar/environment.yml b/modules/nf-core/untar/environment.yml new file mode 100644 index 00000000..0c9cbb10 --- /dev/null +++ b/modules/nf-core/untar/environment.yml @@ -0,0 +1,11 @@ +name: untar + +channels: + - conda-forge + - bioconda + - defaults + +dependencies: + - conda-forge::grep=3.11 + - conda-forge::sed=4.7 + - conda-forge::tar=1.34 diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf new file mode 100644 index 00000000..8a75bb95 --- /dev/null +++ b/modules/nf-core/untar/main.nf @@ -0,0 +1,63 @@ +process UNTAR { + tag "$archive" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("$prefix"), emit: untar + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.tar$/, "")) + + """ + mkdir $prefix + + ## Ensures --strip-components only applied when top level of tar contents is a directory + ## If just files or multiple directories, place all in prefix + if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then + tar \\ + -C $prefix --strip-components 1 \\ + -xavf \\ + $args \\ + $archive \\ + $args2 + else + tar \\ + -C $prefix \\ + -xavf \\ + $args \\ + $archive \\ + $args2 + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.toString().replaceFirst(/\.[^\.]+(.gz)?$/, "")) + """ + mkdir $prefix + touch ${prefix}/file.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/untar/meta.yml b/modules/nf-core/untar/meta.yml new file mode 100644 index 00000000..a9a2110f --- /dev/null +++ b/modules/nf-core/untar/meta.yml @@ -0,0 +1,46 @@ +name: untar +description: Extract files. +keywords: + - untar + - uncompress + - extract +tools: + - untar: + description: | + Extract tar.gz files. + documentation: https://www.gnu.org/software/tar/manual/ + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be untar + pattern: "*.{tar}.{gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - untar: + type: directory + description: Directory containing contents of archive + pattern: "*/" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@matthdsm" + - "@jfy133" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@matthdsm" + - "@jfy133" diff --git a/modules/nf-core/untar/tests/main.nf.test b/modules/nf-core/untar/tests/main.nf.test new file mode 100644 index 00000000..2a7c97bf --- /dev/null +++ b/modules/nf-core/untar/tests/main.nf.test @@ -0,0 +1,47 @@ +nextflow_process { + + name "Test Process UNTAR" + script "../main.nf" + process "UNTAR" + tag "modules" + tag "modules_nfcore" + tag "untar" + test("test_untar") { + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.untar).match("test_untar") }, + ) + } + + } + + test("test_untar_onlyfiles") { + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'generic/tar/hello.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.untar).match("test_untar_onlyfiles") }, + ) + } + + } + +} diff --git a/modules/nf-core/untar/tests/main.nf.test.snap b/modules/nf-core/untar/tests/main.nf.test.snap new file mode 100644 index 00000000..64550292 --- /dev/null +++ b/modules/nf-core/untar/tests/main.nf.test.snap @@ -0,0 +1,42 @@ +{ + "test_untar_onlyfiles": { + "content": [ + [ + [ + [ + + ], + [ + "hello.txt:md5,e59ff97941044f85df5297e1c302d260" + ] + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T11:49:41.320643" + }, + "test_untar": { + "content": [ + [ + [ + [ + + ], + [ + "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9", + "opts.k2d:md5,a033d00cf6759407010b21700938f543", + "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c" + ] + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T11:49:33.795172" + } +} \ No newline at end of file diff --git a/modules/nf-core/untar/tests/tags.yml b/modules/nf-core/untar/tests/tags.yml new file mode 100644 index 00000000..feb6f15c --- /dev/null +++ b/modules/nf-core/untar/tests/tags.yml @@ -0,0 +1,2 @@ +untar: + - modules/nf-core/untar/** diff --git a/nextflow_schema.json b/nextflow_schema.json index 33f16acd..2d326075 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -479,11 +479,15 @@ "properties": { "centrifuge_db": { "type": "string", + "format": "file-path", + "pattern": ".*.tar.gz", + "exists": true, "description": "Database for taxonomic binning with centrifuge.", "help_text": "Local directory containing `*.cf` files, or a URL or local path to a downloaded compressed tar archive of a Centrifuge database. E.g. ftp://ftp.ccb.jhu.edu/pub/infphilo/centrifuge/data/p_compressed+h+v.tar.gz." }, "kraken2_db": { "type": "string", + "format": "file-path", "description": "Database for taxonomic binning with kraken2.", "help_text": "Path to a local directory, archive file, or a URL to compressed tar archive that contains at least the three files `hash.k2d`, `opts.k2d` and `taxo.k2d`. E.g. ftp://ftp.ccb.jhu.edu/pub/data/kraken2_dbs/minikraken_8GB_202003.tgz." }, diff --git a/workflows/mag.nf b/workflows/mag.nf index 6ec7b132..b5761d28 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -60,8 +60,6 @@ include { NANOLYSE } from '../modules include { FILTLONG } from '../modules/local/filtlong' include { NANOPLOT as NANOPLOT_RAW } from '../modules/local/nanoplot' include { NANOPLOT as NANOPLOT_FILTERED } from '../modules/local/nanoplot' -include { CENTRIFUGE_DB_PREPARATION } from '../modules/local/centrifuge_db_preparation' -include { CENTRIFUGE } from '../modules/local/centrifuge' include { KRAKEN2_DB_PREPARATION } from '../modules/local/kraken2_db_preparation' include { KRAKEN2 } from '../modules/local/kraken2' include { KRONA_DB } from '../modules/local/krona_db' @@ -72,7 +70,6 @@ include { POOL_SINGLE_READS as POOL_LONG_READS } from '../modules include { MEGAHIT } from '../modules/local/megahit' include { SPADES } from '../modules/local/spades' include { SPADESHYBRID } from '../modules/local/spadeshybrid' -include { GUNZIP as GUNZIP_ASSEMBLIES } from '../modules/nf-core/gunzip' include { QUAST } from '../modules/local/quast' include { QUAST_BINS } from '../modules/local/quast_bins' include { QUAST_BINS_SUMMARY } from '../modules/local/quast_bins_summary' @@ -109,6 +106,7 @@ include { DEPTHS } from '../subworkflows/local/depths' // MODULE: Installed directly from nf-core/modules // include { ARIA2 as ARIA2_UNTAR } from '../modules/nf-core/aria2/main' +include { UNTAR } from '../modules/nf-core/untar/main' include { FASTQC as FASTQC_RAW } from '../modules/nf-core/fastqc/main' include { FASTQC as FASTQC_TRIMMED } from '../modules/nf-core/fastqc/main' include { SEQTK_MERGEPE } from '../modules/nf-core/seqtk/mergepe/main' @@ -117,6 +115,9 @@ include { FASTP } from '../modules/nf-core/fast include { ADAPTERREMOVAL as ADAPTERREMOVAL_PE } from '../modules/nf-core/adapterremoval/main' include { ADAPTERREMOVAL as ADAPTERREMOVAL_SE } from '../modules/nf-core/adapterremoval/main' include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main' +include { CENTRIFUGE_CENTRIFUGE } from '../modules/nf-core/centrifuge/centrifuge/main' +include { CENTRIFUGE_KREPORT } from '../modules/nf-core/centrifuge/kreport/main' +include { GUNZIP as GUNZIP_ASSEMBLIES } from '../modules/nf-core/gunzip' include { PRODIGAL } from '../modules/nf-core/prodigal/main' include { PROKKA } from '../modules/nf-core/prokka/main' include { MMSEQS_DATABASES } from '../modules/nf-core/mmseqs/databases/main' @@ -158,12 +159,6 @@ if (params.gunc_db) { ch_gunc_db = Channel.empty() } -if(params.centrifuge_db){ - ch_centrifuge_db_file = file(params.centrifuge_db, checkIfExists: true) -} else { - ch_centrifuge_db_file = [] -} - if(params.kraken2_db){ ch_kraken2_db_file = file(params.kraken2_db, checkIfExists: true) } else { @@ -460,38 +455,28 @@ workflow MAG { Taxonomic information ================================================================================ */ - if ( !ch_centrifuge_db_file.isEmpty() ) { - if ( ch_centrifuge_db_file.extension in ['gz', 'tgz'] ) { - // Expects to be tar.gz! - ch_db_for_centrifuge = CENTRIFUGE_DB_PREPARATION ( ch_centrifuge_db_file ) - .db - .collect() - .map{ - db -> - def db_name = db[0].getBaseName().split('\\.')[0] - [ db_name, db ] - } - } else if ( ch_centrifuge_db_file.isDirectory() ) { - ch_db_for_centrifuge = Channel - .fromPath( "${ch_centrifuge_db_file}/*.cf" ) - .collect() - .map{ - db -> - def db_name = db[0].getBaseName().split('\\.')[0] - [ db_name, db ] - } + + + if ( !params.centrifuge_db ) { + ch_db_for_centrifuge = Channel.empty() + } else { + if ( file(params.centrifuge_db).isDirectory() ) { + ch_db_for_centrifuge = Channel.of(file(params.centrifuge_db, checkIfExists: true)) } else { - ch_db_for_centrifuge = Channel.empty() + ch_db_for_centrifuge = UNTAR ( Channel.of([[id: 'db'], file(params.centrifuge_db, checkIfExists: true)])).untar.map{it[1]} } - } else { - ch_db_for_centrifuge = Channel.empty() } - CENTRIFUGE ( + CENTRIFUGE_CENTRIFUGE ( ch_short_reads, - ch_db_for_centrifuge + ch_db_for_centrifuge, + false, + false ) - ch_versions = ch_versions.mix(CENTRIFUGE.out.versions.first()) + ch_versions = ch_versions.mix(CENTRIFUGE_CENTRIFUGE.out.versions.first()) + + CENTRIFUGE_KREPORT ( CENTRIFUGE_CENTRIFUGE.out.report, ch_db_for_centrifuge ) + ch_versions = ch_versions.mix(CENTRIFUGE_KREPORT.out.versions.first()) if ( !ch_kraken2_db_file.isEmpty() ) { if ( ch_kraken2_db_file.extension in ['gz', 'tgz'] ) { @@ -530,7 +515,7 @@ workflow MAG { KRONA_DB () ch_krona_db = KRONA_DB.out.db } - ch_tax_classifications = CENTRIFUGE.out.results_for_krona.mix(KRAKEN2.out.results_for_krona) + ch_tax_classifications = CENTRIFUGE_KREPORT.out.kreport.mix(KRAKEN2.out.results_for_krona) . map { classifier, meta, report -> def meta_new = meta + [classifier: classifier] [ meta_new, report ] @@ -1085,7 +1070,7 @@ workflow MAG { } - ch_multiqc_files = ch_multiqc_files.mix(CENTRIFUGE.out.kreport.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(CENTRIFUGE_KREPORT.out.kreport.collect{it[1]}.ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(KRAKEN2.out.report.collect{it[1]}.ifEmpty([])) if (!params.skip_quast){ From 3bec3e09842a9a44c4f66b610c54da10c3d00daf Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 24 Apr 2024 17:06:39 +0200 Subject: [PATCH 2/7] Start replacing krona --- conf/modules.config | 9 +++ modules.json | 18 ++++- .../krakentools/kreport2krona/environment.yml | 7 ++ .../nf-core/krakentools/kreport2krona/main.nf | 36 +++++++++ .../krakentools/kreport2krona/meta.yml | 40 ++++++++++ modules/nf-core/krona/kronadb/environment.yml | 7 ++ modules/nf-core/krona/kronadb/main.nf | 30 ++++++++ modules/nf-core/krona/kronadb/meta.yml | 26 +++++++ .../krona/ktimporttaxonomy/environment.yml | 7 ++ .../nf-core/krona/ktimporttaxonomy/main.nf | 41 ++++++++++ .../nf-core/krona/ktimporttaxonomy/meta.yml | 45 +++++++++++ workflows/mag.nf | 75 +++++++++++-------- 12 files changed, 309 insertions(+), 32 deletions(-) create mode 100644 modules/nf-core/krakentools/kreport2krona/environment.yml create mode 100644 modules/nf-core/krakentools/kreport2krona/main.nf create mode 100644 modules/nf-core/krakentools/kreport2krona/meta.yml create mode 100644 modules/nf-core/krona/kronadb/environment.yml create mode 100644 modules/nf-core/krona/kronadb/main.nf create mode 100644 modules/nf-core/krona/kronadb/meta.yml create mode 100644 modules/nf-core/krona/ktimporttaxonomy/environment.yml create mode 100644 modules/nf-core/krona/ktimporttaxonomy/main.nf create mode 100644 modules/nf-core/krona/ktimporttaxonomy/meta.yml diff --git a/conf/modules.config b/conf/modules.config index 20ae66ee..1305b66f 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -230,6 +230,15 @@ process { ] } + withName: CENTRIFUGE_KREPORT { + ext.prefix = { "${meta.id}_kreport" } + publishDir = [ + path: { "${params.outdir}/Taxonomy/centrifuge/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: "*.txt" + ] + } + withName: KRAKEN2 { ext.args = '--quiet' publishDir = [ diff --git a/modules.json b/modules.json index 470611b7..0f8a4d12 100644 --- a/modules.json +++ b/modules.json @@ -49,7 +49,8 @@ "centrifuge/kreport": { "branch": "master", "git_sha": "9a07a1293d9b818d1e06d0f7b58152f74d462012", - "installed_by": ["modules"] + "installed_by": ["modules"], + "patch": "modules/nf-core/centrifuge/kreport/centrifuge-kreport.diff" }, "checkm/lineagewf": { "branch": "master", @@ -151,6 +152,21 @@ "git_sha": "e06548bfa36ee31869b81041879dd6b3a83b1d57", "installed_by": ["modules"] }, + "krakentools/kreport2krona": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, + "krona/kronadb": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, + "krona/ktimporttaxonomy": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, "maxbin2": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", diff --git a/modules/nf-core/krakentools/kreport2krona/environment.yml b/modules/nf-core/krakentools/kreport2krona/environment.yml new file mode 100644 index 00000000..ea49a77c --- /dev/null +++ b/modules/nf-core/krakentools/kreport2krona/environment.yml @@ -0,0 +1,7 @@ +name: krakentools_kreport2krona +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::krakentools=1.2 diff --git a/modules/nf-core/krakentools/kreport2krona/main.nf b/modules/nf-core/krakentools/kreport2krona/main.nf new file mode 100644 index 00000000..f9f27001 --- /dev/null +++ b/modules/nf-core/krakentools/kreport2krona/main.nf @@ -0,0 +1,36 @@ +process KRAKENTOOLS_KREPORT2KRONA { + tag "$meta.id" + label 'process_single' + + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/krakentools:1.2--pyh5e36f6f_0': + 'biocontainers/krakentools:1.2--pyh5e36f6f_0' }" + + input: + tuple val(meta), path(kreport) + + output: + tuple val(meta), path("*.txt"), emit: txt + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '1.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + kreport2krona.py \\ + -r ${kreport} \\ + -o ${prefix}.txt \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kreport2krona.py: ${VERSION} + END_VERSIONS + """ +} diff --git a/modules/nf-core/krakentools/kreport2krona/meta.yml b/modules/nf-core/krakentools/kreport2krona/meta.yml new file mode 100644 index 00000000..7a5dda4a --- /dev/null +++ b/modules/nf-core/krakentools/kreport2krona/meta.yml @@ -0,0 +1,40 @@ +name: krakentools_kreport2krona +description: Takes a Kraken report file and prints out a krona-compatible TEXT file +keywords: + - kraken + - krona + - metagenomics + - visualization +tools: + - krakentools: + description: KrakenTools is a suite of scripts to be used for post-analysis of Kraken/KrakenUniq/Kraken2/Bracken results. Please cite the relevant paper if using KrakenTools with any of the listed programs. + homepage: https://github.com/jenniferlu717/KrakenTools + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - kreport: + type: file + description: Kraken report + pattern: "*.{txt,kreport}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - krona: + type: file + description: Krona text-based input file converted from Kraken report + pattern: "*.{txt,krona}" +authors: + - "@MillironX" +maintainers: + - "@MillironX" diff --git a/modules/nf-core/krona/kronadb/environment.yml b/modules/nf-core/krona/kronadb/environment.yml new file mode 100644 index 00000000..1646628f --- /dev/null +++ b/modules/nf-core/krona/kronadb/environment.yml @@ -0,0 +1,7 @@ +name: krona_kronadb +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::krona=2.7.1 diff --git a/modules/nf-core/krona/kronadb/main.nf b/modules/nf-core/krona/kronadb/main.nf new file mode 100644 index 00000000..1d9bf698 --- /dev/null +++ b/modules/nf-core/krona/kronadb/main.nf @@ -0,0 +1,30 @@ +def VERSION='2.7.1' // Version information not provided by tool on CLI + +process KRONA_KRONADB { + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/krona:2.7.1--pl526_5' : + 'biocontainers/krona:2.7.1--pl526_5' }" + + output: + path 'taxonomy/taxonomy.tab', emit: db + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + ktUpdateTaxonomy.sh \\ + $args \\ + taxonomy/ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + krona: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/krona/kronadb/meta.yml b/modules/nf-core/krona/kronadb/meta.yml new file mode 100644 index 00000000..0d42bb10 --- /dev/null +++ b/modules/nf-core/krona/kronadb/meta.yml @@ -0,0 +1,26 @@ +name: krona_kronadb +description: KronaTools Update Taxonomy downloads a taxonomy database +keywords: + - database + - taxonomy + - krona +tools: + - krona: + description: Krona Tools is a set of scripts to create Krona charts from several Bioinformatics tools as well as from text and XML files. + homepage: https://github.com/marbl/Krona/wiki/KronaTools + documentation: https://github.com/marbl/Krona/wiki/Installing + doi: 10.1186/1471-2105-12-385 +# There is no input. This module downloads a pre-built taxonomy database for use with Krona Tools. +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - db: + type: file + description: A TAB separated file that contains a taxonomy database. + pattern: "*.{tab}" +authors: + - "@mjakobs" +maintainers: + - "@mjakobs" diff --git a/modules/nf-core/krona/ktimporttaxonomy/environment.yml b/modules/nf-core/krona/ktimporttaxonomy/environment.yml new file mode 100644 index 00000000..1909e15f --- /dev/null +++ b/modules/nf-core/krona/ktimporttaxonomy/environment.yml @@ -0,0 +1,7 @@ +name: krona_ktimporttaxonomy +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::krona=2.8 diff --git a/modules/nf-core/krona/ktimporttaxonomy/main.nf b/modules/nf-core/krona/ktimporttaxonomy/main.nf new file mode 100644 index 00000000..5a9f3ff8 --- /dev/null +++ b/modules/nf-core/krona/ktimporttaxonomy/main.nf @@ -0,0 +1,41 @@ +process KRONA_KTIMPORTTAXONOMY { + tag "${meta.id}" + label 'process_single' + + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/krona:2.8--pl5262hdfd78af_2' : + 'biocontainers/krona:2.8--pl5262hdfd78af_2' }" + + input: + tuple val(meta), path(report) + path taxonomy, stageAs: 'taxonomy.tab' + + output: + tuple val(meta), path ('*.html'), emit: html + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '2.8' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + TAXONOMY=\$(find -L . -name '*.tab' -exec dirname {} \\;) + echo \$TAXONOMY + + ktImportTaxonomy \\ + $args \\ + -o ${prefix}.html \\ + -tax \$TAXONOMY/ \\ + $report + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + krona: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/krona/ktimporttaxonomy/meta.yml b/modules/nf-core/krona/ktimporttaxonomy/meta.yml new file mode 100644 index 00000000..de548210 --- /dev/null +++ b/modules/nf-core/krona/ktimporttaxonomy/meta.yml @@ -0,0 +1,45 @@ +name: krona_ktimporttaxonomy +description: KronaTools Import Taxonomy imports taxonomy classifications and produces an interactive Krona plot. +keywords: + - plot + - taxonomy + - interactive + - html + - visualisation + - krona chart +tools: + - krona: + description: Krona Tools is a set of scripts to create Krona charts from several Bioinformatics tools as well as from text and XML files. + homepage: https://github.com/marbl/Krona/wiki/KronaTools + documentation: http://manpages.ubuntu.com/manpages/impish/man1/ktImportTaxonomy.1.html + doi: 10.1186/1471-2105-12-385 +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test'] + - database: + type: file + description: | + Path to a Krona taxonomy .tab file normally downloaded and generated by + krona/ktUpdateTaxonomy. Custom taxonomy files can have any name, but + must end in `.tab`. + pattern: "*tab" + - report: + type: file + description: "A tab-delimited file with taxonomy IDs and (optionally) query IDs, magnitudes, and scores. Query IDs are taken from column 1, taxonomy IDs from column 2, and scores from column 3. Lines beginning with # will be ignored." + pattern: "*.{tsv}" +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - html: + type: file + description: A html file containing an interactive krona plot. + pattern: "*.{html}" +authors: + - "@mjakobs" +maintainers: + - "@mjakobs" diff --git a/workflows/mag.nf b/workflows/mag.nf index b5761d28..70dde6d3 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -62,8 +62,6 @@ include { NANOPLOT as NANOPLOT_RAW } from '../modules include { NANOPLOT as NANOPLOT_FILTERED } from '../modules/local/nanoplot' include { KRAKEN2_DB_PREPARATION } from '../modules/local/kraken2_db_preparation' include { KRAKEN2 } from '../modules/local/kraken2' -include { KRONA_DB } from '../modules/local/krona_db' -include { KRONA } from '../modules/local/krona' include { POOL_SINGLE_READS as POOL_SHORT_SINGLE_READS } from '../modules/local/pool_single_reads' include { POOL_PAIRED_READS } from '../modules/local/pool_paired_reads' include { POOL_SINGLE_READS as POOL_LONG_READS } from '../modules/local/pool_single_reads' @@ -105,25 +103,28 @@ include { DEPTHS } from '../subworkflows/local/depths' // // MODULE: Installed directly from nf-core/modules // -include { ARIA2 as ARIA2_UNTAR } from '../modules/nf-core/aria2/main' -include { UNTAR } from '../modules/nf-core/untar/main' -include { FASTQC as FASTQC_RAW } from '../modules/nf-core/fastqc/main' -include { FASTQC as FASTQC_TRIMMED } from '../modules/nf-core/fastqc/main' -include { SEQTK_MERGEPE } from '../modules/nf-core/seqtk/mergepe/main' -include { BBMAP_BBNORM } from '../modules/nf-core/bbmap/bbnorm/main' -include { FASTP } from '../modules/nf-core/fastp/main' -include { ADAPTERREMOVAL as ADAPTERREMOVAL_PE } from '../modules/nf-core/adapterremoval/main' -include { ADAPTERREMOVAL as ADAPTERREMOVAL_SE } from '../modules/nf-core/adapterremoval/main' -include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main' -include { CENTRIFUGE_CENTRIFUGE } from '../modules/nf-core/centrifuge/centrifuge/main' -include { CENTRIFUGE_KREPORT } from '../modules/nf-core/centrifuge/kreport/main' -include { GUNZIP as GUNZIP_ASSEMBLIES } from '../modules/nf-core/gunzip' -include { PRODIGAL } from '../modules/nf-core/prodigal/main' -include { PROKKA } from '../modules/nf-core/prokka/main' -include { MMSEQS_DATABASES } from '../modules/nf-core/mmseqs/databases/main' -include { METAEUK_EASYPREDICT } from '../modules/nf-core/metaeuk/easypredict/main' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { ARIA2 as ARIA2_UNTAR } from '../modules/nf-core/aria2/main' +include { UNTAR } from '../modules/nf-core/untar/main' +include { FASTQC as FASTQC_RAW } from '../modules/nf-core/fastqc/main' +include { FASTQC as FASTQC_TRIMMED } from '../modules/nf-core/fastqc/main' +include { SEQTK_MERGEPE } from '../modules/nf-core/seqtk/mergepe/main' +include { BBMAP_BBNORM } from '../modules/nf-core/bbmap/bbnorm/main' +include { FASTP } from '../modules/nf-core/fastp/main' +include { ADAPTERREMOVAL as ADAPTERREMOVAL_PE } from '../modules/nf-core/adapterremoval/main' +include { ADAPTERREMOVAL as ADAPTERREMOVAL_SE } from '../modules/nf-core/adapterremoval/main' +include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main' +include { CENTRIFUGE_CENTRIFUGE } from '../modules/nf-core/centrifuge/centrifuge/main' +include { CENTRIFUGE_KREPORT } from '../modules/nf-core/centrifuge/kreport/main' +include { KRONA_KRONADB } from '../modules/nf-core/krona/kronadb/main' +include { KRONA_KTIMPORTTAXONOMY } from '../modules/nf-core/krona/ktimporttaxonomy/main' +include { KRAKENTOOLS_KREPORT2KRONA as KREPORT2KRONA_CENTRIFUGE } from '../modules/nf-core/krakentools/kreport2krona/main' +include { GUNZIP as GUNZIP_ASSEMBLIES } from '../modules/nf-core/gunzip' +include { PRODIGAL } from '../modules/nf-core/prodigal/main' +include { PROKKA } from '../modules/nf-core/prokka/main' +include { MMSEQS_DATABASES } from '../modules/nf-core/mmseqs/databases/main' +include { METAEUK_EASYPREDICT } from '../modules/nf-core/metaeuk/easypredict/main' +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' //////////////////////////////////////////////////// /* -- Create channel for reference databases -- */ @@ -475,7 +476,7 @@ workflow MAG { ) ch_versions = ch_versions.mix(CENTRIFUGE_CENTRIFUGE.out.versions.first()) - CENTRIFUGE_KREPORT ( CENTRIFUGE_CENTRIFUGE.out.report, ch_db_for_centrifuge ) + CENTRIFUGE_KREPORT ( CENTRIFUGE_CENTRIFUGE.out.results, ch_db_for_centrifuge ) ch_versions = ch_versions.mix(CENTRIFUGE_KREPORT.out.versions.first()) if ( !ch_kraken2_db_file.isEmpty() ) { @@ -512,19 +513,31 @@ workflow MAG { if (params.krona_db){ ch_krona_db = ch_krona_db_file } else { - KRONA_DB () - ch_krona_db = KRONA_DB.out.db + KRONA_KRONADB () + ch_krona_db = KRONA_KRONADB.out.db + ch_versions = ch_versions.mix(KRONA_KRONADB.out.versions.first()) } - ch_tax_classifications = CENTRIFUGE_KREPORT.out.kreport.mix(KRAKEN2.out.results_for_krona) - . map { classifier, meta, report -> - def meta_new = meta + [classifier: classifier] - [ meta_new, report ] - } - KRONA ( + + if ( params.centrifuge_db ) { + ch_centrifuge_for_krona = KREPORT2KRONA_CENTRIFUGE ( CENTRIFUGE_KREPORT.out.kreport.dump(tag: 'input_to_convert') ).txt.dump(tag: 'output_from_convert') + ch_versions = ch_versions.mix(KREPORT2KRONA_CENTRIFUGE.out.versions.first()) + } else { + ch_centrifuge_for_krona = Channel.empty() + } + + + ch_tax_classifications = ch_centrifuge_for_krona + .mix(KRAKEN2.out.results_for_krona) + . map { classifier, meta, report -> + def meta_new = meta + [classifier: classifier] + [ meta_new, report ] + } + KRONA_KTIMPORTTAXONOMY ( ch_tax_classifications, ch_krona_db ) - ch_versions = ch_versions.mix(KRONA.out.versions.first()) + ch_versions = ch_versions.mix(KRONA_KTIMPORTTAXONOMY.out.versions.first()) + } /* From 7dce783ca345312bf9d7c2963761e2a785d3c7a9 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 16 May 2024 14:30:34 +0200 Subject: [PATCH 3/7] Actually load centrifuge module --- workflows/mag.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/workflows/mag.nf b/workflows/mag.nf index 56b9ebf9..b68a0bff 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -36,6 +36,7 @@ include { BBMAP_BBNORM } from '../modules/nf-core/bbma include { FASTP } from '../modules/nf-core/fastp/main' include { ADAPTERREMOVAL as ADAPTERREMOVAL_PE } from '../modules/nf-core/adapterremoval/main' include { ADAPTERREMOVAL as ADAPTERREMOVAL_SE } from '../modules/nf-core/adapterremoval/main' +include { CENTRIFUGE_CENTRIFUGE } from '../modules/nf-core/centrifuge/centrifuge/main' include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main' include { PRODIGAL } from '../modules/nf-core/prodigal/main' include { PROKKA } from '../modules/nf-core/prokka/main' From a344091dd5c5dcf3b5fd61a27e27ad883f65aa19 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 16 May 2024 16:10:59 +0200 Subject: [PATCH 4/7] Fix publishing --- conf/modules.config | 11 ++++++++++- workflows/mag.nf | 48 ++++++++++++++++++++++++++------------------- 2 files changed, 38 insertions(+), 21 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 23d0ce77..d6d410d4 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -248,7 +248,16 @@ process { ] } - withName: KRONA { + withName: KREPORT2KRONA_CENTRIFUGE { + publishDir = [ + // path: { "${params.outdir}/Taxonomy/${meta.classifier}/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: "*.txt", + enabled: false + ] + } + + withName: KRONA_KTIMPORTTAXONOMY { publishDir = [ path: { "${params.outdir}/Taxonomy/${meta.classifier}/${meta.id}" }, mode: params.publish_dir_mode, diff --git a/workflows/mag.nf b/workflows/mag.nf index b68a0bff..07b77963 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -28,20 +28,25 @@ include { DEPTHS } from '../subworkflows/local/depths' // // MODULE: Installed directly from nf-core/modules // -include { ARIA2 as ARIA2_UNTAR } from '../modules/nf-core/aria2/main' -include { FASTQC as FASTQC_RAW } from '../modules/nf-core/fastqc/main' -include { FASTQC as FASTQC_TRIMMED } from '../modules/nf-core/fastqc/main' -include { SEQTK_MERGEPE } from '../modules/nf-core/seqtk/mergepe/main' -include { BBMAP_BBNORM } from '../modules/nf-core/bbmap/bbnorm/main' -include { FASTP } from '../modules/nf-core/fastp/main' -include { ADAPTERREMOVAL as ADAPTERREMOVAL_PE } from '../modules/nf-core/adapterremoval/main' -include { ADAPTERREMOVAL as ADAPTERREMOVAL_SE } from '../modules/nf-core/adapterremoval/main' -include { CENTRIFUGE_CENTRIFUGE } from '../modules/nf-core/centrifuge/centrifuge/main' -include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main' -include { PRODIGAL } from '../modules/nf-core/prodigal/main' -include { PROKKA } from '../modules/nf-core/prokka/main' -include { MMSEQS_DATABASES } from '../modules/nf-core/mmseqs/databases/main' -include { METAEUK_EASYPREDICT } from '../modules/nf-core/metaeuk/easypredict/main' +include { ARIA2 as ARIA2_UNTAR } from '../modules/nf-core/aria2/main' +include { FASTQC as FASTQC_RAW } from '../modules/nf-core/fastqc/main' +include { FASTQC as FASTQC_TRIMMED } from '../modules/nf-core/fastqc/main' +include { SEQTK_MERGEPE } from '../modules/nf-core/seqtk/mergepe/main' +include { BBMAP_BBNORM } from '../modules/nf-core/bbmap/bbnorm/main' +include { FASTP } from '../modules/nf-core/fastp/main' +include { ADAPTERREMOVAL as ADAPTERREMOVAL_PE } from '../modules/nf-core/adapterremoval/main' +include { ADAPTERREMOVAL as ADAPTERREMOVAL_SE } from '../modules/nf-core/adapterremoval/main' +include { UNTAR as CENTRIFUGEDB_UNTAR } from '../modules/nf-core/untar/main' +include { CENTRIFUGE_CENTRIFUGE } from '../modules/nf-core/centrifuge/centrifuge/main' +include { CENTRIFUGE_KREPORT } from '../modules/nf-core/centrifuge/kreport/main' +include { KRONA_KRONADB } from '../modules/nf-core/krona/kronadb/main' +include { KRONA_KTIMPORTTAXONOMY } from '../modules/nf-core/krona/ktimporttaxonomy/main' +include { KRAKENTOOLS_KREPORT2KRONA as KREPORT2KRONA_CENTRIFUGE } from '../modules/nf-core/krakentools/kreport2krona/main' +include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main' +include { PRODIGAL } from '../modules/nf-core/prodigal/main' +include { PROKKA } from '../modules/nf-core/prokka/main' +include { MMSEQS_DATABASES } from '../modules/nf-core/mmseqs/databases/main' +include { METAEUK_EASYPREDICT } from '../modules/nf-core/metaeuk/easypredict/main' // // MODULE: Local to the pipeline @@ -405,14 +410,15 @@ workflow MAG { ================================================================================ */ - + // Centrifuge if ( !params.centrifuge_db ) { ch_db_for_centrifuge = Channel.empty() } else { if ( file(params.centrifuge_db).isDirectory() ) { ch_db_for_centrifuge = Channel.of(file(params.centrifuge_db, checkIfExists: true)) } else { - ch_db_for_centrifuge = UNTAR ( Channel.of([[id: 'db'], file(params.centrifuge_db, checkIfExists: true)])).untar.map{it[1]} + ch_db_for_centrifuge = CENTRIFUGEDB_UNTAR ( Channel.of([[id: 'db'], file(params.centrifuge_db, checkIfExists: true)])).untar.map{it[1]}.first() + ch_versions = ch_versions.mix(CENTRIFUGEDB_UNTAR.out.versions.first()) } } @@ -427,6 +433,7 @@ workflow MAG { CENTRIFUGE_KREPORT ( CENTRIFUGE_CENTRIFUGE.out.results, ch_db_for_centrifuge ) ch_versions = ch_versions.mix(CENTRIFUGE_KREPORT.out.versions.first()) + // Kraken2 if ( !ch_kraken2_db_file.isEmpty() ) { if ( ch_kraken2_db_file.extension in ['gz', 'tgz'] ) { // Expects to be tar.gz! @@ -463,23 +470,24 @@ workflow MAG { } else { KRONA_KRONADB () ch_krona_db = KRONA_KRONADB.out.db - ch_versions = ch_versions.mix(KRONA_KRONADB.out.versions.first()) + ch_versions = ch_versions.mix(KRONA_KRONADB.out.versions) } if ( params.centrifuge_db ) { - ch_centrifuge_for_krona = KREPORT2KRONA_CENTRIFUGE ( CENTRIFUGE_KREPORT.out.kreport.dump(tag: 'input_to_convert') ).txt.dump(tag: 'output_from_convert') + ch_centrifuge_for_krona = KREPORT2KRONA_CENTRIFUGE ( CENTRIFUGE_KREPORT.out.kreport ).txt.map{ meta, files -> ['centrifuge', meta, files] } ch_versions = ch_versions.mix(KREPORT2KRONA_CENTRIFUGE.out.versions.first()) } else { ch_centrifuge_for_krona = Channel.empty() } - + // Join together for Krona ch_tax_classifications = ch_centrifuge_for_krona .mix(KRAKEN2.out.results_for_krona) - . map { classifier, meta, report -> + .map { classifier, meta, report -> def meta_new = meta + [classifier: classifier] [ meta_new, report ] } + KRONA_KTIMPORTTAXONOMY ( ch_tax_classifications, ch_krona_db From da070f7b0d183af66d8bd21de14e0ede03d8655d Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 23 May 2024 13:45:13 +0200 Subject: [PATCH 5/7] Add missing module load --- workflows/mag.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/workflows/mag.nf b/workflows/mag.nf index 07b77963..7594b370 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -43,6 +43,7 @@ include { KRONA_KRONADB } from '../modul include { KRONA_KTIMPORTTAXONOMY } from '../modules/nf-core/krona/ktimporttaxonomy/main' include { KRAKENTOOLS_KREPORT2KRONA as KREPORT2KRONA_CENTRIFUGE } from '../modules/nf-core/krakentools/kreport2krona/main' include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main' +include { GUNZIP as GUNZIP_ASSEMBLIES } from '../modules/nf-core/gunzip' include { PRODIGAL } from '../modules/nf-core/prodigal/main' include { PROKKA } from '../modules/nf-core/prokka/main' include { MMSEQS_DATABASES } from '../modules/nf-core/mmseqs/databases/main' From 25b88512618e62f3f2341c7ff575fbfd87c2dd7d Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 23 May 2024 13:54:22 +0200 Subject: [PATCH 6/7] Update docs, add additional file descirption --- docs/output.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/output.md b/docs/output.md index 88aba227..838fc7c5 100644 --- a/docs/output.md +++ b/docs/output.md @@ -170,9 +170,10 @@ More information on the [Centrifuge](https://ccb.jhu.edu/software/centrifuge/) w Output files - `Taxonomy/centrifuge/[sample]/` - - `report.txt`: Tab-delimited result file. See the [centrifuge manual](https://ccb.jhu.edu/software/centrifuge/manual.shtml#centrifuge-classification-output) for information about the fields - - `kreport.txt`: Classification in the Kraken report format. See the [kraken2 manual](https://github.com/DerrickWood/kraken2/wiki/Manual#output-formats) for more details - - `taxonomy.krona.html`: Interactive pie chart produced by [KronaTools](https://github.com/marbl/Krona/wiki) + - `[sample].kreport.txt`: Classification in the Kraken report format. See the [kraken2 manual](https://github.com/DerrickWood/kraken2/wiki/Manual#output-formats) for more details + - `[sample].report.txt`: Tab-delimited result file. See the [centrifuge manual](https://ccb.jhu.edu/software/centrifuge/manual.shtml#centrifuge-classification-output) for information about the fields + - `[sample].results.txt`: Per read taxonomic classification information. See the [centrifuge manual](https://ccb.jhu.edu/software/centrifuge/manual.shtml#centrifuge-classification-output) for more details + - `[sample].html`: Interactive pie chart produced by [KronaTools](https://github.com/marbl/Krona/wiki) From b44e0f5cd7a24e26c041ba427cab0a22ae59cfa4 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 23 May 2024 14:38:49 +0200 Subject: [PATCH 7/7] Apply suggestions from code review --- conf/modules.config | 2 +- nextflow_schema.json | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index d6d410d4..a3be8574 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -250,7 +250,7 @@ process { withName: KREPORT2KRONA_CENTRIFUGE { publishDir = [ - // path: { "${params.outdir}/Taxonomy/${meta.classifier}/${meta.id}" }, + path: { "${params.outdir}/Taxonomy/${meta.classifier}/${meta.id}" }, mode: params.publish_dir_mode, pattern: "*.txt", enabled: false diff --git a/nextflow_schema.json b/nextflow_schema.json index a09e9fdc..59b8bb11 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -492,7 +492,6 @@ "centrifuge_db": { "type": "string", "format": "file-path", - "pattern": ".*.tar.gz", "exists": true, "description": "Database for taxonomic binning with centrifuge.", "help_text": "Local directory containing `*.cf` files, or a URL or local path to a downloaded compressed tar archive of a Centrifuge database. E.g. ftp://ftp.ccb.jhu.edu/pub/infphilo/centrifuge/data/p_compressed+h+v.tar.gz."