Merge pull request #497 from willros/local-krona-db

Adds --krona_db parameter to point at a local copy of the db needed for Krona. Suggested in Issue #404.
nf-core · Sep 2, 2023 · 21c9716 · 21c9716
2 parents 675b727 + 929b54d
commit 21c9716
Show file tree

Hide file tree

Showing 6 changed files with 31 additions and 8 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### `Added`
 
+- [#497](https://github.com/nf-core/mag/pull/497) - Adds support for pointing at a local db for krona, using the parameter `--krona_db` (by @willros).
 - [#395](https://github.com/nf-core/mag/pull/395) - Adds support for fast domain-level classification of bins using Tiara, to allow bins to be separated into eukaryotic and prokaryotic-specific processes.
 - [#422](https://github.com/nf-core/mag/pull/422) - Adds support for normalization of read depth with BBNorm (added by @erikrikarddaniel and @fabianegli)
 - [#439](https://github.com/nf-core/mag/pull/439) - Adds ability to enter the pipeline at the binning stage by providing a CSV of pre-computed assemblies (by @prototaxites)

diff --git a/docs/usage.md b/docs/usage.md
@@ -191,6 +191,8 @@ To allow also reproducible bin QC with BUSCO, run BUSCO providing already downlo
 
 For the taxonomic bin classification with [CAT](https://github.com/dutilh/CAT), when running the pipeline with `--cat_db_generate` the parameter `--save_cat_db` can be used to also save the generated database to allow reproducibility in future runs. Note that when specifying a pre-built database with `--cat_db`, currently the database can not be saved.
 
+When it comes to visualizing taxonomic data using [Krona](https://github.com/marbl/Krona), you have the option to provide a taxonomy file, such as `taxonomy.tab`, using the `--krona_db` parameter. If you don't supply a taxonomy file, Krona is designed to automatically download the required taxonomy data for visualization. If you choose to provide a pre-existing taxonomy file using the `--krona_db` parameter, Krona will use that file for visualization. On the other hand, if you omit the `--krona_db` parameter, Krona will download the necessary taxonomy information automatically to enable visualization.
+
 The taxonomic classification of bins with GTDB-Tk is not guaranteed to be reproducible, since the placement of bins in the reference tree is non-deterministic. However, the authors of the GTDB-Tk article examined the reproducibility on a set of 100 genomes across 50 trials and did not observe any difference (see [https://doi.org/10.1093/bioinformatics/btz848](https://doi.org/10.1093/bioinformatics/btz848)).
 
 ## Core Nextflow arguments

diff --git a/modules/local/krona.nf b/modules/local/krona.nf
@@ -8,15 +8,17 @@ process KRONA {
 
     input:
     tuple val(meta), path(report)
-    path  "taxonomy/taxonomy.tab"
+    path(taxonomy_file), stageAs: 'taxonomy.tab'
 
     output:
-    path "*.html"       , emit: html
-    path "versions.yml" , emit: versions
+    tuple val(meta), path("*.html") , emit: html
+    path "versions.yml"             , emit: versions
 
     script:
     """
-    ktImportTaxonomy "$report" -tax taxonomy
+    TAXONOMY=\$(find -L . -name '*.tab' -exec dirname {} \\;)
+
+    ktImportTaxonomy ${report} -tax \$TAXONOMY/
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":

diff --git a/nextflow.config b/nextflow.config
@@ -81,6 +81,7 @@ params {
     centrifuge_db                        = null
     kraken2_db                           = null
     skip_krona                           = false
+    krona_db                             = null
     cat_db                               = null
     cat_db_generate                      = false
     cat_official_taxonomy                = false

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -488,6 +488,11 @@
                     "description": "Database for taxonomic binning with kraken2.",
                     "help_text": "The database file must be a compressed tar archive that contains at least the three files `hash.k2d`, `opts.k2d` and `taxo.k2d`. E.g. ftp://ftp.ccb.jhu.edu/pub/data/kraken2_dbs/minikraken_8GB_202003.tgz."
                 },
+                "krona_db": {
+                    "type": "string",
+                    "description": "Database for taxonomic binning with krona",
+                    "help_text": "Path to `taxonomy.tab` file for Krona, instead of downloading the default file. Point at the `.tab` file."
+                },
                 "skip_krona": {
                     "type": "boolean",
                     "description": "Skip creating a krona plot for taxonomic binning."

diff --git a/workflows/mag.nf b/workflows/mag.nf
@@ -31,7 +31,7 @@ log.info logo + paramsSummaryLog(workflow) + citation
 WorkflowMag.initialise(params, log, hybrid)
 
 // Check input path parameters to see if they exist
-def checkPathParamList = [ params.input, params.multiqc_config, params.phix_reference, params.host_fasta, params.centrifuge_db, params.kraken2_db, params.cat_db, params.gtdb_db, params.lambda_reference, params.busco_reference ]
+def checkPathParamList = [ params.input, params.multiqc_config, params.phix_reference, params.host_fasta, params.centrifuge_db, params.kraken2_db, params.cat_db, params.krona_db, params.gtdb_db, params.lambda_reference, params.busco_reference ]
 for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } }
 
 /*
@@ -189,6 +189,13 @@ if(params.cat_db){
     ch_cat_db_file = Channel.empty()
 }
 
+if(params.krona_db){
+    ch_krona_db_file = Channel
+        .value(file( "${params.krona_db}" ))
+} else {
+    ch_krona_db_file = Channel.empty()
+}
+
 if(!params.keep_phix) {
     ch_phix_db_file = Channel
         .value(file( "${params.phix_reference}" ))
@@ -474,15 +481,20 @@ workflow MAG {
     ch_versions = ch_versions.mix(KRAKEN2.out.versions.first())
 
     if (( params.centrifuge_db || params.kraken2_db ) && !params.skip_krona){
-        KRONA_DB ()
+        if (params.krona_db){
+            ch_krona_db = ch_krona_db_file
+        } else {
+            KRONA_DB ()
+            ch_krona_db = KRONA_DB.out.db
+        }
         ch_tax_classifications = CENTRIFUGE.out.results_for_krona.mix(KRAKEN2.out.results_for_krona)
             . map { classifier, meta, report ->
-                def meta_new = meta + [classifer: classifier]
+                def meta_new = meta + [classifier: classifier]
                 [ meta_new, report ]
             }
         KRONA (
             ch_tax_classifications,
-            KRONA_DB.out.db.collect()
+            ch_krona_db
         )
         ch_versions = ch_versions.mix(KRONA.out.versions.first())
     }