From 036da2cbff1f890b8893f7672c10fa7f7a049593 Mon Sep 17 00:00:00 2001
From: fellen31 <felixlenner@gmail.com>
Date: Mon, 28 Oct 2024 20:02:09 +0100
Subject: [PATCH] Refactor reference channels

---
 CHANGELOG.md                                  |   2 +
 .../{schema_snpdb.json => schema_snp_db.json} |  11 +-
 assets/schema_vep_plugin_files.json           |  20 ++++
 modules.json                                  |   3 +-
 modules/nf-core/cadd/cadd.diff                |   8 +-
 modules/nf-core/cadd/main.nf                  |   4 +-
 nextflow_schema.json                          |   2 +-
 subworkflows/local/annotate_cadd/main.nf      |   8 +-
 subworkflows/local/prepare_genome.nf          |  23 +---
 .../local/rank_variants/tests/main.nf.test    |   9 +-
 subworkflows/local/snv_annotation/main.nf     |   6 +-
 .../local/snv_annotation/tests/main.nf.test   |  10 +-
 .../local/utils_nfcore_nallo_pipeline/main.nf |  12 +++
 tests/.nftignore                              |   2 +-
 tests/samplesheet.nf.test.snap                |   3 +-
 .../samplesheet_multisample_bam.nf.test.snap  |   4 +-
 ...mplesheet_multisample_ont_bam.nf.test.snap |   4 +-
 workflows/nallo.nf                            | 102 ++++++++----------
 18 files changed, 117 insertions(+), 116 deletions(-)
 rename assets/{schema_snpdb.json => schema_snp_db.json} (53%)
 create mode 100644 assets/schema_vep_plugin_files.json

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 48d4df78..19156abb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -64,6 +64,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - [#435](https://github.com/genomic-medicine-sweden/nallo/pull/435) - Updated and refactored processes and workflows related to variant ranking
 - [#438](https://github.com/genomic-medicine-sweden/nallo/pull/438) - Updated pipeline tests to use functions in nft-utils instead of checking hardcoded paths
 - [#440](https://github.com/genomic-medicine-sweden/nallo/pull/440) - Updated hifiasm to 0.20 with new default parameters for telomeres and scaffolding ([#295](https://github.com/genomic-medicine-sweden/nallo/issues/295))
+- [#443](https://github.com/genomic-medicine-sweden/nallo/pull/443) - Refactored reference channel assignments
+- [#443](https://github.com/genomic-medicine-sweden/nallo/pull/443) - Updated schemas for `vep_plugin_files` and `snp_db`
 
 ### `Removed`
 
diff --git a/assets/schema_snpdb.json b/assets/schema_snp_db.json
similarity index 53%
rename from assets/schema_snpdb.json
rename to assets/schema_snp_db.json
index 648a5283..4d9141db 100644
--- a/assets/schema_snpdb.json
+++ b/assets/schema_snp_db.json
@@ -1,8 +1,8 @@
 {
     "$schema": "https://json-schema.org/draft/2020-12/schema",
-    "$id": "https://raw.githubusercontent.com/genomic-medicine-sweden/nallo/master/assets/schema_gvcfs.json",
-    "title": "genomic-medicine-sweden/nallo pipeline - params.extra_gvcfs schema",
-    "description": "Schema for the file provided with params.extra_gvcfs",
+    "$id": "https://raw.githubusercontent.com/genomic-medicine-sweden/nallo/master/assets/schema_snp_db.json",
+    "title": "genomic-medicine-sweden/nallo pipeline - params.snp_db schema",
+    "description": "Schema for the file provided with params.snp_db",
     "type": "array",
     "items": {
         "type": "object",
@@ -10,14 +10,13 @@
             "sample": {
                 "type": "string",
                 "pattern": "^\\S+$",
-                "errorMessage": "Sample name must be provided and cannot contain spaces",
-                "meta": ["id"]
+                "errorMessage": "Sample must be provided and cannot contain spaces."
             },
             "file": {
                 "format": "file-path",
                 "type": "string",
                 "pattern": "^\\S+\\.zip$",
-                "errorMessage": "gVCF file must be provided, cannot contain spaces and must have extension 'g.vcf.gz' or 'gvcf.gz'"
+                "errorMessage": "Echtvar database must be provided, cannot contain spaces and must have extension '.zip'"
             }
         },
         "required": ["sample", "file"]
diff --git a/assets/schema_vep_plugin_files.json b/assets/schema_vep_plugin_files.json
new file mode 100644
index 00000000..0be393a3
--- /dev/null
+++ b/assets/schema_vep_plugin_files.json
@@ -0,0 +1,20 @@
+{
+    "$schema": "https://json-schema.org/draft/2020-12/schema",
+    "$id": "https://raw.githubusercontent.com/genomic-medicine-sweden/nallo/master/assets/schema_vep_plugin_files.json",
+    "title": "genomic-medicine-sweden/nallo pipeline - params.vep_plugin_files schema",
+    "description": "Schema for the file provided with params.vep_plugin_files",
+    "type": "array",
+    "items": {
+        "type": "object",
+        "properties": {
+            "vep_files": {
+                "format": "file-path",
+                "type": "string",
+                "pattern": "^\\S+",
+                "exists": true,
+                "errorMessage": "Vep plugin file must be a path and exist."
+            }
+        },
+        "required": ["vep_files"]
+    }
+}
diff --git a/modules.json b/modules.json
index 55088401..97bc7115 100644
--- a/modules.json
+++ b/modules.json
@@ -68,7 +68,8 @@
                     "cadd": {
                         "branch": "master",
                         "git_sha": "cf3ed075695639b0a0924eb0901146df1996dc08",
-                        "installed_by": ["modules"]
+                        "installed_by": ["modules"],
+                        "patch": "modules/nf-core/cadd/cadd.diff"
                     },
                     "cat/fastq": {
                         "branch": "master",
diff --git a/modules/nf-core/cadd/cadd.diff b/modules/nf-core/cadd/cadd.diff
index 2ee51723..2243f02d 100644
--- a/modules/nf-core/cadd/cadd.diff
+++ b/modules/nf-core/cadd/cadd.diff
@@ -1,4 +1,6 @@
 Changes in module 'nf-core/cadd'
+'modules/nf-core/cadd/meta.yml' is unchanged
+Changes in 'cadd/main.nf':
 --- modules/nf-core/cadd/main.nf
 +++ modules/nf-core/cadd/main.nf
 @@ -7,13 +7,14 @@
@@ -13,10 +15,12 @@ Changes in module 'nf-core/cadd'
  
      input:
      tuple val(meta), path(vcf)
-     path(annotation_dir)
-+    path(prescored_dir)
+-    path(annotation_dir)
++    tuple val(meta2), path(annotation_dir)
++    tuple val(meta3), path(prescored_dir)
  
      output:
      tuple val(meta), path("*.tsv.gz"), emit: tsv
 
+'modules/nf-core/cadd/environment.yml' is unchanged
 ************************************************************
diff --git a/modules/nf-core/cadd/main.nf b/modules/nf-core/cadd/main.nf
index 52490c64..d830ff72 100644
--- a/modules/nf-core/cadd/main.nf
+++ b/modules/nf-core/cadd/main.nf
@@ -13,8 +13,8 @@ process CADD {
 
     input:
     tuple val(meta), path(vcf)
-    path(annotation_dir)
-    path(prescored_dir)
+    tuple val(meta2), path(annotation_dir)
+    tuple val(meta3), path(prescored_dir)
 
     output:
     tuple val(meta), path("*.tsv.gz"), emit: tsv
diff --git a/nextflow_schema.json b/nextflow_schema.json
index ec1fa4b2..a459def4 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -163,7 +163,7 @@
                     "pattern": "^\\S+\\.csv$",
                     "format": "file-path",
                     "mimetype": "text/csv",
-                    "schema": "/assets/schema_snpdb.json",
+                    "schema": "/assets/schema_snp_db.json",
                     "description": "A csv file with echtvar databases to annotate SNVs with",
                     "exists": true
                 },
diff --git a/subworkflows/local/annotate_cadd/main.nf b/subworkflows/local/annotate_cadd/main.nf
index 9a1dc047..a56d1450 100644
--- a/subworkflows/local/annotate_cadd/main.nf
+++ b/subworkflows/local/annotate_cadd/main.nf
@@ -17,9 +17,9 @@ workflow ANNOTATE_CADD {
     ch_fai            // channel: [mandatory] [ val(meta), path(fai) ]
     ch_vcf            // channel: [mandatory] [ val(meta), path(vcfs) ]
     ch_index          // channel: [mandatory] [ val(meta), path(tbis) ]
-    ch_header         // channel: [mandatory] [ path(txt) ]
-    ch_cadd_resources // channel: [mandatory] [ path(dir) ]
-    ch_cadd_prescored // channel: [mandatory] [ path(dir) ]
+    ch_header         // channel: [mandatory] [ val(meta), path(txt) ]
+    ch_cadd_resources // channel: [mandatory] [ val(meta), path(dir) ]
+    ch_cadd_prescored // channel: [mandatory] [ val(meta), path(dir) ]
 
     main:
     ch_versions = Channel.empty()
@@ -64,7 +64,7 @@ workflow ANNOTATE_CADD {
 
     ANNOTATE_INDELS (
         ch_annotate_indels_in,
-        ch_header,
+        ch_header.map { meta, header -> header },
         CADD_TO_REFERENCE_CHRNAMES.out.output.map { meta, txt -> txt }
     )
     ch_versions = ch_versions.mix(ANNOTATE_INDELS.out.versions)
diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf
index f18cf360..23bfe8f0 100644
--- a/subworkflows/local/prepare_genome.nf
+++ b/subworkflows/local/prepare_genome.nf
@@ -10,7 +10,6 @@ workflow PREPARE_GENOME {
     gunzip_fasta               //    bool: should we gunzip fasta
     ch_vep_cache               // channel: [optional] [ val(meta), path(cache) ]
     split_vep_files            //    bool: are there vep extra files
-    ch_vep_extra_files_unsplit // channel: [optional] [ val(meta), path(csv) ]
 
     main:
     ch_versions = Channel.empty()
@@ -40,33 +39,13 @@ workflow PREPARE_GENOME {
     ch_versions = ch_versions.mix(UNTAR_VEP_CACHE.out.versions)
 
     UNTAR_VEP_CACHE.out.untar
-        .map { meta, files -> [ files ] }
         .collect()
         .set { untarred_vep }
 
-    // Read and store paths in the vep_plugin_files file
-    if ( split_vep_files ) {
-        ch_vep_extra_files_unsplit
-            .splitCsv ( header:true )
-            .map { row ->
-                path = file(row.vep_files[0])
-                if(path.exists()) {
-                    return [path]
-                } else {
-                    error("\nVep database file ${path} does not exist.")
-                }
-            }
-            .collect()
-            .set { ch_vep_extra_files }
-    } else {
-        ch_vep_extra_files = Channel.value([])
-    }
-
     emit:
     mmi             = MINIMAP2_INDEX.out.index.collect() // channel: [ val(meta), path(mmi) ]
     fai             = SAMTOOLS_FAIDX.out.fai.collect()   // channel: [ val(meta), path(fai) ]
     fasta           = ch_fasta                           // channel: [ val(meta), path(fasta) ]
-    vep_resources   = untarred_vep                       // channel: [ path(cache) ]
-    vep_extra_files = ch_vep_extra_files                 // channel: [ path(files) ]
+    vep_resources   = untarred_vep                       // channel: [ val(meta), path(cache) ]
     versions        = ch_versions                        // channel: [ versions.yml ]
 }
diff --git a/subworkflows/local/rank_variants/tests/main.nf.test b/subworkflows/local/rank_variants/tests/main.nf.test
index 581a7510..473f16c8 100644
--- a/subworkflows/local/rank_variants/tests/main.nf.test
+++ b/subworkflows/local/rank_variants/tests/main.nf.test
@@ -20,9 +20,6 @@ nextflow_workflow {
                     file(params.pipelines_testdata_base_path + 'reference/vep_cache_test_data.tar.gz', checkIfExists:true)
                 ]
                 input[3] = true
-                input[4] = Channel.of([
-                    file(params.pipelines_testdata_base_path + 'reference/vep_plugin_files.csv', checkIfExists: true)
-                ])
                 """
             }
         }
@@ -69,9 +66,11 @@ nextflow_workflow {
                 ]
                 input[2] = PREPARE_GENOME.out.fasta
                 input[3] = PREPARE_GENOME.out.fai
-                input[4] = PREPARE_GENOME.out.vep_resources
+                input[4] = PREPARE_GENOME.out.vep_resources.map { meta, cache -> cache }
                 input[5] = Channel.value('110')
-                input[6] = PREPARE_GENOME.out.vep_extra_files
+                input[6] = Channel.of([
+                    file(params.pipelines_testdata_base_path + 'reference/vep_plugin_files.csv', checkIfExists: true)
+                ]).splitCsv(header:true).map { row -> row.vep_files }.collect()
                 input[7] = false
                 input[8] = Channel.value([])
                 input[9] = null
diff --git a/subworkflows/local/snv_annotation/main.nf b/subworkflows/local/snv_annotation/main.nf
index 82fd3695..d1e382be 100644
--- a/subworkflows/local/snv_annotation/main.nf
+++ b/subworkflows/local/snv_annotation/main.nf
@@ -8,7 +8,7 @@ workflow SNV_ANNOTATION {
 
     take:
     ch_vcf                // channel [mandatory] [ val(meta), path(vcf) ]
-    ch_databases          // channel: [mandatory] [ val(meta), path(db) ]
+    ch_databases          // channel: [mandatory] [ path(db) ]
     ch_fasta              // channel: [mandatory] [ val(meta), path(fasta) ]
     ch_fai                // channel: [mandatory] [ val(meta), path(fai) ]
     ch_vep_cache          // channel: [mandatory] [ path(cache) ]
@@ -16,8 +16,8 @@ workflow SNV_ANNOTATION {
     ch_vep_extra_files    // channel: [mandatory] [ path(files) ]
     val_annotate_cadd     // bool: [mandatory]
     ch_cadd_header        // channel: [mandatory] [ path(txt) ]
-    ch_cadd_resources     // channel: [mandatory] [ path(annotation) ]
-    ch_cadd_prescored     // channel: [mandatory] [ path(prescored) ]
+    ch_cadd_resources     // channel: [mandatory] [ val(meta), path(annotation) ]
+    ch_cadd_prescored     // channel: [mandatory] [ val(meta), path(prescored) ]
 
     main:
     ch_versions = Channel.empty()
diff --git a/subworkflows/local/snv_annotation/tests/main.nf.test b/subworkflows/local/snv_annotation/tests/main.nf.test
index 3164b44a..dd5ee5ef 100644
--- a/subworkflows/local/snv_annotation/tests/main.nf.test
+++ b/subworkflows/local/snv_annotation/tests/main.nf.test
@@ -88,11 +88,11 @@ nextflow_workflow {
                 ]
                 input[2] = GUNZIP.out.gunzip
                 input[3] = SAMTOOLS_FAIDX.out.fai
-                input[4] = UNTAR.out.untar.map { meta, cache -> cache }
+                input[4] = UNTAR.out.untar.map { meta, cache -> cache}
                 input[5] = Channel.value('110')
-                input[6] = [
+                input[6] = Channel.of([
                     file(params.pipelines_testdata_base_path + 'reference/vep_plugin_files.csv', checkIfExists: true)
-                ]
+                ]).splitCsv(header:true).map { row -> row.vep_files }.collect()
                 input[7] = false
                 input[8] = Channel.value([])
                 input[9] = null
@@ -132,9 +132,9 @@ nextflow_workflow {
                 input[3] = SAMTOOLS_FAIDX.out.fai
                 input[4] = UNTAR.out.untar.map { meta, cache -> cache }
                 input[5] = Channel.value('110')
-                input[6] = [
+                input[6] = Channel.of([
                     file(params.pipelines_testdata_base_path + 'reference/vep_plugin_files.csv', checkIfExists: true)
-                ]
+                ]).splitCsv(header:true).map { row -> row.vep_files }.collect()
                 input[7] = false
                 input[8] = Channel.value([])
                 input[9] = null
diff --git a/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf b/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf
index 3061e3c0..5fca3199 100644
--- a/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf
@@ -640,3 +640,15 @@ def findKeyForValue(def valueToFind, Map map) {
     }
     return null // Value not found
 }
+
+// Utility function to create channels from references
+def createReferenceChannelFromPath(param, defaultValue = '') {
+    return param ? Channel.fromPath(param, checkIfExists: true)
+        .map { [ [ id: it.simpleName ], it ] }
+        .collect() : defaultValue
+}
+// Utility function to create channels from samplesheets
+def createReferenceChannelFromSamplesheet(param, schema, defaultValue = '') {
+    return param ? Channel.fromList(samplesheetToList(param, schema)) : defaultValue
+}
+
diff --git a/tests/.nftignore b/tests/.nftignore
index 3499afeb..4a4111aa 100644
--- a/tests/.nftignore
+++ b/tests/.nftignore
@@ -10,7 +10,7 @@ paraphase/**/*.{vcf.gz,tbi,bam,bai,json}
 phased_variants/**/*.{vcf.gz,tbi}
 pipeline_info/*.{html,json,txt,yml}
 qc/cramino/**/*.txt
-qc/fastqc/**/*.zip
+qc/fastqc/**/*.{zip,html}
 qc/somalier/**/*.{html,tsv}
 repeat_annotation/**/*.{vcf.gz,tbi}
 repeat_calling/**/*.{vcf.gz,tbi,bam,bai}
diff --git a/tests/samplesheet.nf.test.snap b/tests/samplesheet.nf.test.snap
index d99fa7c1..64dae7b6 100644
--- a/tests/samplesheet.nf.test.snap
+++ b/tests/samplesheet.nf.test.snap
@@ -439,7 +439,6 @@
                 "test.ped:md5,bd5cec27ba7337a85cf98e787131e2b5",
                 "HG002_Revio_cramino_aligned_phased.arrow:md5,a76219e9046db32c4b3d6d78425c5d78",
                 "HG002_Revio_cramino_aligned.arrow:md5,a76219e9046db32c4b3d6d78425c5d78",
-                "HG002_Revio_fastqc.html:md5,1080b519dbbb66f45eee74e311d4922c",
                 "HG002_Revio.mosdepth.global.dist.txt:md5,63701e857361046628f89cb84988ea1d",
                 "HG002_Revio.mosdepth.region.dist.txt:md5,6b46396518979ff9d9771cb8a8fbbab0",
                 "HG002_Revio.mosdepth.summary.txt:md5,311aad293c6d8a646b6dd4edc337845c",
@@ -551,6 +550,6 @@
             "nf-test": "0.9.0",
             "nextflow": "24.04.4"
         },
-        "timestamp": "2024-10-25T13:00:09.69999597"
+        "timestamp": "2024-10-29T08:07:45.120070133"
     }
 }
\ No newline at end of file
diff --git a/tests/samplesheet_multisample_bam.nf.test.snap b/tests/samplesheet_multisample_bam.nf.test.snap
index 21e2178a..94ef5a85 100644
--- a/tests/samplesheet_multisample_bam.nf.test.snap
+++ b/tests/samplesheet_multisample_bam.nf.test.snap
@@ -563,8 +563,6 @@
                 "HG002_Revio_B_cramino_aligned_phased.arrow:md5,3bb08ac5958c6cb0801f319066c3a1b2",
                 "HG002_Revio_A_cramino_aligned.arrow:md5,a76219e9046db32c4b3d6d78425c5d78",
                 "HG002_Revio_B_cramino_aligned.arrow:md5,3bb08ac5958c6cb0801f319066c3a1b2",
-                "HG002_Revio_A_fastqc.html:md5,25f875c3a542ff8590655685bc152658",
-                "HG002_Revio_B_fastqc.html:md5,4b7d698cbe79dbfb4a74e8e7f84891d5",
                 "HG002_Revio_A.mosdepth.global.dist.txt:md5,63701e857361046628f89cb84988ea1d",
                 "HG002_Revio_A.mosdepth.region.dist.txt:md5,6b46396518979ff9d9771cb8a8fbbab0",
                 "HG002_Revio_A.mosdepth.summary.txt:md5,311aad293c6d8a646b6dd4edc337845c",
@@ -759,6 +757,6 @@
             "nf-test": "0.9.0",
             "nextflow": "24.04.4"
         },
-        "timestamp": "2024-10-25T13:01:47.642764093"
+        "timestamp": "2024-10-29T08:09:35.63908858"
     }
 }
\ No newline at end of file
diff --git a/tests/samplesheet_multisample_ont_bam.nf.test.snap b/tests/samplesheet_multisample_ont_bam.nf.test.snap
index 5a25e57d..d8307404 100644
--- a/tests/samplesheet_multisample_ont_bam.nf.test.snap
+++ b/tests/samplesheet_multisample_ont_bam.nf.test.snap
@@ -402,8 +402,6 @@
                 "HG002_ONT_B_cramino_aligned_phased.arrow:md5,61af72539e105cec79db7c9b78eb15a7",
                 "HG002_ONT_A_cramino_aligned.arrow:md5,d2a5c81595fa34925ab8f03078487d81",
                 "HG002_ONT_B_cramino_aligned.arrow:md5,61af72539e105cec79db7c9b78eb15a7",
-                "HG002_ONT_A_fastqc.html:md5,94d86b38a30f846de64b840656663d18",
-                "HG002_ONT_B_fastqc.html:md5,2ec692ee5acf69717811be481d38f775",
                 "HG002_ONT_A.mosdepth.global.dist.txt:md5,5ae0972357f99aa481a0bf12fb9e0b0b",
                 "HG002_ONT_A.mosdepth.region.dist.txt:md5,023b1c6aeaf8fa5ededd6b711a5cd012",
                 "HG002_ONT_A.mosdepth.summary.txt:md5,c3b664b0983213f73edf3c0d5a0b04a2",
@@ -502,6 +500,6 @@
             "nf-test": "0.9.0",
             "nextflow": "24.04.4"
         },
-        "timestamp": "2024-10-25T13:10:46.10939576"
+        "timestamp": "2024-10-29T08:11:05.903725502"
     }
 }
\ No newline at end of file
diff --git a/workflows/nallo.nf b/workflows/nallo.nf
index 86a79a37..0a15ec4c 100644
--- a/workflows/nallo.nf
+++ b/workflows/nallo.nf
@@ -1,5 +1,8 @@
 include { samplesheetToList } from 'plugin/nf-schema'
-
+include {
+    createReferenceChannelFromPath
+    createReferenceChannelFromSamplesheet
+} from '../subworkflows/local/utils_nfcore_nallo_pipeline'
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     IMPORT LOCAL SUBWORKFLOWS
@@ -68,46 +71,30 @@ workflow NALLO {
     ch_versions      = Channel.empty()
     ch_multiqc_files = Channel.empty()
 
-    // Optional input files that has to be set depending on which workflow is run
-    ch_cadd_header              = Channel.fromPath("$projectDir/assets/cadd_to_vcf_header_-1.0-.txt", checkIfExists: true).collect()
-    ch_cadd_resources           = params.cadd_resources             ? Channel.fromPath(params.cadd_resources).collect()
-                                                                    : ''
-    ch_cadd_prescored           = params.cadd_prescored             ? Channel.fromPath(params.cadd_prescored).collect()
-                                                                    : ''
-    ch_fasta                    = params.fasta                      ? Channel.fromPath(params.fasta).map { it -> [ it.simpleName, it ] }.collect()
-                                                                    : ''
-    ch_tandem_repeats           = params.tandem_repeats             ? Channel.fromPath(params.tandem_repeats).map{ [ it.simpleName, it ] }.collect()
-                                                                    : Channel.value([[],[]])
-    ch_input_bed                = params.bed                        ? Channel.fromPath(params.bed).map{ [ [ id:it.simpleName ] , it ] }.collect()
-                                                                    : Channel.value([[],[]])
-    ch_par                      = params.par_regions                ? Channel.fromPath(params.par_regions).map { [ [ id: it.simpleName ], it ] }.collect()
-                                                                    : ''
-    ch_trgt_bed                 = params.trgt_repeats               ? Channel.fromPath(params.trgt_repeats).map { it -> [ it.simpleName, it ] }.collect()
-                                                                    : ''
-    ch_variant_catalog          = params.variant_catalog            ? Channel.fromPath(params.variant_catalog).map { it -> [ it.simpleName, it ] }.collect()
-                                                                    : ''
-    ch_databases                = params.snp_db                     ? Channel.fromList(samplesheetToList(params.snp_db, 'assets/schema_snpdb.json')).map{ it[1] }.collect()
-                                                                    : ''
-    ch_variant_consequences_snv = params.variant_consequences_snv   ? Channel.fromPath(params.variant_consequences_snv).map { it -> [ it.simpleName, it ] }.collect()
-                                                                    : Channel.value([])
-    ch_vep_cache_unprocessed    = params.vep_cache                  ? Channel.fromPath(params.vep_cache).map { it -> [ [ id:'vep_cache' ], it ] }.collect()
-                                                                    : Channel.value([[],[]])
-    ch_vep_extra_files_unsplit  = params.vep_plugin_files           ? Channel.fromPath(params.vep_plugin_files).collect()
-                                                                    : ''
-    ch_expected_xy_bed          = params.hificnv_xy                 ? Channel.fromPath(params.hificnv_xy).collect()
-                                                                    : ''
-    ch_expected_xx_bed          = params.hificnv_xx                 ? Channel.fromPath(params.hificnv_xx).collect()
-                                                                    : ''
-    ch_exclude_bed              = params.hificnv_exclude            ? Channel.fromPath(params.hificnv_exclude).collect()
-                                                                    : ''
-    ch_reduced_penetrance       = params.reduced_penetrance         ? Channel.fromPath(params.reduced_penetrance).map { it -> [ it.simpleName, it ] }.collect()
-                                                                    : Channel.value([])
-    ch_score_config_snv         = params.score_config_snv           ? Channel.fromPath(params.score_config_snv).map { it -> [ it.simpleName, it ] }.collect()
-                                                                    : Channel.value([])
-    ch_somalier_sites           = params.somalier_sites             ? Channel.fromPath(params.somalier_sites).map { [ it.simpleName, it ] }.collect()
-                                                                    : ''
-    ch_svdb_dbs                 = params.svdb_dbs                   ? Channel.fromPath(params.svdb_dbs).map { [ it.simpleName, it ] }.collect()
-                                                                    : ''
+    // Channels from (optional) input files
+    // If provided: [[id: 'reference'], [/path/to/reference_full_name.file]]
+    ch_cadd_header              = createReferenceChannelFromPath("$projectDir/assets/cadd_to_vcf_header_-1.0-.txt")
+    ch_cadd_resources           = createReferenceChannelFromPath(params.cadd_resouces)
+    ch_cadd_prescored           = createReferenceChannelFromPath(params.cadd_prescored)
+    ch_fasta                    = createReferenceChannelFromPath(params.fasta)
+    ch_tandem_repeats           = createReferenceChannelFromPath(params.tandem_repeats, Channel.value([[],[]]))
+    ch_input_bed                = createReferenceChannelFromPath(params.bed, Channel.value([[],[]]))
+    ch_par                      = createReferenceChannelFromPath(params.par_regions)
+    ch_trgt_bed                 = createReferenceChannelFromPath(params.trgt_repeats)
+    ch_variant_catalog          = createReferenceChannelFromPath(params.variant_catalog)
+    ch_variant_consequences_snv = createReferenceChannelFromPath(params.variant_consequences_snv)
+    ch_vep_cache_unprocessed    = createReferenceChannelFromPath(params.vep_cache, Channel.value([]))
+    ch_expected_xy_bed          = createReferenceChannelFromPath(params.hificnv_xy)
+    ch_expected_xx_bed          = createReferenceChannelFromPath(params.hificnv_xx)
+    ch_exclude_bed              = createReferenceChannelFromPath(params.hificnv_exclude)
+    ch_reduced_penetrance       = createReferenceChannelFromPath(params.reduced_penetrance)
+    ch_score_config_snv         = createReferenceChannelFromPath(params.score_config_snv)
+    ch_somalier_sites           = createReferenceChannelFromPath(params.somalier_sites)
+    ch_svdb_dbs                 = createReferenceChannelFromPath(params.svdb_dbs)
+
+    // Channels from (optional) input samplesheets validated by schema
+    ch_databases                = createReferenceChannelFromSamplesheet(params.snp_db, 'assets/schema_snp_db.json')
+    ch_vep_plugin_files         = createReferenceChannelFromSamplesheet(params.vep_plugin_files, 'assets/schema_vep_plugin_files.json', Channel.value([]))
 
     // Check parameter that doesn't conform to schema validation here
     if (params.phaser.matches('hiphase') && params.preset == 'ONT_R10') { error "The HiPhase license only permits analysis of data from PacBio. For details see: https://github.com/PacificBiosciences/HiPhase/blob/main/LICENSE.md" }
@@ -124,24 +111,21 @@ workflow NALLO {
     //
     // Prepare references
     //
-    if(!params.skip_mapping_wf | !params.skip_assembly_wf ) {
+    if(!params.skip_mapping_wf || !params.skip_assembly_wf ) {
 
         PREPARE_GENOME (
             ch_fasta,
             params.fasta.endsWith('.gz'),
             ch_vep_cache_unprocessed,
             params.vep_plugin_files,
-            ch_vep_extra_files_unsplit
         )
         ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions)
 
-        if(!params.skip_snv_annotation) {
-            if (params.vep_cache) {
-                if (params.vep_cache.endsWith("tar.gz")) {
-                    ch_vep_cache = PREPARE_GENOME.out.vep_resources
-                } else {
-                    ch_vep_cache = Channel.fromPath(params.vep_cache).collect()
-                }
+        if(!params.skip_snv_annotation && params.vep_cache) {
+            if (params.vep_cache.endsWith("tar.gz")) {
+                ch_vep_cache = PREPARE_GENOME.out.vep_resources
+            } else {
+                ch_vep_cache = Channel.fromPath(params.vep_cache).collect()
             }
         }
 
@@ -314,9 +298,9 @@ workflow NALLO {
                 CALL_SVS.out.ch_multisample_vcf,
                 fasta,
                 ch_svdb_dbs,
-                ch_vep_cache,
+                ch_vep_cache.map { meta, cache -> cache },
                 params.vep_cache_version,
-                PREPARE_GENOME.out.vep_extra_files
+                ch_vep_plugin_files.collect()
             )
 
             ANNOTATE_SVS.out.vcf
@@ -377,12 +361,12 @@ workflow NALLO {
                 //
                 SNV_ANNOTATION(
                     SHORT_VARIANT_CALLING.out.combined_bcf,
-                    ch_databases,
+                    ch_databases.map { meta, databases -> databases }.collect(),
                     fasta,
                     fai.map { name, fai -> [ [ id: name ], fai ] },
-                    ch_vep_cache,
+                    ch_vep_cache.map { meta, cache -> cache },
                     params.vep_cache_version,
-                    PREPARE_GENOME.out.vep_extra_files,
+                    ch_vep_plugin_files.collect(),
                     (params.cadd_resources && params.cadd_prescored),
                     ch_cadd_header,
                     ch_cadd_resources,
@@ -466,7 +450,13 @@ workflow NALLO {
                     .join(SHORT_VARIANT_CALLING.out.snp_calls_vcf)
                     .set { cnv_workflow_in }
 
-                CNV(cnv_workflow_in, fasta, ch_expected_xy_bed, ch_expected_xx_bed, ch_exclude_bed)
+                CNV(
+                    cnv_workflow_in,
+                    fasta,
+                    ch_expected_xy_bed.map { meta, bed -> bed },
+                    ch_expected_xx_bed.map { meta, bed -> bed },
+                    ch_exclude_bed.map { meta, bed -> bed }
+                )
                 ch_versions = ch_versions.mix(CNV.out.versions)
             }