nf-cmgg · nvnieuwk · Apr 25, 2024 · Apr 23, 2024 · Apr 23, 2024 · Apr 23, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Changes
 
 1. Updated all tests to use snapshots instead.
+2. Made the pipeline pluggable to enable the use of it in a meta pipeline.
 
 ## v1.5.1 - Great Geraardsbergen - [March 7 2024]
 

diff --git a/assets/schema_input.json b/assets/schema_input.json
@@ -75,5 +75,11 @@
             }
         },
         "required": ["sample"]
+    },
+    "dependentRequired": {
+        "truth_tbi": ["truth_vcf"],
+        "truth_bed": ["truth_vcf"],
+        "tbi": ["gvcf"],
+        "crai": ["cram"]
     }
 }
diff --git a/main.nf b/main.nf
@@ -11,23 +11,93 @@ nextflow.enable.dsl = 2
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS
+    GENOME PARAMETER VALUES
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
-include { GERMLINE                } from './workflows/germline'
-include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_cmgg_germline_pipeline'
-include { PIPELINE_COMPLETION     } from './subworkflows/local/utils_cmgg_germline_pipeline'
-
 include { getGenomeAttribute      } from './subworkflows/local/utils_cmgg_germline_pipeline'
 
+params.fasta                = getGenomeAttribute('fasta')
+params.fai                  = getGenomeAttribute('fai')
+params.dict                 = getGenomeAttribute('dict')
+params.strtablefile         = getGenomeAttribute('strtablefile')
+params.sdf                  = getGenomeAttribute('sdf')
+params.dbsnp                = getGenomeAttribute('dbsnp')
+params.dbsnp_tbi            = getGenomeAttribute('dbsnp_tbi')
+params.vep_cache            = getGenomeAttribute('vep_cache')
+params.dbnsfp               = getGenomeAttribute('dbnsfp')
+params.dbnsfp_tbi           = getGenomeAttribute('dbnsfp_tbi')
+params.spliceai_indel       = getGenomeAttribute('spliceai_indel')
+params.spliceai_indel_tbi   = getGenomeAttribute('spliceai_indel_tbi')
+params.spliceai_snv         = getGenomeAttribute('spliceai_snv')
+params.spliceai_snv_tbi     = getGenomeAttribute('spliceai_snv_tbi')
+params.mastermind           = getGenomeAttribute('mastermind')
+params.mastermind_tbi       = getGenomeAttribute('mastermind_tbi')
+params.eog                  = getGenomeAttribute('eog')
+params.eog_tbi              = getGenomeAttribute('eog_tbi')
+params.alphamissense        = getGenomeAttribute('alphamissense')
+params.alphamissense_tbi    = getGenomeAttribute('alphamissense_tbi')
+params.vcfanno_resources    = getGenomeAttribute('vcfanno_resources')
+params.vcfanno_config       = getGenomeAttribute('vcfanno_config')
+
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    GENOME PARAMETER VALUES
+    VALIDATE INPUTS
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
+//
+// Check for dependencies between parameters
+//
+
+if(params.dbsnp_tbi && !params.dbsnp){
+    error("Please specify the dbsnp VCF with --dbsnp VCF")
+}
+
+if (params.annotate) {
+    // Check if a genome is given
+    if (!params.genome) { error("A genome should be supplied for annotation (use --genome)") }
+
+    // Check if the VEP versions were given
+    if (!params.vep_version) { error("A VEP version should be supplied for annotation (use --vep_version)") }
+    if (!params.vep_cache_version) { error("A VEP cache version should be supplied for annotation (use --vep_cache_version)") }
+
+    // Check if a species is entered
+    if (!params.species) { error("A species should be supplied for annotation (use --species)") }
 
+    // Check if all vcfanno files are supplied when vcfanno should be used
+    if (params.vcfanno && (!params.vcfanno_config || !params.vcfanno_resources)) {
+        error("A TOML file and resource files should be supplied when using vcfanno (use --vcfanno_config and --vcfanno_resources)")
+    }
+}
+
+callers = params.callers.tokenize(",")
+for(caller in callers) {
+    if(!(caller in GlobalVariables.availableCallers)) { error("\"${caller}\" is not a supported callers please use one or more of these instead: ${GlobalVariables.availableCallers}")}
+}
+
+if (params.output_suffix && callers.size() > 1) {
+    error("Cannot use --output_suffix with more than one caller")
+}
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    CONFIG FILES
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+multiqc_logo     = params.multiqc_logo   ?: "$projectDir/assets/CMGG_logo.png"
+
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+include { GERMLINE                } from './workflows/germline'
+include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_cmgg_germline_pipeline'
+include { PIPELINE_COMPLETION     } from './subworkflows/local/utils_cmgg_germline_pipeline'
 
 //
 // WORKFLOW: Run main analysis pipeline depending on type of input
@@ -44,7 +114,69 @@ workflow NFCMGG_GERMLINE {
     // WORKFLOW: Run pipeline
     //
     GERMLINE (
-        samplesheet
+        // Input channels
+        samplesheet,
+
+        // File inputs
+        params.fasta,
+        params.fai,
+        params.dict,
+        params.strtablefile,
+        params.sdf,
+        params.dbsnp,
+        params.dbsnp_tbi,
+        params.vep_cache,
+        params.dbnsfp,
+        params.dbnsfp_tbi,
+        params.spliceai_indel,
+        params.spliceai_indel_tbi,
+        params.spliceai_snv,
+        params.spliceai_snv_tbi,
+        params.mastermind,
+        params.mastermind_tbi,
+        params.eog,
+        params.eog_tbi,
+        params.alphamissense,
+        params.alphamissense_tbi,
+        params.vcfanno_resources,
+        params.vcfanno_config,
+        params.multiqc_config,
+        multiqc_logo,
+        params.multiqc_methods_description,
+        params.roi,
+        params.somalier_sites,
+        params.vcfanno_lua,
+        params.updio_common_cnvs,
+        params.automap_repeats,
+        params.automap_panel,
+        params.outdir,
+
+        // Boolean inputs
+        params.dragstr,
+        params.annotate,
+        params.vcfanno,
+        params.only_call,
+        params.only_merge,
+        params.filter,
+        params.normalize,
+        params.add_ped,
+        params.gemini,
+        params.validate,
+        params.updio,
+        params.automap,
+        params.vep_dbnsfp,
+        params.vep_spliceai,
+        params.vep_mastermind,
+        params.vep_eog,
+        params.vep_alphamissense,
+
+        // Value inputs
+        params.genome,
+        params.species,
+        params.vep_cache_version,
+        params.vep_chunk_size,
+        params.scatter_count,
+        params.callers.tokenize(",")
     )
 
     emit:

diff --git a/nextflow.config b/nextflow.config
@@ -265,32 +265,6 @@ if (!params.igenomes_ignore) {
     params.genomes = [:]
 }
 
-// Get iGenomes reference path if it doesn't already exist
-params {
-    fasta                = fasta                ?: getGenomeAttribute('fasta')
-    fai                  = fai                  ?: getGenomeAttribute('fai')
-    dict                 = dict                 ?: getGenomeAttribute('dict')
-    strtablefile         = strtablefile         ?: getGenomeAttribute('strtablefile')
-    sdf                  = sdf                  ?: getGenomeAttribute('sdf')
-    dbsnp                = dbsnp                ?: getGenomeAttribute('dbsnp')
-    dbsnp_tbi            = dbsnp_tbi            ?: getGenomeAttribute('dbsnp_tbi')
-    vep_cache            = vep_cache            ?: getGenomeAttribute('vep_cache')
-    dbnsfp               = dbnsfp               ?: getGenomeAttribute('dbnsfp')
-    dbnsfp_tbi           = dbnsfp_tbi           ?: getGenomeAttribute('dbnsfp_tbi')
-    spliceai_indel       = spliceai_indel       ?: getGenomeAttribute('spliceai_indel')
-    spliceai_indel_tbi   = spliceai_indel_tbi   ?: getGenomeAttribute('spliceai_indel_tbi')
-    spliceai_snv         = spliceai_snv         ?: getGenomeAttribute('spliceai_snv')
-    spliceai_snv_tbi     = spliceai_snv_tbi     ?: getGenomeAttribute('spliceai_snv_tbi')
-    mastermind           = mastermind           ?: getGenomeAttribute('mastermind')
-    mastermind_tbi       = mastermind_tbi       ?: getGenomeAttribute('mastermind_tbi')
-    eog                  = eog                  ?: getGenomeAttribute('eog')
-    eog_tbi              = eog_tbi              ?: getGenomeAttribute('eog_tbi')
-    alphamissense        = alphamissense        ?: getGenomeAttribute('alphamissense')
-    alphamissense_tbi    = alphamissense_tbi    ?: getGenomeAttribute('alphamissense_tbi')
-    vcfanno_resources    = vcfanno_resources    ?: getGenomeAttribute('vcfanno_resources')
-    vcfanno_config       = vcfanno_config       ?: getGenomeAttribute('vcfanno_config')
-}
-
 // Export these variables to prevent local Python/R libraries from conflicting with those in the container
 // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container.
 // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable.
@@ -372,16 +346,3 @@ def check_max(obj, type) {
         }
     }
 }
-
-//
-// Get attribute from genome config file e.g. fasta
-//
-def getGenomeAttribute(attribute) {
-    if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) {
-        if (params.genomes[ params.genome ].containsKey(attribute)) {
-            return params.genomes[ params.genome ][ attribute ]
-        }
-    }
-    return null
-}
-
diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -16,7 +16,6 @@
                     "type": "string",
                     "format": "file-path",
                     "exists": true,
-                    "schema": "assets/schema_input.json",
                     "pattern": "^\\S+\\.(csv|tsv|yaml|yml|json)$",
                     "description": "Path to comma-separated file containing information about the samples in the experiment.",
                     "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with samples, and a header row. See [usage docs](./usage.md).",
@@ -459,29 +458,6 @@
                     "default": true,
                     "fa_icon": "fas fa-check-square",
                     "hidden": true
-                },
-                "validationShowHiddenParams": {
-                    "type": "boolean",
-                    "fa_icon": "far fa-eye-slash",
-                    "description": "Show all params when using `--help`",
-                    "hidden": true,
-                    "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters."
-                },
-                "validationLenientMode": {
-                    "type": "boolean",
-                    "hidden": true,
-                    "description": "Lenient mode for parameter validation"
-                },
-                "validationFailUnrecognisedParams": {
-                    "type": "boolean",
-                    "hidden": true,
-                    "description": "Fail on unrecognised parameters"
-                },
-                "validationSchemaIgnoreParams": {
-                    "type": "string",
-                    "default": "genomes,igenomes_base,test_data",
-                    "hidden": true,
-                    "description": "Comma-separated list of parameters to ignore when validating against the schema"
                 }
             }
         },

diff --git a/subworkflows/local/cram_call_gatk4/main.nf b/subworkflows/local/cram_call_gatk4/main.nf
@@ -10,13 +10,14 @@ include { VCF_CONCAT_BCFTOOLS           } from '../vcf_concat_bcftools/main'
 
 workflow CRAM_CALL_GATK4 {
     take:
-        ch_input             // channel: [mandatory] [ val(meta), path(cram), path(crai), path(bed) ] => sample CRAM files and their indexes with the split bed files
-        ch_fasta             // channel: [mandatory] [ val(meta), path(fasta) ] => fasta reference
-        ch_fai               // channel: [mandatory] [ val(meta), path(fai) ] => fasta reference index
-        ch_dict              // channel: [mandatory] [ val(meta), path(dict) ] => sequence dictionary
-        ch_strtablefile      // channel: [optional]  [ val(meta), path(strtablefile) ] => STR table file
-        ch_dbsnp             // channel: [optional]  [ path(dbsnp) ] => The VCF containing the dbsnp variants
-        ch_dbsnp_tbi         // channel: [optional]  [ path(dbsnp_tbi) ] => The index of the dbsnp VCF
+        ch_input            // channel: [mandatory] [ val(meta), path(cram), path(crai), path(bed) ] => sample CRAM files and their indexes with the split bed files
+        ch_fasta            // channel: [mandatory] [ val(meta), path(fasta) ] => fasta reference
+        ch_fai              // channel: [mandatory] [ val(meta), path(fai) ] => fasta reference index
+        ch_dict             // channel: [mandatory] [ val(meta), path(dict) ] => sequence dictionary
+        ch_strtablefile     // channel: [optional]  [ val(meta), path(strtablefile) ] => STR table file
+        ch_dbsnp            // channel: [optional]  [ path(dbsnp) ] => The VCF containing the dbsnp variants
+        ch_dbsnp_tbi        // channel: [optional]  [ path(dbsnp_tbi) ] => The index of the dbsnp VCF
+        dragstr             // boolean: create a DragSTR model and run haplotypecaller with it
 
     main:
 
@@ -26,7 +27,7 @@ workflow CRAM_CALL_GATK4 {
     // Generate DRAGSTR models (if --dragstr is specified)
     //
 
-    if (params.dragstr) {
+    if (dragstr) {
 
         ch_input
             .map { meta, cram, crai, bed ->

diff --git a/subworkflows/local/cram_call_genotype_gatk4/main.nf b/subworkflows/local/cram_call_genotype_gatk4/main.nf
@@ -8,14 +8,19 @@ include { VCF_FILTER_BCFTOOLS           } from '../vcf_filter_bcftools/main'
 
 workflow CRAM_CALL_GENOTYPE_GATK4 {
     take:
-        ch_input             // channel: [mandatory] [ val(meta), path(cram), path(crai), path(bed) ] => sample CRAM files and their indexes with the split bed files
-        ch_gvcfs             // channel: [mandatory] [ val(meta), path(gvcf), path(tbi) ] => earlier called GVCFs with their indices
-        ch_fasta             // channel: [mandatory] [ val(meta), path(fasta) ] => fasta reference
-        ch_fai               // channel: [mandatory] [ val(meta), path(fai) ] => fasta reference index
-        ch_dict              // channel: [mandatory] [ val(meta), path(dict) ] => sequence dictionary
-        ch_strtablefile      // channel: [optional]  [ path(strtablefile) ] => STR table file
-        ch_dbsnp             // channel: [optional]  [ path(dbsnp) ] => The VCF containing the dbsnp variants
-        ch_dbsnp_tbi         // channel: [optional]  [ path(dbsnp_tbi) ] => The index of the dbsnp VCF
+        ch_input            // channel: [mandatory] [ val(meta), path(cram), path(crai), path(bed) ] => sample CRAM files and their indexes with the split bed files
+        ch_gvcfs            // channel: [mandatory] [ val(meta), path(gvcf), path(tbi) ] => earlier called GVCFs with their indices
+        ch_fasta            // channel: [mandatory] [ val(meta), path(fasta) ] => fasta reference
+        ch_fai              // channel: [mandatory] [ val(meta), path(fai) ] => fasta reference index
+        ch_dict             // channel: [mandatory] [ val(meta), path(dict) ] => sequence dictionary
+        ch_strtablefile     // channel: [optional]  [ path(strtablefile) ] => STR table file
+        ch_dbsnp            // channel: [optional]  [ path(dbsnp) ] => The VCF containing the dbsnp variants
+        ch_dbsnp_tbi        // channel: [optional]  [ path(dbsnp_tbi) ] => The index of the dbsnp VCF
+        dragstr             // boolean: create a DragSTR model and run haplotypecaller with it
+        only_call           // boolean: only run the variant calling
+        only_merge          // boolean: run until the family merging
+        filter              // boolean: filter the VCFs
+        scatter_count       // integer: the amount of times the VCFs should be scattered
 
     main:
 
@@ -30,7 +35,8 @@ workflow CRAM_CALL_GENOTYPE_GATK4 {
         ch_dict,
         ch_strtablefile,
         ch_dbsnp,
-        ch_dbsnp_tbi
+        ch_dbsnp_tbi,
+        dragstr
     )
     ch_versions = ch_versions.mix(CRAM_CALL_GATK4.out.versions)
     ch_reports  = ch_reports.mix(CRAM_CALL_GATK4.out.reports)
@@ -42,23 +48,25 @@ workflow CRAM_CALL_GENOTYPE_GATK4 {
         }
         .mix(CRAM_CALL_GATK4.out.gvcfs)
 
-    if(!params.only_call) {
+    if(!only_call) {
 
         GVCF_JOINT_GENOTYPE_GATK4(
             ch_gvcfs_ready,
             ch_fasta,
             ch_fai,
             ch_dict,
             ch_dbsnp,
-            ch_dbsnp_tbi
+            ch_dbsnp_tbi,
+            only_merge,
+            scatter_count
         )
         ch_versions = ch_versions.mix(GVCF_JOINT_GENOTYPE_GATK4.out.versions)
 
     }
 
-    if(!params.only_call && !params.only_merge) {
+    if(!only_call && !only_merge) {
 
-        if(params.filter) {
+        if(filter) {
             VCF_FILTER_BCFTOOLS(
                 GVCF_JOINT_GENOTYPE_GATK4.out.vcfs,
                 true

diff --git a/subworkflows/local/cram_call_vardictjava/main.nf b/subworkflows/local/cram_call_vardictjava/main.nf
@@ -18,6 +18,7 @@ workflow CRAM_CALL_VARDICTJAVA {
         ch_fai               // channel: [mandatory] [ val(meta), path(fai) ] => fasta reference index
         ch_dbsnp             // channel: [optional]  [ path(vcf) ] => the dbnsp vcf file
         ch_dbsnp_tbi         // channel: [optional]  [ path(tbi) ] => the dbsnp vcf index file
+        filter               // boolean: filter the VCFs
 
     main:
         ch_versions = Channel.empty()
@@ -106,7 +107,7 @@ workflow CRAM_CALL_VARDICTJAVA {
             VCF_CONCAT_BCFTOOLS.out.vcfs.set { ch_dbsnp_annotated }
         }
 
-        if(params.filter) {
+        if(filter) {
             VCF_FILTER_BCFTOOLS(
                 ch_dbsnp_annotated,
                 false