From 036da2cbff1f890b8893f7672c10fa7f7a049593 Mon Sep 17 00:00:00 2001 From: fellen31 Date: Mon, 28 Oct 2024 20:02:09 +0100 Subject: [PATCH] Refactor reference channels --- CHANGELOG.md | 2 + .../{schema_snpdb.json => schema_snp_db.json} | 11 +- assets/schema_vep_plugin_files.json | 20 ++++ modules.json | 3 +- modules/nf-core/cadd/cadd.diff | 8 +- modules/nf-core/cadd/main.nf | 4 +- nextflow_schema.json | 2 +- subworkflows/local/annotate_cadd/main.nf | 8 +- subworkflows/local/prepare_genome.nf | 23 +--- .../local/rank_variants/tests/main.nf.test | 9 +- subworkflows/local/snv_annotation/main.nf | 6 +- .../local/snv_annotation/tests/main.nf.test | 10 +- .../local/utils_nfcore_nallo_pipeline/main.nf | 12 +++ tests/.nftignore | 2 +- tests/samplesheet.nf.test.snap | 3 +- .../samplesheet_multisample_bam.nf.test.snap | 4 +- ...mplesheet_multisample_ont_bam.nf.test.snap | 4 +- workflows/nallo.nf | 102 ++++++++---------- 18 files changed, 117 insertions(+), 116 deletions(-) rename assets/{schema_snpdb.json => schema_snp_db.json} (53%) create mode 100644 assets/schema_vep_plugin_files.json diff --git a/CHANGELOG.md b/CHANGELOG.md index 48d4df78..19156abb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -64,6 +64,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#435](https://github.com/genomic-medicine-sweden/nallo/pull/435) - Updated and refactored processes and workflows related to variant ranking - [#438](https://github.com/genomic-medicine-sweden/nallo/pull/438) - Updated pipeline tests to use functions in nft-utils instead of checking hardcoded paths - [#440](https://github.com/genomic-medicine-sweden/nallo/pull/440) - Updated hifiasm to 0.20 with new default parameters for telomeres and scaffolding ([#295](https://github.com/genomic-medicine-sweden/nallo/issues/295)) +- [#443](https://github.com/genomic-medicine-sweden/nallo/pull/443) - Refactored reference channel assignments +- [#443](https://github.com/genomic-medicine-sweden/nallo/pull/443) - Updated schemas for `vep_plugin_files` and `snp_db` ### `Removed` diff --git a/assets/schema_snpdb.json b/assets/schema_snp_db.json similarity index 53% rename from assets/schema_snpdb.json rename to assets/schema_snp_db.json index 648a5283..4d9141db 100644 --- a/assets/schema_snpdb.json +++ b/assets/schema_snp_db.json @@ -1,8 +1,8 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://raw.githubusercontent.com/genomic-medicine-sweden/nallo/master/assets/schema_gvcfs.json", - "title": "genomic-medicine-sweden/nallo pipeline - params.extra_gvcfs schema", - "description": "Schema for the file provided with params.extra_gvcfs", + "$id": "https://raw.githubusercontent.com/genomic-medicine-sweden/nallo/master/assets/schema_snp_db.json", + "title": "genomic-medicine-sweden/nallo pipeline - params.snp_db schema", + "description": "Schema for the file provided with params.snp_db", "type": "array", "items": { "type": "object", @@ -10,14 +10,13 @@ "sample": { "type": "string", "pattern": "^\\S+$", - "errorMessage": "Sample name must be provided and cannot contain spaces", - "meta": ["id"] + "errorMessage": "Sample must be provided and cannot contain spaces." }, "file": { "format": "file-path", "type": "string", "pattern": "^\\S+\\.zip$", - "errorMessage": "gVCF file must be provided, cannot contain spaces and must have extension 'g.vcf.gz' or 'gvcf.gz'" + "errorMessage": "Echtvar database must be provided, cannot contain spaces and must have extension '.zip'" } }, "required": ["sample", "file"] diff --git a/assets/schema_vep_plugin_files.json b/assets/schema_vep_plugin_files.json new file mode 100644 index 00000000..0be393a3 --- /dev/null +++ b/assets/schema_vep_plugin_files.json @@ -0,0 +1,20 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/genomic-medicine-sweden/nallo/master/assets/schema_vep_plugin_files.json", + "title": "genomic-medicine-sweden/nallo pipeline - params.vep_plugin_files schema", + "description": "Schema for the file provided with params.vep_plugin_files", + "type": "array", + "items": { + "type": "object", + "properties": { + "vep_files": { + "format": "file-path", + "type": "string", + "pattern": "^\\S+", + "exists": true, + "errorMessage": "Vep plugin file must be a path and exist." + } + }, + "required": ["vep_files"] + } +} diff --git a/modules.json b/modules.json index 55088401..97bc7115 100644 --- a/modules.json +++ b/modules.json @@ -68,7 +68,8 @@ "cadd": { "branch": "master", "git_sha": "cf3ed075695639b0a0924eb0901146df1996dc08", - "installed_by": ["modules"] + "installed_by": ["modules"], + "patch": "modules/nf-core/cadd/cadd.diff" }, "cat/fastq": { "branch": "master", diff --git a/modules/nf-core/cadd/cadd.diff b/modules/nf-core/cadd/cadd.diff index 2ee51723..2243f02d 100644 --- a/modules/nf-core/cadd/cadd.diff +++ b/modules/nf-core/cadd/cadd.diff @@ -1,4 +1,6 @@ Changes in module 'nf-core/cadd' +'modules/nf-core/cadd/meta.yml' is unchanged +Changes in 'cadd/main.nf': --- modules/nf-core/cadd/main.nf +++ modules/nf-core/cadd/main.nf @@ -7,13 +7,14 @@ @@ -13,10 +15,12 @@ Changes in module 'nf-core/cadd' input: tuple val(meta), path(vcf) - path(annotation_dir) -+ path(prescored_dir) +- path(annotation_dir) ++ tuple val(meta2), path(annotation_dir) ++ tuple val(meta3), path(prescored_dir) output: tuple val(meta), path("*.tsv.gz"), emit: tsv +'modules/nf-core/cadd/environment.yml' is unchanged ************************************************************ diff --git a/modules/nf-core/cadd/main.nf b/modules/nf-core/cadd/main.nf index 52490c64..d830ff72 100644 --- a/modules/nf-core/cadd/main.nf +++ b/modules/nf-core/cadd/main.nf @@ -13,8 +13,8 @@ process CADD { input: tuple val(meta), path(vcf) - path(annotation_dir) - path(prescored_dir) + tuple val(meta2), path(annotation_dir) + tuple val(meta3), path(prescored_dir) output: tuple val(meta), path("*.tsv.gz"), emit: tsv diff --git a/nextflow_schema.json b/nextflow_schema.json index ec1fa4b2..a459def4 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -163,7 +163,7 @@ "pattern": "^\\S+\\.csv$", "format": "file-path", "mimetype": "text/csv", - "schema": "/assets/schema_snpdb.json", + "schema": "/assets/schema_snp_db.json", "description": "A csv file with echtvar databases to annotate SNVs with", "exists": true }, diff --git a/subworkflows/local/annotate_cadd/main.nf b/subworkflows/local/annotate_cadd/main.nf index 9a1dc047..a56d1450 100644 --- a/subworkflows/local/annotate_cadd/main.nf +++ b/subworkflows/local/annotate_cadd/main.nf @@ -17,9 +17,9 @@ workflow ANNOTATE_CADD { ch_fai // channel: [mandatory] [ val(meta), path(fai) ] ch_vcf // channel: [mandatory] [ val(meta), path(vcfs) ] ch_index // channel: [mandatory] [ val(meta), path(tbis) ] - ch_header // channel: [mandatory] [ path(txt) ] - ch_cadd_resources // channel: [mandatory] [ path(dir) ] - ch_cadd_prescored // channel: [mandatory] [ path(dir) ] + ch_header // channel: [mandatory] [ val(meta), path(txt) ] + ch_cadd_resources // channel: [mandatory] [ val(meta), path(dir) ] + ch_cadd_prescored // channel: [mandatory] [ val(meta), path(dir) ] main: ch_versions = Channel.empty() @@ -64,7 +64,7 @@ workflow ANNOTATE_CADD { ANNOTATE_INDELS ( ch_annotate_indels_in, - ch_header, + ch_header.map { meta, header -> header }, CADD_TO_REFERENCE_CHRNAMES.out.output.map { meta, txt -> txt } ) ch_versions = ch_versions.mix(ANNOTATE_INDELS.out.versions) diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf index f18cf360..23bfe8f0 100644 --- a/subworkflows/local/prepare_genome.nf +++ b/subworkflows/local/prepare_genome.nf @@ -10,7 +10,6 @@ workflow PREPARE_GENOME { gunzip_fasta // bool: should we gunzip fasta ch_vep_cache // channel: [optional] [ val(meta), path(cache) ] split_vep_files // bool: are there vep extra files - ch_vep_extra_files_unsplit // channel: [optional] [ val(meta), path(csv) ] main: ch_versions = Channel.empty() @@ -40,33 +39,13 @@ workflow PREPARE_GENOME { ch_versions = ch_versions.mix(UNTAR_VEP_CACHE.out.versions) UNTAR_VEP_CACHE.out.untar - .map { meta, files -> [ files ] } .collect() .set { untarred_vep } - // Read and store paths in the vep_plugin_files file - if ( split_vep_files ) { - ch_vep_extra_files_unsplit - .splitCsv ( header:true ) - .map { row -> - path = file(row.vep_files[0]) - if(path.exists()) { - return [path] - } else { - error("\nVep database file ${path} does not exist.") - } - } - .collect() - .set { ch_vep_extra_files } - } else { - ch_vep_extra_files = Channel.value([]) - } - emit: mmi = MINIMAP2_INDEX.out.index.collect() // channel: [ val(meta), path(mmi) ] fai = SAMTOOLS_FAIDX.out.fai.collect() // channel: [ val(meta), path(fai) ] fasta = ch_fasta // channel: [ val(meta), path(fasta) ] - vep_resources = untarred_vep // channel: [ path(cache) ] - vep_extra_files = ch_vep_extra_files // channel: [ path(files) ] + vep_resources = untarred_vep // channel: [ val(meta), path(cache) ] versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/rank_variants/tests/main.nf.test b/subworkflows/local/rank_variants/tests/main.nf.test index 581a7510..473f16c8 100644 --- a/subworkflows/local/rank_variants/tests/main.nf.test +++ b/subworkflows/local/rank_variants/tests/main.nf.test @@ -20,9 +20,6 @@ nextflow_workflow { file(params.pipelines_testdata_base_path + 'reference/vep_cache_test_data.tar.gz', checkIfExists:true) ] input[3] = true - input[4] = Channel.of([ - file(params.pipelines_testdata_base_path + 'reference/vep_plugin_files.csv', checkIfExists: true) - ]) """ } } @@ -69,9 +66,11 @@ nextflow_workflow { ] input[2] = PREPARE_GENOME.out.fasta input[3] = PREPARE_GENOME.out.fai - input[4] = PREPARE_GENOME.out.vep_resources + input[4] = PREPARE_GENOME.out.vep_resources.map { meta, cache -> cache } input[5] = Channel.value('110') - input[6] = PREPARE_GENOME.out.vep_extra_files + input[6] = Channel.of([ + file(params.pipelines_testdata_base_path + 'reference/vep_plugin_files.csv', checkIfExists: true) + ]).splitCsv(header:true).map { row -> row.vep_files }.collect() input[7] = false input[8] = Channel.value([]) input[9] = null diff --git a/subworkflows/local/snv_annotation/main.nf b/subworkflows/local/snv_annotation/main.nf index 82fd3695..d1e382be 100644 --- a/subworkflows/local/snv_annotation/main.nf +++ b/subworkflows/local/snv_annotation/main.nf @@ -8,7 +8,7 @@ workflow SNV_ANNOTATION { take: ch_vcf // channel [mandatory] [ val(meta), path(vcf) ] - ch_databases // channel: [mandatory] [ val(meta), path(db) ] + ch_databases // channel: [mandatory] [ path(db) ] ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] ch_fai // channel: [mandatory] [ val(meta), path(fai) ] ch_vep_cache // channel: [mandatory] [ path(cache) ] @@ -16,8 +16,8 @@ workflow SNV_ANNOTATION { ch_vep_extra_files // channel: [mandatory] [ path(files) ] val_annotate_cadd // bool: [mandatory] ch_cadd_header // channel: [mandatory] [ path(txt) ] - ch_cadd_resources // channel: [mandatory] [ path(annotation) ] - ch_cadd_prescored // channel: [mandatory] [ path(prescored) ] + ch_cadd_resources // channel: [mandatory] [ val(meta), path(annotation) ] + ch_cadd_prescored // channel: [mandatory] [ val(meta), path(prescored) ] main: ch_versions = Channel.empty() diff --git a/subworkflows/local/snv_annotation/tests/main.nf.test b/subworkflows/local/snv_annotation/tests/main.nf.test index 3164b44a..dd5ee5ef 100644 --- a/subworkflows/local/snv_annotation/tests/main.nf.test +++ b/subworkflows/local/snv_annotation/tests/main.nf.test @@ -88,11 +88,11 @@ nextflow_workflow { ] input[2] = GUNZIP.out.gunzip input[3] = SAMTOOLS_FAIDX.out.fai - input[4] = UNTAR.out.untar.map { meta, cache -> cache } + input[4] = UNTAR.out.untar.map { meta, cache -> cache} input[5] = Channel.value('110') - input[6] = [ + input[6] = Channel.of([ file(params.pipelines_testdata_base_path + 'reference/vep_plugin_files.csv', checkIfExists: true) - ] + ]).splitCsv(header:true).map { row -> row.vep_files }.collect() input[7] = false input[8] = Channel.value([]) input[9] = null @@ -132,9 +132,9 @@ nextflow_workflow { input[3] = SAMTOOLS_FAIDX.out.fai input[4] = UNTAR.out.untar.map { meta, cache -> cache } input[5] = Channel.value('110') - input[6] = [ + input[6] = Channel.of([ file(params.pipelines_testdata_base_path + 'reference/vep_plugin_files.csv', checkIfExists: true) - ] + ]).splitCsv(header:true).map { row -> row.vep_files }.collect() input[7] = false input[8] = Channel.value([]) input[9] = null diff --git a/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf b/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf index 3061e3c0..5fca3199 100644 --- a/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf @@ -640,3 +640,15 @@ def findKeyForValue(def valueToFind, Map map) { } return null // Value not found } + +// Utility function to create channels from references +def createReferenceChannelFromPath(param, defaultValue = '') { + return param ? Channel.fromPath(param, checkIfExists: true) + .map { [ [ id: it.simpleName ], it ] } + .collect() : defaultValue +} +// Utility function to create channels from samplesheets +def createReferenceChannelFromSamplesheet(param, schema, defaultValue = '') { + return param ? Channel.fromList(samplesheetToList(param, schema)) : defaultValue +} + diff --git a/tests/.nftignore b/tests/.nftignore index 3499afeb..4a4111aa 100644 --- a/tests/.nftignore +++ b/tests/.nftignore @@ -10,7 +10,7 @@ paraphase/**/*.{vcf.gz,tbi,bam,bai,json} phased_variants/**/*.{vcf.gz,tbi} pipeline_info/*.{html,json,txt,yml} qc/cramino/**/*.txt -qc/fastqc/**/*.zip +qc/fastqc/**/*.{zip,html} qc/somalier/**/*.{html,tsv} repeat_annotation/**/*.{vcf.gz,tbi} repeat_calling/**/*.{vcf.gz,tbi,bam,bai} diff --git a/tests/samplesheet.nf.test.snap b/tests/samplesheet.nf.test.snap index d99fa7c1..64dae7b6 100644 --- a/tests/samplesheet.nf.test.snap +++ b/tests/samplesheet.nf.test.snap @@ -439,7 +439,6 @@ "test.ped:md5,bd5cec27ba7337a85cf98e787131e2b5", "HG002_Revio_cramino_aligned_phased.arrow:md5,a76219e9046db32c4b3d6d78425c5d78", "HG002_Revio_cramino_aligned.arrow:md5,a76219e9046db32c4b3d6d78425c5d78", - "HG002_Revio_fastqc.html:md5,1080b519dbbb66f45eee74e311d4922c", "HG002_Revio.mosdepth.global.dist.txt:md5,63701e857361046628f89cb84988ea1d", "HG002_Revio.mosdepth.region.dist.txt:md5,6b46396518979ff9d9771cb8a8fbbab0", "HG002_Revio.mosdepth.summary.txt:md5,311aad293c6d8a646b6dd4edc337845c", @@ -551,6 +550,6 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-10-25T13:00:09.69999597" + "timestamp": "2024-10-29T08:07:45.120070133" } } \ No newline at end of file diff --git a/tests/samplesheet_multisample_bam.nf.test.snap b/tests/samplesheet_multisample_bam.nf.test.snap index 21e2178a..94ef5a85 100644 --- a/tests/samplesheet_multisample_bam.nf.test.snap +++ b/tests/samplesheet_multisample_bam.nf.test.snap @@ -563,8 +563,6 @@ "HG002_Revio_B_cramino_aligned_phased.arrow:md5,3bb08ac5958c6cb0801f319066c3a1b2", "HG002_Revio_A_cramino_aligned.arrow:md5,a76219e9046db32c4b3d6d78425c5d78", "HG002_Revio_B_cramino_aligned.arrow:md5,3bb08ac5958c6cb0801f319066c3a1b2", - "HG002_Revio_A_fastqc.html:md5,25f875c3a542ff8590655685bc152658", - "HG002_Revio_B_fastqc.html:md5,4b7d698cbe79dbfb4a74e8e7f84891d5", "HG002_Revio_A.mosdepth.global.dist.txt:md5,63701e857361046628f89cb84988ea1d", "HG002_Revio_A.mosdepth.region.dist.txt:md5,6b46396518979ff9d9771cb8a8fbbab0", "HG002_Revio_A.mosdepth.summary.txt:md5,311aad293c6d8a646b6dd4edc337845c", @@ -759,6 +757,6 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-10-25T13:01:47.642764093" + "timestamp": "2024-10-29T08:09:35.63908858" } } \ No newline at end of file diff --git a/tests/samplesheet_multisample_ont_bam.nf.test.snap b/tests/samplesheet_multisample_ont_bam.nf.test.snap index 5a25e57d..d8307404 100644 --- a/tests/samplesheet_multisample_ont_bam.nf.test.snap +++ b/tests/samplesheet_multisample_ont_bam.nf.test.snap @@ -402,8 +402,6 @@ "HG002_ONT_B_cramino_aligned_phased.arrow:md5,61af72539e105cec79db7c9b78eb15a7", "HG002_ONT_A_cramino_aligned.arrow:md5,d2a5c81595fa34925ab8f03078487d81", "HG002_ONT_B_cramino_aligned.arrow:md5,61af72539e105cec79db7c9b78eb15a7", - "HG002_ONT_A_fastqc.html:md5,94d86b38a30f846de64b840656663d18", - "HG002_ONT_B_fastqc.html:md5,2ec692ee5acf69717811be481d38f775", "HG002_ONT_A.mosdepth.global.dist.txt:md5,5ae0972357f99aa481a0bf12fb9e0b0b", "HG002_ONT_A.mosdepth.region.dist.txt:md5,023b1c6aeaf8fa5ededd6b711a5cd012", "HG002_ONT_A.mosdepth.summary.txt:md5,c3b664b0983213f73edf3c0d5a0b04a2", @@ -502,6 +500,6 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-10-25T13:10:46.10939576" + "timestamp": "2024-10-29T08:11:05.903725502" } } \ No newline at end of file diff --git a/workflows/nallo.nf b/workflows/nallo.nf index 86a79a37..0a15ec4c 100644 --- a/workflows/nallo.nf +++ b/workflows/nallo.nf @@ -1,5 +1,8 @@ include { samplesheetToList } from 'plugin/nf-schema' - +include { + createReferenceChannelFromPath + createReferenceChannelFromSamplesheet +} from '../subworkflows/local/utils_nfcore_nallo_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT LOCAL SUBWORKFLOWS @@ -68,46 +71,30 @@ workflow NALLO { ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() - // Optional input files that has to be set depending on which workflow is run - ch_cadd_header = Channel.fromPath("$projectDir/assets/cadd_to_vcf_header_-1.0-.txt", checkIfExists: true).collect() - ch_cadd_resources = params.cadd_resources ? Channel.fromPath(params.cadd_resources).collect() - : '' - ch_cadd_prescored = params.cadd_prescored ? Channel.fromPath(params.cadd_prescored).collect() - : '' - ch_fasta = params.fasta ? Channel.fromPath(params.fasta).map { it -> [ it.simpleName, it ] }.collect() - : '' - ch_tandem_repeats = params.tandem_repeats ? Channel.fromPath(params.tandem_repeats).map{ [ it.simpleName, it ] }.collect() - : Channel.value([[],[]]) - ch_input_bed = params.bed ? Channel.fromPath(params.bed).map{ [ [ id:it.simpleName ] , it ] }.collect() - : Channel.value([[],[]]) - ch_par = params.par_regions ? Channel.fromPath(params.par_regions).map { [ [ id: it.simpleName ], it ] }.collect() - : '' - ch_trgt_bed = params.trgt_repeats ? Channel.fromPath(params.trgt_repeats).map { it -> [ it.simpleName, it ] }.collect() - : '' - ch_variant_catalog = params.variant_catalog ? Channel.fromPath(params.variant_catalog).map { it -> [ it.simpleName, it ] }.collect() - : '' - ch_databases = params.snp_db ? Channel.fromList(samplesheetToList(params.snp_db, 'assets/schema_snpdb.json')).map{ it[1] }.collect() - : '' - ch_variant_consequences_snv = params.variant_consequences_snv ? Channel.fromPath(params.variant_consequences_snv).map { it -> [ it.simpleName, it ] }.collect() - : Channel.value([]) - ch_vep_cache_unprocessed = params.vep_cache ? Channel.fromPath(params.vep_cache).map { it -> [ [ id:'vep_cache' ], it ] }.collect() - : Channel.value([[],[]]) - ch_vep_extra_files_unsplit = params.vep_plugin_files ? Channel.fromPath(params.vep_plugin_files).collect() - : '' - ch_expected_xy_bed = params.hificnv_xy ? Channel.fromPath(params.hificnv_xy).collect() - : '' - ch_expected_xx_bed = params.hificnv_xx ? Channel.fromPath(params.hificnv_xx).collect() - : '' - ch_exclude_bed = params.hificnv_exclude ? Channel.fromPath(params.hificnv_exclude).collect() - : '' - ch_reduced_penetrance = params.reduced_penetrance ? Channel.fromPath(params.reduced_penetrance).map { it -> [ it.simpleName, it ] }.collect() - : Channel.value([]) - ch_score_config_snv = params.score_config_snv ? Channel.fromPath(params.score_config_snv).map { it -> [ it.simpleName, it ] }.collect() - : Channel.value([]) - ch_somalier_sites = params.somalier_sites ? Channel.fromPath(params.somalier_sites).map { [ it.simpleName, it ] }.collect() - : '' - ch_svdb_dbs = params.svdb_dbs ? Channel.fromPath(params.svdb_dbs).map { [ it.simpleName, it ] }.collect() - : '' + // Channels from (optional) input files + // If provided: [[id: 'reference'], [/path/to/reference_full_name.file]] + ch_cadd_header = createReferenceChannelFromPath("$projectDir/assets/cadd_to_vcf_header_-1.0-.txt") + ch_cadd_resources = createReferenceChannelFromPath(params.cadd_resouces) + ch_cadd_prescored = createReferenceChannelFromPath(params.cadd_prescored) + ch_fasta = createReferenceChannelFromPath(params.fasta) + ch_tandem_repeats = createReferenceChannelFromPath(params.tandem_repeats, Channel.value([[],[]])) + ch_input_bed = createReferenceChannelFromPath(params.bed, Channel.value([[],[]])) + ch_par = createReferenceChannelFromPath(params.par_regions) + ch_trgt_bed = createReferenceChannelFromPath(params.trgt_repeats) + ch_variant_catalog = createReferenceChannelFromPath(params.variant_catalog) + ch_variant_consequences_snv = createReferenceChannelFromPath(params.variant_consequences_snv) + ch_vep_cache_unprocessed = createReferenceChannelFromPath(params.vep_cache, Channel.value([])) + ch_expected_xy_bed = createReferenceChannelFromPath(params.hificnv_xy) + ch_expected_xx_bed = createReferenceChannelFromPath(params.hificnv_xx) + ch_exclude_bed = createReferenceChannelFromPath(params.hificnv_exclude) + ch_reduced_penetrance = createReferenceChannelFromPath(params.reduced_penetrance) + ch_score_config_snv = createReferenceChannelFromPath(params.score_config_snv) + ch_somalier_sites = createReferenceChannelFromPath(params.somalier_sites) + ch_svdb_dbs = createReferenceChannelFromPath(params.svdb_dbs) + + // Channels from (optional) input samplesheets validated by schema + ch_databases = createReferenceChannelFromSamplesheet(params.snp_db, 'assets/schema_snp_db.json') + ch_vep_plugin_files = createReferenceChannelFromSamplesheet(params.vep_plugin_files, 'assets/schema_vep_plugin_files.json', Channel.value([])) // Check parameter that doesn't conform to schema validation here if (params.phaser.matches('hiphase') && params.preset == 'ONT_R10') { error "The HiPhase license only permits analysis of data from PacBio. For details see: https://github.com/PacificBiosciences/HiPhase/blob/main/LICENSE.md" } @@ -124,24 +111,21 @@ workflow NALLO { // // Prepare references // - if(!params.skip_mapping_wf | !params.skip_assembly_wf ) { + if(!params.skip_mapping_wf || !params.skip_assembly_wf ) { PREPARE_GENOME ( ch_fasta, params.fasta.endsWith('.gz'), ch_vep_cache_unprocessed, params.vep_plugin_files, - ch_vep_extra_files_unsplit ) ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions) - if(!params.skip_snv_annotation) { - if (params.vep_cache) { - if (params.vep_cache.endsWith("tar.gz")) { - ch_vep_cache = PREPARE_GENOME.out.vep_resources - } else { - ch_vep_cache = Channel.fromPath(params.vep_cache).collect() - } + if(!params.skip_snv_annotation && params.vep_cache) { + if (params.vep_cache.endsWith("tar.gz")) { + ch_vep_cache = PREPARE_GENOME.out.vep_resources + } else { + ch_vep_cache = Channel.fromPath(params.vep_cache).collect() } } @@ -314,9 +298,9 @@ workflow NALLO { CALL_SVS.out.ch_multisample_vcf, fasta, ch_svdb_dbs, - ch_vep_cache, + ch_vep_cache.map { meta, cache -> cache }, params.vep_cache_version, - PREPARE_GENOME.out.vep_extra_files + ch_vep_plugin_files.collect() ) ANNOTATE_SVS.out.vcf @@ -377,12 +361,12 @@ workflow NALLO { // SNV_ANNOTATION( SHORT_VARIANT_CALLING.out.combined_bcf, - ch_databases, + ch_databases.map { meta, databases -> databases }.collect(), fasta, fai.map { name, fai -> [ [ id: name ], fai ] }, - ch_vep_cache, + ch_vep_cache.map { meta, cache -> cache }, params.vep_cache_version, - PREPARE_GENOME.out.vep_extra_files, + ch_vep_plugin_files.collect(), (params.cadd_resources && params.cadd_prescored), ch_cadd_header, ch_cadd_resources, @@ -466,7 +450,13 @@ workflow NALLO { .join(SHORT_VARIANT_CALLING.out.snp_calls_vcf) .set { cnv_workflow_in } - CNV(cnv_workflow_in, fasta, ch_expected_xy_bed, ch_expected_xx_bed, ch_exclude_bed) + CNV( + cnv_workflow_in, + fasta, + ch_expected_xy_bed.map { meta, bed -> bed }, + ch_expected_xx_bed.map { meta, bed -> bed }, + ch_exclude_bed.map { meta, bed -> bed } + ) ch_versions = ch_versions.mix(CNV.out.versions) }