Skip to content

Commit

Permalink
Refactor reference channels
Browse files Browse the repository at this point in the history
  • Loading branch information
fellen31 committed Oct 29, 2024
1 parent 79b836b commit 036da2c
Show file tree
Hide file tree
Showing 18 changed files with 117 additions and 116 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#435](https://github.com/genomic-medicine-sweden/nallo/pull/435) - Updated and refactored processes and workflows related to variant ranking
- [#438](https://github.com/genomic-medicine-sweden/nallo/pull/438) - Updated pipeline tests to use functions in nft-utils instead of checking hardcoded paths
- [#440](https://github.com/genomic-medicine-sweden/nallo/pull/440) - Updated hifiasm to 0.20 with new default parameters for telomeres and scaffolding ([#295](https://github.com/genomic-medicine-sweden/nallo/issues/295))
- [#443](https://github.com/genomic-medicine-sweden/nallo/pull/443) - Refactored reference channel assignments
- [#443](https://github.com/genomic-medicine-sweden/nallo/pull/443) - Updated schemas for `vep_plugin_files` and `snp_db`

### `Removed`

Expand Down
11 changes: 5 additions & 6 deletions assets/schema_snpdb.json → assets/schema_snp_db.json
Original file line number Diff line number Diff line change
@@ -1,23 +1,22 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://raw.githubusercontent.com/genomic-medicine-sweden/nallo/master/assets/schema_gvcfs.json",
"title": "genomic-medicine-sweden/nallo pipeline - params.extra_gvcfs schema",
"description": "Schema for the file provided with params.extra_gvcfs",
"$id": "https://raw.githubusercontent.com/genomic-medicine-sweden/nallo/master/assets/schema_snp_db.json",
"title": "genomic-medicine-sweden/nallo pipeline - params.snp_db schema",
"description": "Schema for the file provided with params.snp_db",
"type": "array",
"items": {
"type": "object",
"properties": {
"sample": {
"type": "string",
"pattern": "^\\S+$",
"errorMessage": "Sample name must be provided and cannot contain spaces",
"meta": ["id"]
"errorMessage": "Sample must be provided and cannot contain spaces."
},
"file": {
"format": "file-path",
"type": "string",
"pattern": "^\\S+\\.zip$",
"errorMessage": "gVCF file must be provided, cannot contain spaces and must have extension 'g.vcf.gz' or 'gvcf.gz'"
"errorMessage": "Echtvar database must be provided, cannot contain spaces and must have extension '.zip'"
}
},
"required": ["sample", "file"]
Expand Down
20 changes: 20 additions & 0 deletions assets/schema_vep_plugin_files.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://raw.githubusercontent.com/genomic-medicine-sweden/nallo/master/assets/schema_vep_plugin_files.json",
"title": "genomic-medicine-sweden/nallo pipeline - params.vep_plugin_files schema",
"description": "Schema for the file provided with params.vep_plugin_files",
"type": "array",
"items": {
"type": "object",
"properties": {
"vep_files": {
"format": "file-path",
"type": "string",
"pattern": "^\\S+",
"exists": true,
"errorMessage": "Vep plugin file must be a path and exist."
}
},
"required": ["vep_files"]
}
}
3 changes: 2 additions & 1 deletion modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@
"cadd": {
"branch": "master",
"git_sha": "cf3ed075695639b0a0924eb0901146df1996dc08",
"installed_by": ["modules"]
"installed_by": ["modules"],
"patch": "modules/nf-core/cadd/cadd.diff"
},
"cat/fastq": {
"branch": "master",
Expand Down
8 changes: 6 additions & 2 deletions modules/nf-core/cadd/cadd.diff

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions modules/nf-core/cadd/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@
"pattern": "^\\S+\\.csv$",
"format": "file-path",
"mimetype": "text/csv",
"schema": "/assets/schema_snpdb.json",
"schema": "/assets/schema_snp_db.json",
"description": "A csv file with echtvar databases to annotate SNVs with",
"exists": true
},
Expand Down
8 changes: 4 additions & 4 deletions subworkflows/local/annotate_cadd/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ workflow ANNOTATE_CADD {
ch_fai // channel: [mandatory] [ val(meta), path(fai) ]
ch_vcf // channel: [mandatory] [ val(meta), path(vcfs) ]
ch_index // channel: [mandatory] [ val(meta), path(tbis) ]
ch_header // channel: [mandatory] [ path(txt) ]
ch_cadd_resources // channel: [mandatory] [ path(dir) ]
ch_cadd_prescored // channel: [mandatory] [ path(dir) ]
ch_header // channel: [mandatory] [ val(meta), path(txt) ]
ch_cadd_resources // channel: [mandatory] [ val(meta), path(dir) ]
ch_cadd_prescored // channel: [mandatory] [ val(meta), path(dir) ]

main:
ch_versions = Channel.empty()
Expand Down Expand Up @@ -64,7 +64,7 @@ workflow ANNOTATE_CADD {

ANNOTATE_INDELS (
ch_annotate_indels_in,
ch_header,
ch_header.map { meta, header -> header },
CADD_TO_REFERENCE_CHRNAMES.out.output.map { meta, txt -> txt }
)
ch_versions = ch_versions.mix(ANNOTATE_INDELS.out.versions)
Expand Down
23 changes: 1 addition & 22 deletions subworkflows/local/prepare_genome.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ workflow PREPARE_GENOME {
gunzip_fasta // bool: should we gunzip fasta
ch_vep_cache // channel: [optional] [ val(meta), path(cache) ]
split_vep_files // bool: are there vep extra files
ch_vep_extra_files_unsplit // channel: [optional] [ val(meta), path(csv) ]

main:
ch_versions = Channel.empty()
Expand Down Expand Up @@ -40,33 +39,13 @@ workflow PREPARE_GENOME {
ch_versions = ch_versions.mix(UNTAR_VEP_CACHE.out.versions)

UNTAR_VEP_CACHE.out.untar
.map { meta, files -> [ files ] }
.collect()
.set { untarred_vep }

// Read and store paths in the vep_plugin_files file
if ( split_vep_files ) {
ch_vep_extra_files_unsplit
.splitCsv ( header:true )
.map { row ->
path = file(row.vep_files[0])
if(path.exists()) {
return [path]
} else {
error("\nVep database file ${path} does not exist.")
}
}
.collect()
.set { ch_vep_extra_files }
} else {
ch_vep_extra_files = Channel.value([])
}

emit:
mmi = MINIMAP2_INDEX.out.index.collect() // channel: [ val(meta), path(mmi) ]
fai = SAMTOOLS_FAIDX.out.fai.collect() // channel: [ val(meta), path(fai) ]
fasta = ch_fasta // channel: [ val(meta), path(fasta) ]
vep_resources = untarred_vep // channel: [ path(cache) ]
vep_extra_files = ch_vep_extra_files // channel: [ path(files) ]
vep_resources = untarred_vep // channel: [ val(meta), path(cache) ]
versions = ch_versions // channel: [ versions.yml ]
}
9 changes: 4 additions & 5 deletions subworkflows/local/rank_variants/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,6 @@ nextflow_workflow {
file(params.pipelines_testdata_base_path + 'reference/vep_cache_test_data.tar.gz', checkIfExists:true)
]
input[3] = true
input[4] = Channel.of([
file(params.pipelines_testdata_base_path + 'reference/vep_plugin_files.csv', checkIfExists: true)
])
"""
}
}
Expand Down Expand Up @@ -69,9 +66,11 @@ nextflow_workflow {
]
input[2] = PREPARE_GENOME.out.fasta
input[3] = PREPARE_GENOME.out.fai
input[4] = PREPARE_GENOME.out.vep_resources
input[4] = PREPARE_GENOME.out.vep_resources.map { meta, cache -> cache }
input[5] = Channel.value('110')
input[6] = PREPARE_GENOME.out.vep_extra_files
input[6] = Channel.of([
file(params.pipelines_testdata_base_path + 'reference/vep_plugin_files.csv', checkIfExists: true)
]).splitCsv(header:true).map { row -> row.vep_files }.collect()
input[7] = false
input[8] = Channel.value([])
input[9] = null
Expand Down
6 changes: 3 additions & 3 deletions subworkflows/local/snv_annotation/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,16 @@ workflow SNV_ANNOTATION {

take:
ch_vcf // channel [mandatory] [ val(meta), path(vcf) ]
ch_databases // channel: [mandatory] [ val(meta), path(db) ]
ch_databases // channel: [mandatory] [ path(db) ]
ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
ch_fai // channel: [mandatory] [ val(meta), path(fai) ]
ch_vep_cache // channel: [mandatory] [ path(cache) ]
val_vep_cache_version // string: [mandatory] default: 110
ch_vep_extra_files // channel: [mandatory] [ path(files) ]
val_annotate_cadd // bool: [mandatory]
ch_cadd_header // channel: [mandatory] [ path(txt) ]
ch_cadd_resources // channel: [mandatory] [ path(annotation) ]
ch_cadd_prescored // channel: [mandatory] [ path(prescored) ]
ch_cadd_resources // channel: [mandatory] [ val(meta), path(annotation) ]
ch_cadd_prescored // channel: [mandatory] [ val(meta), path(prescored) ]

main:
ch_versions = Channel.empty()
Expand Down
10 changes: 5 additions & 5 deletions subworkflows/local/snv_annotation/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,11 @@ nextflow_workflow {
]
input[2] = GUNZIP.out.gunzip
input[3] = SAMTOOLS_FAIDX.out.fai
input[4] = UNTAR.out.untar.map { meta, cache -> cache }
input[4] = UNTAR.out.untar.map { meta, cache -> cache}
input[5] = Channel.value('110')
input[6] = [
input[6] = Channel.of([
file(params.pipelines_testdata_base_path + 'reference/vep_plugin_files.csv', checkIfExists: true)
]
]).splitCsv(header:true).map { row -> row.vep_files }.collect()
input[7] = false
input[8] = Channel.value([])
input[9] = null
Expand Down Expand Up @@ -132,9 +132,9 @@ nextflow_workflow {
input[3] = SAMTOOLS_FAIDX.out.fai
input[4] = UNTAR.out.untar.map { meta, cache -> cache }
input[5] = Channel.value('110')
input[6] = [
input[6] = Channel.of([
file(params.pipelines_testdata_base_path + 'reference/vep_plugin_files.csv', checkIfExists: true)
]
]).splitCsv(header:true).map { row -> row.vep_files }.collect()
input[7] = false
input[8] = Channel.value([])
input[9] = null
Expand Down
12 changes: 12 additions & 0 deletions subworkflows/local/utils_nfcore_nallo_pipeline/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -640,3 +640,15 @@ def findKeyForValue(def valueToFind, Map map) {
}
return null // Value not found
}

// Utility function to create channels from references
def createReferenceChannelFromPath(param, defaultValue = '') {
return param ? Channel.fromPath(param, checkIfExists: true)
.map { [ [ id: it.simpleName ], it ] }
.collect() : defaultValue
}
// Utility function to create channels from samplesheets
def createReferenceChannelFromSamplesheet(param, schema, defaultValue = '') {
return param ? Channel.fromList(samplesheetToList(param, schema)) : defaultValue
}

2 changes: 1 addition & 1 deletion tests/.nftignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ paraphase/**/*.{vcf.gz,tbi,bam,bai,json}
phased_variants/**/*.{vcf.gz,tbi}
pipeline_info/*.{html,json,txt,yml}
qc/cramino/**/*.txt
qc/fastqc/**/*.zip
qc/fastqc/**/*.{zip,html}
qc/somalier/**/*.{html,tsv}
repeat_annotation/**/*.{vcf.gz,tbi}
repeat_calling/**/*.{vcf.gz,tbi,bam,bai}
Expand Down
3 changes: 1 addition & 2 deletions tests/samplesheet.nf.test.snap
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,6 @@
"test.ped:md5,bd5cec27ba7337a85cf98e787131e2b5",
"HG002_Revio_cramino_aligned_phased.arrow:md5,a76219e9046db32c4b3d6d78425c5d78",
"HG002_Revio_cramino_aligned.arrow:md5,a76219e9046db32c4b3d6d78425c5d78",
"HG002_Revio_fastqc.html:md5,1080b519dbbb66f45eee74e311d4922c",
"HG002_Revio.mosdepth.global.dist.txt:md5,63701e857361046628f89cb84988ea1d",
"HG002_Revio.mosdepth.region.dist.txt:md5,6b46396518979ff9d9771cb8a8fbbab0",
"HG002_Revio.mosdepth.summary.txt:md5,311aad293c6d8a646b6dd4edc337845c",
Expand Down Expand Up @@ -551,6 +550,6 @@
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-25T13:00:09.69999597"
"timestamp": "2024-10-29T08:07:45.120070133"
}
}
4 changes: 1 addition & 3 deletions tests/samplesheet_multisample_bam.nf.test.snap
Original file line number Diff line number Diff line change
Expand Up @@ -563,8 +563,6 @@
"HG002_Revio_B_cramino_aligned_phased.arrow:md5,3bb08ac5958c6cb0801f319066c3a1b2",
"HG002_Revio_A_cramino_aligned.arrow:md5,a76219e9046db32c4b3d6d78425c5d78",
"HG002_Revio_B_cramino_aligned.arrow:md5,3bb08ac5958c6cb0801f319066c3a1b2",
"HG002_Revio_A_fastqc.html:md5,25f875c3a542ff8590655685bc152658",
"HG002_Revio_B_fastqc.html:md5,4b7d698cbe79dbfb4a74e8e7f84891d5",
"HG002_Revio_A.mosdepth.global.dist.txt:md5,63701e857361046628f89cb84988ea1d",
"HG002_Revio_A.mosdepth.region.dist.txt:md5,6b46396518979ff9d9771cb8a8fbbab0",
"HG002_Revio_A.mosdepth.summary.txt:md5,311aad293c6d8a646b6dd4edc337845c",
Expand Down Expand Up @@ -759,6 +757,6 @@
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-25T13:01:47.642764093"
"timestamp": "2024-10-29T08:09:35.63908858"
}
}
4 changes: 1 addition & 3 deletions tests/samplesheet_multisample_ont_bam.nf.test.snap
Original file line number Diff line number Diff line change
Expand Up @@ -402,8 +402,6 @@
"HG002_ONT_B_cramino_aligned_phased.arrow:md5,61af72539e105cec79db7c9b78eb15a7",
"HG002_ONT_A_cramino_aligned.arrow:md5,d2a5c81595fa34925ab8f03078487d81",
"HG002_ONT_B_cramino_aligned.arrow:md5,61af72539e105cec79db7c9b78eb15a7",
"HG002_ONT_A_fastqc.html:md5,94d86b38a30f846de64b840656663d18",
"HG002_ONT_B_fastqc.html:md5,2ec692ee5acf69717811be481d38f775",
"HG002_ONT_A.mosdepth.global.dist.txt:md5,5ae0972357f99aa481a0bf12fb9e0b0b",
"HG002_ONT_A.mosdepth.region.dist.txt:md5,023b1c6aeaf8fa5ededd6b711a5cd012",
"HG002_ONT_A.mosdepth.summary.txt:md5,c3b664b0983213f73edf3c0d5a0b04a2",
Expand Down Expand Up @@ -502,6 +500,6 @@
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-25T13:10:46.10939576"
"timestamp": "2024-10-29T08:11:05.903725502"
}
}
Loading

0 comments on commit 036da2c

Please sign in to comment.